blob: 88d4f9774fa1b8c0d23f66c611d7888a23a4b6b7 [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
Skip Montanarob4a04172003-03-20 23:29:12 +00009*/
10
Skip Montanaro7b01a832003-04-12 19:23:46 +000011#define MODULE_VERSION "1.0"
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013#include "Python.h"
14#include "structmember.h"
15
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000016#define IS_BASESTRING(o) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000017 PyUnicode_Check(o)
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000018
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000019static PyObject *error_obj; /* CSV exception */
Skip Montanarob4a04172003-03-20 23:29:12 +000020static PyObject *dialects; /* Dialect registry */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000021static long field_limit = 128 * 1024; /* max parsed field size */
Skip Montanarob4a04172003-03-20 23:29:12 +000022
23typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000024 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
25 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
26 EAT_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000027} ParserState;
28
29typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000030 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000031} QuoteStyle;
32
33typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000034 QuoteStyle style;
35 char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000036} StyleDesc;
37
38static StyleDesc quote_styles[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000039 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
40 { QUOTE_ALL, "QUOTE_ALL" },
41 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
42 { QUOTE_NONE, "QUOTE_NONE" },
43 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000044};
45
46typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000047 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000048
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000049 int doublequote; /* is " represented by ""? */
50 Py_UNICODE delimiter; /* field separator */
51 Py_UNICODE quotechar; /* quote character */
52 Py_UNICODE escapechar; /* escape character */
53 int skipinitialspace; /* ignore spaces following delimiter? */
54 PyObject *lineterminator; /* string to write between records */
55 int quoting; /* style of quoting to write */
Skip Montanarob4a04172003-03-20 23:29:12 +000056
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000057 int strict; /* raise exception on bad CSV */
Skip Montanarob4a04172003-03-20 23:29:12 +000058} DialectObj;
59
Neal Norwitz227b5332006-03-22 09:28:35 +000060static PyTypeObject Dialect_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000061
62typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000063 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +000064
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +000066
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +000068
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000069 PyObject *fields; /* field list for current record */
70 ParserState state; /* current CSV parse state */
71 Py_UNICODE *field; /* build current field in here */
Antoine Pitrou40455752010-08-15 18:51:10 +000072 Py_ssize_t field_size; /* size of allocated buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000073 Py_ssize_t field_len; /* length of current field */
74 int numeric_field; /* treat field as numeric */
75 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +000076} ReaderObj;
77
Neal Norwitz227b5332006-03-22 09:28:35 +000078static PyTypeObject Reader_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000079
Christian Heimes90aa7642007-12-19 02:45:37 +000080#define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +000081
82typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000083 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +000084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 PyObject *writeline; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +000086
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +000088
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089 Py_UNICODE *rec; /* buffer for parser.join */
Antoine Pitrou40455752010-08-15 18:51:10 +000090 Py_ssize_t rec_size; /* size of allocated record */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 Py_ssize_t rec_len; /* length of record */
92 int num_fields; /* number of fields in record */
Guido van Rossum46264582007-08-06 19:32:18 +000093} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +000094
Neal Norwitz227b5332006-03-22 09:28:35 +000095static PyTypeObject Writer_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000096
97/*
98 * DIALECT class
99 */
100
101static PyObject *
102get_dialect_from_registry(PyObject * name_obj)
103{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000104 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000105
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000106 dialect_obj = PyDict_GetItem(dialects, name_obj);
107 if (dialect_obj == NULL) {
108 if (!PyErr_Occurred())
109 PyErr_Format(error_obj, "unknown dialect");
110 }
111 else
112 Py_INCREF(dialect_obj);
113 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000114}
115
Skip Montanarob4a04172003-03-20 23:29:12 +0000116static PyObject *
117get_string(PyObject *str)
118{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000119 Py_XINCREF(str);
120 return str;
Skip Montanarob4a04172003-03-20 23:29:12 +0000121}
122
Skip Montanarob4a04172003-03-20 23:29:12 +0000123static PyObject *
Skip Montanaroe3b10f42007-08-06 20:55:47 +0000124get_nullchar_as_None(Py_UNICODE c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000125{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000126 if (c == '\0') {
127 Py_INCREF(Py_None);
128 return Py_None;
129 }
130 else
131 return PyUnicode_FromUnicode((Py_UNICODE *)&c, 1);
Skip Montanarob4a04172003-03-20 23:29:12 +0000132}
133
Skip Montanarob4a04172003-03-20 23:29:12 +0000134static PyObject *
135Dialect_get_lineterminator(DialectObj *self)
136{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000137 return get_string(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000138}
139
Skip Montanarob4a04172003-03-20 23:29:12 +0000140static PyObject *
Guido van Rossuma9769c22007-08-07 23:59:30 +0000141Dialect_get_delimiter(DialectObj *self)
142{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 return get_nullchar_as_None(self->delimiter);
Guido van Rossuma9769c22007-08-07 23:59:30 +0000144}
145
146static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000147Dialect_get_escapechar(DialectObj *self)
148{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000150}
151
Andrew McNamara1196cf12005-01-07 04:42:45 +0000152static PyObject *
153Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000154{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000155 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000156}
157
158static PyObject *
159Dialect_get_quoting(DialectObj *self)
160{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000161 return PyLong_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000162}
163
164static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000165_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000166{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000167 if (src == NULL)
168 *target = dflt;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200169 else {
170 int b = PyObject_IsTrue(src);
171 if (b < 0)
172 return -1;
173 *target = b;
174 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000175 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000176}
177
Andrew McNamara1196cf12005-01-07 04:42:45 +0000178static int
179_set_int(const char *name, int *target, PyObject *src, int dflt)
180{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000181 if (src == NULL)
182 *target = dflt;
183 else {
184 long value;
185 if (!PyLong_CheckExact(src)) {
186 PyErr_Format(PyExc_TypeError,
187 "\"%s\" must be an integer", name);
188 return -1;
189 }
190 value = PyLong_AsLong(src);
191 if (value == -1 && PyErr_Occurred())
192 return -1;
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +0000193#if SIZEOF_LONG > SIZEOF_INT
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000194 if (value > INT_MAX || value < INT_MIN) {
195 PyErr_Format(PyExc_ValueError,
196 "integer out of range for \"%s\"", name);
197 return -1;
198 }
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +0000199#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000200 *target = (int)value;
201 }
202 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000203}
204
205static int
Guido van Rossum46264582007-08-06 19:32:18 +0000206_set_char(const char *name, Py_UNICODE *target, PyObject *src, Py_UNICODE dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000207{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000208 if (src == NULL)
209 *target = dflt;
210 else {
211 *target = '\0';
212 if (src != Py_None) {
213 Py_UNICODE *buf;
214 Py_ssize_t len;
215 buf = PyUnicode_AsUnicode(src);
216 len = PyUnicode_GetSize(src);
217 if (buf == NULL || len > 1) {
218 PyErr_Format(PyExc_TypeError,
219 "\"%s\" must be an 1-character string",
220 name);
221 return -1;
222 }
223 if (len > 0)
224 *target = buf[0];
225 }
226 }
227 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000228}
229
230static int
231_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
232{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000233 if (src == NULL)
234 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
235 else {
236 if (src == Py_None)
237 *target = NULL;
238 else if (!IS_BASESTRING(src)) {
239 PyErr_Format(PyExc_TypeError,
240 "\"%s\" must be a string", name);
241 return -1;
242 }
243 else {
244 Py_XDECREF(*target);
245 Py_INCREF(src);
246 *target = src;
247 }
248 }
249 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000250}
251
252static int
253dialect_check_quoting(int quoting)
254{
Victor Stinner4fe519b2010-11-09 09:40:16 +0000255 StyleDesc *qs;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000256
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000257 for (qs = quote_styles; qs->name; qs++) {
258 if (qs->style == quoting)
259 return 0;
260 }
261 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
262 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000263}
Skip Montanarob4a04172003-03-20 23:29:12 +0000264
265#define D_OFF(x) offsetof(DialectObj, x)
266
267static struct PyMemberDef Dialect_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000268 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
269 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
270 { "strict", T_INT, D_OFF(strict), READONLY },
271 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000272};
273
274static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000275 { "delimiter", (getter)Dialect_get_delimiter},
276 { "escapechar", (getter)Dialect_get_escapechar},
277 { "lineterminator", (getter)Dialect_get_lineterminator},
278 { "quotechar", (getter)Dialect_get_quotechar},
279 { "quoting", (getter)Dialect_get_quoting},
280 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000281};
282
283static void
284Dialect_dealloc(DialectObj *self)
285{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000286 Py_XDECREF(self->lineterminator);
287 Py_TYPE(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000288}
289
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000290static char *dialect_kws[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000291 "dialect",
292 "delimiter",
293 "doublequote",
294 "escapechar",
295 "lineterminator",
296 "quotechar",
297 "quoting",
298 "skipinitialspace",
299 "strict",
300 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000301};
302
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000303static PyObject *
304dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000305{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000306 DialectObj *self;
307 PyObject *ret = NULL;
308 PyObject *dialect = NULL;
309 PyObject *delimiter = NULL;
310 PyObject *doublequote = NULL;
311 PyObject *escapechar = NULL;
312 PyObject *lineterminator = NULL;
313 PyObject *quotechar = NULL;
314 PyObject *quoting = NULL;
315 PyObject *skipinitialspace = NULL;
316 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000317
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000318 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
319 "|OOOOOOOOO", dialect_kws,
320 &dialect,
321 &delimiter,
322 &doublequote,
323 &escapechar,
324 &lineterminator,
325 &quotechar,
326 &quoting,
327 &skipinitialspace,
328 &strict))
329 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000330
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000331 if (dialect != NULL) {
332 if (IS_BASESTRING(dialect)) {
333 dialect = get_dialect_from_registry(dialect);
334 if (dialect == NULL)
335 return NULL;
336 }
337 else
338 Py_INCREF(dialect);
339 /* Can we reuse this instance? */
340 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
341 delimiter == 0 &&
342 doublequote == 0 &&
343 escapechar == 0 &&
344 lineterminator == 0 &&
345 quotechar == 0 &&
346 quoting == 0 &&
347 skipinitialspace == 0 &&
348 strict == 0)
349 return dialect;
350 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000351
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000352 self = (DialectObj *)type->tp_alloc(type, 0);
353 if (self == NULL) {
354 Py_XDECREF(dialect);
355 return NULL;
356 }
357 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000358
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 Py_XINCREF(delimiter);
360 Py_XINCREF(doublequote);
361 Py_XINCREF(escapechar);
362 Py_XINCREF(lineterminator);
363 Py_XINCREF(quotechar);
364 Py_XINCREF(quoting);
365 Py_XINCREF(skipinitialspace);
366 Py_XINCREF(strict);
367 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000368#define DIALECT_GETATTR(v, n) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000369 if (v == NULL) \
370 v = PyObject_GetAttrString(dialect, n)
371 DIALECT_GETATTR(delimiter, "delimiter");
372 DIALECT_GETATTR(doublequote, "doublequote");
373 DIALECT_GETATTR(escapechar, "escapechar");
374 DIALECT_GETATTR(lineterminator, "lineterminator");
375 DIALECT_GETATTR(quotechar, "quotechar");
376 DIALECT_GETATTR(quoting, "quoting");
377 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
378 DIALECT_GETATTR(strict, "strict");
379 PyErr_Clear();
380 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000381
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000382 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000383#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000384 if (meth(name, target, src, dflt)) \
385 goto err
386 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
387 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
388 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
389 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
390 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
391 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
392 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
393 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000394
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000395 /* validate options */
396 if (dialect_check_quoting(self->quoting))
397 goto err;
398 if (self->delimiter == 0) {
399 PyErr_SetString(PyExc_TypeError, "delimiter must be set");
400 goto err;
401 }
402 if (quotechar == Py_None && quoting == NULL)
403 self->quoting = QUOTE_NONE;
404 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
405 PyErr_SetString(PyExc_TypeError,
406 "quotechar must be set if quoting enabled");
407 goto err;
408 }
409 if (self->lineterminator == 0) {
410 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
411 goto err;
412 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000413
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000414 ret = (PyObject *)self;
415 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000416err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000417 Py_XDECREF(self);
418 Py_XDECREF(dialect);
419 Py_XDECREF(delimiter);
420 Py_XDECREF(doublequote);
421 Py_XDECREF(escapechar);
422 Py_XDECREF(lineterminator);
423 Py_XDECREF(quotechar);
424 Py_XDECREF(quoting);
425 Py_XDECREF(skipinitialspace);
426 Py_XDECREF(strict);
427 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000428}
429
430
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000431PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000432"CSV dialect\n"
433"\n"
434"The Dialect type records CSV parsing and generation options.\n");
435
436static PyTypeObject Dialect_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000437 PyVarObject_HEAD_INIT(NULL, 0)
438 "_csv.Dialect", /* tp_name */
439 sizeof(DialectObj), /* tp_basicsize */
440 0, /* tp_itemsize */
441 /* methods */
442 (destructor)Dialect_dealloc, /* tp_dealloc */
443 (printfunc)0, /* tp_print */
444 (getattrfunc)0, /* tp_getattr */
445 (setattrfunc)0, /* tp_setattr */
446 0, /* tp_reserved */
447 (reprfunc)0, /* tp_repr */
448 0, /* tp_as_number */
449 0, /* tp_as_sequence */
450 0, /* tp_as_mapping */
451 (hashfunc)0, /* tp_hash */
452 (ternaryfunc)0, /* tp_call */
453 (reprfunc)0, /* tp_str */
454 0, /* tp_getattro */
455 0, /* tp_setattro */
456 0, /* tp_as_buffer */
457 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
458 Dialect_Type_doc, /* tp_doc */
459 0, /* tp_traverse */
460 0, /* tp_clear */
461 0, /* tp_richcompare */
462 0, /* tp_weaklistoffset */
463 0, /* tp_iter */
464 0, /* tp_iternext */
465 0, /* tp_methods */
466 Dialect_memberlist, /* tp_members */
467 Dialect_getsetlist, /* tp_getset */
468 0, /* tp_base */
469 0, /* tp_dict */
470 0, /* tp_descr_get */
471 0, /* tp_descr_set */
472 0, /* tp_dictoffset */
473 0, /* tp_init */
474 0, /* tp_alloc */
475 dialect_new, /* tp_new */
476 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000477};
478
Andrew McNamara91b97462005-01-11 01:07:23 +0000479/*
480 * Return an instance of the dialect type, given a Python instance or kwarg
481 * description of the dialect
482 */
483static PyObject *
484_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
485{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000486 PyObject *ctor_args;
487 PyObject *dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000488
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000489 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
490 if (ctor_args == NULL)
491 return NULL;
492 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
493 Py_DECREF(ctor_args);
494 return dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000495}
496
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000497/*
498 * READER
499 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000500static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000501parse_save_field(ReaderObj *self)
502{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000503 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000504
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000505 field = PyUnicode_FromUnicode(self->field, self->field_len);
506 if (field == NULL)
507 return -1;
508 self->field_len = 0;
509 if (self->numeric_field) {
510 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000511
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000512 self->numeric_field = 0;
513 tmp = PyNumber_Float(field);
514 if (tmp == NULL) {
515 Py_DECREF(field);
516 return -1;
517 }
518 Py_DECREF(field);
519 field = tmp;
520 }
521 PyList_Append(self->fields, field);
522 Py_DECREF(field);
523 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000524}
525
526static int
527parse_grow_buff(ReaderObj *self)
528{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000529 if (self->field_size == 0) {
530 self->field_size = 4096;
531 if (self->field != NULL)
532 PyMem_Free(self->field);
533 self->field = PyMem_New(Py_UNICODE, self->field_size);
534 }
535 else {
Antoine Pitrou40455752010-08-15 18:51:10 +0000536 if (self->field_size > PY_SSIZE_T_MAX / 2) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000537 PyErr_NoMemory();
538 return 0;
539 }
540 self->field_size *= 2;
541 self->field = PyMem_Resize(self->field, Py_UNICODE,
542 self->field_size);
543 }
544 if (self->field == NULL) {
545 PyErr_NoMemory();
546 return 0;
547 }
548 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000549}
550
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000551static int
Guido van Rossum46264582007-08-06 19:32:18 +0000552parse_add_char(ReaderObj *self, Py_UNICODE c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000553{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000554 if (self->field_len >= field_limit) {
555 PyErr_Format(error_obj, "field larger than field limit (%ld)",
556 field_limit);
557 return -1;
558 }
559 if (self->field_len == self->field_size && !parse_grow_buff(self))
560 return -1;
561 self->field[self->field_len++] = c;
562 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000563}
564
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000565static int
Guido van Rossum46264582007-08-06 19:32:18 +0000566parse_process_char(ReaderObj *self, Py_UNICODE c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000567{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000568 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000569
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000570 switch (self->state) {
571 case START_RECORD:
572 /* start of record */
573 if (c == '\0')
574 /* empty line - return [] */
575 break;
576 else if (c == '\n' || c == '\r') {
577 self->state = EAT_CRNL;
578 break;
579 }
580 /* normal character - handle as START_FIELD */
581 self->state = START_FIELD;
582 /* fallthru */
583 case START_FIELD:
584 /* expecting field */
585 if (c == '\n' || c == '\r' || c == '\0') {
586 /* save empty field - return [fields] */
587 if (parse_save_field(self) < 0)
588 return -1;
589 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
590 }
591 else if (c == dialect->quotechar &&
592 dialect->quoting != QUOTE_NONE) {
593 /* start quoted field */
594 self->state = IN_QUOTED_FIELD;
595 }
596 else if (c == dialect->escapechar) {
597 /* possible escaped character */
598 self->state = ESCAPED_CHAR;
599 }
600 else if (c == ' ' && dialect->skipinitialspace)
601 /* ignore space at start of field */
602 ;
603 else if (c == dialect->delimiter) {
604 /* save empty field */
605 if (parse_save_field(self) < 0)
606 return -1;
607 }
608 else {
609 /* begin new unquoted field */
610 if (dialect->quoting == QUOTE_NONNUMERIC)
611 self->numeric_field = 1;
612 if (parse_add_char(self, c) < 0)
613 return -1;
614 self->state = IN_FIELD;
615 }
616 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000617
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000618 case ESCAPED_CHAR:
619 if (c == '\0')
620 c = '\n';
621 if (parse_add_char(self, c) < 0)
622 return -1;
623 self->state = IN_FIELD;
624 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000625
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000626 case IN_FIELD:
627 /* in unquoted field */
628 if (c == '\n' || c == '\r' || c == '\0') {
629 /* end of line - return [fields] */
630 if (parse_save_field(self) < 0)
631 return -1;
632 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
633 }
634 else if (c == dialect->escapechar) {
635 /* possible escaped character */
636 self->state = ESCAPED_CHAR;
637 }
638 else if (c == dialect->delimiter) {
639 /* save field - wait for new field */
640 if (parse_save_field(self) < 0)
641 return -1;
642 self->state = START_FIELD;
643 }
644 else {
645 /* normal character - save in field */
646 if (parse_add_char(self, c) < 0)
647 return -1;
648 }
649 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000650
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000651 case IN_QUOTED_FIELD:
652 /* in quoted field */
653 if (c == '\0')
654 ;
655 else if (c == dialect->escapechar) {
656 /* Possible escape character */
657 self->state = ESCAPE_IN_QUOTED_FIELD;
658 }
659 else if (c == dialect->quotechar &&
660 dialect->quoting != QUOTE_NONE) {
661 if (dialect->doublequote) {
662 /* doublequote; " represented by "" */
663 self->state = QUOTE_IN_QUOTED_FIELD;
664 }
665 else {
666 /* end of quote part of field */
667 self->state = IN_FIELD;
668 }
669 }
670 else {
671 /* normal character - save in field */
672 if (parse_add_char(self, c) < 0)
673 return -1;
674 }
675 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000676
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000677 case ESCAPE_IN_QUOTED_FIELD:
678 if (c == '\0')
679 c = '\n';
680 if (parse_add_char(self, c) < 0)
681 return -1;
682 self->state = IN_QUOTED_FIELD;
683 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000684
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000685 case QUOTE_IN_QUOTED_FIELD:
686 /* doublequote - seen a quote in an quoted field */
687 if (dialect->quoting != QUOTE_NONE &&
688 c == dialect->quotechar) {
689 /* save "" as " */
690 if (parse_add_char(self, c) < 0)
691 return -1;
692 self->state = IN_QUOTED_FIELD;
693 }
694 else if (c == dialect->delimiter) {
695 /* save field - wait for new field */
696 if (parse_save_field(self) < 0)
697 return -1;
698 self->state = START_FIELD;
699 }
700 else if (c == '\n' || c == '\r' || c == '\0') {
701 /* end of line - return [fields] */
702 if (parse_save_field(self) < 0)
703 return -1;
704 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
705 }
706 else if (!dialect->strict) {
707 if (parse_add_char(self, c) < 0)
708 return -1;
709 self->state = IN_FIELD;
710 }
711 else {
712 /* illegal */
713 PyErr_Format(error_obj, "'%c' expected after '%c'",
714 dialect->delimiter,
715 dialect->quotechar);
716 return -1;
717 }
718 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000719
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000720 case EAT_CRNL:
721 if (c == '\n' || c == '\r')
722 ;
723 else if (c == '\0')
724 self->state = START_RECORD;
725 else {
726 PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
727 return -1;
728 }
729 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000730
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000731 }
732 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000733}
734
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000735static int
736parse_reset(ReaderObj *self)
737{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000738 Py_XDECREF(self->fields);
739 self->fields = PyList_New(0);
740 if (self->fields == NULL)
741 return -1;
742 self->field_len = 0;
743 self->state = START_RECORD;
744 self->numeric_field = 0;
745 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000746}
Skip Montanarob4a04172003-03-20 23:29:12 +0000747
748static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000749Reader_iternext(ReaderObj *self)
750{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000751 PyObject *lineobj;
752 PyObject *fields = NULL;
753 Py_UNICODE *line, c;
754 Py_ssize_t linelen;
Skip Montanarob4a04172003-03-20 23:29:12 +0000755
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000756 if (parse_reset(self) < 0)
757 return NULL;
758 do {
759 lineobj = PyIter_Next(self->input_iter);
760 if (lineobj == NULL) {
761 /* End of input OR exception */
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700762 if (!PyErr_Occurred() && (self->field_len != 0 ||
763 self->state == IN_QUOTED_FIELD)) {
764 if (self->dialect->strict)
765 PyErr_SetString(error_obj, "unexpected end of data");
766 else if (parse_save_field(self) >= 0)
767 break;
768 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000769 return NULL;
770 }
771 if (!PyUnicode_Check(lineobj)) {
772 PyErr_Format(error_obj,
773 "iterator should return strings, "
774 "not %.200s "
775 "(did you open the file in text mode?)",
776 lineobj->ob_type->tp_name
777 );
778 Py_DECREF(lineobj);
779 return NULL;
780 }
781 ++self->line_num;
782 line = PyUnicode_AsUnicode(lineobj);
783 linelen = PyUnicode_GetSize(lineobj);
784 if (line == NULL || linelen < 0) {
785 Py_DECREF(lineobj);
786 return NULL;
787 }
788 while (linelen--) {
789 c = *line++;
790 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000791 Py_DECREF(lineobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000792 PyErr_Format(error_obj,
793 "line contains NULL byte");
794 goto err;
795 }
796 if (parse_process_char(self, c) < 0) {
797 Py_DECREF(lineobj);
798 goto err;
799 }
800 }
801 Py_DECREF(lineobj);
802 if (parse_process_char(self, 0) < 0)
803 goto err;
804 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000805
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000806 fields = self->fields;
807 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000808err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000809 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000810}
811
812static void
813Reader_dealloc(ReaderObj *self)
814{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000815 PyObject_GC_UnTrack(self);
816 Py_XDECREF(self->dialect);
817 Py_XDECREF(self->input_iter);
818 Py_XDECREF(self->fields);
819 if (self->field != NULL)
820 PyMem_Free(self->field);
821 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000822}
823
824static int
825Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
826{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000827 Py_VISIT(self->dialect);
828 Py_VISIT(self->input_iter);
829 Py_VISIT(self->fields);
830 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000831}
832
833static int
834Reader_clear(ReaderObj *self)
835{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000836 Py_CLEAR(self->dialect);
837 Py_CLEAR(self->input_iter);
838 Py_CLEAR(self->fields);
839 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000840}
841
842PyDoc_STRVAR(Reader_Type_doc,
843"CSV reader\n"
844"\n"
845"Reader objects are responsible for reading and parsing tabular data\n"
846"in CSV format.\n"
847);
848
849static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000850 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000851};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000852#define R_OFF(x) offsetof(ReaderObj, x)
853
854static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000855 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
856 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
857 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000858};
859
Skip Montanarob4a04172003-03-20 23:29:12 +0000860
861static PyTypeObject Reader_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000862 PyVarObject_HEAD_INIT(NULL, 0)
863 "_csv.reader", /*tp_name*/
864 sizeof(ReaderObj), /*tp_basicsize*/
865 0, /*tp_itemsize*/
866 /* methods */
867 (destructor)Reader_dealloc, /*tp_dealloc*/
868 (printfunc)0, /*tp_print*/
869 (getattrfunc)0, /*tp_getattr*/
870 (setattrfunc)0, /*tp_setattr*/
871 0, /*tp_reserved*/
872 (reprfunc)0, /*tp_repr*/
873 0, /*tp_as_number*/
874 0, /*tp_as_sequence*/
875 0, /*tp_as_mapping*/
876 (hashfunc)0, /*tp_hash*/
877 (ternaryfunc)0, /*tp_call*/
878 (reprfunc)0, /*tp_str*/
879 0, /*tp_getattro*/
880 0, /*tp_setattro*/
881 0, /*tp_as_buffer*/
882 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
883 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
884 Reader_Type_doc, /*tp_doc*/
885 (traverseproc)Reader_traverse, /*tp_traverse*/
886 (inquiry)Reader_clear, /*tp_clear*/
887 0, /*tp_richcompare*/
888 0, /*tp_weaklistoffset*/
889 PyObject_SelfIter, /*tp_iter*/
890 (getiterfunc)Reader_iternext, /*tp_iternext*/
891 Reader_methods, /*tp_methods*/
892 Reader_memberlist, /*tp_members*/
893 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000894
895};
896
897static PyObject *
898csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
899{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000900 PyObject * iterator, * dialect = NULL;
901 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000902
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000903 if (!self)
904 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000905
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000906 self->dialect = NULL;
907 self->fields = NULL;
908 self->input_iter = NULL;
909 self->field = NULL;
910 self->field_size = 0;
911 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000912
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000913 if (parse_reset(self) < 0) {
914 Py_DECREF(self);
915 return NULL;
916 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000917
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000918 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
919 Py_DECREF(self);
920 return NULL;
921 }
922 self->input_iter = PyObject_GetIter(iterator);
923 if (self->input_iter == NULL) {
924 PyErr_SetString(PyExc_TypeError,
925 "argument 1 must be an iterator");
926 Py_DECREF(self);
927 return NULL;
928 }
929 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
930 if (self->dialect == NULL) {
931 Py_DECREF(self);
932 return NULL;
933 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000934
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000935 PyObject_GC_Track(self);
936 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +0000937}
938
939/*
940 * WRITER
941 */
942/* ---------------------------------------------------------------- */
943static void
944join_reset(WriterObj *self)
945{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000946 self->rec_len = 0;
947 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000948}
949
950#define MEM_INCR 32768
951
952/* Calculate new record length or append field to record. Return new
953 * record length.
954 */
Antoine Pitrou40455752010-08-15 18:51:10 +0000955static Py_ssize_t
Guido van Rossum46264582007-08-06 19:32:18 +0000956join_append_data(WriterObj *self, Py_UNICODE *field, int quote_empty,
957 int *quoted, int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +0000958{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000959 DialectObj *dialect = self->dialect;
960 int i;
Antoine Pitrou40455752010-08-15 18:51:10 +0000961 Py_ssize_t rec_len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000962 Py_UNICODE *lineterm;
Andrew McNamarac89f2842005-01-12 07:44:42 +0000963
964#define ADDCH(c) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000965 do {\
966 if (copy_phase) \
967 self->rec[rec_len] = c;\
968 rec_len++;\
969 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +0000970
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000971 lineterm = PyUnicode_AsUnicode(dialect->lineterminator);
972 if (lineterm == NULL)
973 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000974
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000975 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +0000976
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000977 /* If this is not the first field we need a field separator */
978 if (self->num_fields > 0)
979 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +0000980
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000981 /* Handle preceding quote */
982 if (copy_phase && *quoted)
983 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +0000984
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000985 /* Copy/count field data */
986 /* If field is null just pass over */
987 for (i = 0; field; i++) {
988 Py_UNICODE c = field[i];
989 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000990
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000991 if (c == '\0')
992 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000993
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000994 if (c == dialect->delimiter ||
995 c == dialect->escapechar ||
996 c == dialect->quotechar ||
997 Py_UNICODE_strchr(lineterm, c)) {
998 if (dialect->quoting == QUOTE_NONE)
999 want_escape = 1;
1000 else {
1001 if (c == dialect->quotechar) {
1002 if (dialect->doublequote)
1003 ADDCH(dialect->quotechar);
1004 else
1005 want_escape = 1;
1006 }
1007 if (!want_escape)
1008 *quoted = 1;
1009 }
1010 if (want_escape) {
1011 if (!dialect->escapechar) {
1012 PyErr_Format(error_obj,
1013 "need to escape, but no escapechar set");
1014 return -1;
1015 }
1016 ADDCH(dialect->escapechar);
1017 }
1018 }
1019 /* Copy field character into record buffer.
1020 */
1021 ADDCH(c);
1022 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001023
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001024 /* If field is empty check if it needs to be quoted.
1025 */
1026 if (i == 0 && quote_empty) {
1027 if (dialect->quoting == QUOTE_NONE) {
1028 PyErr_Format(error_obj,
1029 "single empty field record must be quoted");
1030 return -1;
1031 }
1032 else
1033 *quoted = 1;
1034 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001035
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001036 if (*quoted) {
1037 if (copy_phase)
1038 ADDCH(dialect->quotechar);
1039 else
1040 rec_len += 2;
1041 }
1042 return rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001043#undef ADDCH
Skip Montanarob4a04172003-03-20 23:29:12 +00001044}
1045
1046static int
Antoine Pitrou40455752010-08-15 18:51:10 +00001047join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
Skip Montanarob4a04172003-03-20 23:29:12 +00001048{
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001049
Antoine Pitrou40455752010-08-15 18:51:10 +00001050 if (rec_len < 0 || rec_len > PY_SSIZE_T_MAX - MEM_INCR) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001051 PyErr_NoMemory();
1052 return 0;
1053 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001054
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001055 if (rec_len > self->rec_size) {
1056 if (self->rec_size == 0) {
1057 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1058 if (self->rec != NULL)
1059 PyMem_Free(self->rec);
1060 self->rec = PyMem_New(Py_UNICODE, self->rec_size);
1061 }
1062 else {
1063 Py_UNICODE* old_rec = self->rec;
Skip Montanarob4a04172003-03-20 23:29:12 +00001064
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001065 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1066 self->rec = PyMem_Resize(self->rec, Py_UNICODE,
1067 self->rec_size);
1068 if (self->rec == NULL)
1069 PyMem_Free(old_rec);
1070 }
1071 if (self->rec == NULL) {
1072 PyErr_NoMemory();
1073 return 0;
1074 }
1075 }
1076 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001077}
1078
1079static int
Guido van Rossum46264582007-08-06 19:32:18 +00001080join_append(WriterObj *self, Py_UNICODE *field, int *quoted, int quote_empty)
Skip Montanarob4a04172003-03-20 23:29:12 +00001081{
Antoine Pitrou40455752010-08-15 18:51:10 +00001082 Py_ssize_t rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001083
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001084 rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1085 if (rec_len < 0)
1086 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001087
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001088 /* grow record buffer if necessary */
1089 if (!join_check_rec_size(self, rec_len))
1090 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001091
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001092 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1093 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001094
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001095 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001096}
1097
1098static int
1099join_append_lineterminator(WriterObj *self)
1100{
Antoine Pitrou40455752010-08-15 18:51:10 +00001101 Py_ssize_t terminator_len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001102 Py_UNICODE *terminator;
Skip Montanarob4a04172003-03-20 23:29:12 +00001103
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001104 terminator_len = PyUnicode_GetSize(self->dialect->lineterminator);
1105 if (terminator_len == -1)
1106 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001107
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001108 /* grow record buffer if necessary */
1109 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1110 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001111
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001112 terminator = PyUnicode_AsUnicode(self->dialect->lineterminator);
1113 if (terminator == NULL)
1114 return 0;
1115 memmove(self->rec + self->rec_len, terminator,
1116 sizeof(Py_UNICODE)*terminator_len);
1117 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001118
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001119 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001120}
1121
1122PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001123"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001124"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001125"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001126"elements will be converted to string.");
1127
1128static PyObject *
1129csv_writerow(WriterObj *self, PyObject *seq)
1130{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001131 DialectObj *dialect = self->dialect;
Antoine Pitrou40455752010-08-15 18:51:10 +00001132 Py_ssize_t len, i;
Skip Montanarob4a04172003-03-20 23:29:12 +00001133
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001134 if (!PySequence_Check(seq))
1135 return PyErr_Format(error_obj, "sequence expected");
Skip Montanarob4a04172003-03-20 23:29:12 +00001136
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001137 len = PySequence_Length(seq);
1138 if (len < 0)
1139 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001141 /* Join all fields in internal buffer.
1142 */
1143 join_reset(self);
1144 for (i = 0; i < len; i++) {
1145 PyObject *field;
1146 int append_ok;
1147 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001149 field = PySequence_GetItem(seq, i);
1150 if (field == NULL)
1151 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001152
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001153 switch (dialect->quoting) {
1154 case QUOTE_NONNUMERIC:
1155 quoted = !PyNumber_Check(field);
1156 break;
1157 case QUOTE_ALL:
1158 quoted = 1;
1159 break;
1160 default:
1161 quoted = 0;
1162 break;
1163 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001164
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001165 if (PyUnicode_Check(field)) {
1166 append_ok = join_append(self,
1167 PyUnicode_AS_UNICODE(field),
Guido van Rossum46264582007-08-06 19:32:18 +00001168 &quoted, len == 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001169 Py_DECREF(field);
1170 }
1171 else if (field == Py_None) {
1172 append_ok = join_append(self, NULL,
1173 &quoted, len == 1);
1174 Py_DECREF(field);
1175 }
1176 else {
1177 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001178
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001179 str = PyObject_Str(field);
1180 Py_DECREF(field);
1181 if (str == NULL)
1182 return NULL;
1183 append_ok = join_append(self,
1184 PyUnicode_AS_UNICODE(str),
1185 &quoted, len == 1);
1186 Py_DECREF(str);
1187 }
1188 if (!append_ok)
1189 return NULL;
1190 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001191
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001192 /* Add line terminator.
1193 */
1194 if (!join_append_lineterminator(self))
1195 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001196
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001197 return PyObject_CallFunction(self->writeline,
1198 "(u#)", self->rec,
1199 self->rec_len);
Skip Montanarob4a04172003-03-20 23:29:12 +00001200}
1201
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001202PyDoc_STRVAR(csv_writerows_doc,
1203"writerows(sequence of sequences)\n"
1204"\n"
1205"Construct and write a series of sequences to a csv file. Non-string\n"
1206"elements will be converted to string.");
1207
Skip Montanarob4a04172003-03-20 23:29:12 +00001208static PyObject *
1209csv_writerows(WriterObj *self, PyObject *seqseq)
1210{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001211 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001212
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001213 row_iter = PyObject_GetIter(seqseq);
1214 if (row_iter == NULL) {
1215 PyErr_SetString(PyExc_TypeError,
1216 "writerows() argument must be iterable");
1217 return NULL;
1218 }
1219 while ((row_obj = PyIter_Next(row_iter))) {
1220 result = csv_writerow(self, row_obj);
1221 Py_DECREF(row_obj);
1222 if (!result) {
1223 Py_DECREF(row_iter);
1224 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001225 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001226 else
1227 Py_DECREF(result);
1228 }
1229 Py_DECREF(row_iter);
1230 if (PyErr_Occurred())
1231 return NULL;
1232 Py_INCREF(Py_None);
1233 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001234}
1235
1236static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001237 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1238 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1239 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001240};
1241
1242#define W_OFF(x) offsetof(WriterObj, x)
1243
1244static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001245 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1246 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001247};
1248
1249static void
1250Writer_dealloc(WriterObj *self)
1251{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001252 PyObject_GC_UnTrack(self);
1253 Py_XDECREF(self->dialect);
1254 Py_XDECREF(self->writeline);
1255 if (self->rec != NULL)
1256 PyMem_Free(self->rec);
1257 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001258}
1259
1260static int
1261Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1262{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001263 Py_VISIT(self->dialect);
1264 Py_VISIT(self->writeline);
1265 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001266}
1267
1268static int
1269Writer_clear(WriterObj *self)
1270{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001271 Py_CLEAR(self->dialect);
1272 Py_CLEAR(self->writeline);
1273 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001274}
1275
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001276PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001277"CSV writer\n"
1278"\n"
1279"Writer objects are responsible for generating tabular data\n"
1280"in CSV format from sequence input.\n"
1281);
1282
1283static PyTypeObject Writer_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001284 PyVarObject_HEAD_INIT(NULL, 0)
1285 "_csv.writer", /*tp_name*/
1286 sizeof(WriterObj), /*tp_basicsize*/
1287 0, /*tp_itemsize*/
1288 /* methods */
1289 (destructor)Writer_dealloc, /*tp_dealloc*/
1290 (printfunc)0, /*tp_print*/
1291 (getattrfunc)0, /*tp_getattr*/
1292 (setattrfunc)0, /*tp_setattr*/
1293 0, /*tp_reserved*/
1294 (reprfunc)0, /*tp_repr*/
1295 0, /*tp_as_number*/
1296 0, /*tp_as_sequence*/
1297 0, /*tp_as_mapping*/
1298 (hashfunc)0, /*tp_hash*/
1299 (ternaryfunc)0, /*tp_call*/
1300 (reprfunc)0, /*tp_str*/
1301 0, /*tp_getattro*/
1302 0, /*tp_setattro*/
1303 0, /*tp_as_buffer*/
1304 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1305 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1306 Writer_Type_doc,
1307 (traverseproc)Writer_traverse, /*tp_traverse*/
1308 (inquiry)Writer_clear, /*tp_clear*/
1309 0, /*tp_richcompare*/
1310 0, /*tp_weaklistoffset*/
1311 (getiterfunc)0, /*tp_iter*/
1312 (getiterfunc)0, /*tp_iternext*/
1313 Writer_methods, /*tp_methods*/
1314 Writer_memberlist, /*tp_members*/
1315 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001316};
1317
1318static PyObject *
1319csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1320{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001321 PyObject * output_file, * dialect = NULL;
1322 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001323
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001324 if (!self)
1325 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001326
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001327 self->dialect = NULL;
1328 self->writeline = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001329
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001330 self->rec = NULL;
1331 self->rec_size = 0;
1332 self->rec_len = 0;
1333 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001334
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001335 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1336 Py_DECREF(self);
1337 return NULL;
1338 }
1339 self->writeline = PyObject_GetAttrString(output_file, "write");
1340 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1341 PyErr_SetString(PyExc_TypeError,
1342 "argument 1 must have a \"write\" method");
1343 Py_DECREF(self);
1344 return NULL;
1345 }
1346 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1347 if (self->dialect == NULL) {
1348 Py_DECREF(self);
1349 return NULL;
1350 }
1351 PyObject_GC_Track(self);
1352 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001353}
1354
1355/*
1356 * DIALECT REGISTRY
1357 */
1358static PyObject *
1359csv_list_dialects(PyObject *module, PyObject *args)
1360{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001361 return PyDict_Keys(dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001362}
1363
1364static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001365csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001366{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001367 PyObject *name_obj, *dialect_obj = NULL;
1368 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001369
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001370 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1371 return NULL;
1372 if (!IS_BASESTRING(name_obj)) {
1373 PyErr_SetString(PyExc_TypeError,
1374 "dialect name must be a string or unicode");
1375 return NULL;
1376 }
1377 dialect = _call_dialect(dialect_obj, kwargs);
1378 if (dialect == NULL)
1379 return NULL;
1380 if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1381 Py_DECREF(dialect);
1382 return NULL;
1383 }
1384 Py_DECREF(dialect);
1385 Py_INCREF(Py_None);
1386 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001387}
1388
1389static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001390csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001391{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001392 if (PyDict_DelItem(dialects, name_obj) < 0)
1393 return PyErr_Format(error_obj, "unknown dialect");
1394 Py_INCREF(Py_None);
1395 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001396}
1397
1398static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001399csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001400{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001401 return get_dialect_from_registry(name_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001402}
1403
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001404static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001405csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001406{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001407 PyObject *new_limit = NULL;
1408 long old_limit = field_limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001409
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001410 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1411 return NULL;
1412 if (new_limit != NULL) {
1413 if (!PyLong_CheckExact(new_limit)) {
1414 PyErr_Format(PyExc_TypeError,
1415 "limit must be an integer");
1416 return NULL;
1417 }
1418 field_limit = PyLong_AsLong(new_limit);
1419 if (field_limit == -1 && PyErr_Occurred()) {
1420 field_limit = old_limit;
1421 return NULL;
1422 }
1423 }
1424 return PyLong_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001425}
1426
Skip Montanarob4a04172003-03-20 23:29:12 +00001427/*
1428 * MODULE
1429 */
1430
1431PyDoc_STRVAR(csv_module_doc,
1432"CSV parsing and writing.\n"
1433"\n"
1434"This module provides classes that assist in the reading and writing\n"
1435"of Comma Separated Value (CSV) files, and implements the interface\n"
1436"described by PEP 305. Although many CSV files are simple to parse,\n"
1437"the format is not formally defined by a stable specification and\n"
1438"is subtle enough that parsing lines of a CSV file with something\n"
1439"like line.split(\",\") is bound to fail. The module supports three\n"
1440"basic APIs: reading, writing, and registration of dialects.\n"
1441"\n"
1442"\n"
1443"DIALECT REGISTRATION:\n"
1444"\n"
1445"Readers and writers support a dialect argument, which is a convenient\n"
1446"handle on a group of settings. When the dialect argument is a string,\n"
1447"it identifies one of the dialects previously registered with the module.\n"
1448"If it is a class or instance, the attributes of the argument are used as\n"
1449"the settings for the reader or writer:\n"
1450"\n"
1451" class excel:\n"
1452" delimiter = ','\n"
1453" quotechar = '\"'\n"
1454" escapechar = None\n"
1455" doublequote = True\n"
1456" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001457" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001458" quoting = QUOTE_MINIMAL\n"
1459"\n"
1460"SETTINGS:\n"
1461"\n"
1462" * quotechar - specifies a one-character string to use as the \n"
1463" quoting character. It defaults to '\"'.\n"
1464" * delimiter - specifies a one-character string to use as the \n"
1465" field separator. It defaults to ','.\n"
1466" * skipinitialspace - specifies how to interpret whitespace which\n"
1467" immediately follows a delimiter. It defaults to False, which\n"
1468" means that whitespace immediately following a delimiter is part\n"
1469" of the following field.\n"
1470" * lineterminator - specifies the character sequence which should \n"
1471" terminate rows.\n"
1472" * quoting - controls when quotes should be generated by the writer.\n"
1473" It can take on any of the following module constants:\n"
1474"\n"
1475" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1476" field contains either the quotechar or the delimiter\n"
1477" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1478" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001479" fields which do not parse as integers or floating point\n"
1480" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001481" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1482" * escapechar - specifies a one-character string used to escape \n"
1483" the delimiter when quoting is set to QUOTE_NONE.\n"
1484" * doublequote - controls the handling of quotes inside fields. When\n"
1485" True, two consecutive quotes are interpreted as one during read,\n"
1486" and when writing, each quote character embedded in the data is\n"
1487" written as two quotes\n");
1488
1489PyDoc_STRVAR(csv_reader_doc,
1490" csv_reader = reader(iterable [, dialect='excel']\n"
1491" [optional keyword args])\n"
1492" for row in csv_reader:\n"
1493" process(row)\n"
1494"\n"
1495"The \"iterable\" argument can be any object that returns a line\n"
1496"of input for each iteration, such as a file object or a list. The\n"
1497"optional \"dialect\" parameter is discussed below. The function\n"
1498"also accepts optional keyword arguments which override settings\n"
1499"provided by the dialect.\n"
1500"\n"
1501"The returned object is an iterator. Each iteration returns a row\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001502"of the CSV file (which can span multiple input lines):\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001503
1504PyDoc_STRVAR(csv_writer_doc,
1505" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1506" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001507" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001508" csv_writer.writerow(row)\n"
1509"\n"
1510" [or]\n"
1511"\n"
1512" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1513" [optional keyword args])\n"
1514" csv_writer.writerows(rows)\n"
1515"\n"
1516"The \"fileobj\" argument can be any object that supports the file API.\n");
1517
1518PyDoc_STRVAR(csv_list_dialects_doc,
1519"Return a list of all know dialect names.\n"
1520" names = csv.list_dialects()");
1521
1522PyDoc_STRVAR(csv_get_dialect_doc,
1523"Return the dialect instance associated with name.\n"
1524" dialect = csv.get_dialect(name)");
1525
1526PyDoc_STRVAR(csv_register_dialect_doc,
1527"Create a mapping from a string name to a dialect class.\n"
1528" dialect = csv.register_dialect(name, dialect)");
1529
1530PyDoc_STRVAR(csv_unregister_dialect_doc,
1531"Delete the name/dialect mapping associated with a string name.\n"
1532" csv.unregister_dialect(name)");
1533
Andrew McNamara31d88962005-01-12 03:45:10 +00001534PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001535"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001536" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001537"\n"
1538"Returns old limit. If limit is not given, no new limit is set and\n"
1539"the old limit is returned");
1540
Skip Montanarob4a04172003-03-20 23:29:12 +00001541static struct PyMethodDef csv_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001542 { "reader", (PyCFunction)csv_reader,
1543 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1544 { "writer", (PyCFunction)csv_writer,
1545 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1546 { "list_dialects", (PyCFunction)csv_list_dialects,
1547 METH_NOARGS, csv_list_dialects_doc},
1548 { "register_dialect", (PyCFunction)csv_register_dialect,
1549 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1550 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1551 METH_O, csv_unregister_dialect_doc},
1552 { "get_dialect", (PyCFunction)csv_get_dialect,
1553 METH_O, csv_get_dialect_doc},
1554 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1555 METH_VARARGS, csv_field_size_limit_doc},
1556 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001557};
1558
Martin v. Löwis1a214512008-06-11 05:26:20 +00001559
1560static struct PyModuleDef _csvmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001561 PyModuleDef_HEAD_INIT,
1562 "_csv",
1563 csv_module_doc,
1564 -1,
1565 csv_methods,
1566 NULL,
1567 NULL,
1568 NULL,
1569 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001570};
1571
Skip Montanarob4a04172003-03-20 23:29:12 +00001572PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001573PyInit__csv(void)
Skip Montanarob4a04172003-03-20 23:29:12 +00001574{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001575 PyObject *module;
1576 StyleDesc *style;
Skip Montanarob4a04172003-03-20 23:29:12 +00001577
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001578 if (PyType_Ready(&Dialect_Type) < 0)
1579 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001580
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001581 if (PyType_Ready(&Reader_Type) < 0)
1582 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001583
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001584 if (PyType_Ready(&Writer_Type) < 0)
1585 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001586
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001587 /* Create the module and add the functions */
1588 module = PyModule_Create(&_csvmodule);
1589 if (module == NULL)
1590 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001591
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001592 /* Add version to the module. */
1593 if (PyModule_AddStringConstant(module, "__version__",
1594 MODULE_VERSION) == -1)
1595 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001596
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001597 /* Add _dialects dictionary */
1598 dialects = PyDict_New();
1599 if (dialects == NULL)
1600 return NULL;
1601 if (PyModule_AddObject(module, "_dialects", dialects))
1602 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001603
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001604 /* Add quote styles into dictionary */
1605 for (style = quote_styles; style->name; style++) {
1606 if (PyModule_AddIntConstant(module, style->name,
1607 style->style) == -1)
1608 return NULL;
1609 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001610
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001611 /* Add the Dialect type */
1612 Py_INCREF(&Dialect_Type);
1613 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1614 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001615
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001616 /* Add the CSV exception object to the module. */
1617 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1618 if (error_obj == NULL)
1619 return NULL;
1620 PyModule_AddObject(module, "Error", error_obj);
1621 return module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001622}