blob: 8da728c67693c81f681d957bf451f5849f022b1b [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
Skip Montanarob4a04172003-03-20 23:29:12 +00009*/
10
Skip Montanaro7b01a832003-04-12 19:23:46 +000011#define MODULE_VERSION "1.0"
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013#include "Python.h"
14#include "structmember.h"
15
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000016#define IS_BASESTRING(o) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000017 PyUnicode_Check(o)
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000018
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000019static PyObject *error_obj; /* CSV exception */
Skip Montanarob4a04172003-03-20 23:29:12 +000020static PyObject *dialects; /* Dialect registry */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000021static long field_limit = 128 * 1024; /* max parsed field size */
Skip Montanarob4a04172003-03-20 23:29:12 +000022
23typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000024 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
25 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
26 EAT_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000027} ParserState;
28
29typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000030 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000031} QuoteStyle;
32
33typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000034 QuoteStyle style;
35 char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000036} StyleDesc;
37
38static StyleDesc quote_styles[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000039 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
40 { QUOTE_ALL, "QUOTE_ALL" },
41 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
42 { QUOTE_NONE, "QUOTE_NONE" },
43 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000044};
45
46typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000047 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000048
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000049 int doublequote; /* is " represented by ""? */
50 Py_UNICODE delimiter; /* field separator */
51 Py_UNICODE quotechar; /* quote character */
52 Py_UNICODE escapechar; /* escape character */
53 int skipinitialspace; /* ignore spaces following delimiter? */
54 PyObject *lineterminator; /* string to write between records */
55 int quoting; /* style of quoting to write */
Skip Montanarob4a04172003-03-20 23:29:12 +000056
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000057 int strict; /* raise exception on bad CSV */
Skip Montanarob4a04172003-03-20 23:29:12 +000058} DialectObj;
59
Neal Norwitz227b5332006-03-22 09:28:35 +000060static PyTypeObject Dialect_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000061
62typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000063 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +000064
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +000066
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +000068
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000069 PyObject *fields; /* field list for current record */
70 ParserState state; /* current CSV parse state */
71 Py_UNICODE *field; /* build current field in here */
Antoine Pitrou40455752010-08-15 18:51:10 +000072 Py_ssize_t field_size; /* size of allocated buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000073 Py_ssize_t field_len; /* length of current field */
74 int numeric_field; /* treat field as numeric */
75 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +000076} ReaderObj;
77
Neal Norwitz227b5332006-03-22 09:28:35 +000078static PyTypeObject Reader_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000079
Christian Heimes90aa7642007-12-19 02:45:37 +000080#define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +000081
82typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000083 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +000084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 PyObject *writeline; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +000086
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +000088
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089 Py_UNICODE *rec; /* buffer for parser.join */
Antoine Pitrou40455752010-08-15 18:51:10 +000090 Py_ssize_t rec_size; /* size of allocated record */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 Py_ssize_t rec_len; /* length of record */
92 int num_fields; /* number of fields in record */
Guido van Rossum46264582007-08-06 19:32:18 +000093} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +000094
Neal Norwitz227b5332006-03-22 09:28:35 +000095static PyTypeObject Writer_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000096
97/*
98 * DIALECT class
99 */
100
101static PyObject *
102get_dialect_from_registry(PyObject * name_obj)
103{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000104 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000105
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000106 dialect_obj = PyDict_GetItem(dialects, name_obj);
107 if (dialect_obj == NULL) {
108 if (!PyErr_Occurred())
109 PyErr_Format(error_obj, "unknown dialect");
110 }
111 else
112 Py_INCREF(dialect_obj);
113 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000114}
115
Skip Montanarob4a04172003-03-20 23:29:12 +0000116static PyObject *
117get_string(PyObject *str)
118{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000119 Py_XINCREF(str);
120 return str;
Skip Montanarob4a04172003-03-20 23:29:12 +0000121}
122
Skip Montanarob4a04172003-03-20 23:29:12 +0000123static PyObject *
Skip Montanaroe3b10f42007-08-06 20:55:47 +0000124get_nullchar_as_None(Py_UNICODE c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000125{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000126 if (c == '\0') {
127 Py_INCREF(Py_None);
128 return Py_None;
129 }
130 else
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200131 return PyUnicode_FromOrdinal(c);
Skip Montanarob4a04172003-03-20 23:29:12 +0000132}
133
Skip Montanarob4a04172003-03-20 23:29:12 +0000134static PyObject *
135Dialect_get_lineterminator(DialectObj *self)
136{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000137 return get_string(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000138}
139
Skip Montanarob4a04172003-03-20 23:29:12 +0000140static PyObject *
Guido van Rossuma9769c22007-08-07 23:59:30 +0000141Dialect_get_delimiter(DialectObj *self)
142{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 return get_nullchar_as_None(self->delimiter);
Guido van Rossuma9769c22007-08-07 23:59:30 +0000144}
145
146static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000147Dialect_get_escapechar(DialectObj *self)
148{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000150}
151
Andrew McNamara1196cf12005-01-07 04:42:45 +0000152static PyObject *
153Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000154{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000155 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000156}
157
158static PyObject *
159Dialect_get_quoting(DialectObj *self)
160{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000161 return PyLong_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000162}
163
164static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000165_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000166{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000167 if (src == NULL)
168 *target = dflt;
169 else
170 *target = PyObject_IsTrue(src);
171 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000172}
173
Andrew McNamara1196cf12005-01-07 04:42:45 +0000174static int
175_set_int(const char *name, int *target, PyObject *src, int dflt)
176{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 if (src == NULL)
178 *target = dflt;
179 else {
180 long value;
181 if (!PyLong_CheckExact(src)) {
182 PyErr_Format(PyExc_TypeError,
183 "\"%s\" must be an integer", name);
184 return -1;
185 }
186 value = PyLong_AsLong(src);
187 if (value == -1 && PyErr_Occurred())
188 return -1;
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +0000189#if SIZEOF_LONG > SIZEOF_INT
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000190 if (value > INT_MAX || value < INT_MIN) {
191 PyErr_Format(PyExc_ValueError,
192 "integer out of range for \"%s\"", name);
193 return -1;
194 }
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +0000195#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000196 *target = (int)value;
197 }
198 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000199}
200
201static int
Guido van Rossum46264582007-08-06 19:32:18 +0000202_set_char(const char *name, Py_UNICODE *target, PyObject *src, Py_UNICODE dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000203{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000204 if (src == NULL)
205 *target = dflt;
206 else {
207 *target = '\0';
208 if (src != Py_None) {
209 Py_UNICODE *buf;
210 Py_ssize_t len;
211 buf = PyUnicode_AsUnicode(src);
212 len = PyUnicode_GetSize(src);
213 if (buf == NULL || len > 1) {
214 PyErr_Format(PyExc_TypeError,
215 "\"%s\" must be an 1-character string",
216 name);
217 return -1;
218 }
219 if (len > 0)
220 *target = buf[0];
221 }
222 }
223 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000224}
225
226static int
227_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
228{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000229 if (src == NULL)
230 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
231 else {
232 if (src == Py_None)
233 *target = NULL;
234 else if (!IS_BASESTRING(src)) {
235 PyErr_Format(PyExc_TypeError,
236 "\"%s\" must be a string", name);
237 return -1;
238 }
239 else {
240 Py_XDECREF(*target);
241 Py_INCREF(src);
242 *target = src;
243 }
244 }
245 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000246}
247
248static int
249dialect_check_quoting(int quoting)
250{
Victor Stinner4fe519b2010-11-09 09:40:16 +0000251 StyleDesc *qs;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000252
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000253 for (qs = quote_styles; qs->name; qs++) {
254 if (qs->style == quoting)
255 return 0;
256 }
257 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
258 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000259}
Skip Montanarob4a04172003-03-20 23:29:12 +0000260
261#define D_OFF(x) offsetof(DialectObj, x)
262
263static struct PyMemberDef Dialect_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000264 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
265 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
266 { "strict", T_INT, D_OFF(strict), READONLY },
267 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000268};
269
270static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000271 { "delimiter", (getter)Dialect_get_delimiter},
272 { "escapechar", (getter)Dialect_get_escapechar},
273 { "lineterminator", (getter)Dialect_get_lineterminator},
274 { "quotechar", (getter)Dialect_get_quotechar},
275 { "quoting", (getter)Dialect_get_quoting},
276 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000277};
278
279static void
280Dialect_dealloc(DialectObj *self)
281{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000282 Py_XDECREF(self->lineterminator);
283 Py_TYPE(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000284}
285
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000286static char *dialect_kws[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000287 "dialect",
288 "delimiter",
289 "doublequote",
290 "escapechar",
291 "lineterminator",
292 "quotechar",
293 "quoting",
294 "skipinitialspace",
295 "strict",
296 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000297};
298
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000299static PyObject *
300dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000301{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000302 DialectObj *self;
303 PyObject *ret = NULL;
304 PyObject *dialect = NULL;
305 PyObject *delimiter = NULL;
306 PyObject *doublequote = NULL;
307 PyObject *escapechar = NULL;
308 PyObject *lineterminator = NULL;
309 PyObject *quotechar = NULL;
310 PyObject *quoting = NULL;
311 PyObject *skipinitialspace = NULL;
312 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000313
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000314 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
315 "|OOOOOOOOO", dialect_kws,
316 &dialect,
317 &delimiter,
318 &doublequote,
319 &escapechar,
320 &lineterminator,
321 &quotechar,
322 &quoting,
323 &skipinitialspace,
324 &strict))
325 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000326
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000327 if (dialect != NULL) {
328 if (IS_BASESTRING(dialect)) {
329 dialect = get_dialect_from_registry(dialect);
330 if (dialect == NULL)
331 return NULL;
332 }
333 else
334 Py_INCREF(dialect);
335 /* Can we reuse this instance? */
336 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
337 delimiter == 0 &&
338 doublequote == 0 &&
339 escapechar == 0 &&
340 lineterminator == 0 &&
341 quotechar == 0 &&
342 quoting == 0 &&
343 skipinitialspace == 0 &&
344 strict == 0)
345 return dialect;
346 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000347
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000348 self = (DialectObj *)type->tp_alloc(type, 0);
349 if (self == NULL) {
350 Py_XDECREF(dialect);
351 return NULL;
352 }
353 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000354
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000355 Py_XINCREF(delimiter);
356 Py_XINCREF(doublequote);
357 Py_XINCREF(escapechar);
358 Py_XINCREF(lineterminator);
359 Py_XINCREF(quotechar);
360 Py_XINCREF(quoting);
361 Py_XINCREF(skipinitialspace);
362 Py_XINCREF(strict);
363 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000364#define DIALECT_GETATTR(v, n) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000365 if (v == NULL) \
366 v = PyObject_GetAttrString(dialect, n)
367 DIALECT_GETATTR(delimiter, "delimiter");
368 DIALECT_GETATTR(doublequote, "doublequote");
369 DIALECT_GETATTR(escapechar, "escapechar");
370 DIALECT_GETATTR(lineterminator, "lineterminator");
371 DIALECT_GETATTR(quotechar, "quotechar");
372 DIALECT_GETATTR(quoting, "quoting");
373 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
374 DIALECT_GETATTR(strict, "strict");
375 PyErr_Clear();
376 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000377
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000378 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000379#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000380 if (meth(name, target, src, dflt)) \
381 goto err
382 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
383 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
384 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
385 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
386 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
387 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
388 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
389 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000390
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000391 /* validate options */
392 if (dialect_check_quoting(self->quoting))
393 goto err;
394 if (self->delimiter == 0) {
395 PyErr_SetString(PyExc_TypeError, "delimiter must be set");
396 goto err;
397 }
398 if (quotechar == Py_None && quoting == NULL)
399 self->quoting = QUOTE_NONE;
400 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
401 PyErr_SetString(PyExc_TypeError,
402 "quotechar must be set if quoting enabled");
403 goto err;
404 }
405 if (self->lineterminator == 0) {
406 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
407 goto err;
408 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000409
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000410 ret = (PyObject *)self;
411 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000412err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000413 Py_XDECREF(self);
414 Py_XDECREF(dialect);
415 Py_XDECREF(delimiter);
416 Py_XDECREF(doublequote);
417 Py_XDECREF(escapechar);
418 Py_XDECREF(lineterminator);
419 Py_XDECREF(quotechar);
420 Py_XDECREF(quoting);
421 Py_XDECREF(skipinitialspace);
422 Py_XDECREF(strict);
423 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000424}
425
426
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000427PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000428"CSV dialect\n"
429"\n"
430"The Dialect type records CSV parsing and generation options.\n");
431
432static PyTypeObject Dialect_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000433 PyVarObject_HEAD_INIT(NULL, 0)
434 "_csv.Dialect", /* tp_name */
435 sizeof(DialectObj), /* tp_basicsize */
436 0, /* tp_itemsize */
437 /* methods */
438 (destructor)Dialect_dealloc, /* tp_dealloc */
439 (printfunc)0, /* tp_print */
440 (getattrfunc)0, /* tp_getattr */
441 (setattrfunc)0, /* tp_setattr */
442 0, /* tp_reserved */
443 (reprfunc)0, /* tp_repr */
444 0, /* tp_as_number */
445 0, /* tp_as_sequence */
446 0, /* tp_as_mapping */
447 (hashfunc)0, /* tp_hash */
448 (ternaryfunc)0, /* tp_call */
449 (reprfunc)0, /* tp_str */
450 0, /* tp_getattro */
451 0, /* tp_setattro */
452 0, /* tp_as_buffer */
453 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
454 Dialect_Type_doc, /* tp_doc */
455 0, /* tp_traverse */
456 0, /* tp_clear */
457 0, /* tp_richcompare */
458 0, /* tp_weaklistoffset */
459 0, /* tp_iter */
460 0, /* tp_iternext */
461 0, /* tp_methods */
462 Dialect_memberlist, /* tp_members */
463 Dialect_getsetlist, /* tp_getset */
464 0, /* tp_base */
465 0, /* tp_dict */
466 0, /* tp_descr_get */
467 0, /* tp_descr_set */
468 0, /* tp_dictoffset */
469 0, /* tp_init */
470 0, /* tp_alloc */
471 dialect_new, /* tp_new */
472 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000473};
474
Andrew McNamara91b97462005-01-11 01:07:23 +0000475/*
476 * Return an instance of the dialect type, given a Python instance or kwarg
477 * description of the dialect
478 */
479static PyObject *
480_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
481{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000482 PyObject *ctor_args;
483 PyObject *dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000484
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000485 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
486 if (ctor_args == NULL)
487 return NULL;
488 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
489 Py_DECREF(ctor_args);
490 return dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000491}
492
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000493/*
494 * READER
495 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000496static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000497parse_save_field(ReaderObj *self)
498{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000499 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000500
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000501 field = PyUnicode_FromUnicode(self->field, self->field_len);
502 if (field == NULL)
503 return -1;
504 self->field_len = 0;
505 if (self->numeric_field) {
506 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000507
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000508 self->numeric_field = 0;
509 tmp = PyNumber_Float(field);
510 if (tmp == NULL) {
511 Py_DECREF(field);
512 return -1;
513 }
514 Py_DECREF(field);
515 field = tmp;
516 }
517 PyList_Append(self->fields, field);
518 Py_DECREF(field);
519 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000520}
521
522static int
523parse_grow_buff(ReaderObj *self)
524{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000525 if (self->field_size == 0) {
526 self->field_size = 4096;
527 if (self->field != NULL)
528 PyMem_Free(self->field);
529 self->field = PyMem_New(Py_UNICODE, self->field_size);
530 }
531 else {
Benjamin Peterson17a332a2011-10-06 17:06:25 -0400532 Py_UNICODE *field = self->field;
Antoine Pitrou40455752010-08-15 18:51:10 +0000533 if (self->field_size > PY_SSIZE_T_MAX / 2) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000534 PyErr_NoMemory();
535 return 0;
536 }
537 self->field_size *= 2;
Benjamin Peterson17a332a2011-10-06 17:06:25 -0400538 self->field = PyMem_Resize(field, Py_UNICODE, self->field_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000539 }
540 if (self->field == NULL) {
541 PyErr_NoMemory();
542 return 0;
543 }
544 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000545}
546
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000547static int
Guido van Rossum46264582007-08-06 19:32:18 +0000548parse_add_char(ReaderObj *self, Py_UNICODE c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000549{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000550 if (self->field_len >= field_limit) {
551 PyErr_Format(error_obj, "field larger than field limit (%ld)",
552 field_limit);
553 return -1;
554 }
555 if (self->field_len == self->field_size && !parse_grow_buff(self))
556 return -1;
557 self->field[self->field_len++] = c;
558 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000559}
560
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000561static int
Guido van Rossum46264582007-08-06 19:32:18 +0000562parse_process_char(ReaderObj *self, Py_UNICODE c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000563{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000564 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000565
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000566 switch (self->state) {
567 case START_RECORD:
568 /* start of record */
569 if (c == '\0')
570 /* empty line - return [] */
571 break;
572 else if (c == '\n' || c == '\r') {
573 self->state = EAT_CRNL;
574 break;
575 }
576 /* normal character - handle as START_FIELD */
577 self->state = START_FIELD;
578 /* fallthru */
579 case START_FIELD:
580 /* expecting field */
581 if (c == '\n' || c == '\r' || c == '\0') {
582 /* save empty field - return [fields] */
583 if (parse_save_field(self) < 0)
584 return -1;
585 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
586 }
587 else if (c == dialect->quotechar &&
588 dialect->quoting != QUOTE_NONE) {
589 /* start quoted field */
590 self->state = IN_QUOTED_FIELD;
591 }
592 else if (c == dialect->escapechar) {
593 /* possible escaped character */
594 self->state = ESCAPED_CHAR;
595 }
596 else if (c == ' ' && dialect->skipinitialspace)
597 /* ignore space at start of field */
598 ;
599 else if (c == dialect->delimiter) {
600 /* save empty field */
601 if (parse_save_field(self) < 0)
602 return -1;
603 }
604 else {
605 /* begin new unquoted field */
606 if (dialect->quoting == QUOTE_NONNUMERIC)
607 self->numeric_field = 1;
608 if (parse_add_char(self, c) < 0)
609 return -1;
610 self->state = IN_FIELD;
611 }
612 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000613
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000614 case ESCAPED_CHAR:
615 if (c == '\0')
616 c = '\n';
617 if (parse_add_char(self, c) < 0)
618 return -1;
619 self->state = IN_FIELD;
620 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000621
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000622 case IN_FIELD:
623 /* in unquoted field */
624 if (c == '\n' || c == '\r' || c == '\0') {
625 /* end of line - return [fields] */
626 if (parse_save_field(self) < 0)
627 return -1;
628 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
629 }
630 else if (c == dialect->escapechar) {
631 /* possible escaped character */
632 self->state = ESCAPED_CHAR;
633 }
634 else if (c == dialect->delimiter) {
635 /* save field - wait for new field */
636 if (parse_save_field(self) < 0)
637 return -1;
638 self->state = START_FIELD;
639 }
640 else {
641 /* normal character - save in field */
642 if (parse_add_char(self, c) < 0)
643 return -1;
644 }
645 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000646
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000647 case IN_QUOTED_FIELD:
648 /* in quoted field */
649 if (c == '\0')
650 ;
651 else if (c == dialect->escapechar) {
652 /* Possible escape character */
653 self->state = ESCAPE_IN_QUOTED_FIELD;
654 }
655 else if (c == dialect->quotechar &&
656 dialect->quoting != QUOTE_NONE) {
657 if (dialect->doublequote) {
658 /* doublequote; " represented by "" */
659 self->state = QUOTE_IN_QUOTED_FIELD;
660 }
661 else {
662 /* end of quote part of field */
663 self->state = IN_FIELD;
664 }
665 }
666 else {
667 /* normal character - save in field */
668 if (parse_add_char(self, c) < 0)
669 return -1;
670 }
671 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000672
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000673 case ESCAPE_IN_QUOTED_FIELD:
674 if (c == '\0')
675 c = '\n';
676 if (parse_add_char(self, c) < 0)
677 return -1;
678 self->state = IN_QUOTED_FIELD;
679 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000680
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000681 case QUOTE_IN_QUOTED_FIELD:
682 /* doublequote - seen a quote in an quoted field */
683 if (dialect->quoting != QUOTE_NONE &&
684 c == dialect->quotechar) {
685 /* save "" as " */
686 if (parse_add_char(self, c) < 0)
687 return -1;
688 self->state = IN_QUOTED_FIELD;
689 }
690 else if (c == dialect->delimiter) {
691 /* save field - wait for new field */
692 if (parse_save_field(self) < 0)
693 return -1;
694 self->state = START_FIELD;
695 }
696 else if (c == '\n' || c == '\r' || c == '\0') {
697 /* end of line - return [fields] */
698 if (parse_save_field(self) < 0)
699 return -1;
700 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
701 }
702 else if (!dialect->strict) {
703 if (parse_add_char(self, c) < 0)
704 return -1;
705 self->state = IN_FIELD;
706 }
707 else {
708 /* illegal */
709 PyErr_Format(error_obj, "'%c' expected after '%c'",
710 dialect->delimiter,
711 dialect->quotechar);
712 return -1;
713 }
714 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000715
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000716 case EAT_CRNL:
717 if (c == '\n' || c == '\r')
718 ;
719 else if (c == '\0')
720 self->state = START_RECORD;
721 else {
722 PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
723 return -1;
724 }
725 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000726
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000727 }
728 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000729}
730
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000731static int
732parse_reset(ReaderObj *self)
733{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000734 Py_XDECREF(self->fields);
735 self->fields = PyList_New(0);
736 if (self->fields == NULL)
737 return -1;
738 self->field_len = 0;
739 self->state = START_RECORD;
740 self->numeric_field = 0;
741 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000742}
Skip Montanarob4a04172003-03-20 23:29:12 +0000743
744static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000745Reader_iternext(ReaderObj *self)
746{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000747 PyObject *lineobj;
748 PyObject *fields = NULL;
749 Py_UNICODE *line, c;
750 Py_ssize_t linelen;
Skip Montanarob4a04172003-03-20 23:29:12 +0000751
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000752 if (parse_reset(self) < 0)
753 return NULL;
754 do {
755 lineobj = PyIter_Next(self->input_iter);
756 if (lineobj == NULL) {
757 /* End of input OR exception */
758 if (!PyErr_Occurred() && self->field_len != 0)
759 PyErr_Format(error_obj,
760 "newline inside string");
761 return NULL;
762 }
763 if (!PyUnicode_Check(lineobj)) {
764 PyErr_Format(error_obj,
765 "iterator should return strings, "
766 "not %.200s "
767 "(did you open the file in text mode?)",
768 lineobj->ob_type->tp_name
769 );
770 Py_DECREF(lineobj);
771 return NULL;
772 }
773 ++self->line_num;
774 line = PyUnicode_AsUnicode(lineobj);
775 linelen = PyUnicode_GetSize(lineobj);
776 if (line == NULL || linelen < 0) {
777 Py_DECREF(lineobj);
778 return NULL;
779 }
780 while (linelen--) {
781 c = *line++;
782 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000783 Py_DECREF(lineobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000784 PyErr_Format(error_obj,
785 "line contains NULL byte");
786 goto err;
787 }
788 if (parse_process_char(self, c) < 0) {
789 Py_DECREF(lineobj);
790 goto err;
791 }
792 }
793 Py_DECREF(lineobj);
794 if (parse_process_char(self, 0) < 0)
795 goto err;
796 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000797
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000798 fields = self->fields;
799 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000800err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000801 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000802}
803
804static void
805Reader_dealloc(ReaderObj *self)
806{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000807 PyObject_GC_UnTrack(self);
808 Py_XDECREF(self->dialect);
809 Py_XDECREF(self->input_iter);
810 Py_XDECREF(self->fields);
811 if (self->field != NULL)
812 PyMem_Free(self->field);
813 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000814}
815
816static int
817Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
818{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000819 Py_VISIT(self->dialect);
820 Py_VISIT(self->input_iter);
821 Py_VISIT(self->fields);
822 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000823}
824
825static int
826Reader_clear(ReaderObj *self)
827{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000828 Py_CLEAR(self->dialect);
829 Py_CLEAR(self->input_iter);
830 Py_CLEAR(self->fields);
831 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000832}
833
834PyDoc_STRVAR(Reader_Type_doc,
835"CSV reader\n"
836"\n"
837"Reader objects are responsible for reading and parsing tabular data\n"
838"in CSV format.\n"
839);
840
841static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000842 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000843};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000844#define R_OFF(x) offsetof(ReaderObj, x)
845
846static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000847 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
848 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
849 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000850};
851
Skip Montanarob4a04172003-03-20 23:29:12 +0000852
853static PyTypeObject Reader_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000854 PyVarObject_HEAD_INIT(NULL, 0)
855 "_csv.reader", /*tp_name*/
856 sizeof(ReaderObj), /*tp_basicsize*/
857 0, /*tp_itemsize*/
858 /* methods */
859 (destructor)Reader_dealloc, /*tp_dealloc*/
860 (printfunc)0, /*tp_print*/
861 (getattrfunc)0, /*tp_getattr*/
862 (setattrfunc)0, /*tp_setattr*/
863 0, /*tp_reserved*/
864 (reprfunc)0, /*tp_repr*/
865 0, /*tp_as_number*/
866 0, /*tp_as_sequence*/
867 0, /*tp_as_mapping*/
868 (hashfunc)0, /*tp_hash*/
869 (ternaryfunc)0, /*tp_call*/
870 (reprfunc)0, /*tp_str*/
871 0, /*tp_getattro*/
872 0, /*tp_setattro*/
873 0, /*tp_as_buffer*/
874 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
875 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
876 Reader_Type_doc, /*tp_doc*/
877 (traverseproc)Reader_traverse, /*tp_traverse*/
878 (inquiry)Reader_clear, /*tp_clear*/
879 0, /*tp_richcompare*/
880 0, /*tp_weaklistoffset*/
881 PyObject_SelfIter, /*tp_iter*/
882 (getiterfunc)Reader_iternext, /*tp_iternext*/
883 Reader_methods, /*tp_methods*/
884 Reader_memberlist, /*tp_members*/
885 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000886
887};
888
889static PyObject *
890csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
891{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000892 PyObject * iterator, * dialect = NULL;
893 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000894
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000895 if (!self)
896 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000897
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000898 self->dialect = NULL;
899 self->fields = NULL;
900 self->input_iter = NULL;
901 self->field = NULL;
902 self->field_size = 0;
903 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000904
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000905 if (parse_reset(self) < 0) {
906 Py_DECREF(self);
907 return NULL;
908 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000909
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000910 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
911 Py_DECREF(self);
912 return NULL;
913 }
914 self->input_iter = PyObject_GetIter(iterator);
915 if (self->input_iter == NULL) {
916 PyErr_SetString(PyExc_TypeError,
917 "argument 1 must be an iterator");
918 Py_DECREF(self);
919 return NULL;
920 }
921 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
922 if (self->dialect == NULL) {
923 Py_DECREF(self);
924 return NULL;
925 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000926
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000927 PyObject_GC_Track(self);
928 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +0000929}
930
931/*
932 * WRITER
933 */
934/* ---------------------------------------------------------------- */
935static void
936join_reset(WriterObj *self)
937{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000938 self->rec_len = 0;
939 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000940}
941
942#define MEM_INCR 32768
943
944/* Calculate new record length or append field to record. Return new
945 * record length.
946 */
Antoine Pitrou40455752010-08-15 18:51:10 +0000947static Py_ssize_t
Guido van Rossum46264582007-08-06 19:32:18 +0000948join_append_data(WriterObj *self, Py_UNICODE *field, int quote_empty,
949 int *quoted, int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +0000950{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000951 DialectObj *dialect = self->dialect;
952 int i;
Antoine Pitrou40455752010-08-15 18:51:10 +0000953 Py_ssize_t rec_len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000954 Py_UNICODE *lineterm;
Andrew McNamarac89f2842005-01-12 07:44:42 +0000955
956#define ADDCH(c) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000957 do {\
958 if (copy_phase) \
959 self->rec[rec_len] = c;\
960 rec_len++;\
961 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +0000962
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000963 lineterm = PyUnicode_AsUnicode(dialect->lineterminator);
964 if (lineterm == NULL)
965 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000966
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000967 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +0000968
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000969 /* If this is not the first field we need a field separator */
970 if (self->num_fields > 0)
971 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +0000972
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000973 /* Handle preceding quote */
974 if (copy_phase && *quoted)
975 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +0000976
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000977 /* Copy/count field data */
978 /* If field is null just pass over */
979 for (i = 0; field; i++) {
980 Py_UNICODE c = field[i];
981 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000982
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000983 if (c == '\0')
984 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000985
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000986 if (c == dialect->delimiter ||
987 c == dialect->escapechar ||
988 c == dialect->quotechar ||
989 Py_UNICODE_strchr(lineterm, c)) {
990 if (dialect->quoting == QUOTE_NONE)
991 want_escape = 1;
992 else {
993 if (c == dialect->quotechar) {
994 if (dialect->doublequote)
995 ADDCH(dialect->quotechar);
996 else
997 want_escape = 1;
998 }
999 if (!want_escape)
1000 *quoted = 1;
1001 }
1002 if (want_escape) {
1003 if (!dialect->escapechar) {
1004 PyErr_Format(error_obj,
1005 "need to escape, but no escapechar set");
1006 return -1;
1007 }
1008 ADDCH(dialect->escapechar);
1009 }
1010 }
1011 /* Copy field character into record buffer.
1012 */
1013 ADDCH(c);
1014 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001015
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001016 /* If field is empty check if it needs to be quoted.
1017 */
1018 if (i == 0 && quote_empty) {
1019 if (dialect->quoting == QUOTE_NONE) {
1020 PyErr_Format(error_obj,
1021 "single empty field record must be quoted");
1022 return -1;
1023 }
1024 else
1025 *quoted = 1;
1026 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001027
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001028 if (*quoted) {
1029 if (copy_phase)
1030 ADDCH(dialect->quotechar);
1031 else
1032 rec_len += 2;
1033 }
1034 return rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001035#undef ADDCH
Skip Montanarob4a04172003-03-20 23:29:12 +00001036}
1037
1038static int
Antoine Pitrou40455752010-08-15 18:51:10 +00001039join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
Skip Montanarob4a04172003-03-20 23:29:12 +00001040{
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001041
Antoine Pitrou40455752010-08-15 18:51:10 +00001042 if (rec_len < 0 || rec_len > PY_SSIZE_T_MAX - MEM_INCR) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001043 PyErr_NoMemory();
1044 return 0;
1045 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001046
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001047 if (rec_len > self->rec_size) {
1048 if (self->rec_size == 0) {
1049 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1050 if (self->rec != NULL)
1051 PyMem_Free(self->rec);
1052 self->rec = PyMem_New(Py_UNICODE, self->rec_size);
1053 }
1054 else {
1055 Py_UNICODE* old_rec = self->rec;
Skip Montanarob4a04172003-03-20 23:29:12 +00001056
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001057 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Benjamin Peterson17a332a2011-10-06 17:06:25 -04001058 self->rec = PyMem_Resize(old_rec, Py_UNICODE, self->rec_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001059 if (self->rec == NULL)
1060 PyMem_Free(old_rec);
1061 }
1062 if (self->rec == NULL) {
1063 PyErr_NoMemory();
1064 return 0;
1065 }
1066 }
1067 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001068}
1069
1070static int
Guido van Rossum46264582007-08-06 19:32:18 +00001071join_append(WriterObj *self, Py_UNICODE *field, int *quoted, int quote_empty)
Skip Montanarob4a04172003-03-20 23:29:12 +00001072{
Antoine Pitrou40455752010-08-15 18:51:10 +00001073 Py_ssize_t rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001074
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001075 rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1076 if (rec_len < 0)
1077 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001078
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001079 /* grow record buffer if necessary */
1080 if (!join_check_rec_size(self, rec_len))
1081 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001082
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001083 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1084 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001085
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001086 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001087}
1088
1089static int
1090join_append_lineterminator(WriterObj *self)
1091{
Antoine Pitrou40455752010-08-15 18:51:10 +00001092 Py_ssize_t terminator_len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001093 Py_UNICODE *terminator;
Skip Montanarob4a04172003-03-20 23:29:12 +00001094
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001095 terminator_len = PyUnicode_GetSize(self->dialect->lineterminator);
1096 if (terminator_len == -1)
1097 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001098
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001099 /* grow record buffer if necessary */
1100 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1101 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001102
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001103 terminator = PyUnicode_AsUnicode(self->dialect->lineterminator);
1104 if (terminator == NULL)
1105 return 0;
1106 memmove(self->rec + self->rec_len, terminator,
1107 sizeof(Py_UNICODE)*terminator_len);
1108 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001109
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001110 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001111}
1112
1113PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001114"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001115"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001116"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001117"elements will be converted to string.");
1118
1119static PyObject *
1120csv_writerow(WriterObj *self, PyObject *seq)
1121{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001122 DialectObj *dialect = self->dialect;
Antoine Pitrou40455752010-08-15 18:51:10 +00001123 Py_ssize_t len, i;
Skip Montanarob4a04172003-03-20 23:29:12 +00001124
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001125 if (!PySequence_Check(seq))
1126 return PyErr_Format(error_obj, "sequence expected");
Skip Montanarob4a04172003-03-20 23:29:12 +00001127
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001128 len = PySequence_Length(seq);
1129 if (len < 0)
1130 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001131
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001132 /* Join all fields in internal buffer.
1133 */
1134 join_reset(self);
1135 for (i = 0; i < len; i++) {
1136 PyObject *field;
1137 int append_ok;
1138 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001139
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001140 field = PySequence_GetItem(seq, i);
1141 if (field == NULL)
1142 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001143
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001144 switch (dialect->quoting) {
1145 case QUOTE_NONNUMERIC:
1146 quoted = !PyNumber_Check(field);
1147 break;
1148 case QUOTE_ALL:
1149 quoted = 1;
1150 break;
1151 default:
1152 quoted = 0;
1153 break;
1154 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001155
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001156 if (PyUnicode_Check(field)) {
1157 append_ok = join_append(self,
1158 PyUnicode_AS_UNICODE(field),
Guido van Rossum46264582007-08-06 19:32:18 +00001159 &quoted, len == 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001160 Py_DECREF(field);
1161 }
1162 else if (field == Py_None) {
1163 append_ok = join_append(self, NULL,
1164 &quoted, len == 1);
1165 Py_DECREF(field);
1166 }
1167 else {
1168 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001169
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001170 str = PyObject_Str(field);
1171 Py_DECREF(field);
1172 if (str == NULL)
1173 return NULL;
1174 append_ok = join_append(self,
1175 PyUnicode_AS_UNICODE(str),
1176 &quoted, len == 1);
1177 Py_DECREF(str);
1178 }
1179 if (!append_ok)
1180 return NULL;
1181 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001182
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001183 /* Add line terminator.
1184 */
1185 if (!join_append_lineterminator(self))
1186 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001187
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001188 return PyObject_CallFunction(self->writeline,
1189 "(u#)", self->rec,
1190 self->rec_len);
Skip Montanarob4a04172003-03-20 23:29:12 +00001191}
1192
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001193PyDoc_STRVAR(csv_writerows_doc,
1194"writerows(sequence of sequences)\n"
1195"\n"
1196"Construct and write a series of sequences to a csv file. Non-string\n"
1197"elements will be converted to string.");
1198
Skip Montanarob4a04172003-03-20 23:29:12 +00001199static PyObject *
1200csv_writerows(WriterObj *self, PyObject *seqseq)
1201{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001202 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001203
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001204 row_iter = PyObject_GetIter(seqseq);
1205 if (row_iter == NULL) {
1206 PyErr_SetString(PyExc_TypeError,
1207 "writerows() argument must be iterable");
1208 return NULL;
1209 }
1210 while ((row_obj = PyIter_Next(row_iter))) {
1211 result = csv_writerow(self, row_obj);
1212 Py_DECREF(row_obj);
1213 if (!result) {
1214 Py_DECREF(row_iter);
1215 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001216 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001217 else
1218 Py_DECREF(result);
1219 }
1220 Py_DECREF(row_iter);
1221 if (PyErr_Occurred())
1222 return NULL;
1223 Py_INCREF(Py_None);
1224 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001225}
1226
1227static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001228 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1229 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1230 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001231};
1232
1233#define W_OFF(x) offsetof(WriterObj, x)
1234
1235static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001236 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1237 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001238};
1239
1240static void
1241Writer_dealloc(WriterObj *self)
1242{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001243 PyObject_GC_UnTrack(self);
1244 Py_XDECREF(self->dialect);
1245 Py_XDECREF(self->writeline);
1246 if (self->rec != NULL)
1247 PyMem_Free(self->rec);
1248 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001249}
1250
1251static int
1252Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1253{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001254 Py_VISIT(self->dialect);
1255 Py_VISIT(self->writeline);
1256 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001257}
1258
1259static int
1260Writer_clear(WriterObj *self)
1261{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001262 Py_CLEAR(self->dialect);
1263 Py_CLEAR(self->writeline);
1264 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001265}
1266
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001267PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001268"CSV writer\n"
1269"\n"
1270"Writer objects are responsible for generating tabular data\n"
1271"in CSV format from sequence input.\n"
1272);
1273
1274static PyTypeObject Writer_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001275 PyVarObject_HEAD_INIT(NULL, 0)
1276 "_csv.writer", /*tp_name*/
1277 sizeof(WriterObj), /*tp_basicsize*/
1278 0, /*tp_itemsize*/
1279 /* methods */
1280 (destructor)Writer_dealloc, /*tp_dealloc*/
1281 (printfunc)0, /*tp_print*/
1282 (getattrfunc)0, /*tp_getattr*/
1283 (setattrfunc)0, /*tp_setattr*/
1284 0, /*tp_reserved*/
1285 (reprfunc)0, /*tp_repr*/
1286 0, /*tp_as_number*/
1287 0, /*tp_as_sequence*/
1288 0, /*tp_as_mapping*/
1289 (hashfunc)0, /*tp_hash*/
1290 (ternaryfunc)0, /*tp_call*/
1291 (reprfunc)0, /*tp_str*/
1292 0, /*tp_getattro*/
1293 0, /*tp_setattro*/
1294 0, /*tp_as_buffer*/
1295 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1296 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1297 Writer_Type_doc,
1298 (traverseproc)Writer_traverse, /*tp_traverse*/
1299 (inquiry)Writer_clear, /*tp_clear*/
1300 0, /*tp_richcompare*/
1301 0, /*tp_weaklistoffset*/
1302 (getiterfunc)0, /*tp_iter*/
1303 (getiterfunc)0, /*tp_iternext*/
1304 Writer_methods, /*tp_methods*/
1305 Writer_memberlist, /*tp_members*/
1306 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001307};
1308
1309static PyObject *
1310csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1311{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001312 PyObject * output_file, * dialect = NULL;
1313 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001314
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001315 if (!self)
1316 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001317
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001318 self->dialect = NULL;
1319 self->writeline = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001320
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001321 self->rec = NULL;
1322 self->rec_size = 0;
1323 self->rec_len = 0;
1324 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001325
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001326 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1327 Py_DECREF(self);
1328 return NULL;
1329 }
1330 self->writeline = PyObject_GetAttrString(output_file, "write");
1331 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1332 PyErr_SetString(PyExc_TypeError,
1333 "argument 1 must have a \"write\" method");
1334 Py_DECREF(self);
1335 return NULL;
1336 }
1337 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1338 if (self->dialect == NULL) {
1339 Py_DECREF(self);
1340 return NULL;
1341 }
1342 PyObject_GC_Track(self);
1343 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001344}
1345
1346/*
1347 * DIALECT REGISTRY
1348 */
1349static PyObject *
1350csv_list_dialects(PyObject *module, PyObject *args)
1351{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001352 return PyDict_Keys(dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001353}
1354
1355static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001356csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001357{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001358 PyObject *name_obj, *dialect_obj = NULL;
1359 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001360
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001361 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1362 return NULL;
1363 if (!IS_BASESTRING(name_obj)) {
1364 PyErr_SetString(PyExc_TypeError,
1365 "dialect name must be a string or unicode");
1366 return NULL;
1367 }
1368 dialect = _call_dialect(dialect_obj, kwargs);
1369 if (dialect == NULL)
1370 return NULL;
1371 if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1372 Py_DECREF(dialect);
1373 return NULL;
1374 }
1375 Py_DECREF(dialect);
1376 Py_INCREF(Py_None);
1377 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001378}
1379
1380static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001381csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001382{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001383 if (PyDict_DelItem(dialects, name_obj) < 0)
1384 return PyErr_Format(error_obj, "unknown dialect");
1385 Py_INCREF(Py_None);
1386 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001387}
1388
1389static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001390csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001391{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001392 return get_dialect_from_registry(name_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001393}
1394
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001395static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001396csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001397{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001398 PyObject *new_limit = NULL;
1399 long old_limit = field_limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001400
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001401 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1402 return NULL;
1403 if (new_limit != NULL) {
1404 if (!PyLong_CheckExact(new_limit)) {
1405 PyErr_Format(PyExc_TypeError,
1406 "limit must be an integer");
1407 return NULL;
1408 }
1409 field_limit = PyLong_AsLong(new_limit);
1410 if (field_limit == -1 && PyErr_Occurred()) {
1411 field_limit = old_limit;
1412 return NULL;
1413 }
1414 }
1415 return PyLong_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001416}
1417
Skip Montanarob4a04172003-03-20 23:29:12 +00001418/*
1419 * MODULE
1420 */
1421
1422PyDoc_STRVAR(csv_module_doc,
1423"CSV parsing and writing.\n"
1424"\n"
1425"This module provides classes that assist in the reading and writing\n"
1426"of Comma Separated Value (CSV) files, and implements the interface\n"
1427"described by PEP 305. Although many CSV files are simple to parse,\n"
1428"the format is not formally defined by a stable specification and\n"
1429"is subtle enough that parsing lines of a CSV file with something\n"
1430"like line.split(\",\") is bound to fail. The module supports three\n"
1431"basic APIs: reading, writing, and registration of dialects.\n"
1432"\n"
1433"\n"
1434"DIALECT REGISTRATION:\n"
1435"\n"
1436"Readers and writers support a dialect argument, which is a convenient\n"
1437"handle on a group of settings. When the dialect argument is a string,\n"
1438"it identifies one of the dialects previously registered with the module.\n"
1439"If it is a class or instance, the attributes of the argument are used as\n"
1440"the settings for the reader or writer:\n"
1441"\n"
1442" class excel:\n"
1443" delimiter = ','\n"
1444" quotechar = '\"'\n"
1445" escapechar = None\n"
1446" doublequote = True\n"
1447" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001448" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001449" quoting = QUOTE_MINIMAL\n"
1450"\n"
1451"SETTINGS:\n"
1452"\n"
1453" * quotechar - specifies a one-character string to use as the \n"
1454" quoting character. It defaults to '\"'.\n"
1455" * delimiter - specifies a one-character string to use as the \n"
1456" field separator. It defaults to ','.\n"
1457" * skipinitialspace - specifies how to interpret whitespace which\n"
1458" immediately follows a delimiter. It defaults to False, which\n"
1459" means that whitespace immediately following a delimiter is part\n"
1460" of the following field.\n"
1461" * lineterminator - specifies the character sequence which should \n"
1462" terminate rows.\n"
1463" * quoting - controls when quotes should be generated by the writer.\n"
1464" It can take on any of the following module constants:\n"
1465"\n"
1466" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1467" field contains either the quotechar or the delimiter\n"
1468" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1469" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001470" fields which do not parse as integers or floating point\n"
1471" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001472" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1473" * escapechar - specifies a one-character string used to escape \n"
1474" the delimiter when quoting is set to QUOTE_NONE.\n"
1475" * doublequote - controls the handling of quotes inside fields. When\n"
1476" True, two consecutive quotes are interpreted as one during read,\n"
1477" and when writing, each quote character embedded in the data is\n"
1478" written as two quotes\n");
1479
1480PyDoc_STRVAR(csv_reader_doc,
1481" csv_reader = reader(iterable [, dialect='excel']\n"
1482" [optional keyword args])\n"
1483" for row in csv_reader:\n"
1484" process(row)\n"
1485"\n"
1486"The \"iterable\" argument can be any object that returns a line\n"
1487"of input for each iteration, such as a file object or a list. The\n"
1488"optional \"dialect\" parameter is discussed below. The function\n"
1489"also accepts optional keyword arguments which override settings\n"
1490"provided by the dialect.\n"
1491"\n"
1492"The returned object is an iterator. Each iteration returns a row\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001493"of the CSV file (which can span multiple input lines):\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001494
1495PyDoc_STRVAR(csv_writer_doc,
1496" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1497" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001498" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001499" csv_writer.writerow(row)\n"
1500"\n"
1501" [or]\n"
1502"\n"
1503" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1504" [optional keyword args])\n"
1505" csv_writer.writerows(rows)\n"
1506"\n"
1507"The \"fileobj\" argument can be any object that supports the file API.\n");
1508
1509PyDoc_STRVAR(csv_list_dialects_doc,
1510"Return a list of all know dialect names.\n"
1511" names = csv.list_dialects()");
1512
1513PyDoc_STRVAR(csv_get_dialect_doc,
1514"Return the dialect instance associated with name.\n"
1515" dialect = csv.get_dialect(name)");
1516
1517PyDoc_STRVAR(csv_register_dialect_doc,
1518"Create a mapping from a string name to a dialect class.\n"
1519" dialect = csv.register_dialect(name, dialect)");
1520
1521PyDoc_STRVAR(csv_unregister_dialect_doc,
1522"Delete the name/dialect mapping associated with a string name.\n"
1523" csv.unregister_dialect(name)");
1524
Andrew McNamara31d88962005-01-12 03:45:10 +00001525PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001526"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001527" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001528"\n"
1529"Returns old limit. If limit is not given, no new limit is set and\n"
1530"the old limit is returned");
1531
Skip Montanarob4a04172003-03-20 23:29:12 +00001532static struct PyMethodDef csv_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001533 { "reader", (PyCFunction)csv_reader,
1534 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1535 { "writer", (PyCFunction)csv_writer,
1536 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1537 { "list_dialects", (PyCFunction)csv_list_dialects,
1538 METH_NOARGS, csv_list_dialects_doc},
1539 { "register_dialect", (PyCFunction)csv_register_dialect,
1540 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1541 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1542 METH_O, csv_unregister_dialect_doc},
1543 { "get_dialect", (PyCFunction)csv_get_dialect,
1544 METH_O, csv_get_dialect_doc},
1545 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1546 METH_VARARGS, csv_field_size_limit_doc},
1547 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001548};
1549
Martin v. Löwis1a214512008-06-11 05:26:20 +00001550
1551static struct PyModuleDef _csvmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001552 PyModuleDef_HEAD_INIT,
1553 "_csv",
1554 csv_module_doc,
1555 -1,
1556 csv_methods,
1557 NULL,
1558 NULL,
1559 NULL,
1560 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001561};
1562
Skip Montanarob4a04172003-03-20 23:29:12 +00001563PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001564PyInit__csv(void)
Skip Montanarob4a04172003-03-20 23:29:12 +00001565{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001566 PyObject *module;
1567 StyleDesc *style;
Skip Montanarob4a04172003-03-20 23:29:12 +00001568
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001569 if (PyType_Ready(&Dialect_Type) < 0)
1570 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001571
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001572 if (PyType_Ready(&Reader_Type) < 0)
1573 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001574
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001575 if (PyType_Ready(&Writer_Type) < 0)
1576 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001577
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001578 /* Create the module and add the functions */
1579 module = PyModule_Create(&_csvmodule);
1580 if (module == NULL)
1581 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001582
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001583 /* Add version to the module. */
1584 if (PyModule_AddStringConstant(module, "__version__",
1585 MODULE_VERSION) == -1)
1586 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001587
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001588 /* Add _dialects dictionary */
1589 dialects = PyDict_New();
1590 if (dialects == NULL)
1591 return NULL;
1592 if (PyModule_AddObject(module, "_dialects", dialects))
1593 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001594
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001595 /* Add quote styles into dictionary */
1596 for (style = quote_styles; style->name; style++) {
1597 if (PyModule_AddIntConstant(module, style->name,
1598 style->style) == -1)
1599 return NULL;
1600 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001601
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001602 /* Add the Dialect type */
1603 Py_INCREF(&Dialect_Type);
1604 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1605 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001606
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001607 /* Add the CSV exception object to the module. */
1608 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1609 if (error_obj == NULL)
1610 return NULL;
1611 PyModule_AddObject(module, "Error", error_obj);
1612 return module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001613}