blob: 59c74e76369a2ff1e179245c32571c1a769c8fff [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
Skip Montanarob4a04172003-03-20 23:29:12 +00009*/
10
Skip Montanaro7b01a832003-04-12 19:23:46 +000011#define MODULE_VERSION "1.0"
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013#include "Python.h"
14#include "structmember.h"
15
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000016#define IS_BASESTRING(o) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000017 PyUnicode_Check(o)
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000018
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000019static PyObject *error_obj; /* CSV exception */
Skip Montanarob4a04172003-03-20 23:29:12 +000020static PyObject *dialects; /* Dialect registry */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000021static long field_limit = 128 * 1024; /* max parsed field size */
Skip Montanarob4a04172003-03-20 23:29:12 +000022
23typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000024 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
25 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
26 EAT_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000027} ParserState;
28
29typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000030 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000031} QuoteStyle;
32
33typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000034 QuoteStyle style;
35 char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000036} StyleDesc;
37
38static StyleDesc quote_styles[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000039 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
40 { QUOTE_ALL, "QUOTE_ALL" },
41 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
42 { QUOTE_NONE, "QUOTE_NONE" },
43 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000044};
45
46typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000047 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000048
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000049 int doublequote; /* is " represented by ""? */
Antoine Pitrou77ea6402011-10-07 04:26:55 +020050 Py_UCS4 delimiter; /* field separator */
51 Py_UCS4 quotechar; /* quote character */
52 Py_UCS4 escapechar; /* escape character */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000053 int skipinitialspace; /* ignore spaces following delimiter? */
54 PyObject *lineterminator; /* string to write between records */
55 int quoting; /* style of quoting to write */
Skip Montanarob4a04172003-03-20 23:29:12 +000056
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000057 int strict; /* raise exception on bad CSV */
Skip Montanarob4a04172003-03-20 23:29:12 +000058} DialectObj;
59
Neal Norwitz227b5332006-03-22 09:28:35 +000060static PyTypeObject Dialect_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000061
62typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000063 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +000064
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +000066
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +000068
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000069 PyObject *fields; /* field list for current record */
70 ParserState state; /* current CSV parse state */
Antoine Pitrou77ea6402011-10-07 04:26:55 +020071 Py_UCS4 *field; /* temporary buffer */
Antoine Pitrou40455752010-08-15 18:51:10 +000072 Py_ssize_t field_size; /* size of allocated buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000073 Py_ssize_t field_len; /* length of current field */
74 int numeric_field; /* treat field as numeric */
75 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +000076} ReaderObj;
77
Neal Norwitz227b5332006-03-22 09:28:35 +000078static PyTypeObject Reader_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000079
Christian Heimes90aa7642007-12-19 02:45:37 +000080#define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +000081
82typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000083 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +000084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 PyObject *writeline; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +000086
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +000088
Antoine Pitrou77ea6402011-10-07 04:26:55 +020089 Py_UCS4 *rec; /* buffer for parser.join */
Antoine Pitrou40455752010-08-15 18:51:10 +000090 Py_ssize_t rec_size; /* size of allocated record */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 Py_ssize_t rec_len; /* length of record */
92 int num_fields; /* number of fields in record */
Guido van Rossum46264582007-08-06 19:32:18 +000093} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +000094
Neal Norwitz227b5332006-03-22 09:28:35 +000095static PyTypeObject Writer_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000096
97/*
98 * DIALECT class
99 */
100
101static PyObject *
102get_dialect_from_registry(PyObject * name_obj)
103{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000104 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000105
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000106 dialect_obj = PyDict_GetItem(dialects, name_obj);
107 if (dialect_obj == NULL) {
108 if (!PyErr_Occurred())
109 PyErr_Format(error_obj, "unknown dialect");
110 }
111 else
112 Py_INCREF(dialect_obj);
113 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000114}
115
Skip Montanarob4a04172003-03-20 23:29:12 +0000116static PyObject *
117get_string(PyObject *str)
118{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000119 Py_XINCREF(str);
120 return str;
Skip Montanarob4a04172003-03-20 23:29:12 +0000121}
122
Skip Montanarob4a04172003-03-20 23:29:12 +0000123static PyObject *
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200124get_nullchar_as_None(Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000125{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000126 if (c == '\0') {
127 Py_INCREF(Py_None);
128 return Py_None;
129 }
130 else
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200131 return PyUnicode_FromOrdinal(c);
Skip Montanarob4a04172003-03-20 23:29:12 +0000132}
133
Skip Montanarob4a04172003-03-20 23:29:12 +0000134static PyObject *
135Dialect_get_lineterminator(DialectObj *self)
136{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000137 return get_string(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000138}
139
Skip Montanarob4a04172003-03-20 23:29:12 +0000140static PyObject *
Guido van Rossuma9769c22007-08-07 23:59:30 +0000141Dialect_get_delimiter(DialectObj *self)
142{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 return get_nullchar_as_None(self->delimiter);
Guido van Rossuma9769c22007-08-07 23:59:30 +0000144}
145
146static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000147Dialect_get_escapechar(DialectObj *self)
148{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000150}
151
Andrew McNamara1196cf12005-01-07 04:42:45 +0000152static PyObject *
153Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000154{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000155 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000156}
157
158static PyObject *
159Dialect_get_quoting(DialectObj *self)
160{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000161 return PyLong_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000162}
163
164static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000165_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000166{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000167 if (src == NULL)
168 *target = dflt;
169 else
170 *target = PyObject_IsTrue(src);
171 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000172}
173
Andrew McNamara1196cf12005-01-07 04:42:45 +0000174static int
175_set_int(const char *name, int *target, PyObject *src, int dflt)
176{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 if (src == NULL)
178 *target = dflt;
179 else {
180 long value;
181 if (!PyLong_CheckExact(src)) {
182 PyErr_Format(PyExc_TypeError,
183 "\"%s\" must be an integer", name);
184 return -1;
185 }
186 value = PyLong_AsLong(src);
187 if (value == -1 && PyErr_Occurred())
188 return -1;
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +0000189#if SIZEOF_LONG > SIZEOF_INT
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000190 if (value > INT_MAX || value < INT_MIN) {
191 PyErr_Format(PyExc_ValueError,
192 "integer out of range for \"%s\"", name);
193 return -1;
194 }
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +0000195#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000196 *target = (int)value;
197 }
198 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000199}
200
201static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200202_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000203{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000204 if (src == NULL)
205 *target = dflt;
206 else {
207 *target = '\0';
208 if (src != Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000209 Py_ssize_t len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000210 len = PyUnicode_GetSize(src);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200211 if (len > 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000212 PyErr_Format(PyExc_TypeError,
213 "\"%s\" must be an 1-character string",
214 name);
215 return -1;
216 }
217 if (len > 0)
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200218 *target = PyUnicode_READ_CHAR(src, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000219 }
220 }
221 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000222}
223
224static int
225_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
226{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000227 if (src == NULL)
228 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
229 else {
230 if (src == Py_None)
231 *target = NULL;
232 else if (!IS_BASESTRING(src)) {
233 PyErr_Format(PyExc_TypeError,
234 "\"%s\" must be a string", name);
235 return -1;
236 }
237 else {
238 Py_XDECREF(*target);
239 Py_INCREF(src);
240 *target = src;
241 }
242 }
243 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000244}
245
246static int
247dialect_check_quoting(int quoting)
248{
Victor Stinner4fe519b2010-11-09 09:40:16 +0000249 StyleDesc *qs;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000250
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000251 for (qs = quote_styles; qs->name; qs++) {
252 if (qs->style == quoting)
253 return 0;
254 }
255 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
256 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000257}
Skip Montanarob4a04172003-03-20 23:29:12 +0000258
259#define D_OFF(x) offsetof(DialectObj, x)
260
261static struct PyMemberDef Dialect_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000262 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
263 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
264 { "strict", T_INT, D_OFF(strict), READONLY },
265 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000266};
267
268static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000269 { "delimiter", (getter)Dialect_get_delimiter},
270 { "escapechar", (getter)Dialect_get_escapechar},
271 { "lineterminator", (getter)Dialect_get_lineterminator},
272 { "quotechar", (getter)Dialect_get_quotechar},
273 { "quoting", (getter)Dialect_get_quoting},
274 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000275};
276
277static void
278Dialect_dealloc(DialectObj *self)
279{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000280 Py_XDECREF(self->lineterminator);
281 Py_TYPE(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000282}
283
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000284static char *dialect_kws[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000285 "dialect",
286 "delimiter",
287 "doublequote",
288 "escapechar",
289 "lineterminator",
290 "quotechar",
291 "quoting",
292 "skipinitialspace",
293 "strict",
294 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000295};
296
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000297static PyObject *
298dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000299{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000300 DialectObj *self;
301 PyObject *ret = NULL;
302 PyObject *dialect = NULL;
303 PyObject *delimiter = NULL;
304 PyObject *doublequote = NULL;
305 PyObject *escapechar = NULL;
306 PyObject *lineterminator = NULL;
307 PyObject *quotechar = NULL;
308 PyObject *quoting = NULL;
309 PyObject *skipinitialspace = NULL;
310 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000311
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000312 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
313 "|OOOOOOOOO", dialect_kws,
314 &dialect,
315 &delimiter,
316 &doublequote,
317 &escapechar,
318 &lineterminator,
319 &quotechar,
320 &quoting,
321 &skipinitialspace,
322 &strict))
323 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000324
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000325 if (dialect != NULL) {
326 if (IS_BASESTRING(dialect)) {
327 dialect = get_dialect_from_registry(dialect);
328 if (dialect == NULL)
329 return NULL;
330 }
331 else
332 Py_INCREF(dialect);
333 /* Can we reuse this instance? */
334 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
335 delimiter == 0 &&
336 doublequote == 0 &&
337 escapechar == 0 &&
338 lineterminator == 0 &&
339 quotechar == 0 &&
340 quoting == 0 &&
341 skipinitialspace == 0 &&
342 strict == 0)
343 return dialect;
344 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000345
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 self = (DialectObj *)type->tp_alloc(type, 0);
347 if (self == NULL) {
348 Py_XDECREF(dialect);
349 return NULL;
350 }
351 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000352
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000353 Py_XINCREF(delimiter);
354 Py_XINCREF(doublequote);
355 Py_XINCREF(escapechar);
356 Py_XINCREF(lineterminator);
357 Py_XINCREF(quotechar);
358 Py_XINCREF(quoting);
359 Py_XINCREF(skipinitialspace);
360 Py_XINCREF(strict);
361 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000362#define DIALECT_GETATTR(v, n) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000363 if (v == NULL) \
364 v = PyObject_GetAttrString(dialect, n)
365 DIALECT_GETATTR(delimiter, "delimiter");
366 DIALECT_GETATTR(doublequote, "doublequote");
367 DIALECT_GETATTR(escapechar, "escapechar");
368 DIALECT_GETATTR(lineterminator, "lineterminator");
369 DIALECT_GETATTR(quotechar, "quotechar");
370 DIALECT_GETATTR(quoting, "quoting");
371 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
372 DIALECT_GETATTR(strict, "strict");
373 PyErr_Clear();
374 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000375
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000376 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000377#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000378 if (meth(name, target, src, dflt)) \
379 goto err
380 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
381 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
382 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
383 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
384 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
385 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
386 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
387 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000388
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000389 /* validate options */
390 if (dialect_check_quoting(self->quoting))
391 goto err;
392 if (self->delimiter == 0) {
393 PyErr_SetString(PyExc_TypeError, "delimiter must be set");
394 goto err;
395 }
396 if (quotechar == Py_None && quoting == NULL)
397 self->quoting = QUOTE_NONE;
398 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
399 PyErr_SetString(PyExc_TypeError,
400 "quotechar must be set if quoting enabled");
401 goto err;
402 }
403 if (self->lineterminator == 0) {
404 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
405 goto err;
406 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000407
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000408 ret = (PyObject *)self;
409 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000410err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000411 Py_XDECREF(self);
412 Py_XDECREF(dialect);
413 Py_XDECREF(delimiter);
414 Py_XDECREF(doublequote);
415 Py_XDECREF(escapechar);
416 Py_XDECREF(lineterminator);
417 Py_XDECREF(quotechar);
418 Py_XDECREF(quoting);
419 Py_XDECREF(skipinitialspace);
420 Py_XDECREF(strict);
421 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000422}
423
424
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000425PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000426"CSV dialect\n"
427"\n"
428"The Dialect type records CSV parsing and generation options.\n");
429
430static PyTypeObject Dialect_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000431 PyVarObject_HEAD_INIT(NULL, 0)
432 "_csv.Dialect", /* tp_name */
433 sizeof(DialectObj), /* tp_basicsize */
434 0, /* tp_itemsize */
435 /* methods */
436 (destructor)Dialect_dealloc, /* tp_dealloc */
437 (printfunc)0, /* tp_print */
438 (getattrfunc)0, /* tp_getattr */
439 (setattrfunc)0, /* tp_setattr */
440 0, /* tp_reserved */
441 (reprfunc)0, /* tp_repr */
442 0, /* tp_as_number */
443 0, /* tp_as_sequence */
444 0, /* tp_as_mapping */
445 (hashfunc)0, /* tp_hash */
446 (ternaryfunc)0, /* tp_call */
447 (reprfunc)0, /* tp_str */
448 0, /* tp_getattro */
449 0, /* tp_setattro */
450 0, /* tp_as_buffer */
451 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
452 Dialect_Type_doc, /* tp_doc */
453 0, /* tp_traverse */
454 0, /* tp_clear */
455 0, /* tp_richcompare */
456 0, /* tp_weaklistoffset */
457 0, /* tp_iter */
458 0, /* tp_iternext */
459 0, /* tp_methods */
460 Dialect_memberlist, /* tp_members */
461 Dialect_getsetlist, /* tp_getset */
462 0, /* tp_base */
463 0, /* tp_dict */
464 0, /* tp_descr_get */
465 0, /* tp_descr_set */
466 0, /* tp_dictoffset */
467 0, /* tp_init */
468 0, /* tp_alloc */
469 dialect_new, /* tp_new */
470 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000471};
472
Andrew McNamara91b97462005-01-11 01:07:23 +0000473/*
474 * Return an instance of the dialect type, given a Python instance or kwarg
475 * description of the dialect
476 */
477static PyObject *
478_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
479{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000480 PyObject *ctor_args;
481 PyObject *dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000482
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000483 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
484 if (ctor_args == NULL)
485 return NULL;
486 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
487 Py_DECREF(ctor_args);
488 return dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000489}
490
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000491/*
492 * READER
493 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000494static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000495parse_save_field(ReaderObj *self)
496{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000497 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000498
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200499 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
500 (void *) self->field, self->field_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000501 if (field == NULL)
502 return -1;
503 self->field_len = 0;
504 if (self->numeric_field) {
505 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000506
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000507 self->numeric_field = 0;
508 tmp = PyNumber_Float(field);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000509 Py_DECREF(field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200510 if (tmp == NULL)
511 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000512 field = tmp;
513 }
514 PyList_Append(self->fields, field);
515 Py_DECREF(field);
516 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000517}
518
519static int
520parse_grow_buff(ReaderObj *self)
521{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000522 if (self->field_size == 0) {
523 self->field_size = 4096;
524 if (self->field != NULL)
525 PyMem_Free(self->field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200526 self->field = PyMem_New(Py_UCS4, self->field_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000527 }
528 else {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200529 Py_UCS4 *field = self->field;
Antoine Pitrou40455752010-08-15 18:51:10 +0000530 if (self->field_size > PY_SSIZE_T_MAX / 2) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000531 PyErr_NoMemory();
532 return 0;
533 }
534 self->field_size *= 2;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200535 self->field = PyMem_Resize(field, Py_UCS4, self->field_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000536 }
537 if (self->field == NULL) {
538 PyErr_NoMemory();
539 return 0;
540 }
541 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000542}
543
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000544static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200545parse_add_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000546{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000547 if (self->field_len >= field_limit) {
548 PyErr_Format(error_obj, "field larger than field limit (%ld)",
549 field_limit);
550 return -1;
551 }
552 if (self->field_len == self->field_size && !parse_grow_buff(self))
553 return -1;
554 self->field[self->field_len++] = c;
555 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000556}
557
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000558static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200559parse_process_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000560{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000561 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000562
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000563 switch (self->state) {
564 case START_RECORD:
565 /* start of record */
566 if (c == '\0')
567 /* empty line - return [] */
568 break;
569 else if (c == '\n' || c == '\r') {
570 self->state = EAT_CRNL;
571 break;
572 }
573 /* normal character - handle as START_FIELD */
574 self->state = START_FIELD;
575 /* fallthru */
576 case START_FIELD:
577 /* expecting field */
578 if (c == '\n' || c == '\r' || c == '\0') {
579 /* save empty field - return [fields] */
580 if (parse_save_field(self) < 0)
581 return -1;
582 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
583 }
584 else if (c == dialect->quotechar &&
585 dialect->quoting != QUOTE_NONE) {
586 /* start quoted field */
587 self->state = IN_QUOTED_FIELD;
588 }
589 else if (c == dialect->escapechar) {
590 /* possible escaped character */
591 self->state = ESCAPED_CHAR;
592 }
593 else if (c == ' ' && dialect->skipinitialspace)
594 /* ignore space at start of field */
595 ;
596 else if (c == dialect->delimiter) {
597 /* save empty field */
598 if (parse_save_field(self) < 0)
599 return -1;
600 }
601 else {
602 /* begin new unquoted field */
603 if (dialect->quoting == QUOTE_NONNUMERIC)
604 self->numeric_field = 1;
605 if (parse_add_char(self, c) < 0)
606 return -1;
607 self->state = IN_FIELD;
608 }
609 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000610
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000611 case ESCAPED_CHAR:
612 if (c == '\0')
613 c = '\n';
614 if (parse_add_char(self, c) < 0)
615 return -1;
616 self->state = IN_FIELD;
617 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000618
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000619 case IN_FIELD:
620 /* in unquoted field */
621 if (c == '\n' || c == '\r' || c == '\0') {
622 /* end of line - return [fields] */
623 if (parse_save_field(self) < 0)
624 return -1;
625 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
626 }
627 else if (c == dialect->escapechar) {
628 /* possible escaped character */
629 self->state = ESCAPED_CHAR;
630 }
631 else if (c == dialect->delimiter) {
632 /* save field - wait for new field */
633 if (parse_save_field(self) < 0)
634 return -1;
635 self->state = START_FIELD;
636 }
637 else {
638 /* normal character - save in field */
639 if (parse_add_char(self, c) < 0)
640 return -1;
641 }
642 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000643
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000644 case IN_QUOTED_FIELD:
645 /* in quoted field */
646 if (c == '\0')
647 ;
648 else if (c == dialect->escapechar) {
649 /* Possible escape character */
650 self->state = ESCAPE_IN_QUOTED_FIELD;
651 }
652 else if (c == dialect->quotechar &&
653 dialect->quoting != QUOTE_NONE) {
654 if (dialect->doublequote) {
655 /* doublequote; " represented by "" */
656 self->state = QUOTE_IN_QUOTED_FIELD;
657 }
658 else {
659 /* end of quote part of field */
660 self->state = IN_FIELD;
661 }
662 }
663 else {
664 /* normal character - save in field */
665 if (parse_add_char(self, c) < 0)
666 return -1;
667 }
668 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000669
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000670 case ESCAPE_IN_QUOTED_FIELD:
671 if (c == '\0')
672 c = '\n';
673 if (parse_add_char(self, c) < 0)
674 return -1;
675 self->state = IN_QUOTED_FIELD;
676 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000677
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000678 case QUOTE_IN_QUOTED_FIELD:
679 /* doublequote - seen a quote in an quoted field */
680 if (dialect->quoting != QUOTE_NONE &&
681 c == dialect->quotechar) {
682 /* save "" as " */
683 if (parse_add_char(self, c) < 0)
684 return -1;
685 self->state = IN_QUOTED_FIELD;
686 }
687 else if (c == dialect->delimiter) {
688 /* save field - wait for new field */
689 if (parse_save_field(self) < 0)
690 return -1;
691 self->state = START_FIELD;
692 }
693 else if (c == '\n' || c == '\r' || c == '\0') {
694 /* end of line - return [fields] */
695 if (parse_save_field(self) < 0)
696 return -1;
697 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
698 }
699 else if (!dialect->strict) {
700 if (parse_add_char(self, c) < 0)
701 return -1;
702 self->state = IN_FIELD;
703 }
704 else {
705 /* illegal */
706 PyErr_Format(error_obj, "'%c' expected after '%c'",
707 dialect->delimiter,
708 dialect->quotechar);
709 return -1;
710 }
711 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000712
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000713 case EAT_CRNL:
714 if (c == '\n' || c == '\r')
715 ;
716 else if (c == '\0')
717 self->state = START_RECORD;
718 else {
719 PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
720 return -1;
721 }
722 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000723
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000724 }
725 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000726}
727
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000728static int
729parse_reset(ReaderObj *self)
730{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000731 Py_XDECREF(self->fields);
732 self->fields = PyList_New(0);
733 if (self->fields == NULL)
734 return -1;
735 self->field_len = 0;
736 self->state = START_RECORD;
737 self->numeric_field = 0;
738 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000739}
Skip Montanarob4a04172003-03-20 23:29:12 +0000740
741static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000742Reader_iternext(ReaderObj *self)
743{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000744 PyObject *fields = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200745 Py_UCS4 c;
746 Py_ssize_t pos, linelen;
747 unsigned int kind;
748 void *data;
749 PyObject *lineobj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000750
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000751 if (parse_reset(self) < 0)
752 return NULL;
753 do {
754 lineobj = PyIter_Next(self->input_iter);
755 if (lineobj == NULL) {
756 /* End of input OR exception */
757 if (!PyErr_Occurred() && self->field_len != 0)
758 PyErr_Format(error_obj,
759 "newline inside string");
760 return NULL;
761 }
762 if (!PyUnicode_Check(lineobj)) {
763 PyErr_Format(error_obj,
764 "iterator should return strings, "
765 "not %.200s "
766 "(did you open the file in text mode?)",
767 lineobj->ob_type->tp_name
768 );
769 Py_DECREF(lineobj);
770 return NULL;
771 }
772 ++self->line_num;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200773 kind = PyUnicode_KIND(lineobj);
774 data = PyUnicode_DATA(lineobj);
775 pos = 0;
776 linelen = PyUnicode_GET_LENGTH(lineobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000777 while (linelen--) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200778 c = PyUnicode_READ(kind, data, pos);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000779 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000780 Py_DECREF(lineobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000781 PyErr_Format(error_obj,
782 "line contains NULL byte");
783 goto err;
784 }
785 if (parse_process_char(self, c) < 0) {
786 Py_DECREF(lineobj);
787 goto err;
788 }
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200789 pos++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000790 }
791 Py_DECREF(lineobj);
792 if (parse_process_char(self, 0) < 0)
793 goto err;
794 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000795
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000796 fields = self->fields;
797 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000798err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000799 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000800}
801
802static void
803Reader_dealloc(ReaderObj *self)
804{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000805 PyObject_GC_UnTrack(self);
806 Py_XDECREF(self->dialect);
807 Py_XDECREF(self->input_iter);
808 Py_XDECREF(self->fields);
809 if (self->field != NULL)
810 PyMem_Free(self->field);
811 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000812}
813
814static int
815Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
816{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000817 Py_VISIT(self->dialect);
818 Py_VISIT(self->input_iter);
819 Py_VISIT(self->fields);
820 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000821}
822
823static int
824Reader_clear(ReaderObj *self)
825{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000826 Py_CLEAR(self->dialect);
827 Py_CLEAR(self->input_iter);
828 Py_CLEAR(self->fields);
829 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000830}
831
832PyDoc_STRVAR(Reader_Type_doc,
833"CSV reader\n"
834"\n"
835"Reader objects are responsible for reading and parsing tabular data\n"
836"in CSV format.\n"
837);
838
839static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000840 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000841};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000842#define R_OFF(x) offsetof(ReaderObj, x)
843
844static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000845 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
846 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
847 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000848};
849
Skip Montanarob4a04172003-03-20 23:29:12 +0000850
851static PyTypeObject Reader_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000852 PyVarObject_HEAD_INIT(NULL, 0)
853 "_csv.reader", /*tp_name*/
854 sizeof(ReaderObj), /*tp_basicsize*/
855 0, /*tp_itemsize*/
856 /* methods */
857 (destructor)Reader_dealloc, /*tp_dealloc*/
858 (printfunc)0, /*tp_print*/
859 (getattrfunc)0, /*tp_getattr*/
860 (setattrfunc)0, /*tp_setattr*/
861 0, /*tp_reserved*/
862 (reprfunc)0, /*tp_repr*/
863 0, /*tp_as_number*/
864 0, /*tp_as_sequence*/
865 0, /*tp_as_mapping*/
866 (hashfunc)0, /*tp_hash*/
867 (ternaryfunc)0, /*tp_call*/
868 (reprfunc)0, /*tp_str*/
869 0, /*tp_getattro*/
870 0, /*tp_setattro*/
871 0, /*tp_as_buffer*/
872 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
873 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
874 Reader_Type_doc, /*tp_doc*/
875 (traverseproc)Reader_traverse, /*tp_traverse*/
876 (inquiry)Reader_clear, /*tp_clear*/
877 0, /*tp_richcompare*/
878 0, /*tp_weaklistoffset*/
879 PyObject_SelfIter, /*tp_iter*/
880 (getiterfunc)Reader_iternext, /*tp_iternext*/
881 Reader_methods, /*tp_methods*/
882 Reader_memberlist, /*tp_members*/
883 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000884
885};
886
887static PyObject *
888csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
889{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000890 PyObject * iterator, * dialect = NULL;
891 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000892
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000893 if (!self)
894 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000895
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000896 self->dialect = NULL;
897 self->fields = NULL;
898 self->input_iter = NULL;
899 self->field = NULL;
900 self->field_size = 0;
901 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000902
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000903 if (parse_reset(self) < 0) {
904 Py_DECREF(self);
905 return NULL;
906 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000907
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000908 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
909 Py_DECREF(self);
910 return NULL;
911 }
912 self->input_iter = PyObject_GetIter(iterator);
913 if (self->input_iter == NULL) {
914 PyErr_SetString(PyExc_TypeError,
915 "argument 1 must be an iterator");
916 Py_DECREF(self);
917 return NULL;
918 }
919 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
920 if (self->dialect == NULL) {
921 Py_DECREF(self);
922 return NULL;
923 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000924
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000925 PyObject_GC_Track(self);
926 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +0000927}
928
929/*
930 * WRITER
931 */
932/* ---------------------------------------------------------------- */
933static void
934join_reset(WriterObj *self)
935{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000936 self->rec_len = 0;
937 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000938}
939
940#define MEM_INCR 32768
941
942/* Calculate new record length or append field to record. Return new
943 * record length.
944 */
Antoine Pitrou40455752010-08-15 18:51:10 +0000945static Py_ssize_t
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200946join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
947 Py_ssize_t field_len, int quote_empty, int *quoted,
948 int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +0000949{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000950 DialectObj *dialect = self->dialect;
951 int i;
Antoine Pitrou40455752010-08-15 18:51:10 +0000952 Py_ssize_t rec_len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000953 Py_UNICODE *lineterm;
Andrew McNamarac89f2842005-01-12 07:44:42 +0000954
955#define ADDCH(c) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000956 do {\
957 if (copy_phase) \
958 self->rec[rec_len] = c;\
959 rec_len++;\
960 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +0000961
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000962 lineterm = PyUnicode_AsUnicode(dialect->lineterminator);
963 if (lineterm == NULL)
964 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000965
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000966 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +0000967
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000968 /* If this is not the first field we need a field separator */
969 if (self->num_fields > 0)
970 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +0000971
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000972 /* Handle preceding quote */
973 if (copy_phase && *quoted)
974 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +0000975
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000976 /* Copy/count field data */
977 /* If field is null just pass over */
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200978 for (i = 0; field_data && (i < field_len); i++) {
979 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000980 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000981
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000982 if (c == dialect->delimiter ||
983 c == dialect->escapechar ||
984 c == dialect->quotechar ||
985 Py_UNICODE_strchr(lineterm, c)) {
986 if (dialect->quoting == QUOTE_NONE)
987 want_escape = 1;
988 else {
989 if (c == dialect->quotechar) {
990 if (dialect->doublequote)
991 ADDCH(dialect->quotechar);
992 else
993 want_escape = 1;
994 }
995 if (!want_escape)
996 *quoted = 1;
997 }
998 if (want_escape) {
999 if (!dialect->escapechar) {
1000 PyErr_Format(error_obj,
1001 "need to escape, but no escapechar set");
1002 return -1;
1003 }
1004 ADDCH(dialect->escapechar);
1005 }
1006 }
1007 /* Copy field character into record buffer.
1008 */
1009 ADDCH(c);
1010 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001011
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001012 /* If field is empty check if it needs to be quoted.
1013 */
1014 if (i == 0 && quote_empty) {
1015 if (dialect->quoting == QUOTE_NONE) {
1016 PyErr_Format(error_obj,
1017 "single empty field record must be quoted");
1018 return -1;
1019 }
1020 else
1021 *quoted = 1;
1022 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001023
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001024 if (*quoted) {
1025 if (copy_phase)
1026 ADDCH(dialect->quotechar);
1027 else
1028 rec_len += 2;
1029 }
1030 return rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001031#undef ADDCH
Skip Montanarob4a04172003-03-20 23:29:12 +00001032}
1033
1034static int
Antoine Pitrou40455752010-08-15 18:51:10 +00001035join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
Skip Montanarob4a04172003-03-20 23:29:12 +00001036{
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001037
Antoine Pitrou40455752010-08-15 18:51:10 +00001038 if (rec_len < 0 || rec_len > PY_SSIZE_T_MAX - MEM_INCR) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001039 PyErr_NoMemory();
1040 return 0;
1041 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001042
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001043 if (rec_len > self->rec_size) {
1044 if (self->rec_size == 0) {
1045 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1046 if (self->rec != NULL)
1047 PyMem_Free(self->rec);
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001048 self->rec = PyMem_New(Py_UCS4, self->rec_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001049 }
1050 else {
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001051 Py_UCS4* old_rec = self->rec;
Skip Montanarob4a04172003-03-20 23:29:12 +00001052
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001053 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001054 self->rec = PyMem_Resize(old_rec, Py_UCS4, self->rec_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001055 if (self->rec == NULL)
1056 PyMem_Free(old_rec);
1057 }
1058 if (self->rec == NULL) {
1059 PyErr_NoMemory();
1060 return 0;
1061 }
1062 }
1063 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001064}
1065
1066static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001067join_append(WriterObj *self, PyObject *field, int *quoted, int quote_empty)
Skip Montanarob4a04172003-03-20 23:29:12 +00001068{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001069 unsigned int field_kind = -1;
1070 void *field_data = NULL;
1071 Py_ssize_t field_len = 0;
Antoine Pitrou40455752010-08-15 18:51:10 +00001072 Py_ssize_t rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001073
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001074 if (field != NULL) {
1075 field_kind = PyUnicode_KIND(field);
1076 field_data = PyUnicode_DATA(field);
1077 field_len = PyUnicode_GET_LENGTH(field);
1078 }
1079 rec_len = join_append_data(self, field_kind, field_data, field_len,
1080 quote_empty, quoted, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001081 if (rec_len < 0)
1082 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001083
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001084 /* grow record buffer if necessary */
1085 if (!join_check_rec_size(self, rec_len))
1086 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001087
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001088 self->rec_len = join_append_data(self, field_kind, field_data, field_len,
1089 quote_empty, quoted, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001090 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001091
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001092 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001093}
1094
1095static int
1096join_append_lineterminator(WriterObj *self)
1097{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001098 Py_ssize_t terminator_len, i;
1099 unsigned int term_kind;
1100 void *term_data;
Skip Montanarob4a04172003-03-20 23:29:12 +00001101
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001102 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001103 if (terminator_len == -1)
1104 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001105
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001106 /* grow record buffer if necessary */
1107 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1108 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001109
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001110 term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1111 term_data = PyUnicode_DATA(self->dialect->lineterminator);
1112 for (i = 0; i < terminator_len; i++)
1113 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001114 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001115
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001116 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001117}
1118
1119PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001120"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001121"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001122"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001123"elements will be converted to string.");
1124
1125static PyObject *
1126csv_writerow(WriterObj *self, PyObject *seq)
1127{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001128 DialectObj *dialect = self->dialect;
Antoine Pitrou40455752010-08-15 18:51:10 +00001129 Py_ssize_t len, i;
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001130 PyObject *line, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001131
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001132 if (!PySequence_Check(seq))
1133 return PyErr_Format(error_obj, "sequence expected");
Skip Montanarob4a04172003-03-20 23:29:12 +00001134
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001135 len = PySequence_Length(seq);
1136 if (len < 0)
1137 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001138
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001139 /* Join all fields in internal buffer.
1140 */
1141 join_reset(self);
1142 for (i = 0; i < len; i++) {
1143 PyObject *field;
1144 int append_ok;
1145 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001146
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001147 field = PySequence_GetItem(seq, i);
1148 if (field == NULL)
1149 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001150
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001151 switch (dialect->quoting) {
1152 case QUOTE_NONNUMERIC:
1153 quoted = !PyNumber_Check(field);
1154 break;
1155 case QUOTE_ALL:
1156 quoted = 1;
1157 break;
1158 default:
1159 quoted = 0;
1160 break;
1161 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001162
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001163 if (PyUnicode_Check(field)) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001164 append_ok = join_append(self, field, &quoted, len == 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001165 Py_DECREF(field);
1166 }
1167 else if (field == Py_None) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001168 append_ok = join_append(self, NULL, &quoted, len == 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001169 Py_DECREF(field);
1170 }
1171 else {
1172 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001173
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001174 str = PyObject_Str(field);
1175 Py_DECREF(field);
1176 if (str == NULL)
1177 return NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001178 append_ok = join_append(self, str, &quoted, len == 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001179 Py_DECREF(str);
1180 }
1181 if (!append_ok)
1182 return NULL;
1183 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001184
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001185 /* Add line terminator.
1186 */
1187 if (!join_append_lineterminator(self))
1188 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001189
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001190 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1191 (void *) self->rec, self->rec_len);
1192 if (line == NULL)
1193 return NULL;
1194 result = PyObject_CallFunctionObjArgs(self->writeline, line, NULL);
1195 Py_DECREF(line);
1196 return result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001197}
1198
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001199PyDoc_STRVAR(csv_writerows_doc,
1200"writerows(sequence of sequences)\n"
1201"\n"
1202"Construct and write a series of sequences to a csv file. Non-string\n"
1203"elements will be converted to string.");
1204
Skip Montanarob4a04172003-03-20 23:29:12 +00001205static PyObject *
1206csv_writerows(WriterObj *self, PyObject *seqseq)
1207{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001208 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001209
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001210 row_iter = PyObject_GetIter(seqseq);
1211 if (row_iter == NULL) {
1212 PyErr_SetString(PyExc_TypeError,
1213 "writerows() argument must be iterable");
1214 return NULL;
1215 }
1216 while ((row_obj = PyIter_Next(row_iter))) {
1217 result = csv_writerow(self, row_obj);
1218 Py_DECREF(row_obj);
1219 if (!result) {
1220 Py_DECREF(row_iter);
1221 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001222 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001223 else
1224 Py_DECREF(result);
1225 }
1226 Py_DECREF(row_iter);
1227 if (PyErr_Occurred())
1228 return NULL;
1229 Py_INCREF(Py_None);
1230 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001231}
1232
1233static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001234 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1235 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1236 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001237};
1238
1239#define W_OFF(x) offsetof(WriterObj, x)
1240
1241static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001242 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1243 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001244};
1245
1246static void
1247Writer_dealloc(WriterObj *self)
1248{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001249 PyObject_GC_UnTrack(self);
1250 Py_XDECREF(self->dialect);
1251 Py_XDECREF(self->writeline);
1252 if (self->rec != NULL)
1253 PyMem_Free(self->rec);
1254 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001255}
1256
1257static int
1258Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1259{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001260 Py_VISIT(self->dialect);
1261 Py_VISIT(self->writeline);
1262 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001263}
1264
1265static int
1266Writer_clear(WriterObj *self)
1267{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001268 Py_CLEAR(self->dialect);
1269 Py_CLEAR(self->writeline);
1270 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001271}
1272
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001273PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001274"CSV writer\n"
1275"\n"
1276"Writer objects are responsible for generating tabular data\n"
1277"in CSV format from sequence input.\n"
1278);
1279
1280static PyTypeObject Writer_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001281 PyVarObject_HEAD_INIT(NULL, 0)
1282 "_csv.writer", /*tp_name*/
1283 sizeof(WriterObj), /*tp_basicsize*/
1284 0, /*tp_itemsize*/
1285 /* methods */
1286 (destructor)Writer_dealloc, /*tp_dealloc*/
1287 (printfunc)0, /*tp_print*/
1288 (getattrfunc)0, /*tp_getattr*/
1289 (setattrfunc)0, /*tp_setattr*/
1290 0, /*tp_reserved*/
1291 (reprfunc)0, /*tp_repr*/
1292 0, /*tp_as_number*/
1293 0, /*tp_as_sequence*/
1294 0, /*tp_as_mapping*/
1295 (hashfunc)0, /*tp_hash*/
1296 (ternaryfunc)0, /*tp_call*/
1297 (reprfunc)0, /*tp_str*/
1298 0, /*tp_getattro*/
1299 0, /*tp_setattro*/
1300 0, /*tp_as_buffer*/
1301 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1302 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1303 Writer_Type_doc,
1304 (traverseproc)Writer_traverse, /*tp_traverse*/
1305 (inquiry)Writer_clear, /*tp_clear*/
1306 0, /*tp_richcompare*/
1307 0, /*tp_weaklistoffset*/
1308 (getiterfunc)0, /*tp_iter*/
1309 (getiterfunc)0, /*tp_iternext*/
1310 Writer_methods, /*tp_methods*/
1311 Writer_memberlist, /*tp_members*/
1312 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001313};
1314
1315static PyObject *
1316csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1317{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001318 PyObject * output_file, * dialect = NULL;
1319 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001320 _Py_identifier(write);
Skip Montanarob4a04172003-03-20 23:29:12 +00001321
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001322 if (!self)
1323 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001324
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001325 self->dialect = NULL;
1326 self->writeline = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001327
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001328 self->rec = NULL;
1329 self->rec_size = 0;
1330 self->rec_len = 0;
1331 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001332
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001333 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1334 Py_DECREF(self);
1335 return NULL;
1336 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001337 self->writeline = _PyObject_GetAttrId(output_file, &PyId_write);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001338 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1339 PyErr_SetString(PyExc_TypeError,
1340 "argument 1 must have a \"write\" method");
1341 Py_DECREF(self);
1342 return NULL;
1343 }
1344 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1345 if (self->dialect == NULL) {
1346 Py_DECREF(self);
1347 return NULL;
1348 }
1349 PyObject_GC_Track(self);
1350 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001351}
1352
1353/*
1354 * DIALECT REGISTRY
1355 */
1356static PyObject *
1357csv_list_dialects(PyObject *module, PyObject *args)
1358{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001359 return PyDict_Keys(dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001360}
1361
1362static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001363csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001364{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001365 PyObject *name_obj, *dialect_obj = NULL;
1366 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001367
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001368 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1369 return NULL;
1370 if (!IS_BASESTRING(name_obj)) {
1371 PyErr_SetString(PyExc_TypeError,
1372 "dialect name must be a string or unicode");
1373 return NULL;
1374 }
1375 dialect = _call_dialect(dialect_obj, kwargs);
1376 if (dialect == NULL)
1377 return NULL;
1378 if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1379 Py_DECREF(dialect);
1380 return NULL;
1381 }
1382 Py_DECREF(dialect);
1383 Py_INCREF(Py_None);
1384 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001385}
1386
1387static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001388csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001389{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001390 if (PyDict_DelItem(dialects, name_obj) < 0)
1391 return PyErr_Format(error_obj, "unknown dialect");
1392 Py_INCREF(Py_None);
1393 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001394}
1395
1396static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001397csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001398{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001399 return get_dialect_from_registry(name_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001400}
1401
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001402static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001403csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001404{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001405 PyObject *new_limit = NULL;
1406 long old_limit = field_limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001407
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001408 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1409 return NULL;
1410 if (new_limit != NULL) {
1411 if (!PyLong_CheckExact(new_limit)) {
1412 PyErr_Format(PyExc_TypeError,
1413 "limit must be an integer");
1414 return NULL;
1415 }
1416 field_limit = PyLong_AsLong(new_limit);
1417 if (field_limit == -1 && PyErr_Occurred()) {
1418 field_limit = old_limit;
1419 return NULL;
1420 }
1421 }
1422 return PyLong_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001423}
1424
Skip Montanarob4a04172003-03-20 23:29:12 +00001425/*
1426 * MODULE
1427 */
1428
1429PyDoc_STRVAR(csv_module_doc,
1430"CSV parsing and writing.\n"
1431"\n"
1432"This module provides classes that assist in the reading and writing\n"
1433"of Comma Separated Value (CSV) files, and implements the interface\n"
1434"described by PEP 305. Although many CSV files are simple to parse,\n"
1435"the format is not formally defined by a stable specification and\n"
1436"is subtle enough that parsing lines of a CSV file with something\n"
1437"like line.split(\",\") is bound to fail. The module supports three\n"
1438"basic APIs: reading, writing, and registration of dialects.\n"
1439"\n"
1440"\n"
1441"DIALECT REGISTRATION:\n"
1442"\n"
1443"Readers and writers support a dialect argument, which is a convenient\n"
1444"handle on a group of settings. When the dialect argument is a string,\n"
1445"it identifies one of the dialects previously registered with the module.\n"
1446"If it is a class or instance, the attributes of the argument are used as\n"
1447"the settings for the reader or writer:\n"
1448"\n"
1449" class excel:\n"
1450" delimiter = ','\n"
1451" quotechar = '\"'\n"
1452" escapechar = None\n"
1453" doublequote = True\n"
1454" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001455" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001456" quoting = QUOTE_MINIMAL\n"
1457"\n"
1458"SETTINGS:\n"
1459"\n"
1460" * quotechar - specifies a one-character string to use as the \n"
1461" quoting character. It defaults to '\"'.\n"
1462" * delimiter - specifies a one-character string to use as the \n"
1463" field separator. It defaults to ','.\n"
1464" * skipinitialspace - specifies how to interpret whitespace which\n"
1465" immediately follows a delimiter. It defaults to False, which\n"
1466" means that whitespace immediately following a delimiter is part\n"
1467" of the following field.\n"
1468" * lineterminator - specifies the character sequence which should \n"
1469" terminate rows.\n"
1470" * quoting - controls when quotes should be generated by the writer.\n"
1471" It can take on any of the following module constants:\n"
1472"\n"
1473" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1474" field contains either the quotechar or the delimiter\n"
1475" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1476" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001477" fields which do not parse as integers or floating point\n"
1478" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001479" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1480" * escapechar - specifies a one-character string used to escape \n"
1481" the delimiter when quoting is set to QUOTE_NONE.\n"
1482" * doublequote - controls the handling of quotes inside fields. When\n"
1483" True, two consecutive quotes are interpreted as one during read,\n"
1484" and when writing, each quote character embedded in the data is\n"
1485" written as two quotes\n");
1486
1487PyDoc_STRVAR(csv_reader_doc,
1488" csv_reader = reader(iterable [, dialect='excel']\n"
1489" [optional keyword args])\n"
1490" for row in csv_reader:\n"
1491" process(row)\n"
1492"\n"
1493"The \"iterable\" argument can be any object that returns a line\n"
1494"of input for each iteration, such as a file object or a list. The\n"
1495"optional \"dialect\" parameter is discussed below. The function\n"
1496"also accepts optional keyword arguments which override settings\n"
1497"provided by the dialect.\n"
1498"\n"
1499"The returned object is an iterator. Each iteration returns a row\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001500"of the CSV file (which can span multiple input lines):\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001501
1502PyDoc_STRVAR(csv_writer_doc,
1503" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1504" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001505" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001506" csv_writer.writerow(row)\n"
1507"\n"
1508" [or]\n"
1509"\n"
1510" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1511" [optional keyword args])\n"
1512" csv_writer.writerows(rows)\n"
1513"\n"
1514"The \"fileobj\" argument can be any object that supports the file API.\n");
1515
1516PyDoc_STRVAR(csv_list_dialects_doc,
1517"Return a list of all know dialect names.\n"
1518" names = csv.list_dialects()");
1519
1520PyDoc_STRVAR(csv_get_dialect_doc,
1521"Return the dialect instance associated with name.\n"
1522" dialect = csv.get_dialect(name)");
1523
1524PyDoc_STRVAR(csv_register_dialect_doc,
1525"Create a mapping from a string name to a dialect class.\n"
1526" dialect = csv.register_dialect(name, dialect)");
1527
1528PyDoc_STRVAR(csv_unregister_dialect_doc,
1529"Delete the name/dialect mapping associated with a string name.\n"
1530" csv.unregister_dialect(name)");
1531
Andrew McNamara31d88962005-01-12 03:45:10 +00001532PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001533"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001534" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001535"\n"
1536"Returns old limit. If limit is not given, no new limit is set and\n"
1537"the old limit is returned");
1538
Skip Montanarob4a04172003-03-20 23:29:12 +00001539static struct PyMethodDef csv_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001540 { "reader", (PyCFunction)csv_reader,
1541 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1542 { "writer", (PyCFunction)csv_writer,
1543 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1544 { "list_dialects", (PyCFunction)csv_list_dialects,
1545 METH_NOARGS, csv_list_dialects_doc},
1546 { "register_dialect", (PyCFunction)csv_register_dialect,
1547 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1548 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1549 METH_O, csv_unregister_dialect_doc},
1550 { "get_dialect", (PyCFunction)csv_get_dialect,
1551 METH_O, csv_get_dialect_doc},
1552 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1553 METH_VARARGS, csv_field_size_limit_doc},
1554 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001555};
1556
Martin v. Löwis1a214512008-06-11 05:26:20 +00001557
1558static struct PyModuleDef _csvmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001559 PyModuleDef_HEAD_INIT,
1560 "_csv",
1561 csv_module_doc,
1562 -1,
1563 csv_methods,
1564 NULL,
1565 NULL,
1566 NULL,
1567 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001568};
1569
Skip Montanarob4a04172003-03-20 23:29:12 +00001570PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001571PyInit__csv(void)
Skip Montanarob4a04172003-03-20 23:29:12 +00001572{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001573 PyObject *module;
1574 StyleDesc *style;
Skip Montanarob4a04172003-03-20 23:29:12 +00001575
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001576 if (PyType_Ready(&Dialect_Type) < 0)
1577 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001578
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001579 if (PyType_Ready(&Reader_Type) < 0)
1580 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001581
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001582 if (PyType_Ready(&Writer_Type) < 0)
1583 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001584
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001585 /* Create the module and add the functions */
1586 module = PyModule_Create(&_csvmodule);
1587 if (module == NULL)
1588 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001589
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001590 /* Add version to the module. */
1591 if (PyModule_AddStringConstant(module, "__version__",
1592 MODULE_VERSION) == -1)
1593 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001594
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001595 /* Add _dialects dictionary */
1596 dialects = PyDict_New();
1597 if (dialects == NULL)
1598 return NULL;
1599 if (PyModule_AddObject(module, "_dialects", dialects))
1600 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001601
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001602 /* Add quote styles into dictionary */
1603 for (style = quote_styles; style->name; style++) {
1604 if (PyModule_AddIntConstant(module, style->name,
1605 style->style) == -1)
1606 return NULL;
1607 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001608
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001609 /* Add the Dialect type */
1610 Py_INCREF(&Dialect_Type);
1611 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1612 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001613
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001614 /* Add the CSV exception object to the module. */
1615 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1616 if (error_obj == NULL)
1617 return NULL;
1618 PyModule_AddObject(module, "Error", error_obj);
1619 return module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001620}