blob: cc87bad05833ea62334d3a09372c938620b65070 [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
Skip Montanarob4a04172003-03-20 23:29:12 +00009*/
10
Skip Montanaro7b01a832003-04-12 19:23:46 +000011#define MODULE_VERSION "1.0"
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013#include "Python.h"
14#include "structmember.h"
15
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000016#define IS_BASESTRING(o) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000017 PyUnicode_Check(o)
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000018
Antoine Pitroue7672d32012-05-16 11:33:08 +020019typedef struct {
20 PyObject *error_obj; /* CSV exception */
21 PyObject *dialects; /* Dialect registry */
22 long field_limit; /* max parsed field size */
23} _csvstate;
24
25#define _csvstate(o) ((_csvstate *)PyModule_GetState(o))
26
27static int
28_csv_clear(PyObject *m)
29{
30 Py_CLEAR(_csvstate(m)->error_obj);
31 Py_CLEAR(_csvstate(m)->dialects);
32 return 0;
33}
34
35static int
36_csv_traverse(PyObject *m, visitproc visit, void *arg)
37{
38 Py_VISIT(_csvstate(m)->error_obj);
39 Py_VISIT(_csvstate(m)->dialects);
40 return 0;
41}
42
43static void
44_csv_free(void *m)
45{
46 _csv_clear((PyObject *)m);
47}
48
49static struct PyModuleDef _csvmodule;
50
51#define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
Skip Montanarob4a04172003-03-20 23:29:12 +000052
53typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000054 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
55 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
56 EAT_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000057} ParserState;
58
59typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000060 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000061} QuoteStyle;
62
63typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000064 QuoteStyle style;
65 char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000066} StyleDesc;
67
68static StyleDesc quote_styles[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000069 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
70 { QUOTE_ALL, "QUOTE_ALL" },
71 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
72 { QUOTE_NONE, "QUOTE_NONE" },
73 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000074};
75
76typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000078
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000079 int doublequote; /* is " represented by ""? */
Antoine Pitrou77ea6402011-10-07 04:26:55 +020080 Py_UCS4 delimiter; /* field separator */
81 Py_UCS4 quotechar; /* quote character */
82 Py_UCS4 escapechar; /* escape character */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000083 int skipinitialspace; /* ignore spaces following delimiter? */
84 PyObject *lineterminator; /* string to write between records */
85 int quoting; /* style of quoting to write */
Skip Montanarob4a04172003-03-20 23:29:12 +000086
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 int strict; /* raise exception on bad CSV */
Skip Montanarob4a04172003-03-20 23:29:12 +000088} DialectObj;
89
Neal Norwitz227b5332006-03-22 09:28:35 +000090static PyTypeObject Dialect_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000091
92typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000093 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +000094
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000095 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +000096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000097 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +000098
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000099 PyObject *fields; /* field list for current record */
100 ParserState state; /* current CSV parse state */
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200101 Py_UCS4 *field; /* temporary buffer */
Antoine Pitrou40455752010-08-15 18:51:10 +0000102 Py_ssize_t field_size; /* size of allocated buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000103 Py_ssize_t field_len; /* length of current field */
104 int numeric_field; /* treat field as numeric */
105 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000106} ReaderObj;
107
Neal Norwitz227b5332006-03-22 09:28:35 +0000108static PyTypeObject Reader_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000109
Christian Heimes90aa7642007-12-19 02:45:37 +0000110#define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +0000111
112typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000113 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000114
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000115 PyObject *writeline; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +0000116
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000117 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000118
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200119 Py_UCS4 *rec; /* buffer for parser.join */
Antoine Pitrou40455752010-08-15 18:51:10 +0000120 Py_ssize_t rec_size; /* size of allocated record */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000121 Py_ssize_t rec_len; /* length of record */
122 int num_fields; /* number of fields in record */
Guido van Rossum46264582007-08-06 19:32:18 +0000123} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000124
Neal Norwitz227b5332006-03-22 09:28:35 +0000125static PyTypeObject Writer_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000126
127/*
128 * DIALECT class
129 */
130
131static PyObject *
132get_dialect_from_registry(PyObject * name_obj)
133{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000135
Antoine Pitroue7672d32012-05-16 11:33:08 +0200136 dialect_obj = PyDict_GetItem(_csvstate_global->dialects, name_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000137 if (dialect_obj == NULL) {
138 if (!PyErr_Occurred())
Antoine Pitroue7672d32012-05-16 11:33:08 +0200139 PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000140 }
141 else
142 Py_INCREF(dialect_obj);
143 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000144}
145
Skip Montanarob4a04172003-03-20 23:29:12 +0000146static PyObject *
147get_string(PyObject *str)
148{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 Py_XINCREF(str);
150 return str;
Skip Montanarob4a04172003-03-20 23:29:12 +0000151}
152
Skip Montanarob4a04172003-03-20 23:29:12 +0000153static PyObject *
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200154get_nullchar_as_None(Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000155{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 if (c == '\0') {
157 Py_INCREF(Py_None);
158 return Py_None;
159 }
160 else
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200161 return PyUnicode_FromOrdinal(c);
Skip Montanarob4a04172003-03-20 23:29:12 +0000162}
163
Skip Montanarob4a04172003-03-20 23:29:12 +0000164static PyObject *
165Dialect_get_lineterminator(DialectObj *self)
166{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000167 return get_string(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000168}
169
Skip Montanarob4a04172003-03-20 23:29:12 +0000170static PyObject *
Guido van Rossuma9769c22007-08-07 23:59:30 +0000171Dialect_get_delimiter(DialectObj *self)
172{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000173 return get_nullchar_as_None(self->delimiter);
Guido van Rossuma9769c22007-08-07 23:59:30 +0000174}
175
176static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000177Dialect_get_escapechar(DialectObj *self)
178{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000179 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000180}
181
Andrew McNamara1196cf12005-01-07 04:42:45 +0000182static PyObject *
183Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000184{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000185 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000186}
187
188static PyObject *
189Dialect_get_quoting(DialectObj *self)
190{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000191 return PyLong_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000192}
193
194static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000195_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000196{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000197 if (src == NULL)
198 *target = dflt;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200199 else {
200 int b = PyObject_IsTrue(src);
201 if (b < 0)
202 return -1;
203 *target = b;
204 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000205 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000206}
207
Andrew McNamara1196cf12005-01-07 04:42:45 +0000208static int
209_set_int(const char *name, int *target, PyObject *src, int dflt)
210{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000211 if (src == NULL)
212 *target = dflt;
213 else {
214 long value;
215 if (!PyLong_CheckExact(src)) {
216 PyErr_Format(PyExc_TypeError,
217 "\"%s\" must be an integer", name);
218 return -1;
219 }
220 value = PyLong_AsLong(src);
221 if (value == -1 && PyErr_Occurred())
222 return -1;
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +0000223#if SIZEOF_LONG > SIZEOF_INT
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000224 if (value > INT_MAX || value < INT_MIN) {
225 PyErr_Format(PyExc_ValueError,
226 "integer out of range for \"%s\"", name);
227 return -1;
228 }
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +0000229#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000230 *target = (int)value;
231 }
232 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000233}
234
235static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200236_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000237{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000238 if (src == NULL)
239 *target = dflt;
240 else {
241 *target = '\0';
242 if (src != Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000243 Py_ssize_t len;
Victor Stinner9e30aa52011-11-21 02:49:52 +0100244 len = PyUnicode_GetLength(src);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200245 if (len > 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000246 PyErr_Format(PyExc_TypeError,
247 "\"%s\" must be an 1-character string",
248 name);
249 return -1;
250 }
251 if (len > 0)
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200252 *target = PyUnicode_READ_CHAR(src, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000253 }
254 }
255 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000256}
257
258static int
259_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
260{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000261 if (src == NULL)
262 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
263 else {
264 if (src == Py_None)
265 *target = NULL;
266 else if (!IS_BASESTRING(src)) {
267 PyErr_Format(PyExc_TypeError,
268 "\"%s\" must be a string", name);
269 return -1;
270 }
271 else {
272 Py_XDECREF(*target);
273 Py_INCREF(src);
274 *target = src;
275 }
276 }
277 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000278}
279
280static int
281dialect_check_quoting(int quoting)
282{
Victor Stinner4fe519b2010-11-09 09:40:16 +0000283 StyleDesc *qs;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000284
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000285 for (qs = quote_styles; qs->name; qs++) {
286 if (qs->style == quoting)
287 return 0;
288 }
289 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
290 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000291}
Skip Montanarob4a04172003-03-20 23:29:12 +0000292
293#define D_OFF(x) offsetof(DialectObj, x)
294
295static struct PyMemberDef Dialect_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000296 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
297 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
298 { "strict", T_INT, D_OFF(strict), READONLY },
299 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000300};
301
302static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000303 { "delimiter", (getter)Dialect_get_delimiter},
304 { "escapechar", (getter)Dialect_get_escapechar},
305 { "lineterminator", (getter)Dialect_get_lineterminator},
306 { "quotechar", (getter)Dialect_get_quotechar},
307 { "quoting", (getter)Dialect_get_quoting},
308 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000309};
310
311static void
312Dialect_dealloc(DialectObj *self)
313{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000314 Py_XDECREF(self->lineterminator);
315 Py_TYPE(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000316}
317
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000318static char *dialect_kws[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000319 "dialect",
320 "delimiter",
321 "doublequote",
322 "escapechar",
323 "lineterminator",
324 "quotechar",
325 "quoting",
326 "skipinitialspace",
327 "strict",
328 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000329};
330
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000331static PyObject *
332dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000333{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000334 DialectObj *self;
335 PyObject *ret = NULL;
336 PyObject *dialect = NULL;
337 PyObject *delimiter = NULL;
338 PyObject *doublequote = NULL;
339 PyObject *escapechar = NULL;
340 PyObject *lineterminator = NULL;
341 PyObject *quotechar = NULL;
342 PyObject *quoting = NULL;
343 PyObject *skipinitialspace = NULL;
344 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000345
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
347 "|OOOOOOOOO", dialect_kws,
348 &dialect,
349 &delimiter,
350 &doublequote,
351 &escapechar,
352 &lineterminator,
353 &quotechar,
354 &quoting,
355 &skipinitialspace,
356 &strict))
357 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000358
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 if (dialect != NULL) {
360 if (IS_BASESTRING(dialect)) {
361 dialect = get_dialect_from_registry(dialect);
362 if (dialect == NULL)
363 return NULL;
364 }
365 else
366 Py_INCREF(dialect);
367 /* Can we reuse this instance? */
368 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
369 delimiter == 0 &&
370 doublequote == 0 &&
371 escapechar == 0 &&
372 lineterminator == 0 &&
373 quotechar == 0 &&
374 quoting == 0 &&
375 skipinitialspace == 0 &&
376 strict == 0)
377 return dialect;
378 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000379
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000380 self = (DialectObj *)type->tp_alloc(type, 0);
381 if (self == NULL) {
382 Py_XDECREF(dialect);
383 return NULL;
384 }
385 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000386
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000387 Py_XINCREF(delimiter);
388 Py_XINCREF(doublequote);
389 Py_XINCREF(escapechar);
390 Py_XINCREF(lineterminator);
391 Py_XINCREF(quotechar);
392 Py_XINCREF(quoting);
393 Py_XINCREF(skipinitialspace);
394 Py_XINCREF(strict);
395 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000396#define DIALECT_GETATTR(v, n) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000397 if (v == NULL) \
398 v = PyObject_GetAttrString(dialect, n)
399 DIALECT_GETATTR(delimiter, "delimiter");
400 DIALECT_GETATTR(doublequote, "doublequote");
401 DIALECT_GETATTR(escapechar, "escapechar");
402 DIALECT_GETATTR(lineterminator, "lineterminator");
403 DIALECT_GETATTR(quotechar, "quotechar");
404 DIALECT_GETATTR(quoting, "quoting");
405 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
406 DIALECT_GETATTR(strict, "strict");
407 PyErr_Clear();
408 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000409
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000410 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000411#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000412 if (meth(name, target, src, dflt)) \
413 goto err
414 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
415 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
416 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
417 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
418 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
419 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
420 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
421 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000422
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000423 /* validate options */
424 if (dialect_check_quoting(self->quoting))
425 goto err;
426 if (self->delimiter == 0) {
427 PyErr_SetString(PyExc_TypeError, "delimiter must be set");
428 goto err;
429 }
430 if (quotechar == Py_None && quoting == NULL)
431 self->quoting = QUOTE_NONE;
432 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
433 PyErr_SetString(PyExc_TypeError,
434 "quotechar must be set if quoting enabled");
435 goto err;
436 }
437 if (self->lineterminator == 0) {
438 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
439 goto err;
440 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000441
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000442 ret = (PyObject *)self;
443 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000444err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000445 Py_XDECREF(self);
446 Py_XDECREF(dialect);
447 Py_XDECREF(delimiter);
448 Py_XDECREF(doublequote);
449 Py_XDECREF(escapechar);
450 Py_XDECREF(lineterminator);
451 Py_XDECREF(quotechar);
452 Py_XDECREF(quoting);
453 Py_XDECREF(skipinitialspace);
454 Py_XDECREF(strict);
455 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000456}
457
458
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000459PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000460"CSV dialect\n"
461"\n"
462"The Dialect type records CSV parsing and generation options.\n");
463
464static PyTypeObject Dialect_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000465 PyVarObject_HEAD_INIT(NULL, 0)
466 "_csv.Dialect", /* tp_name */
467 sizeof(DialectObj), /* tp_basicsize */
468 0, /* tp_itemsize */
469 /* methods */
470 (destructor)Dialect_dealloc, /* tp_dealloc */
471 (printfunc)0, /* tp_print */
472 (getattrfunc)0, /* tp_getattr */
473 (setattrfunc)0, /* tp_setattr */
474 0, /* tp_reserved */
475 (reprfunc)0, /* tp_repr */
476 0, /* tp_as_number */
477 0, /* tp_as_sequence */
478 0, /* tp_as_mapping */
479 (hashfunc)0, /* tp_hash */
480 (ternaryfunc)0, /* tp_call */
481 (reprfunc)0, /* tp_str */
482 0, /* tp_getattro */
483 0, /* tp_setattro */
484 0, /* tp_as_buffer */
485 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
486 Dialect_Type_doc, /* tp_doc */
487 0, /* tp_traverse */
488 0, /* tp_clear */
489 0, /* tp_richcompare */
490 0, /* tp_weaklistoffset */
491 0, /* tp_iter */
492 0, /* tp_iternext */
493 0, /* tp_methods */
494 Dialect_memberlist, /* tp_members */
495 Dialect_getsetlist, /* tp_getset */
496 0, /* tp_base */
497 0, /* tp_dict */
498 0, /* tp_descr_get */
499 0, /* tp_descr_set */
500 0, /* tp_dictoffset */
501 0, /* tp_init */
502 0, /* tp_alloc */
503 dialect_new, /* tp_new */
504 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000505};
506
Andrew McNamara91b97462005-01-11 01:07:23 +0000507/*
508 * Return an instance of the dialect type, given a Python instance or kwarg
509 * description of the dialect
510 */
511static PyObject *
512_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
513{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000514 PyObject *ctor_args;
515 PyObject *dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000516
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000517 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
518 if (ctor_args == NULL)
519 return NULL;
520 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
521 Py_DECREF(ctor_args);
522 return dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000523}
524
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000525/*
526 * READER
527 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000528static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000529parse_save_field(ReaderObj *self)
530{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000531 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000532
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200533 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
534 (void *) self->field, self->field_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000535 if (field == NULL)
536 return -1;
537 self->field_len = 0;
538 if (self->numeric_field) {
539 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000540
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000541 self->numeric_field = 0;
542 tmp = PyNumber_Float(field);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000543 Py_DECREF(field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200544 if (tmp == NULL)
545 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000546 field = tmp;
547 }
548 PyList_Append(self->fields, field);
549 Py_DECREF(field);
550 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000551}
552
553static int
554parse_grow_buff(ReaderObj *self)
555{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000556 if (self->field_size == 0) {
557 self->field_size = 4096;
558 if (self->field != NULL)
559 PyMem_Free(self->field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200560 self->field = PyMem_New(Py_UCS4, self->field_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000561 }
562 else {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200563 Py_UCS4 *field = self->field;
Antoine Pitrou40455752010-08-15 18:51:10 +0000564 if (self->field_size > PY_SSIZE_T_MAX / 2) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000565 PyErr_NoMemory();
566 return 0;
567 }
568 self->field_size *= 2;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200569 self->field = PyMem_Resize(field, Py_UCS4, self->field_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000570 }
571 if (self->field == NULL) {
572 PyErr_NoMemory();
573 return 0;
574 }
575 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000576}
577
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000578static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200579parse_add_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000580{
Antoine Pitroue7672d32012-05-16 11:33:08 +0200581 if (self->field_len >= _csvstate_global->field_limit) {
582 PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
583 _csvstate_global->field_limit);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000584 return -1;
585 }
586 if (self->field_len == self->field_size && !parse_grow_buff(self))
587 return -1;
588 self->field[self->field_len++] = c;
589 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000590}
591
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000592static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200593parse_process_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000594{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000595 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000596
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000597 switch (self->state) {
598 case START_RECORD:
599 /* start of record */
600 if (c == '\0')
601 /* empty line - return [] */
602 break;
603 else if (c == '\n' || c == '\r') {
604 self->state = EAT_CRNL;
605 break;
606 }
607 /* normal character - handle as START_FIELD */
608 self->state = START_FIELD;
609 /* fallthru */
610 case START_FIELD:
611 /* expecting field */
612 if (c == '\n' || c == '\r' || c == '\0') {
613 /* save empty field - return [fields] */
614 if (parse_save_field(self) < 0)
615 return -1;
616 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
617 }
618 else if (c == dialect->quotechar &&
619 dialect->quoting != QUOTE_NONE) {
620 /* start quoted field */
621 self->state = IN_QUOTED_FIELD;
622 }
623 else if (c == dialect->escapechar) {
624 /* possible escaped character */
625 self->state = ESCAPED_CHAR;
626 }
627 else if (c == ' ' && dialect->skipinitialspace)
628 /* ignore space at start of field */
629 ;
630 else if (c == dialect->delimiter) {
631 /* save empty field */
632 if (parse_save_field(self) < 0)
633 return -1;
634 }
635 else {
636 /* begin new unquoted field */
637 if (dialect->quoting == QUOTE_NONNUMERIC)
638 self->numeric_field = 1;
639 if (parse_add_char(self, c) < 0)
640 return -1;
641 self->state = IN_FIELD;
642 }
643 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000644
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000645 case ESCAPED_CHAR:
646 if (c == '\0')
647 c = '\n';
648 if (parse_add_char(self, c) < 0)
649 return -1;
650 self->state = IN_FIELD;
651 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000652
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000653 case IN_FIELD:
654 /* in unquoted field */
655 if (c == '\n' || c == '\r' || c == '\0') {
656 /* end of line - return [fields] */
657 if (parse_save_field(self) < 0)
658 return -1;
659 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
660 }
661 else if (c == dialect->escapechar) {
662 /* possible escaped character */
663 self->state = ESCAPED_CHAR;
664 }
665 else if (c == dialect->delimiter) {
666 /* save field - wait for new field */
667 if (parse_save_field(self) < 0)
668 return -1;
669 self->state = START_FIELD;
670 }
671 else {
672 /* normal character - save in field */
673 if (parse_add_char(self, c) < 0)
674 return -1;
675 }
676 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000677
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000678 case IN_QUOTED_FIELD:
679 /* in quoted field */
680 if (c == '\0')
681 ;
682 else if (c == dialect->escapechar) {
683 /* Possible escape character */
684 self->state = ESCAPE_IN_QUOTED_FIELD;
685 }
686 else if (c == dialect->quotechar &&
687 dialect->quoting != QUOTE_NONE) {
688 if (dialect->doublequote) {
689 /* doublequote; " represented by "" */
690 self->state = QUOTE_IN_QUOTED_FIELD;
691 }
692 else {
693 /* end of quote part of field */
694 self->state = IN_FIELD;
695 }
696 }
697 else {
698 /* normal character - save in field */
699 if (parse_add_char(self, c) < 0)
700 return -1;
701 }
702 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000703
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000704 case ESCAPE_IN_QUOTED_FIELD:
705 if (c == '\0')
706 c = '\n';
707 if (parse_add_char(self, c) < 0)
708 return -1;
709 self->state = IN_QUOTED_FIELD;
710 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000711
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000712 case QUOTE_IN_QUOTED_FIELD:
713 /* doublequote - seen a quote in an quoted field */
714 if (dialect->quoting != QUOTE_NONE &&
715 c == dialect->quotechar) {
716 /* save "" as " */
717 if (parse_add_char(self, c) < 0)
718 return -1;
719 self->state = IN_QUOTED_FIELD;
720 }
721 else if (c == dialect->delimiter) {
722 /* save field - wait for new field */
723 if (parse_save_field(self) < 0)
724 return -1;
725 self->state = START_FIELD;
726 }
727 else if (c == '\n' || c == '\r' || c == '\0') {
728 /* end of line - return [fields] */
729 if (parse_save_field(self) < 0)
730 return -1;
731 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
732 }
733 else if (!dialect->strict) {
734 if (parse_add_char(self, c) < 0)
735 return -1;
736 self->state = IN_FIELD;
737 }
738 else {
739 /* illegal */
Antoine Pitroue7672d32012-05-16 11:33:08 +0200740 PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000741 dialect->delimiter,
742 dialect->quotechar);
743 return -1;
744 }
745 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000746
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000747 case EAT_CRNL:
748 if (c == '\n' || c == '\r')
749 ;
750 else if (c == '\0')
751 self->state = START_RECORD;
752 else {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200753 PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000754 return -1;
755 }
756 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000757
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000758 }
759 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000760}
761
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000762static int
763parse_reset(ReaderObj *self)
764{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000765 Py_XDECREF(self->fields);
766 self->fields = PyList_New(0);
767 if (self->fields == NULL)
768 return -1;
769 self->field_len = 0;
770 self->state = START_RECORD;
771 self->numeric_field = 0;
772 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000773}
Skip Montanarob4a04172003-03-20 23:29:12 +0000774
775static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000776Reader_iternext(ReaderObj *self)
777{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000778 PyObject *fields = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200779 Py_UCS4 c;
780 Py_ssize_t pos, linelen;
781 unsigned int kind;
782 void *data;
783 PyObject *lineobj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000784
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000785 if (parse_reset(self) < 0)
786 return NULL;
787 do {
788 lineobj = PyIter_Next(self->input_iter);
789 if (lineobj == NULL) {
790 /* End of input OR exception */
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700791 if (!PyErr_Occurred() && (self->field_len != 0 ||
792 self->state == IN_QUOTED_FIELD)) {
793 if (self->dialect->strict)
Senthil Kumaran49d13022012-09-25 02:37:20 -0700794 PyErr_SetString(_csvstate_global->error_obj,
795 "unexpected end of data");
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700796 else if (parse_save_field(self) >= 0)
797 break;
798 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000799 return NULL;
800 }
801 if (!PyUnicode_Check(lineobj)) {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200802 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000803 "iterator should return strings, "
804 "not %.200s "
805 "(did you open the file in text mode?)",
806 lineobj->ob_type->tp_name
807 );
808 Py_DECREF(lineobj);
809 return NULL;
810 }
811 ++self->line_num;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200812 kind = PyUnicode_KIND(lineobj);
813 data = PyUnicode_DATA(lineobj);
814 pos = 0;
815 linelen = PyUnicode_GET_LENGTH(lineobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000816 while (linelen--) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200817 c = PyUnicode_READ(kind, data, pos);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000818 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000819 Py_DECREF(lineobj);
Antoine Pitroue7672d32012-05-16 11:33:08 +0200820 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000821 "line contains NULL byte");
822 goto err;
823 }
824 if (parse_process_char(self, c) < 0) {
825 Py_DECREF(lineobj);
826 goto err;
827 }
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200828 pos++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000829 }
830 Py_DECREF(lineobj);
831 if (parse_process_char(self, 0) < 0)
832 goto err;
833 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000834
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000835 fields = self->fields;
836 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000837err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000838 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000839}
840
841static void
842Reader_dealloc(ReaderObj *self)
843{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000844 PyObject_GC_UnTrack(self);
845 Py_XDECREF(self->dialect);
846 Py_XDECREF(self->input_iter);
847 Py_XDECREF(self->fields);
848 if (self->field != NULL)
849 PyMem_Free(self->field);
850 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000851}
852
853static int
854Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
855{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000856 Py_VISIT(self->dialect);
857 Py_VISIT(self->input_iter);
858 Py_VISIT(self->fields);
859 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000860}
861
862static int
863Reader_clear(ReaderObj *self)
864{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000865 Py_CLEAR(self->dialect);
866 Py_CLEAR(self->input_iter);
867 Py_CLEAR(self->fields);
868 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000869}
870
871PyDoc_STRVAR(Reader_Type_doc,
872"CSV reader\n"
873"\n"
874"Reader objects are responsible for reading and parsing tabular data\n"
875"in CSV format.\n"
876);
877
878static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000879 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000880};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000881#define R_OFF(x) offsetof(ReaderObj, x)
882
883static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000884 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
885 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
886 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000887};
888
Skip Montanarob4a04172003-03-20 23:29:12 +0000889
890static PyTypeObject Reader_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000891 PyVarObject_HEAD_INIT(NULL, 0)
892 "_csv.reader", /*tp_name*/
893 sizeof(ReaderObj), /*tp_basicsize*/
894 0, /*tp_itemsize*/
895 /* methods */
896 (destructor)Reader_dealloc, /*tp_dealloc*/
897 (printfunc)0, /*tp_print*/
898 (getattrfunc)0, /*tp_getattr*/
899 (setattrfunc)0, /*tp_setattr*/
900 0, /*tp_reserved*/
901 (reprfunc)0, /*tp_repr*/
902 0, /*tp_as_number*/
903 0, /*tp_as_sequence*/
904 0, /*tp_as_mapping*/
905 (hashfunc)0, /*tp_hash*/
906 (ternaryfunc)0, /*tp_call*/
907 (reprfunc)0, /*tp_str*/
908 0, /*tp_getattro*/
909 0, /*tp_setattro*/
910 0, /*tp_as_buffer*/
911 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
912 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
913 Reader_Type_doc, /*tp_doc*/
914 (traverseproc)Reader_traverse, /*tp_traverse*/
915 (inquiry)Reader_clear, /*tp_clear*/
916 0, /*tp_richcompare*/
917 0, /*tp_weaklistoffset*/
918 PyObject_SelfIter, /*tp_iter*/
919 (getiterfunc)Reader_iternext, /*tp_iternext*/
920 Reader_methods, /*tp_methods*/
921 Reader_memberlist, /*tp_members*/
922 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000923
924};
925
926static PyObject *
927csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
928{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000929 PyObject * iterator, * dialect = NULL;
930 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000931
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000932 if (!self)
933 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000934
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000935 self->dialect = NULL;
936 self->fields = NULL;
937 self->input_iter = NULL;
938 self->field = NULL;
939 self->field_size = 0;
940 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000941
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000942 if (parse_reset(self) < 0) {
943 Py_DECREF(self);
944 return NULL;
945 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000946
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000947 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
948 Py_DECREF(self);
949 return NULL;
950 }
951 self->input_iter = PyObject_GetIter(iterator);
952 if (self->input_iter == NULL) {
953 PyErr_SetString(PyExc_TypeError,
954 "argument 1 must be an iterator");
955 Py_DECREF(self);
956 return NULL;
957 }
958 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
959 if (self->dialect == NULL) {
960 Py_DECREF(self);
961 return NULL;
962 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000963
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000964 PyObject_GC_Track(self);
965 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +0000966}
967
968/*
969 * WRITER
970 */
971/* ---------------------------------------------------------------- */
972static void
973join_reset(WriterObj *self)
974{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000975 self->rec_len = 0;
976 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000977}
978
979#define MEM_INCR 32768
980
981/* Calculate new record length or append field to record. Return new
982 * record length.
983 */
Antoine Pitrou40455752010-08-15 18:51:10 +0000984static Py_ssize_t
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200985join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
986 Py_ssize_t field_len, int quote_empty, int *quoted,
987 int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +0000988{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000989 DialectObj *dialect = self->dialect;
990 int i;
Antoine Pitrou40455752010-08-15 18:51:10 +0000991 Py_ssize_t rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +0000992
993#define ADDCH(c) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000994 do {\
995 if (copy_phase) \
996 self->rec[rec_len] = c;\
997 rec_len++;\
998 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +0000999
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001000 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001001
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001002 /* If this is not the first field we need a field separator */
1003 if (self->num_fields > 0)
1004 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001005
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001006 /* Handle preceding quote */
1007 if (copy_phase && *quoted)
1008 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001009
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001010 /* Copy/count field data */
1011 /* If field is null just pass over */
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001012 for (i = 0; field_data && (i < field_len); i++) {
1013 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001014 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001015
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001016 if (c == dialect->delimiter ||
1017 c == dialect->escapechar ||
1018 c == dialect->quotechar ||
Martin v. Löwis5f4f4c52011-11-01 18:42:23 +01001019 PyUnicode_FindChar(
1020 dialect->lineterminator, c, 0,
1021 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001022 if (dialect->quoting == QUOTE_NONE)
1023 want_escape = 1;
1024 else {
1025 if (c == dialect->quotechar) {
1026 if (dialect->doublequote)
1027 ADDCH(dialect->quotechar);
1028 else
1029 want_escape = 1;
1030 }
1031 if (!want_escape)
1032 *quoted = 1;
1033 }
1034 if (want_escape) {
1035 if (!dialect->escapechar) {
Antoine Pitroue7672d32012-05-16 11:33:08 +02001036 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001037 "need to escape, but no escapechar set");
1038 return -1;
1039 }
1040 ADDCH(dialect->escapechar);
1041 }
1042 }
1043 /* Copy field character into record buffer.
1044 */
1045 ADDCH(c);
1046 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001047
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001048 /* If field is empty check if it needs to be quoted.
1049 */
1050 if (i == 0 && quote_empty) {
1051 if (dialect->quoting == QUOTE_NONE) {
Antoine Pitroue7672d32012-05-16 11:33:08 +02001052 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001053 "single empty field record must be quoted");
1054 return -1;
1055 }
1056 else
1057 *quoted = 1;
1058 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001059
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001060 if (*quoted) {
1061 if (copy_phase)
1062 ADDCH(dialect->quotechar);
1063 else
1064 rec_len += 2;
1065 }
1066 return rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001067#undef ADDCH
Skip Montanarob4a04172003-03-20 23:29:12 +00001068}
1069
1070static int
Antoine Pitrou40455752010-08-15 18:51:10 +00001071join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
Skip Montanarob4a04172003-03-20 23:29:12 +00001072{
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001073
Antoine Pitrou40455752010-08-15 18:51:10 +00001074 if (rec_len < 0 || rec_len > PY_SSIZE_T_MAX - MEM_INCR) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001075 PyErr_NoMemory();
1076 return 0;
1077 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001078
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001079 if (rec_len > self->rec_size) {
1080 if (self->rec_size == 0) {
1081 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1082 if (self->rec != NULL)
1083 PyMem_Free(self->rec);
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001084 self->rec = PyMem_New(Py_UCS4, self->rec_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001085 }
1086 else {
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001087 Py_UCS4* old_rec = self->rec;
Skip Montanarob4a04172003-03-20 23:29:12 +00001088
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001089 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001090 self->rec = PyMem_Resize(old_rec, Py_UCS4, self->rec_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001091 if (self->rec == NULL)
1092 PyMem_Free(old_rec);
1093 }
1094 if (self->rec == NULL) {
1095 PyErr_NoMemory();
1096 return 0;
1097 }
1098 }
1099 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001100}
1101
1102static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001103join_append(WriterObj *self, PyObject *field, int *quoted, int quote_empty)
Skip Montanarob4a04172003-03-20 23:29:12 +00001104{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001105 unsigned int field_kind = -1;
1106 void *field_data = NULL;
1107 Py_ssize_t field_len = 0;
Antoine Pitrou40455752010-08-15 18:51:10 +00001108 Py_ssize_t rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001109
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001110 if (field != NULL) {
1111 field_kind = PyUnicode_KIND(field);
1112 field_data = PyUnicode_DATA(field);
1113 field_len = PyUnicode_GET_LENGTH(field);
1114 }
1115 rec_len = join_append_data(self, field_kind, field_data, field_len,
1116 quote_empty, quoted, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001117 if (rec_len < 0)
1118 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001119
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001120 /* grow record buffer if necessary */
1121 if (!join_check_rec_size(self, rec_len))
1122 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001123
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001124 self->rec_len = join_append_data(self, field_kind, field_data, field_len,
1125 quote_empty, quoted, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001126 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001127
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001128 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001129}
1130
1131static int
1132join_append_lineterminator(WriterObj *self)
1133{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001134 Py_ssize_t terminator_len, i;
1135 unsigned int term_kind;
1136 void *term_data;
Skip Montanarob4a04172003-03-20 23:29:12 +00001137
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001138 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001139 if (terminator_len == -1)
1140 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001141
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001142 /* grow record buffer if necessary */
1143 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1144 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001145
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001146 term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1147 term_data = PyUnicode_DATA(self->dialect->lineterminator);
1148 for (i = 0; i < terminator_len; i++)
1149 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001150 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001151
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001152 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001153}
1154
1155PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001156"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001157"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001158"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001159"elements will be converted to string.");
1160
1161static PyObject *
1162csv_writerow(WriterObj *self, PyObject *seq)
1163{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001164 DialectObj *dialect = self->dialect;
Antoine Pitrou40455752010-08-15 18:51:10 +00001165 Py_ssize_t len, i;
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001166 PyObject *line, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001167
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001168 if (!PySequence_Check(seq))
Antoine Pitroue7672d32012-05-16 11:33:08 +02001169 return PyErr_Format(_csvstate_global->error_obj, "sequence expected");
Skip Montanarob4a04172003-03-20 23:29:12 +00001170
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001171 len = PySequence_Length(seq);
1172 if (len < 0)
1173 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001174
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001175 /* Join all fields in internal buffer.
1176 */
1177 join_reset(self);
1178 for (i = 0; i < len; i++) {
1179 PyObject *field;
1180 int append_ok;
1181 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001182
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001183 field = PySequence_GetItem(seq, i);
1184 if (field == NULL)
1185 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001186
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001187 switch (dialect->quoting) {
1188 case QUOTE_NONNUMERIC:
1189 quoted = !PyNumber_Check(field);
1190 break;
1191 case QUOTE_ALL:
1192 quoted = 1;
1193 break;
1194 default:
1195 quoted = 0;
1196 break;
1197 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001198
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001199 if (PyUnicode_Check(field)) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001200 append_ok = join_append(self, field, &quoted, len == 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001201 Py_DECREF(field);
1202 }
1203 else if (field == Py_None) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001204 append_ok = join_append(self, NULL, &quoted, len == 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001205 Py_DECREF(field);
1206 }
1207 else {
1208 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001209
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001210 str = PyObject_Str(field);
1211 Py_DECREF(field);
1212 if (str == NULL)
1213 return NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001214 append_ok = join_append(self, str, &quoted, len == 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001215 Py_DECREF(str);
1216 }
1217 if (!append_ok)
1218 return NULL;
1219 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001220
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001221 /* Add line terminator.
1222 */
1223 if (!join_append_lineterminator(self))
1224 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001225
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001226 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1227 (void *) self->rec, self->rec_len);
1228 if (line == NULL)
1229 return NULL;
1230 result = PyObject_CallFunctionObjArgs(self->writeline, line, NULL);
1231 Py_DECREF(line);
1232 return result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001233}
1234
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001235PyDoc_STRVAR(csv_writerows_doc,
1236"writerows(sequence of sequences)\n"
1237"\n"
1238"Construct and write a series of sequences to a csv file. Non-string\n"
1239"elements will be converted to string.");
1240
Skip Montanarob4a04172003-03-20 23:29:12 +00001241static PyObject *
1242csv_writerows(WriterObj *self, PyObject *seqseq)
1243{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001244 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001245
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001246 row_iter = PyObject_GetIter(seqseq);
1247 if (row_iter == NULL) {
1248 PyErr_SetString(PyExc_TypeError,
1249 "writerows() argument must be iterable");
1250 return NULL;
1251 }
1252 while ((row_obj = PyIter_Next(row_iter))) {
1253 result = csv_writerow(self, row_obj);
1254 Py_DECREF(row_obj);
1255 if (!result) {
1256 Py_DECREF(row_iter);
1257 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001258 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001259 else
1260 Py_DECREF(result);
1261 }
1262 Py_DECREF(row_iter);
1263 if (PyErr_Occurred())
1264 return NULL;
1265 Py_INCREF(Py_None);
1266 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001267}
1268
1269static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001270 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1271 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1272 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001273};
1274
1275#define W_OFF(x) offsetof(WriterObj, x)
1276
1277static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001278 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1279 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001280};
1281
1282static void
1283Writer_dealloc(WriterObj *self)
1284{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001285 PyObject_GC_UnTrack(self);
1286 Py_XDECREF(self->dialect);
1287 Py_XDECREF(self->writeline);
1288 if (self->rec != NULL)
1289 PyMem_Free(self->rec);
1290 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001291}
1292
1293static int
1294Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1295{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001296 Py_VISIT(self->dialect);
1297 Py_VISIT(self->writeline);
1298 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001299}
1300
1301static int
1302Writer_clear(WriterObj *self)
1303{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001304 Py_CLEAR(self->dialect);
1305 Py_CLEAR(self->writeline);
1306 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001307}
1308
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001309PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001310"CSV writer\n"
1311"\n"
1312"Writer objects are responsible for generating tabular data\n"
1313"in CSV format from sequence input.\n"
1314);
1315
1316static PyTypeObject Writer_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001317 PyVarObject_HEAD_INIT(NULL, 0)
1318 "_csv.writer", /*tp_name*/
1319 sizeof(WriterObj), /*tp_basicsize*/
1320 0, /*tp_itemsize*/
1321 /* methods */
1322 (destructor)Writer_dealloc, /*tp_dealloc*/
1323 (printfunc)0, /*tp_print*/
1324 (getattrfunc)0, /*tp_getattr*/
1325 (setattrfunc)0, /*tp_setattr*/
1326 0, /*tp_reserved*/
1327 (reprfunc)0, /*tp_repr*/
1328 0, /*tp_as_number*/
1329 0, /*tp_as_sequence*/
1330 0, /*tp_as_mapping*/
1331 (hashfunc)0, /*tp_hash*/
1332 (ternaryfunc)0, /*tp_call*/
1333 (reprfunc)0, /*tp_str*/
1334 0, /*tp_getattro*/
1335 0, /*tp_setattro*/
1336 0, /*tp_as_buffer*/
1337 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1338 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1339 Writer_Type_doc,
1340 (traverseproc)Writer_traverse, /*tp_traverse*/
1341 (inquiry)Writer_clear, /*tp_clear*/
1342 0, /*tp_richcompare*/
1343 0, /*tp_weaklistoffset*/
1344 (getiterfunc)0, /*tp_iter*/
1345 (getiterfunc)0, /*tp_iternext*/
1346 Writer_methods, /*tp_methods*/
1347 Writer_memberlist, /*tp_members*/
1348 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001349};
1350
1351static PyObject *
1352csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1353{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001354 PyObject * output_file, * dialect = NULL;
1355 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001356 _Py_IDENTIFIER(write);
Skip Montanarob4a04172003-03-20 23:29:12 +00001357
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001358 if (!self)
1359 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001360
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001361 self->dialect = NULL;
1362 self->writeline = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001363
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001364 self->rec = NULL;
1365 self->rec_size = 0;
1366 self->rec_len = 0;
1367 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001368
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001369 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1370 Py_DECREF(self);
1371 return NULL;
1372 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001373 self->writeline = _PyObject_GetAttrId(output_file, &PyId_write);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001374 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1375 PyErr_SetString(PyExc_TypeError,
1376 "argument 1 must have a \"write\" method");
1377 Py_DECREF(self);
1378 return NULL;
1379 }
1380 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1381 if (self->dialect == NULL) {
1382 Py_DECREF(self);
1383 return NULL;
1384 }
1385 PyObject_GC_Track(self);
1386 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001387}
1388
1389/*
1390 * DIALECT REGISTRY
1391 */
1392static PyObject *
1393csv_list_dialects(PyObject *module, PyObject *args)
1394{
Antoine Pitroue7672d32012-05-16 11:33:08 +02001395 return PyDict_Keys(_csvstate_global->dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001396}
1397
1398static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001399csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001400{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001401 PyObject *name_obj, *dialect_obj = NULL;
1402 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001403
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001404 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1405 return NULL;
1406 if (!IS_BASESTRING(name_obj)) {
1407 PyErr_SetString(PyExc_TypeError,
1408 "dialect name must be a string or unicode");
1409 return NULL;
1410 }
1411 dialect = _call_dialect(dialect_obj, kwargs);
1412 if (dialect == NULL)
1413 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001414 if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001415 Py_DECREF(dialect);
1416 return NULL;
1417 }
1418 Py_DECREF(dialect);
1419 Py_INCREF(Py_None);
1420 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001421}
1422
1423static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001424csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001425{
Antoine Pitroue7672d32012-05-16 11:33:08 +02001426 if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0)
1427 return PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001428 Py_INCREF(Py_None);
1429 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001430}
1431
1432static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001433csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001434{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001435 return get_dialect_from_registry(name_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001436}
1437
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001438static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001439csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001440{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001441 PyObject *new_limit = NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001442 long old_limit = _csvstate_global->field_limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001443
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001444 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1445 return NULL;
1446 if (new_limit != NULL) {
1447 if (!PyLong_CheckExact(new_limit)) {
1448 PyErr_Format(PyExc_TypeError,
1449 "limit must be an integer");
1450 return NULL;
1451 }
Antoine Pitroue7672d32012-05-16 11:33:08 +02001452 _csvstate_global->field_limit = PyLong_AsLong(new_limit);
1453 if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
1454 _csvstate_global->field_limit = old_limit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001455 return NULL;
1456 }
1457 }
1458 return PyLong_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001459}
1460
Skip Montanarob4a04172003-03-20 23:29:12 +00001461/*
1462 * MODULE
1463 */
1464
1465PyDoc_STRVAR(csv_module_doc,
1466"CSV parsing and writing.\n"
1467"\n"
1468"This module provides classes that assist in the reading and writing\n"
1469"of Comma Separated Value (CSV) files, and implements the interface\n"
1470"described by PEP 305. Although many CSV files are simple to parse,\n"
1471"the format is not formally defined by a stable specification and\n"
1472"is subtle enough that parsing lines of a CSV file with something\n"
1473"like line.split(\",\") is bound to fail. The module supports three\n"
1474"basic APIs: reading, writing, and registration of dialects.\n"
1475"\n"
1476"\n"
1477"DIALECT REGISTRATION:\n"
1478"\n"
1479"Readers and writers support a dialect argument, which is a convenient\n"
1480"handle on a group of settings. When the dialect argument is a string,\n"
1481"it identifies one of the dialects previously registered with the module.\n"
1482"If it is a class or instance, the attributes of the argument are used as\n"
1483"the settings for the reader or writer:\n"
1484"\n"
1485" class excel:\n"
1486" delimiter = ','\n"
1487" quotechar = '\"'\n"
1488" escapechar = None\n"
1489" doublequote = True\n"
1490" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001491" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001492" quoting = QUOTE_MINIMAL\n"
1493"\n"
1494"SETTINGS:\n"
1495"\n"
1496" * quotechar - specifies a one-character string to use as the \n"
1497" quoting character. It defaults to '\"'.\n"
1498" * delimiter - specifies a one-character string to use as the \n"
1499" field separator. It defaults to ','.\n"
1500" * skipinitialspace - specifies how to interpret whitespace which\n"
1501" immediately follows a delimiter. It defaults to False, which\n"
1502" means that whitespace immediately following a delimiter is part\n"
1503" of the following field.\n"
1504" * lineterminator - specifies the character sequence which should \n"
1505" terminate rows.\n"
1506" * quoting - controls when quotes should be generated by the writer.\n"
1507" It can take on any of the following module constants:\n"
1508"\n"
1509" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1510" field contains either the quotechar or the delimiter\n"
1511" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1512" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001513" fields which do not parse as integers or floating point\n"
1514" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001515" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1516" * escapechar - specifies a one-character string used to escape \n"
1517" the delimiter when quoting is set to QUOTE_NONE.\n"
1518" * doublequote - controls the handling of quotes inside fields. When\n"
1519" True, two consecutive quotes are interpreted as one during read,\n"
1520" and when writing, each quote character embedded in the data is\n"
1521" written as two quotes\n");
1522
1523PyDoc_STRVAR(csv_reader_doc,
1524" csv_reader = reader(iterable [, dialect='excel']\n"
1525" [optional keyword args])\n"
1526" for row in csv_reader:\n"
1527" process(row)\n"
1528"\n"
1529"The \"iterable\" argument can be any object that returns a line\n"
1530"of input for each iteration, such as a file object or a list. The\n"
1531"optional \"dialect\" parameter is discussed below. The function\n"
1532"also accepts optional keyword arguments which override settings\n"
1533"provided by the dialect.\n"
1534"\n"
1535"The returned object is an iterator. Each iteration returns a row\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001536"of the CSV file (which can span multiple input lines):\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001537
1538PyDoc_STRVAR(csv_writer_doc,
1539" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1540" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001541" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001542" csv_writer.writerow(row)\n"
1543"\n"
1544" [or]\n"
1545"\n"
1546" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1547" [optional keyword args])\n"
1548" csv_writer.writerows(rows)\n"
1549"\n"
1550"The \"fileobj\" argument can be any object that supports the file API.\n");
1551
1552PyDoc_STRVAR(csv_list_dialects_doc,
1553"Return a list of all know dialect names.\n"
1554" names = csv.list_dialects()");
1555
1556PyDoc_STRVAR(csv_get_dialect_doc,
1557"Return the dialect instance associated with name.\n"
1558" dialect = csv.get_dialect(name)");
1559
1560PyDoc_STRVAR(csv_register_dialect_doc,
1561"Create a mapping from a string name to a dialect class.\n"
1562" dialect = csv.register_dialect(name, dialect)");
1563
1564PyDoc_STRVAR(csv_unregister_dialect_doc,
1565"Delete the name/dialect mapping associated with a string name.\n"
1566" csv.unregister_dialect(name)");
1567
Andrew McNamara31d88962005-01-12 03:45:10 +00001568PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001569"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001570" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001571"\n"
1572"Returns old limit. If limit is not given, no new limit is set and\n"
1573"the old limit is returned");
1574
Skip Montanarob4a04172003-03-20 23:29:12 +00001575static struct PyMethodDef csv_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001576 { "reader", (PyCFunction)csv_reader,
1577 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1578 { "writer", (PyCFunction)csv_writer,
1579 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1580 { "list_dialects", (PyCFunction)csv_list_dialects,
1581 METH_NOARGS, csv_list_dialects_doc},
1582 { "register_dialect", (PyCFunction)csv_register_dialect,
1583 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1584 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1585 METH_O, csv_unregister_dialect_doc},
1586 { "get_dialect", (PyCFunction)csv_get_dialect,
1587 METH_O, csv_get_dialect_doc},
1588 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1589 METH_VARARGS, csv_field_size_limit_doc},
1590 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001591};
1592
Martin v. Löwis1a214512008-06-11 05:26:20 +00001593static struct PyModuleDef _csvmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001594 PyModuleDef_HEAD_INIT,
1595 "_csv",
1596 csv_module_doc,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001597 sizeof(_csvstate),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001598 csv_methods,
1599 NULL,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001600 _csv_traverse,
1601 _csv_clear,
1602 _csv_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00001603};
1604
Skip Montanarob4a04172003-03-20 23:29:12 +00001605PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001606PyInit__csv(void)
Skip Montanarob4a04172003-03-20 23:29:12 +00001607{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001608 PyObject *module;
1609 StyleDesc *style;
Skip Montanarob4a04172003-03-20 23:29:12 +00001610
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001611 if (PyType_Ready(&Dialect_Type) < 0)
1612 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001613
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001614 if (PyType_Ready(&Reader_Type) < 0)
1615 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001616
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001617 if (PyType_Ready(&Writer_Type) < 0)
1618 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001619
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001620 /* Create the module and add the functions */
1621 module = PyModule_Create(&_csvmodule);
1622 if (module == NULL)
1623 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001624
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001625 /* Add version to the module. */
1626 if (PyModule_AddStringConstant(module, "__version__",
1627 MODULE_VERSION) == -1)
1628 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001629
Antoine Pitroue7672d32012-05-16 11:33:08 +02001630 /* Set the field limit */
1631 _csvstate(module)->field_limit = 128 * 1024;
1632 /* Do I still need to add this var to the Module Dict? */
1633
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001634 /* Add _dialects dictionary */
Antoine Pitroue7672d32012-05-16 11:33:08 +02001635 _csvstate(module)->dialects = PyDict_New();
1636 if (_csvstate(module)->dialects == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001637 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001638 Py_INCREF(_csvstate(module)->dialects);
1639 if (PyModule_AddObject(module, "_dialects", _csvstate(module)->dialects))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001640 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001641
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001642 /* Add quote styles into dictionary */
1643 for (style = quote_styles; style->name; style++) {
1644 if (PyModule_AddIntConstant(module, style->name,
1645 style->style) == -1)
1646 return NULL;
1647 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001648
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001649 /* Add the Dialect type */
1650 Py_INCREF(&Dialect_Type);
1651 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1652 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001653
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001654 /* Add the CSV exception object to the module. */
Antoine Pitroue7672d32012-05-16 11:33:08 +02001655 _csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1656 if (_csvstate(module)->error_obj == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001657 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001658 Py_INCREF(_csvstate(module)->error_obj);
1659 PyModule_AddObject(module, "Error", _csvstate(module)->error_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001660 return module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001661}