blob: 89ce1224e785a9a3a43c184031e0ac2d35d4edbb [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
Skip Montanarob4a04172003-03-20 23:29:12 +00009*/
10
Skip Montanaro7b01a832003-04-12 19:23:46 +000011#define MODULE_VERSION "1.0"
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013#include "Python.h"
14#include "structmember.h"
15
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000016#define IS_BASESTRING(o) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000017 PyUnicode_Check(o)
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000018
Antoine Pitroue7672d32012-05-16 11:33:08 +020019typedef struct {
20 PyObject *error_obj; /* CSV exception */
21 PyObject *dialects; /* Dialect registry */
22 long field_limit; /* max parsed field size */
23} _csvstate;
24
25#define _csvstate(o) ((_csvstate *)PyModule_GetState(o))
26
27static int
28_csv_clear(PyObject *m)
29{
30 Py_CLEAR(_csvstate(m)->error_obj);
31 Py_CLEAR(_csvstate(m)->dialects);
32 return 0;
33}
34
35static int
36_csv_traverse(PyObject *m, visitproc visit, void *arg)
37{
38 Py_VISIT(_csvstate(m)->error_obj);
39 Py_VISIT(_csvstate(m)->dialects);
40 return 0;
41}
42
43static void
44_csv_free(void *m)
45{
46 _csv_clear((PyObject *)m);
47}
48
49static struct PyModuleDef _csvmodule;
50
51#define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
Skip Montanarob4a04172003-03-20 23:29:12 +000052
53typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000054 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
55 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
56 EAT_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000057} ParserState;
58
59typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000060 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000061} QuoteStyle;
62
63typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000064 QuoteStyle style;
65 char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000066} StyleDesc;
67
68static StyleDesc quote_styles[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000069 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
70 { QUOTE_ALL, "QUOTE_ALL" },
71 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
72 { QUOTE_NONE, "QUOTE_NONE" },
73 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000074};
75
76typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000078
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000079 int doublequote; /* is " represented by ""? */
Antoine Pitrou77ea6402011-10-07 04:26:55 +020080 Py_UCS4 delimiter; /* field separator */
81 Py_UCS4 quotechar; /* quote character */
82 Py_UCS4 escapechar; /* escape character */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000083 int skipinitialspace; /* ignore spaces following delimiter? */
84 PyObject *lineterminator; /* string to write between records */
85 int quoting; /* style of quoting to write */
Skip Montanarob4a04172003-03-20 23:29:12 +000086
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 int strict; /* raise exception on bad CSV */
Skip Montanarob4a04172003-03-20 23:29:12 +000088} DialectObj;
89
Neal Norwitz227b5332006-03-22 09:28:35 +000090static PyTypeObject Dialect_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000091
92typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000093 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +000094
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000095 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +000096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000097 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +000098
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000099 PyObject *fields; /* field list for current record */
100 ParserState state; /* current CSV parse state */
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200101 Py_UCS4 *field; /* temporary buffer */
Antoine Pitrou40455752010-08-15 18:51:10 +0000102 Py_ssize_t field_size; /* size of allocated buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000103 Py_ssize_t field_len; /* length of current field */
104 int numeric_field; /* treat field as numeric */
105 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000106} ReaderObj;
107
Neal Norwitz227b5332006-03-22 09:28:35 +0000108static PyTypeObject Reader_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000109
Christian Heimes90aa7642007-12-19 02:45:37 +0000110#define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +0000111
112typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000113 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000114
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000115 PyObject *writeline; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +0000116
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000117 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000118
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200119 Py_UCS4 *rec; /* buffer for parser.join */
Antoine Pitrou40455752010-08-15 18:51:10 +0000120 Py_ssize_t rec_size; /* size of allocated record */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000121 Py_ssize_t rec_len; /* length of record */
122 int num_fields; /* number of fields in record */
Guido van Rossum46264582007-08-06 19:32:18 +0000123} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000124
Neal Norwitz227b5332006-03-22 09:28:35 +0000125static PyTypeObject Writer_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000126
127/*
128 * DIALECT class
129 */
130
131static PyObject *
132get_dialect_from_registry(PyObject * name_obj)
133{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000134 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000135
Antoine Pitroue7672d32012-05-16 11:33:08 +0200136 dialect_obj = PyDict_GetItem(_csvstate_global->dialects, name_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000137 if (dialect_obj == NULL) {
138 if (!PyErr_Occurred())
Antoine Pitroue7672d32012-05-16 11:33:08 +0200139 PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000140 }
141 else
142 Py_INCREF(dialect_obj);
143 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000144}
145
Skip Montanarob4a04172003-03-20 23:29:12 +0000146static PyObject *
147get_string(PyObject *str)
148{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 Py_XINCREF(str);
150 return str;
Skip Montanarob4a04172003-03-20 23:29:12 +0000151}
152
Skip Montanarob4a04172003-03-20 23:29:12 +0000153static PyObject *
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200154get_nullchar_as_None(Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000155{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 if (c == '\0') {
157 Py_INCREF(Py_None);
158 return Py_None;
159 }
160 else
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200161 return PyUnicode_FromOrdinal(c);
Skip Montanarob4a04172003-03-20 23:29:12 +0000162}
163
Skip Montanarob4a04172003-03-20 23:29:12 +0000164static PyObject *
165Dialect_get_lineterminator(DialectObj *self)
166{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000167 return get_string(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000168}
169
Skip Montanarob4a04172003-03-20 23:29:12 +0000170static PyObject *
Guido van Rossuma9769c22007-08-07 23:59:30 +0000171Dialect_get_delimiter(DialectObj *self)
172{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000173 return get_nullchar_as_None(self->delimiter);
Guido van Rossuma9769c22007-08-07 23:59:30 +0000174}
175
176static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000177Dialect_get_escapechar(DialectObj *self)
178{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000179 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000180}
181
Andrew McNamara1196cf12005-01-07 04:42:45 +0000182static PyObject *
183Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000184{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000185 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000186}
187
188static PyObject *
189Dialect_get_quoting(DialectObj *self)
190{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000191 return PyLong_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000192}
193
194static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000195_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000196{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000197 if (src == NULL)
198 *target = dflt;
199 else
200 *target = PyObject_IsTrue(src);
201 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000202}
203
Andrew McNamara1196cf12005-01-07 04:42:45 +0000204static int
205_set_int(const char *name, int *target, PyObject *src, int dflt)
206{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000207 if (src == NULL)
208 *target = dflt;
209 else {
210 long value;
211 if (!PyLong_CheckExact(src)) {
212 PyErr_Format(PyExc_TypeError,
213 "\"%s\" must be an integer", name);
214 return -1;
215 }
216 value = PyLong_AsLong(src);
217 if (value == -1 && PyErr_Occurred())
218 return -1;
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +0000219#if SIZEOF_LONG > SIZEOF_INT
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000220 if (value > INT_MAX || value < INT_MIN) {
221 PyErr_Format(PyExc_ValueError,
222 "integer out of range for \"%s\"", name);
223 return -1;
224 }
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +0000225#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000226 *target = (int)value;
227 }
228 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000229}
230
231static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200232_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000233{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000234 if (src == NULL)
235 *target = dflt;
236 else {
237 *target = '\0';
238 if (src != Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000239 Py_ssize_t len;
Victor Stinner9e30aa52011-11-21 02:49:52 +0100240 len = PyUnicode_GetLength(src);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200241 if (len > 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000242 PyErr_Format(PyExc_TypeError,
243 "\"%s\" must be an 1-character string",
244 name);
245 return -1;
246 }
247 if (len > 0)
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200248 *target = PyUnicode_READ_CHAR(src, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000249 }
250 }
251 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000252}
253
254static int
255_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
256{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000257 if (src == NULL)
258 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
259 else {
260 if (src == Py_None)
261 *target = NULL;
262 else if (!IS_BASESTRING(src)) {
263 PyErr_Format(PyExc_TypeError,
264 "\"%s\" must be a string", name);
265 return -1;
266 }
267 else {
268 Py_XDECREF(*target);
269 Py_INCREF(src);
270 *target = src;
271 }
272 }
273 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000274}
275
276static int
277dialect_check_quoting(int quoting)
278{
Victor Stinner4fe519b2010-11-09 09:40:16 +0000279 StyleDesc *qs;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000280
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000281 for (qs = quote_styles; qs->name; qs++) {
282 if (qs->style == quoting)
283 return 0;
284 }
285 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
286 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000287}
Skip Montanarob4a04172003-03-20 23:29:12 +0000288
289#define D_OFF(x) offsetof(DialectObj, x)
290
291static struct PyMemberDef Dialect_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000292 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
293 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
294 { "strict", T_INT, D_OFF(strict), READONLY },
295 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000296};
297
298static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000299 { "delimiter", (getter)Dialect_get_delimiter},
300 { "escapechar", (getter)Dialect_get_escapechar},
301 { "lineterminator", (getter)Dialect_get_lineterminator},
302 { "quotechar", (getter)Dialect_get_quotechar},
303 { "quoting", (getter)Dialect_get_quoting},
304 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000305};
306
307static void
308Dialect_dealloc(DialectObj *self)
309{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000310 Py_XDECREF(self->lineterminator);
311 Py_TYPE(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000312}
313
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000314static char *dialect_kws[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000315 "dialect",
316 "delimiter",
317 "doublequote",
318 "escapechar",
319 "lineterminator",
320 "quotechar",
321 "quoting",
322 "skipinitialspace",
323 "strict",
324 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000325};
326
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000327static PyObject *
328dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000329{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000330 DialectObj *self;
331 PyObject *ret = NULL;
332 PyObject *dialect = NULL;
333 PyObject *delimiter = NULL;
334 PyObject *doublequote = NULL;
335 PyObject *escapechar = NULL;
336 PyObject *lineterminator = NULL;
337 PyObject *quotechar = NULL;
338 PyObject *quoting = NULL;
339 PyObject *skipinitialspace = NULL;
340 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000341
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000342 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
343 "|OOOOOOOOO", dialect_kws,
344 &dialect,
345 &delimiter,
346 &doublequote,
347 &escapechar,
348 &lineterminator,
349 &quotechar,
350 &quoting,
351 &skipinitialspace,
352 &strict))
353 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000354
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000355 if (dialect != NULL) {
356 if (IS_BASESTRING(dialect)) {
357 dialect = get_dialect_from_registry(dialect);
358 if (dialect == NULL)
359 return NULL;
360 }
361 else
362 Py_INCREF(dialect);
363 /* Can we reuse this instance? */
364 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
365 delimiter == 0 &&
366 doublequote == 0 &&
367 escapechar == 0 &&
368 lineterminator == 0 &&
369 quotechar == 0 &&
370 quoting == 0 &&
371 skipinitialspace == 0 &&
372 strict == 0)
373 return dialect;
374 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000375
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000376 self = (DialectObj *)type->tp_alloc(type, 0);
377 if (self == NULL) {
378 Py_XDECREF(dialect);
379 return NULL;
380 }
381 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000382
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000383 Py_XINCREF(delimiter);
384 Py_XINCREF(doublequote);
385 Py_XINCREF(escapechar);
386 Py_XINCREF(lineterminator);
387 Py_XINCREF(quotechar);
388 Py_XINCREF(quoting);
389 Py_XINCREF(skipinitialspace);
390 Py_XINCREF(strict);
391 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000392#define DIALECT_GETATTR(v, n) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000393 if (v == NULL) \
394 v = PyObject_GetAttrString(dialect, n)
395 DIALECT_GETATTR(delimiter, "delimiter");
396 DIALECT_GETATTR(doublequote, "doublequote");
397 DIALECT_GETATTR(escapechar, "escapechar");
398 DIALECT_GETATTR(lineterminator, "lineterminator");
399 DIALECT_GETATTR(quotechar, "quotechar");
400 DIALECT_GETATTR(quoting, "quoting");
401 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
402 DIALECT_GETATTR(strict, "strict");
403 PyErr_Clear();
404 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000405
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000406 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000407#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000408 if (meth(name, target, src, dflt)) \
409 goto err
410 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
411 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
412 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
413 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
414 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
415 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
416 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
417 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000418
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000419 /* validate options */
420 if (dialect_check_quoting(self->quoting))
421 goto err;
422 if (self->delimiter == 0) {
423 PyErr_SetString(PyExc_TypeError, "delimiter must be set");
424 goto err;
425 }
426 if (quotechar == Py_None && quoting == NULL)
427 self->quoting = QUOTE_NONE;
428 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
429 PyErr_SetString(PyExc_TypeError,
430 "quotechar must be set if quoting enabled");
431 goto err;
432 }
433 if (self->lineterminator == 0) {
434 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
435 goto err;
436 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000437
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000438 ret = (PyObject *)self;
439 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000440err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000441 Py_XDECREF(self);
442 Py_XDECREF(dialect);
443 Py_XDECREF(delimiter);
444 Py_XDECREF(doublequote);
445 Py_XDECREF(escapechar);
446 Py_XDECREF(lineterminator);
447 Py_XDECREF(quotechar);
448 Py_XDECREF(quoting);
449 Py_XDECREF(skipinitialspace);
450 Py_XDECREF(strict);
451 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000452}
453
454
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000455PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000456"CSV dialect\n"
457"\n"
458"The Dialect type records CSV parsing and generation options.\n");
459
460static PyTypeObject Dialect_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000461 PyVarObject_HEAD_INIT(NULL, 0)
462 "_csv.Dialect", /* tp_name */
463 sizeof(DialectObj), /* tp_basicsize */
464 0, /* tp_itemsize */
465 /* methods */
466 (destructor)Dialect_dealloc, /* tp_dealloc */
467 (printfunc)0, /* tp_print */
468 (getattrfunc)0, /* tp_getattr */
469 (setattrfunc)0, /* tp_setattr */
470 0, /* tp_reserved */
471 (reprfunc)0, /* tp_repr */
472 0, /* tp_as_number */
473 0, /* tp_as_sequence */
474 0, /* tp_as_mapping */
475 (hashfunc)0, /* tp_hash */
476 (ternaryfunc)0, /* tp_call */
477 (reprfunc)0, /* tp_str */
478 0, /* tp_getattro */
479 0, /* tp_setattro */
480 0, /* tp_as_buffer */
481 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
482 Dialect_Type_doc, /* tp_doc */
483 0, /* tp_traverse */
484 0, /* tp_clear */
485 0, /* tp_richcompare */
486 0, /* tp_weaklistoffset */
487 0, /* tp_iter */
488 0, /* tp_iternext */
489 0, /* tp_methods */
490 Dialect_memberlist, /* tp_members */
491 Dialect_getsetlist, /* tp_getset */
492 0, /* tp_base */
493 0, /* tp_dict */
494 0, /* tp_descr_get */
495 0, /* tp_descr_set */
496 0, /* tp_dictoffset */
497 0, /* tp_init */
498 0, /* tp_alloc */
499 dialect_new, /* tp_new */
500 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000501};
502
Andrew McNamara91b97462005-01-11 01:07:23 +0000503/*
504 * Return an instance of the dialect type, given a Python instance or kwarg
505 * description of the dialect
506 */
507static PyObject *
508_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
509{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000510 PyObject *ctor_args;
511 PyObject *dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000512
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000513 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
514 if (ctor_args == NULL)
515 return NULL;
516 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
517 Py_DECREF(ctor_args);
518 return dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000519}
520
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000521/*
522 * READER
523 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000524static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000525parse_save_field(ReaderObj *self)
526{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000527 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000528
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200529 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
530 (void *) self->field, self->field_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000531 if (field == NULL)
532 return -1;
533 self->field_len = 0;
534 if (self->numeric_field) {
535 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000536
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000537 self->numeric_field = 0;
538 tmp = PyNumber_Float(field);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000539 Py_DECREF(field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200540 if (tmp == NULL)
541 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000542 field = tmp;
543 }
544 PyList_Append(self->fields, field);
545 Py_DECREF(field);
546 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000547}
548
549static int
550parse_grow_buff(ReaderObj *self)
551{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000552 if (self->field_size == 0) {
553 self->field_size = 4096;
554 if (self->field != NULL)
555 PyMem_Free(self->field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200556 self->field = PyMem_New(Py_UCS4, self->field_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000557 }
558 else {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200559 Py_UCS4 *field = self->field;
Antoine Pitrou40455752010-08-15 18:51:10 +0000560 if (self->field_size > PY_SSIZE_T_MAX / 2) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000561 PyErr_NoMemory();
562 return 0;
563 }
564 self->field_size *= 2;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200565 self->field = PyMem_Resize(field, Py_UCS4, self->field_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000566 }
567 if (self->field == NULL) {
568 PyErr_NoMemory();
569 return 0;
570 }
571 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000572}
573
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000574static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200575parse_add_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000576{
Antoine Pitroue7672d32012-05-16 11:33:08 +0200577 if (self->field_len >= _csvstate_global->field_limit) {
578 PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
579 _csvstate_global->field_limit);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000580 return -1;
581 }
582 if (self->field_len == self->field_size && !parse_grow_buff(self))
583 return -1;
584 self->field[self->field_len++] = c;
585 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000586}
587
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000588static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200589parse_process_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000590{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000591 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000592
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000593 switch (self->state) {
594 case START_RECORD:
595 /* start of record */
596 if (c == '\0')
597 /* empty line - return [] */
598 break;
599 else if (c == '\n' || c == '\r') {
600 self->state = EAT_CRNL;
601 break;
602 }
603 /* normal character - handle as START_FIELD */
604 self->state = START_FIELD;
605 /* fallthru */
606 case START_FIELD:
607 /* expecting field */
608 if (c == '\n' || c == '\r' || c == '\0') {
609 /* save empty field - return [fields] */
610 if (parse_save_field(self) < 0)
611 return -1;
612 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
613 }
614 else if (c == dialect->quotechar &&
615 dialect->quoting != QUOTE_NONE) {
616 /* start quoted field */
617 self->state = IN_QUOTED_FIELD;
618 }
619 else if (c == dialect->escapechar) {
620 /* possible escaped character */
621 self->state = ESCAPED_CHAR;
622 }
623 else if (c == ' ' && dialect->skipinitialspace)
624 /* ignore space at start of field */
625 ;
626 else if (c == dialect->delimiter) {
627 /* save empty field */
628 if (parse_save_field(self) < 0)
629 return -1;
630 }
631 else {
632 /* begin new unquoted field */
633 if (dialect->quoting == QUOTE_NONNUMERIC)
634 self->numeric_field = 1;
635 if (parse_add_char(self, c) < 0)
636 return -1;
637 self->state = IN_FIELD;
638 }
639 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000640
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000641 case ESCAPED_CHAR:
642 if (c == '\0')
643 c = '\n';
644 if (parse_add_char(self, c) < 0)
645 return -1;
646 self->state = IN_FIELD;
647 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000648
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000649 case IN_FIELD:
650 /* in unquoted field */
651 if (c == '\n' || c == '\r' || c == '\0') {
652 /* end of line - return [fields] */
653 if (parse_save_field(self) < 0)
654 return -1;
655 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
656 }
657 else if (c == dialect->escapechar) {
658 /* possible escaped character */
659 self->state = ESCAPED_CHAR;
660 }
661 else if (c == dialect->delimiter) {
662 /* save field - wait for new field */
663 if (parse_save_field(self) < 0)
664 return -1;
665 self->state = START_FIELD;
666 }
667 else {
668 /* normal character - save in field */
669 if (parse_add_char(self, c) < 0)
670 return -1;
671 }
672 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000673
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000674 case IN_QUOTED_FIELD:
675 /* in quoted field */
676 if (c == '\0')
677 ;
678 else if (c == dialect->escapechar) {
679 /* Possible escape character */
680 self->state = ESCAPE_IN_QUOTED_FIELD;
681 }
682 else if (c == dialect->quotechar &&
683 dialect->quoting != QUOTE_NONE) {
684 if (dialect->doublequote) {
685 /* doublequote; " represented by "" */
686 self->state = QUOTE_IN_QUOTED_FIELD;
687 }
688 else {
689 /* end of quote part of field */
690 self->state = IN_FIELD;
691 }
692 }
693 else {
694 /* normal character - save in field */
695 if (parse_add_char(self, c) < 0)
696 return -1;
697 }
698 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000699
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000700 case ESCAPE_IN_QUOTED_FIELD:
701 if (c == '\0')
702 c = '\n';
703 if (parse_add_char(self, c) < 0)
704 return -1;
705 self->state = IN_QUOTED_FIELD;
706 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000707
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000708 case QUOTE_IN_QUOTED_FIELD:
709 /* doublequote - seen a quote in an quoted field */
710 if (dialect->quoting != QUOTE_NONE &&
711 c == dialect->quotechar) {
712 /* save "" as " */
713 if (parse_add_char(self, c) < 0)
714 return -1;
715 self->state = IN_QUOTED_FIELD;
716 }
717 else if (c == dialect->delimiter) {
718 /* save field - wait for new field */
719 if (parse_save_field(self) < 0)
720 return -1;
721 self->state = START_FIELD;
722 }
723 else if (c == '\n' || c == '\r' || c == '\0') {
724 /* end of line - return [fields] */
725 if (parse_save_field(self) < 0)
726 return -1;
727 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
728 }
729 else if (!dialect->strict) {
730 if (parse_add_char(self, c) < 0)
731 return -1;
732 self->state = IN_FIELD;
733 }
734 else {
735 /* illegal */
Antoine Pitroue7672d32012-05-16 11:33:08 +0200736 PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000737 dialect->delimiter,
738 dialect->quotechar);
739 return -1;
740 }
741 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000742
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000743 case EAT_CRNL:
744 if (c == '\n' || c == '\r')
745 ;
746 else if (c == '\0')
747 self->state = START_RECORD;
748 else {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200749 PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000750 return -1;
751 }
752 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000753
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000754 }
755 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000756}
757
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000758static int
759parse_reset(ReaderObj *self)
760{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000761 Py_XDECREF(self->fields);
762 self->fields = PyList_New(0);
763 if (self->fields == NULL)
764 return -1;
765 self->field_len = 0;
766 self->state = START_RECORD;
767 self->numeric_field = 0;
768 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000769}
Skip Montanarob4a04172003-03-20 23:29:12 +0000770
771static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000772Reader_iternext(ReaderObj *self)
773{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000774 PyObject *fields = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200775 Py_UCS4 c;
776 Py_ssize_t pos, linelen;
777 unsigned int kind;
778 void *data;
779 PyObject *lineobj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000780
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000781 if (parse_reset(self) < 0)
782 return NULL;
783 do {
784 lineobj = PyIter_Next(self->input_iter);
785 if (lineobj == NULL) {
786 /* End of input OR exception */
787 if (!PyErr_Occurred() && self->field_len != 0)
Antoine Pitroue7672d32012-05-16 11:33:08 +0200788 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000789 "newline inside string");
790 return NULL;
791 }
792 if (!PyUnicode_Check(lineobj)) {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200793 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000794 "iterator should return strings, "
795 "not %.200s "
796 "(did you open the file in text mode?)",
797 lineobj->ob_type->tp_name
798 );
799 Py_DECREF(lineobj);
800 return NULL;
801 }
802 ++self->line_num;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200803 kind = PyUnicode_KIND(lineobj);
804 data = PyUnicode_DATA(lineobj);
805 pos = 0;
806 linelen = PyUnicode_GET_LENGTH(lineobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000807 while (linelen--) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200808 c = PyUnicode_READ(kind, data, pos);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000809 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000810 Py_DECREF(lineobj);
Antoine Pitroue7672d32012-05-16 11:33:08 +0200811 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000812 "line contains NULL byte");
813 goto err;
814 }
815 if (parse_process_char(self, c) < 0) {
816 Py_DECREF(lineobj);
817 goto err;
818 }
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200819 pos++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000820 }
821 Py_DECREF(lineobj);
822 if (parse_process_char(self, 0) < 0)
823 goto err;
824 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000825
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000826 fields = self->fields;
827 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000828err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000829 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000830}
831
832static void
833Reader_dealloc(ReaderObj *self)
834{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000835 PyObject_GC_UnTrack(self);
836 Py_XDECREF(self->dialect);
837 Py_XDECREF(self->input_iter);
838 Py_XDECREF(self->fields);
839 if (self->field != NULL)
840 PyMem_Free(self->field);
841 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000842}
843
844static int
845Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
846{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000847 Py_VISIT(self->dialect);
848 Py_VISIT(self->input_iter);
849 Py_VISIT(self->fields);
850 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000851}
852
853static int
854Reader_clear(ReaderObj *self)
855{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000856 Py_CLEAR(self->dialect);
857 Py_CLEAR(self->input_iter);
858 Py_CLEAR(self->fields);
859 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000860}
861
862PyDoc_STRVAR(Reader_Type_doc,
863"CSV reader\n"
864"\n"
865"Reader objects are responsible for reading and parsing tabular data\n"
866"in CSV format.\n"
867);
868
869static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000870 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000871};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000872#define R_OFF(x) offsetof(ReaderObj, x)
873
874static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000875 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
876 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
877 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000878};
879
Skip Montanarob4a04172003-03-20 23:29:12 +0000880
881static PyTypeObject Reader_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000882 PyVarObject_HEAD_INIT(NULL, 0)
883 "_csv.reader", /*tp_name*/
884 sizeof(ReaderObj), /*tp_basicsize*/
885 0, /*tp_itemsize*/
886 /* methods */
887 (destructor)Reader_dealloc, /*tp_dealloc*/
888 (printfunc)0, /*tp_print*/
889 (getattrfunc)0, /*tp_getattr*/
890 (setattrfunc)0, /*tp_setattr*/
891 0, /*tp_reserved*/
892 (reprfunc)0, /*tp_repr*/
893 0, /*tp_as_number*/
894 0, /*tp_as_sequence*/
895 0, /*tp_as_mapping*/
896 (hashfunc)0, /*tp_hash*/
897 (ternaryfunc)0, /*tp_call*/
898 (reprfunc)0, /*tp_str*/
899 0, /*tp_getattro*/
900 0, /*tp_setattro*/
901 0, /*tp_as_buffer*/
902 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
903 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
904 Reader_Type_doc, /*tp_doc*/
905 (traverseproc)Reader_traverse, /*tp_traverse*/
906 (inquiry)Reader_clear, /*tp_clear*/
907 0, /*tp_richcompare*/
908 0, /*tp_weaklistoffset*/
909 PyObject_SelfIter, /*tp_iter*/
910 (getiterfunc)Reader_iternext, /*tp_iternext*/
911 Reader_methods, /*tp_methods*/
912 Reader_memberlist, /*tp_members*/
913 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000914
915};
916
917static PyObject *
918csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
919{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000920 PyObject * iterator, * dialect = NULL;
921 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000922
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000923 if (!self)
924 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000925
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000926 self->dialect = NULL;
927 self->fields = NULL;
928 self->input_iter = NULL;
929 self->field = NULL;
930 self->field_size = 0;
931 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000932
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000933 if (parse_reset(self) < 0) {
934 Py_DECREF(self);
935 return NULL;
936 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000937
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000938 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
939 Py_DECREF(self);
940 return NULL;
941 }
942 self->input_iter = PyObject_GetIter(iterator);
943 if (self->input_iter == NULL) {
944 PyErr_SetString(PyExc_TypeError,
945 "argument 1 must be an iterator");
946 Py_DECREF(self);
947 return NULL;
948 }
949 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
950 if (self->dialect == NULL) {
951 Py_DECREF(self);
952 return NULL;
953 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000954
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000955 PyObject_GC_Track(self);
956 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +0000957}
958
959/*
960 * WRITER
961 */
962/* ---------------------------------------------------------------- */
963static void
964join_reset(WriterObj *self)
965{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000966 self->rec_len = 0;
967 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000968}
969
970#define MEM_INCR 32768
971
972/* Calculate new record length or append field to record. Return new
973 * record length.
974 */
Antoine Pitrou40455752010-08-15 18:51:10 +0000975static Py_ssize_t
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200976join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
977 Py_ssize_t field_len, int quote_empty, int *quoted,
978 int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +0000979{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000980 DialectObj *dialect = self->dialect;
981 int i;
Antoine Pitrou40455752010-08-15 18:51:10 +0000982 Py_ssize_t rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +0000983
984#define ADDCH(c) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000985 do {\
986 if (copy_phase) \
987 self->rec[rec_len] = c;\
988 rec_len++;\
989 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +0000990
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000991 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +0000992
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000993 /* If this is not the first field we need a field separator */
994 if (self->num_fields > 0)
995 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +0000996
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000997 /* Handle preceding quote */
998 if (copy_phase && *quoted)
999 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001000
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001001 /* Copy/count field data */
1002 /* If field is null just pass over */
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001003 for (i = 0; field_data && (i < field_len); i++) {
1004 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001005 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001006
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001007 if (c == dialect->delimiter ||
1008 c == dialect->escapechar ||
1009 c == dialect->quotechar ||
Martin v. Löwis5f4f4c52011-11-01 18:42:23 +01001010 PyUnicode_FindChar(
1011 dialect->lineterminator, c, 0,
1012 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001013 if (dialect->quoting == QUOTE_NONE)
1014 want_escape = 1;
1015 else {
1016 if (c == dialect->quotechar) {
1017 if (dialect->doublequote)
1018 ADDCH(dialect->quotechar);
1019 else
1020 want_escape = 1;
1021 }
1022 if (!want_escape)
1023 *quoted = 1;
1024 }
1025 if (want_escape) {
1026 if (!dialect->escapechar) {
Antoine Pitroue7672d32012-05-16 11:33:08 +02001027 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001028 "need to escape, but no escapechar set");
1029 return -1;
1030 }
1031 ADDCH(dialect->escapechar);
1032 }
1033 }
1034 /* Copy field character into record buffer.
1035 */
1036 ADDCH(c);
1037 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001038
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001039 /* If field is empty check if it needs to be quoted.
1040 */
1041 if (i == 0 && quote_empty) {
1042 if (dialect->quoting == QUOTE_NONE) {
Antoine Pitroue7672d32012-05-16 11:33:08 +02001043 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001044 "single empty field record must be quoted");
1045 return -1;
1046 }
1047 else
1048 *quoted = 1;
1049 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001050
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001051 if (*quoted) {
1052 if (copy_phase)
1053 ADDCH(dialect->quotechar);
1054 else
1055 rec_len += 2;
1056 }
1057 return rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001058#undef ADDCH
Skip Montanarob4a04172003-03-20 23:29:12 +00001059}
1060
1061static int
Antoine Pitrou40455752010-08-15 18:51:10 +00001062join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
Skip Montanarob4a04172003-03-20 23:29:12 +00001063{
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001064
Antoine Pitrou40455752010-08-15 18:51:10 +00001065 if (rec_len < 0 || rec_len > PY_SSIZE_T_MAX - MEM_INCR) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001066 PyErr_NoMemory();
1067 return 0;
1068 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001069
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001070 if (rec_len > self->rec_size) {
1071 if (self->rec_size == 0) {
1072 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1073 if (self->rec != NULL)
1074 PyMem_Free(self->rec);
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001075 self->rec = PyMem_New(Py_UCS4, self->rec_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001076 }
1077 else {
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001078 Py_UCS4* old_rec = self->rec;
Skip Montanarob4a04172003-03-20 23:29:12 +00001079
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001080 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001081 self->rec = PyMem_Resize(old_rec, Py_UCS4, self->rec_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001082 if (self->rec == NULL)
1083 PyMem_Free(old_rec);
1084 }
1085 if (self->rec == NULL) {
1086 PyErr_NoMemory();
1087 return 0;
1088 }
1089 }
1090 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001091}
1092
1093static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001094join_append(WriterObj *self, PyObject *field, int *quoted, int quote_empty)
Skip Montanarob4a04172003-03-20 23:29:12 +00001095{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001096 unsigned int field_kind = -1;
1097 void *field_data = NULL;
1098 Py_ssize_t field_len = 0;
Antoine Pitrou40455752010-08-15 18:51:10 +00001099 Py_ssize_t rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001100
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001101 if (field != NULL) {
1102 field_kind = PyUnicode_KIND(field);
1103 field_data = PyUnicode_DATA(field);
1104 field_len = PyUnicode_GET_LENGTH(field);
1105 }
1106 rec_len = join_append_data(self, field_kind, field_data, field_len,
1107 quote_empty, quoted, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001108 if (rec_len < 0)
1109 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001110
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001111 /* grow record buffer if necessary */
1112 if (!join_check_rec_size(self, rec_len))
1113 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001114
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001115 self->rec_len = join_append_data(self, field_kind, field_data, field_len,
1116 quote_empty, quoted, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001117 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001118
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001119 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001120}
1121
1122static int
1123join_append_lineterminator(WriterObj *self)
1124{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001125 Py_ssize_t terminator_len, i;
1126 unsigned int term_kind;
1127 void *term_data;
Skip Montanarob4a04172003-03-20 23:29:12 +00001128
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001129 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001130 if (terminator_len == -1)
1131 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001132
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001133 /* grow record buffer if necessary */
1134 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1135 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001136
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001137 term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1138 term_data = PyUnicode_DATA(self->dialect->lineterminator);
1139 for (i = 0; i < terminator_len; i++)
1140 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001141 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001142
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001143 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001144}
1145
1146PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001147"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001148"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001149"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001150"elements will be converted to string.");
1151
1152static PyObject *
1153csv_writerow(WriterObj *self, PyObject *seq)
1154{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001155 DialectObj *dialect = self->dialect;
Antoine Pitrou40455752010-08-15 18:51:10 +00001156 Py_ssize_t len, i;
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001157 PyObject *line, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001158
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001159 if (!PySequence_Check(seq))
Antoine Pitroue7672d32012-05-16 11:33:08 +02001160 return PyErr_Format(_csvstate_global->error_obj, "sequence expected");
Skip Montanarob4a04172003-03-20 23:29:12 +00001161
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001162 len = PySequence_Length(seq);
1163 if (len < 0)
1164 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001165
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001166 /* Join all fields in internal buffer.
1167 */
1168 join_reset(self);
1169 for (i = 0; i < len; i++) {
1170 PyObject *field;
1171 int append_ok;
1172 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001173
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001174 field = PySequence_GetItem(seq, i);
1175 if (field == NULL)
1176 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001177
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001178 switch (dialect->quoting) {
1179 case QUOTE_NONNUMERIC:
1180 quoted = !PyNumber_Check(field);
1181 break;
1182 case QUOTE_ALL:
1183 quoted = 1;
1184 break;
1185 default:
1186 quoted = 0;
1187 break;
1188 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001189
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001190 if (PyUnicode_Check(field)) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001191 append_ok = join_append(self, field, &quoted, len == 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001192 Py_DECREF(field);
1193 }
1194 else if (field == Py_None) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001195 append_ok = join_append(self, NULL, &quoted, len == 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001196 Py_DECREF(field);
1197 }
1198 else {
1199 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001200
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001201 str = PyObject_Str(field);
1202 Py_DECREF(field);
1203 if (str == NULL)
1204 return NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001205 append_ok = join_append(self, str, &quoted, len == 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001206 Py_DECREF(str);
1207 }
1208 if (!append_ok)
1209 return NULL;
1210 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001211
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001212 /* Add line terminator.
1213 */
1214 if (!join_append_lineterminator(self))
1215 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001216
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001217 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1218 (void *) self->rec, self->rec_len);
1219 if (line == NULL)
1220 return NULL;
1221 result = PyObject_CallFunctionObjArgs(self->writeline, line, NULL);
1222 Py_DECREF(line);
1223 return result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001224}
1225
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001226PyDoc_STRVAR(csv_writerows_doc,
1227"writerows(sequence of sequences)\n"
1228"\n"
1229"Construct and write a series of sequences to a csv file. Non-string\n"
1230"elements will be converted to string.");
1231
Skip Montanarob4a04172003-03-20 23:29:12 +00001232static PyObject *
1233csv_writerows(WriterObj *self, PyObject *seqseq)
1234{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001235 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001236
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001237 row_iter = PyObject_GetIter(seqseq);
1238 if (row_iter == NULL) {
1239 PyErr_SetString(PyExc_TypeError,
1240 "writerows() argument must be iterable");
1241 return NULL;
1242 }
1243 while ((row_obj = PyIter_Next(row_iter))) {
1244 result = csv_writerow(self, row_obj);
1245 Py_DECREF(row_obj);
1246 if (!result) {
1247 Py_DECREF(row_iter);
1248 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001249 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001250 else
1251 Py_DECREF(result);
1252 }
1253 Py_DECREF(row_iter);
1254 if (PyErr_Occurred())
1255 return NULL;
1256 Py_INCREF(Py_None);
1257 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001258}
1259
1260static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001261 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1262 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1263 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001264};
1265
1266#define W_OFF(x) offsetof(WriterObj, x)
1267
1268static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001269 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1270 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001271};
1272
1273static void
1274Writer_dealloc(WriterObj *self)
1275{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001276 PyObject_GC_UnTrack(self);
1277 Py_XDECREF(self->dialect);
1278 Py_XDECREF(self->writeline);
1279 if (self->rec != NULL)
1280 PyMem_Free(self->rec);
1281 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001282}
1283
1284static int
1285Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1286{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001287 Py_VISIT(self->dialect);
1288 Py_VISIT(self->writeline);
1289 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001290}
1291
1292static int
1293Writer_clear(WriterObj *self)
1294{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001295 Py_CLEAR(self->dialect);
1296 Py_CLEAR(self->writeline);
1297 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001298}
1299
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001300PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001301"CSV writer\n"
1302"\n"
1303"Writer objects are responsible for generating tabular data\n"
1304"in CSV format from sequence input.\n"
1305);
1306
1307static PyTypeObject Writer_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001308 PyVarObject_HEAD_INIT(NULL, 0)
1309 "_csv.writer", /*tp_name*/
1310 sizeof(WriterObj), /*tp_basicsize*/
1311 0, /*tp_itemsize*/
1312 /* methods */
1313 (destructor)Writer_dealloc, /*tp_dealloc*/
1314 (printfunc)0, /*tp_print*/
1315 (getattrfunc)0, /*tp_getattr*/
1316 (setattrfunc)0, /*tp_setattr*/
1317 0, /*tp_reserved*/
1318 (reprfunc)0, /*tp_repr*/
1319 0, /*tp_as_number*/
1320 0, /*tp_as_sequence*/
1321 0, /*tp_as_mapping*/
1322 (hashfunc)0, /*tp_hash*/
1323 (ternaryfunc)0, /*tp_call*/
1324 (reprfunc)0, /*tp_str*/
1325 0, /*tp_getattro*/
1326 0, /*tp_setattro*/
1327 0, /*tp_as_buffer*/
1328 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1329 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1330 Writer_Type_doc,
1331 (traverseproc)Writer_traverse, /*tp_traverse*/
1332 (inquiry)Writer_clear, /*tp_clear*/
1333 0, /*tp_richcompare*/
1334 0, /*tp_weaklistoffset*/
1335 (getiterfunc)0, /*tp_iter*/
1336 (getiterfunc)0, /*tp_iternext*/
1337 Writer_methods, /*tp_methods*/
1338 Writer_memberlist, /*tp_members*/
1339 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001340};
1341
1342static PyObject *
1343csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1344{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001345 PyObject * output_file, * dialect = NULL;
1346 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001347 _Py_IDENTIFIER(write);
Skip Montanarob4a04172003-03-20 23:29:12 +00001348
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001349 if (!self)
1350 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001351
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001352 self->dialect = NULL;
1353 self->writeline = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001354
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001355 self->rec = NULL;
1356 self->rec_size = 0;
1357 self->rec_len = 0;
1358 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001359
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001360 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1361 Py_DECREF(self);
1362 return NULL;
1363 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001364 self->writeline = _PyObject_GetAttrId(output_file, &PyId_write);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001365 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1366 PyErr_SetString(PyExc_TypeError,
1367 "argument 1 must have a \"write\" method");
1368 Py_DECREF(self);
1369 return NULL;
1370 }
1371 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1372 if (self->dialect == NULL) {
1373 Py_DECREF(self);
1374 return NULL;
1375 }
1376 PyObject_GC_Track(self);
1377 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001378}
1379
1380/*
1381 * DIALECT REGISTRY
1382 */
1383static PyObject *
1384csv_list_dialects(PyObject *module, PyObject *args)
1385{
Antoine Pitroue7672d32012-05-16 11:33:08 +02001386 return PyDict_Keys(_csvstate_global->dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001387}
1388
1389static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001390csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001391{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001392 PyObject *name_obj, *dialect_obj = NULL;
1393 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001394
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001395 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1396 return NULL;
1397 if (!IS_BASESTRING(name_obj)) {
1398 PyErr_SetString(PyExc_TypeError,
1399 "dialect name must be a string or unicode");
1400 return NULL;
1401 }
1402 dialect = _call_dialect(dialect_obj, kwargs);
1403 if (dialect == NULL)
1404 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001405 if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001406 Py_DECREF(dialect);
1407 return NULL;
1408 }
1409 Py_DECREF(dialect);
1410 Py_INCREF(Py_None);
1411 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001412}
1413
1414static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001415csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001416{
Antoine Pitroue7672d32012-05-16 11:33:08 +02001417 if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0)
1418 return PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001419 Py_INCREF(Py_None);
1420 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001421}
1422
1423static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001424csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001425{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001426 return get_dialect_from_registry(name_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001427}
1428
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001429static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001430csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001431{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001432 PyObject *new_limit = NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001433 long old_limit = _csvstate_global->field_limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001434
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001435 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1436 return NULL;
1437 if (new_limit != NULL) {
1438 if (!PyLong_CheckExact(new_limit)) {
1439 PyErr_Format(PyExc_TypeError,
1440 "limit must be an integer");
1441 return NULL;
1442 }
Antoine Pitroue7672d32012-05-16 11:33:08 +02001443 _csvstate_global->field_limit = PyLong_AsLong(new_limit);
1444 if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
1445 _csvstate_global->field_limit = old_limit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001446 return NULL;
1447 }
1448 }
1449 return PyLong_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001450}
1451
Skip Montanarob4a04172003-03-20 23:29:12 +00001452/*
1453 * MODULE
1454 */
1455
1456PyDoc_STRVAR(csv_module_doc,
1457"CSV parsing and writing.\n"
1458"\n"
1459"This module provides classes that assist in the reading and writing\n"
1460"of Comma Separated Value (CSV) files, and implements the interface\n"
1461"described by PEP 305. Although many CSV files are simple to parse,\n"
1462"the format is not formally defined by a stable specification and\n"
1463"is subtle enough that parsing lines of a CSV file with something\n"
1464"like line.split(\",\") is bound to fail. The module supports three\n"
1465"basic APIs: reading, writing, and registration of dialects.\n"
1466"\n"
1467"\n"
1468"DIALECT REGISTRATION:\n"
1469"\n"
1470"Readers and writers support a dialect argument, which is a convenient\n"
1471"handle on a group of settings. When the dialect argument is a string,\n"
1472"it identifies one of the dialects previously registered with the module.\n"
1473"If it is a class or instance, the attributes of the argument are used as\n"
1474"the settings for the reader or writer:\n"
1475"\n"
1476" class excel:\n"
1477" delimiter = ','\n"
1478" quotechar = '\"'\n"
1479" escapechar = None\n"
1480" doublequote = True\n"
1481" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001482" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001483" quoting = QUOTE_MINIMAL\n"
1484"\n"
1485"SETTINGS:\n"
1486"\n"
1487" * quotechar - specifies a one-character string to use as the \n"
1488" quoting character. It defaults to '\"'.\n"
1489" * delimiter - specifies a one-character string to use as the \n"
1490" field separator. It defaults to ','.\n"
1491" * skipinitialspace - specifies how to interpret whitespace which\n"
1492" immediately follows a delimiter. It defaults to False, which\n"
1493" means that whitespace immediately following a delimiter is part\n"
1494" of the following field.\n"
1495" * lineterminator - specifies the character sequence which should \n"
1496" terminate rows.\n"
1497" * quoting - controls when quotes should be generated by the writer.\n"
1498" It can take on any of the following module constants:\n"
1499"\n"
1500" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1501" field contains either the quotechar or the delimiter\n"
1502" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1503" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001504" fields which do not parse as integers or floating point\n"
1505" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001506" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1507" * escapechar - specifies a one-character string used to escape \n"
1508" the delimiter when quoting is set to QUOTE_NONE.\n"
1509" * doublequote - controls the handling of quotes inside fields. When\n"
1510" True, two consecutive quotes are interpreted as one during read,\n"
1511" and when writing, each quote character embedded in the data is\n"
1512" written as two quotes\n");
1513
1514PyDoc_STRVAR(csv_reader_doc,
1515" csv_reader = reader(iterable [, dialect='excel']\n"
1516" [optional keyword args])\n"
1517" for row in csv_reader:\n"
1518" process(row)\n"
1519"\n"
1520"The \"iterable\" argument can be any object that returns a line\n"
1521"of input for each iteration, such as a file object or a list. The\n"
1522"optional \"dialect\" parameter is discussed below. The function\n"
1523"also accepts optional keyword arguments which override settings\n"
1524"provided by the dialect.\n"
1525"\n"
1526"The returned object is an iterator. Each iteration returns a row\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001527"of the CSV file (which can span multiple input lines):\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001528
1529PyDoc_STRVAR(csv_writer_doc,
1530" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1531" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001532" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001533" csv_writer.writerow(row)\n"
1534"\n"
1535" [or]\n"
1536"\n"
1537" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1538" [optional keyword args])\n"
1539" csv_writer.writerows(rows)\n"
1540"\n"
1541"The \"fileobj\" argument can be any object that supports the file API.\n");
1542
1543PyDoc_STRVAR(csv_list_dialects_doc,
1544"Return a list of all know dialect names.\n"
1545" names = csv.list_dialects()");
1546
1547PyDoc_STRVAR(csv_get_dialect_doc,
1548"Return the dialect instance associated with name.\n"
1549" dialect = csv.get_dialect(name)");
1550
1551PyDoc_STRVAR(csv_register_dialect_doc,
1552"Create a mapping from a string name to a dialect class.\n"
1553" dialect = csv.register_dialect(name, dialect)");
1554
1555PyDoc_STRVAR(csv_unregister_dialect_doc,
1556"Delete the name/dialect mapping associated with a string name.\n"
1557" csv.unregister_dialect(name)");
1558
Andrew McNamara31d88962005-01-12 03:45:10 +00001559PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001560"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001561" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001562"\n"
1563"Returns old limit. If limit is not given, no new limit is set and\n"
1564"the old limit is returned");
1565
Skip Montanarob4a04172003-03-20 23:29:12 +00001566static struct PyMethodDef csv_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001567 { "reader", (PyCFunction)csv_reader,
1568 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1569 { "writer", (PyCFunction)csv_writer,
1570 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1571 { "list_dialects", (PyCFunction)csv_list_dialects,
1572 METH_NOARGS, csv_list_dialects_doc},
1573 { "register_dialect", (PyCFunction)csv_register_dialect,
1574 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1575 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1576 METH_O, csv_unregister_dialect_doc},
1577 { "get_dialect", (PyCFunction)csv_get_dialect,
1578 METH_O, csv_get_dialect_doc},
1579 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1580 METH_VARARGS, csv_field_size_limit_doc},
1581 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001582};
1583
Martin v. Löwis1a214512008-06-11 05:26:20 +00001584static struct PyModuleDef _csvmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001585 PyModuleDef_HEAD_INIT,
1586 "_csv",
1587 csv_module_doc,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001588 sizeof(_csvstate),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001589 csv_methods,
1590 NULL,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001591 _csv_traverse,
1592 _csv_clear,
1593 _csv_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00001594};
1595
Skip Montanarob4a04172003-03-20 23:29:12 +00001596PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001597PyInit__csv(void)
Skip Montanarob4a04172003-03-20 23:29:12 +00001598{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001599 PyObject *module;
1600 StyleDesc *style;
Skip Montanarob4a04172003-03-20 23:29:12 +00001601
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001602 if (PyType_Ready(&Dialect_Type) < 0)
1603 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001604
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001605 if (PyType_Ready(&Reader_Type) < 0)
1606 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001607
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001608 if (PyType_Ready(&Writer_Type) < 0)
1609 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001610
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001611 /* Create the module and add the functions */
1612 module = PyModule_Create(&_csvmodule);
1613 if (module == NULL)
1614 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001615
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001616 /* Add version to the module. */
1617 if (PyModule_AddStringConstant(module, "__version__",
1618 MODULE_VERSION) == -1)
1619 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001620
Antoine Pitroue7672d32012-05-16 11:33:08 +02001621 /* Set the field limit */
1622 _csvstate(module)->field_limit = 128 * 1024;
1623 /* Do I still need to add this var to the Module Dict? */
1624
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001625 /* Add _dialects dictionary */
Antoine Pitroue7672d32012-05-16 11:33:08 +02001626 _csvstate(module)->dialects = PyDict_New();
1627 if (_csvstate(module)->dialects == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001628 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001629 Py_INCREF(_csvstate(module)->dialects);
1630 if (PyModule_AddObject(module, "_dialects", _csvstate(module)->dialects))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001631 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001632
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001633 /* Add quote styles into dictionary */
1634 for (style = quote_styles; style->name; style++) {
1635 if (PyModule_AddIntConstant(module, style->name,
1636 style->style) == -1)
1637 return NULL;
1638 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001639
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001640 /* Add the Dialect type */
1641 Py_INCREF(&Dialect_Type);
1642 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1643 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001644
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001645 /* Add the CSV exception object to the module. */
Antoine Pitroue7672d32012-05-16 11:33:08 +02001646 _csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1647 if (_csvstate(module)->error_obj == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001648 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001649 Py_INCREF(_csvstate(module)->error_obj);
1650 PyModule_AddObject(module, "Error", _csvstate(module)->error_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001651 return module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001652}