blob: c8767d1ea56c17c135a805aa049d75378e2f9f0f [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
Skip Montanarob4a04172003-03-20 23:29:12 +00009*/
10
Skip Montanaro7b01a832003-04-12 19:23:46 +000011#define MODULE_VERSION "1.0"
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013#include "Python.h"
14#include "structmember.h"
15
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000016
Antoine Pitroue7672d32012-05-16 11:33:08 +020017typedef struct {
18 PyObject *error_obj; /* CSV exception */
19 PyObject *dialects; /* Dialect registry */
20 long field_limit; /* max parsed field size */
21} _csvstate;
22
23#define _csvstate(o) ((_csvstate *)PyModule_GetState(o))
24
25static int
26_csv_clear(PyObject *m)
27{
28 Py_CLEAR(_csvstate(m)->error_obj);
29 Py_CLEAR(_csvstate(m)->dialects);
30 return 0;
31}
32
33static int
34_csv_traverse(PyObject *m, visitproc visit, void *arg)
35{
36 Py_VISIT(_csvstate(m)->error_obj);
37 Py_VISIT(_csvstate(m)->dialects);
38 return 0;
39}
40
41static void
42_csv_free(void *m)
43{
44 _csv_clear((PyObject *)m);
45}
46
47static struct PyModuleDef _csvmodule;
48
49#define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
Skip Montanarob4a04172003-03-20 23:29:12 +000050
51typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000052 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
53 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
R David Murrayc7c42ef2013-03-19 22:41:47 -040054 EAT_CRNL,AFTER_ESCAPED_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000055} ParserState;
56
57typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000058 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000059} QuoteStyle;
60
61typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000062 QuoteStyle style;
63 char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000064} StyleDesc;
65
66static StyleDesc quote_styles[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
68 { QUOTE_ALL, "QUOTE_ALL" },
69 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
70 { QUOTE_NONE, "QUOTE_NONE" },
71 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000072};
73
74typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000075 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 int doublequote; /* is " represented by ""? */
Antoine Pitrou77ea6402011-10-07 04:26:55 +020078 Py_UCS4 delimiter; /* field separator */
79 Py_UCS4 quotechar; /* quote character */
80 Py_UCS4 escapechar; /* escape character */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000081 int skipinitialspace; /* ignore spaces following delimiter? */
82 PyObject *lineterminator; /* string to write between records */
83 int quoting; /* style of quoting to write */
Skip Montanarob4a04172003-03-20 23:29:12 +000084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 int strict; /* raise exception on bad CSV */
Skip Montanarob4a04172003-03-20 23:29:12 +000086} DialectObj;
87
Neal Norwitz227b5332006-03-22 09:28:35 +000088static PyTypeObject Dialect_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000089
90typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +000092
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000093 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +000094
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000095 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +000096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000097 PyObject *fields; /* field list for current record */
98 ParserState state; /* current CSV parse state */
Antoine Pitrou77ea6402011-10-07 04:26:55 +020099 Py_UCS4 *field; /* temporary buffer */
Antoine Pitrou40455752010-08-15 18:51:10 +0000100 Py_ssize_t field_size; /* size of allocated buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000101 Py_ssize_t field_len; /* length of current field */
102 int numeric_field; /* treat field as numeric */
103 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000104} ReaderObj;
105
Neal Norwitz227b5332006-03-22 09:28:35 +0000106static PyTypeObject Reader_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000107
Christian Heimes90aa7642007-12-19 02:45:37 +0000108#define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +0000109
110typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000111 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000112
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000113 PyObject *writeline; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +0000114
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000115 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000116
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200117 Py_UCS4 *rec; /* buffer for parser.join */
Antoine Pitrou40455752010-08-15 18:51:10 +0000118 Py_ssize_t rec_size; /* size of allocated record */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000119 Py_ssize_t rec_len; /* length of record */
120 int num_fields; /* number of fields in record */
Guido van Rossum46264582007-08-06 19:32:18 +0000121} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000122
Neal Norwitz227b5332006-03-22 09:28:35 +0000123static PyTypeObject Writer_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000124
125/*
126 * DIALECT class
127 */
128
129static PyObject *
130get_dialect_from_registry(PyObject * name_obj)
131{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000133
Antoine Pitroue7672d32012-05-16 11:33:08 +0200134 dialect_obj = PyDict_GetItem(_csvstate_global->dialects, name_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000135 if (dialect_obj == NULL) {
136 if (!PyErr_Occurred())
Antoine Pitroue7672d32012-05-16 11:33:08 +0200137 PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000138 }
139 else
140 Py_INCREF(dialect_obj);
141 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000142}
143
Skip Montanarob4a04172003-03-20 23:29:12 +0000144static PyObject *
145get_string(PyObject *str)
146{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000147 Py_XINCREF(str);
148 return str;
Skip Montanarob4a04172003-03-20 23:29:12 +0000149}
150
Skip Montanarob4a04172003-03-20 23:29:12 +0000151static PyObject *
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200152get_nullchar_as_None(Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000153{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 if (c == '\0') {
155 Py_INCREF(Py_None);
156 return Py_None;
157 }
158 else
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200159 return PyUnicode_FromOrdinal(c);
Skip Montanarob4a04172003-03-20 23:29:12 +0000160}
161
Skip Montanarob4a04172003-03-20 23:29:12 +0000162static PyObject *
163Dialect_get_lineterminator(DialectObj *self)
164{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000165 return get_string(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000166}
167
Skip Montanarob4a04172003-03-20 23:29:12 +0000168static PyObject *
Guido van Rossuma9769c22007-08-07 23:59:30 +0000169Dialect_get_delimiter(DialectObj *self)
170{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000171 return get_nullchar_as_None(self->delimiter);
Guido van Rossuma9769c22007-08-07 23:59:30 +0000172}
173
174static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000175Dialect_get_escapechar(DialectObj *self)
176{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000178}
179
Andrew McNamara1196cf12005-01-07 04:42:45 +0000180static PyObject *
181Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000182{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000183 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000184}
185
186static PyObject *
187Dialect_get_quoting(DialectObj *self)
188{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000189 return PyLong_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000190}
191
192static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000193_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000194{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000195 if (src == NULL)
196 *target = dflt;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200197 else {
198 int b = PyObject_IsTrue(src);
199 if (b < 0)
200 return -1;
201 *target = b;
202 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000203 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000204}
205
Andrew McNamara1196cf12005-01-07 04:42:45 +0000206static int
207_set_int(const char *name, int *target, PyObject *src, int dflt)
208{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000209 if (src == NULL)
210 *target = dflt;
211 else {
212 long value;
213 if (!PyLong_CheckExact(src)) {
214 PyErr_Format(PyExc_TypeError,
215 "\"%s\" must be an integer", name);
216 return -1;
217 }
218 value = PyLong_AsLong(src);
219 if (value == -1 && PyErr_Occurred())
220 return -1;
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +0000221#if SIZEOF_LONG > SIZEOF_INT
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000222 if (value > INT_MAX || value < INT_MIN) {
223 PyErr_Format(PyExc_ValueError,
224 "integer out of range for \"%s\"", name);
225 return -1;
226 }
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +0000227#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 *target = (int)value;
229 }
230 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000231}
232
233static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200234_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000235{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000236 if (src == NULL)
237 *target = dflt;
238 else {
239 *target = '\0';
240 if (src != Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000241 Py_ssize_t len;
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200242 if (!PyUnicode_Check(src)) {
243 PyErr_Format(PyExc_TypeError,
244 "\"%s\" must be string, not %.200s", name,
245 src->ob_type->tp_name);
246 return -1;
247 }
Victor Stinner9e30aa52011-11-21 02:49:52 +0100248 len = PyUnicode_GetLength(src);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200249 if (len > 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000250 PyErr_Format(PyExc_TypeError,
251 "\"%s\" must be an 1-character string",
252 name);
253 return -1;
254 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100255 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000256 if (len > 0)
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200257 *target = PyUnicode_READ_CHAR(src, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000258 }
259 }
260 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000261}
262
263static int
264_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
265{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000266 if (src == NULL)
267 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
268 else {
269 if (src == Py_None)
270 *target = NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +0100271 else if (!PyUnicode_Check(src)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 PyErr_Format(PyExc_TypeError,
273 "\"%s\" must be a string", name);
274 return -1;
275 }
276 else {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100277 if (PyUnicode_READY(src) == -1)
278 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000279 Py_XDECREF(*target);
280 Py_INCREF(src);
281 *target = src;
282 }
283 }
284 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000285}
286
287static int
288dialect_check_quoting(int quoting)
289{
Victor Stinner4fe519b2010-11-09 09:40:16 +0000290 StyleDesc *qs;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000291
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000292 for (qs = quote_styles; qs->name; qs++) {
293 if (qs->style == quoting)
294 return 0;
295 }
296 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
297 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000298}
Skip Montanarob4a04172003-03-20 23:29:12 +0000299
300#define D_OFF(x) offsetof(DialectObj, x)
301
302static struct PyMemberDef Dialect_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000303 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
304 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
305 { "strict", T_INT, D_OFF(strict), READONLY },
306 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000307};
308
309static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000310 { "delimiter", (getter)Dialect_get_delimiter},
311 { "escapechar", (getter)Dialect_get_escapechar},
312 { "lineterminator", (getter)Dialect_get_lineterminator},
313 { "quotechar", (getter)Dialect_get_quotechar},
314 { "quoting", (getter)Dialect_get_quoting},
315 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000316};
317
318static void
319Dialect_dealloc(DialectObj *self)
320{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000321 Py_XDECREF(self->lineterminator);
322 Py_TYPE(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000323}
324
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000325static char *dialect_kws[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000326 "dialect",
327 "delimiter",
328 "doublequote",
329 "escapechar",
330 "lineterminator",
331 "quotechar",
332 "quoting",
333 "skipinitialspace",
334 "strict",
335 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000336};
337
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000338static PyObject *
339dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000340{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000341 DialectObj *self;
342 PyObject *ret = NULL;
343 PyObject *dialect = NULL;
344 PyObject *delimiter = NULL;
345 PyObject *doublequote = NULL;
346 PyObject *escapechar = NULL;
347 PyObject *lineterminator = NULL;
348 PyObject *quotechar = NULL;
349 PyObject *quoting = NULL;
350 PyObject *skipinitialspace = NULL;
351 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000352
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000353 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
354 "|OOOOOOOOO", dialect_kws,
355 &dialect,
356 &delimiter,
357 &doublequote,
358 &escapechar,
359 &lineterminator,
360 &quotechar,
361 &quoting,
362 &skipinitialspace,
363 &strict))
364 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000365
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000366 if (dialect != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100367 if (PyUnicode_Check(dialect)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000368 dialect = get_dialect_from_registry(dialect);
369 if (dialect == NULL)
370 return NULL;
371 }
372 else
373 Py_INCREF(dialect);
374 /* Can we reuse this instance? */
375 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
376 delimiter == 0 &&
377 doublequote == 0 &&
378 escapechar == 0 &&
379 lineterminator == 0 &&
380 quotechar == 0 &&
381 quoting == 0 &&
382 skipinitialspace == 0 &&
383 strict == 0)
384 return dialect;
385 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000386
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000387 self = (DialectObj *)type->tp_alloc(type, 0);
388 if (self == NULL) {
389 Py_XDECREF(dialect);
390 return NULL;
391 }
392 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000393
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000394 Py_XINCREF(delimiter);
395 Py_XINCREF(doublequote);
396 Py_XINCREF(escapechar);
397 Py_XINCREF(lineterminator);
398 Py_XINCREF(quotechar);
399 Py_XINCREF(quoting);
400 Py_XINCREF(skipinitialspace);
401 Py_XINCREF(strict);
402 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000403#define DIALECT_GETATTR(v, n) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000404 if (v == NULL) \
405 v = PyObject_GetAttrString(dialect, n)
406 DIALECT_GETATTR(delimiter, "delimiter");
407 DIALECT_GETATTR(doublequote, "doublequote");
408 DIALECT_GETATTR(escapechar, "escapechar");
409 DIALECT_GETATTR(lineterminator, "lineterminator");
410 DIALECT_GETATTR(quotechar, "quotechar");
411 DIALECT_GETATTR(quoting, "quoting");
412 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
413 DIALECT_GETATTR(strict, "strict");
414 PyErr_Clear();
415 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000416
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000417 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000418#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000419 if (meth(name, target, src, dflt)) \
420 goto err
421 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
422 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
423 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
424 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
425 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
426 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
427 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
428 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000429
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000430 /* validate options */
431 if (dialect_check_quoting(self->quoting))
432 goto err;
433 if (self->delimiter == 0) {
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200434 PyErr_SetString(PyExc_TypeError,
435 "\"delimiter\" must be an 1-character string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000436 goto err;
437 }
438 if (quotechar == Py_None && quoting == NULL)
439 self->quoting = QUOTE_NONE;
440 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
441 PyErr_SetString(PyExc_TypeError,
442 "quotechar must be set if quoting enabled");
443 goto err;
444 }
445 if (self->lineterminator == 0) {
446 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
447 goto err;
448 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000449
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000450 ret = (PyObject *)self;
451 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000452err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000453 Py_XDECREF(self);
454 Py_XDECREF(dialect);
455 Py_XDECREF(delimiter);
456 Py_XDECREF(doublequote);
457 Py_XDECREF(escapechar);
458 Py_XDECREF(lineterminator);
459 Py_XDECREF(quotechar);
460 Py_XDECREF(quoting);
461 Py_XDECREF(skipinitialspace);
462 Py_XDECREF(strict);
463 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000464}
465
466
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000467PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000468"CSV dialect\n"
469"\n"
470"The Dialect type records CSV parsing and generation options.\n");
471
472static PyTypeObject Dialect_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000473 PyVarObject_HEAD_INIT(NULL, 0)
474 "_csv.Dialect", /* tp_name */
475 sizeof(DialectObj), /* tp_basicsize */
476 0, /* tp_itemsize */
477 /* methods */
478 (destructor)Dialect_dealloc, /* tp_dealloc */
479 (printfunc)0, /* tp_print */
480 (getattrfunc)0, /* tp_getattr */
481 (setattrfunc)0, /* tp_setattr */
482 0, /* tp_reserved */
483 (reprfunc)0, /* tp_repr */
484 0, /* tp_as_number */
485 0, /* tp_as_sequence */
486 0, /* tp_as_mapping */
487 (hashfunc)0, /* tp_hash */
488 (ternaryfunc)0, /* tp_call */
489 (reprfunc)0, /* tp_str */
490 0, /* tp_getattro */
491 0, /* tp_setattro */
492 0, /* tp_as_buffer */
493 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
494 Dialect_Type_doc, /* tp_doc */
495 0, /* tp_traverse */
496 0, /* tp_clear */
497 0, /* tp_richcompare */
498 0, /* tp_weaklistoffset */
499 0, /* tp_iter */
500 0, /* tp_iternext */
501 0, /* tp_methods */
502 Dialect_memberlist, /* tp_members */
503 Dialect_getsetlist, /* tp_getset */
504 0, /* tp_base */
505 0, /* tp_dict */
506 0, /* tp_descr_get */
507 0, /* tp_descr_set */
508 0, /* tp_dictoffset */
509 0, /* tp_init */
510 0, /* tp_alloc */
511 dialect_new, /* tp_new */
512 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000513};
514
Andrew McNamara91b97462005-01-11 01:07:23 +0000515/*
516 * Return an instance of the dialect type, given a Python instance or kwarg
517 * description of the dialect
518 */
519static PyObject *
520_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
521{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000522 PyObject *ctor_args;
523 PyObject *dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000524
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000525 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
526 if (ctor_args == NULL)
527 return NULL;
528 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
529 Py_DECREF(ctor_args);
530 return dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000531}
532
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000533/*
534 * READER
535 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000536static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000537parse_save_field(ReaderObj *self)
538{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000539 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000540
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200541 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
542 (void *) self->field, self->field_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000543 if (field == NULL)
544 return -1;
545 self->field_len = 0;
546 if (self->numeric_field) {
547 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000548
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000549 self->numeric_field = 0;
550 tmp = PyNumber_Float(field);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000551 Py_DECREF(field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200552 if (tmp == NULL)
553 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000554 field = tmp;
555 }
Victor Stinnerb80b3782013-11-14 21:29:34 +0100556 if (PyList_Append(self->fields, field) < 0) {
557 Py_DECREF(field);
558 return -1;
559 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000560 Py_DECREF(field);
561 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000562}
563
564static int
565parse_grow_buff(ReaderObj *self)
566{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000567 if (self->field_size == 0) {
568 self->field_size = 4096;
569 if (self->field != NULL)
570 PyMem_Free(self->field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200571 self->field = PyMem_New(Py_UCS4, self->field_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000572 }
573 else {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200574 Py_UCS4 *field = self->field;
Antoine Pitrou40455752010-08-15 18:51:10 +0000575 if (self->field_size > PY_SSIZE_T_MAX / 2) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000576 PyErr_NoMemory();
577 return 0;
578 }
579 self->field_size *= 2;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200580 self->field = PyMem_Resize(field, Py_UCS4, self->field_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000581 }
582 if (self->field == NULL) {
583 PyErr_NoMemory();
584 return 0;
585 }
586 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000587}
588
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000589static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200590parse_add_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000591{
Antoine Pitroue7672d32012-05-16 11:33:08 +0200592 if (self->field_len >= _csvstate_global->field_limit) {
593 PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
594 _csvstate_global->field_limit);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000595 return -1;
596 }
597 if (self->field_len == self->field_size && !parse_grow_buff(self))
598 return -1;
599 self->field[self->field_len++] = c;
600 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000601}
602
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000603static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200604parse_process_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000605{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000606 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000607
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000608 switch (self->state) {
609 case START_RECORD:
610 /* start of record */
611 if (c == '\0')
612 /* empty line - return [] */
613 break;
614 else if (c == '\n' || c == '\r') {
615 self->state = EAT_CRNL;
616 break;
617 }
618 /* normal character - handle as START_FIELD */
619 self->state = START_FIELD;
620 /* fallthru */
621 case START_FIELD:
622 /* expecting field */
623 if (c == '\n' || c == '\r' || c == '\0') {
624 /* save empty field - return [fields] */
625 if (parse_save_field(self) < 0)
626 return -1;
627 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
628 }
629 else if (c == dialect->quotechar &&
630 dialect->quoting != QUOTE_NONE) {
631 /* start quoted field */
632 self->state = IN_QUOTED_FIELD;
633 }
634 else if (c == dialect->escapechar) {
635 /* possible escaped character */
636 self->state = ESCAPED_CHAR;
637 }
638 else if (c == ' ' && dialect->skipinitialspace)
639 /* ignore space at start of field */
640 ;
641 else if (c == dialect->delimiter) {
642 /* save empty field */
643 if (parse_save_field(self) < 0)
644 return -1;
645 }
646 else {
647 /* begin new unquoted field */
648 if (dialect->quoting == QUOTE_NONNUMERIC)
649 self->numeric_field = 1;
650 if (parse_add_char(self, c) < 0)
651 return -1;
652 self->state = IN_FIELD;
653 }
654 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000655
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000656 case ESCAPED_CHAR:
R David Murray9a7d3762013-03-20 00:15:20 -0400657 if (c == '\n' || c=='\r') {
R David Murrayc7c42ef2013-03-19 22:41:47 -0400658 if (parse_add_char(self, c) < 0)
659 return -1;
660 self->state = AFTER_ESCAPED_CRNL;
661 break;
662 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000663 if (c == '\0')
664 c = '\n';
665 if (parse_add_char(self, c) < 0)
666 return -1;
667 self->state = IN_FIELD;
668 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000669
R David Murrayc7c42ef2013-03-19 22:41:47 -0400670 case AFTER_ESCAPED_CRNL:
671 if (c == '\0')
672 break;
673 /*fallthru*/
674
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000675 case IN_FIELD:
676 /* in unquoted field */
677 if (c == '\n' || c == '\r' || c == '\0') {
678 /* end of line - return [fields] */
679 if (parse_save_field(self) < 0)
680 return -1;
681 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
682 }
683 else if (c == dialect->escapechar) {
684 /* possible escaped character */
685 self->state = ESCAPED_CHAR;
686 }
687 else if (c == dialect->delimiter) {
688 /* save field - wait for new field */
689 if (parse_save_field(self) < 0)
690 return -1;
691 self->state = START_FIELD;
692 }
693 else {
694 /* normal character - save in field */
695 if (parse_add_char(self, c) < 0)
696 return -1;
697 }
698 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000699
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000700 case IN_QUOTED_FIELD:
701 /* in quoted field */
702 if (c == '\0')
703 ;
704 else if (c == dialect->escapechar) {
705 /* Possible escape character */
706 self->state = ESCAPE_IN_QUOTED_FIELD;
707 }
708 else if (c == dialect->quotechar &&
709 dialect->quoting != QUOTE_NONE) {
710 if (dialect->doublequote) {
711 /* doublequote; " represented by "" */
712 self->state = QUOTE_IN_QUOTED_FIELD;
713 }
714 else {
715 /* end of quote part of field */
716 self->state = IN_FIELD;
717 }
718 }
719 else {
720 /* normal character - save in field */
721 if (parse_add_char(self, c) < 0)
722 return -1;
723 }
724 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000725
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000726 case ESCAPE_IN_QUOTED_FIELD:
727 if (c == '\0')
728 c = '\n';
729 if (parse_add_char(self, c) < 0)
730 return -1;
731 self->state = IN_QUOTED_FIELD;
732 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000733
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000734 case QUOTE_IN_QUOTED_FIELD:
735 /* doublequote - seen a quote in an quoted field */
736 if (dialect->quoting != QUOTE_NONE &&
737 c == dialect->quotechar) {
738 /* save "" as " */
739 if (parse_add_char(self, c) < 0)
740 return -1;
741 self->state = IN_QUOTED_FIELD;
742 }
743 else if (c == dialect->delimiter) {
744 /* save field - wait for new field */
745 if (parse_save_field(self) < 0)
746 return -1;
747 self->state = START_FIELD;
748 }
749 else if (c == '\n' || c == '\r' || c == '\0') {
750 /* end of line - return [fields] */
751 if (parse_save_field(self) < 0)
752 return -1;
753 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
754 }
755 else if (!dialect->strict) {
756 if (parse_add_char(self, c) < 0)
757 return -1;
758 self->state = IN_FIELD;
759 }
760 else {
761 /* illegal */
Antoine Pitroue7672d32012-05-16 11:33:08 +0200762 PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000763 dialect->delimiter,
764 dialect->quotechar);
765 return -1;
766 }
767 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000768
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000769 case EAT_CRNL:
770 if (c == '\n' || c == '\r')
771 ;
772 else if (c == '\0')
773 self->state = START_RECORD;
774 else {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200775 PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000776 return -1;
777 }
778 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000779
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000780 }
781 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000782}
783
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000784static int
785parse_reset(ReaderObj *self)
786{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000787 Py_XDECREF(self->fields);
788 self->fields = PyList_New(0);
789 if (self->fields == NULL)
790 return -1;
791 self->field_len = 0;
792 self->state = START_RECORD;
793 self->numeric_field = 0;
794 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000795}
Skip Montanarob4a04172003-03-20 23:29:12 +0000796
797static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000798Reader_iternext(ReaderObj *self)
799{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000800 PyObject *fields = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200801 Py_UCS4 c;
802 Py_ssize_t pos, linelen;
803 unsigned int kind;
804 void *data;
805 PyObject *lineobj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000806
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000807 if (parse_reset(self) < 0)
808 return NULL;
809 do {
810 lineobj = PyIter_Next(self->input_iter);
811 if (lineobj == NULL) {
812 /* End of input OR exception */
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700813 if (!PyErr_Occurred() && (self->field_len != 0 ||
814 self->state == IN_QUOTED_FIELD)) {
815 if (self->dialect->strict)
Senthil Kumaran49d13022012-09-25 02:37:20 -0700816 PyErr_SetString(_csvstate_global->error_obj,
817 "unexpected end of data");
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700818 else if (parse_save_field(self) >= 0)
819 break;
820 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000821 return NULL;
822 }
823 if (!PyUnicode_Check(lineobj)) {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200824 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000825 "iterator should return strings, "
826 "not %.200s "
827 "(did you open the file in text mode?)",
828 lineobj->ob_type->tp_name
829 );
830 Py_DECREF(lineobj);
831 return NULL;
832 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100833 if (PyUnicode_READY(lineobj) == -1) {
834 Py_DECREF(lineobj);
835 return NULL;
836 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000837 ++self->line_num;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200838 kind = PyUnicode_KIND(lineobj);
839 data = PyUnicode_DATA(lineobj);
840 pos = 0;
841 linelen = PyUnicode_GET_LENGTH(lineobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000842 while (linelen--) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200843 c = PyUnicode_READ(kind, data, pos);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000844 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000845 Py_DECREF(lineobj);
Antoine Pitroue7672d32012-05-16 11:33:08 +0200846 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000847 "line contains NULL byte");
848 goto err;
849 }
850 if (parse_process_char(self, c) < 0) {
851 Py_DECREF(lineobj);
852 goto err;
853 }
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200854 pos++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000855 }
856 Py_DECREF(lineobj);
857 if (parse_process_char(self, 0) < 0)
858 goto err;
859 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000860
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000861 fields = self->fields;
862 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000863err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000864 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000865}
866
867static void
868Reader_dealloc(ReaderObj *self)
869{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000870 PyObject_GC_UnTrack(self);
871 Py_XDECREF(self->dialect);
872 Py_XDECREF(self->input_iter);
873 Py_XDECREF(self->fields);
874 if (self->field != NULL)
875 PyMem_Free(self->field);
876 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000877}
878
879static int
880Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
881{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000882 Py_VISIT(self->dialect);
883 Py_VISIT(self->input_iter);
884 Py_VISIT(self->fields);
885 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000886}
887
888static int
889Reader_clear(ReaderObj *self)
890{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000891 Py_CLEAR(self->dialect);
892 Py_CLEAR(self->input_iter);
893 Py_CLEAR(self->fields);
894 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000895}
896
897PyDoc_STRVAR(Reader_Type_doc,
898"CSV reader\n"
899"\n"
900"Reader objects are responsible for reading and parsing tabular data\n"
901"in CSV format.\n"
902);
903
904static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000905 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000906};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000907#define R_OFF(x) offsetof(ReaderObj, x)
908
909static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000910 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
911 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
912 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000913};
914
Skip Montanarob4a04172003-03-20 23:29:12 +0000915
916static PyTypeObject Reader_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000917 PyVarObject_HEAD_INIT(NULL, 0)
918 "_csv.reader", /*tp_name*/
919 sizeof(ReaderObj), /*tp_basicsize*/
920 0, /*tp_itemsize*/
921 /* methods */
922 (destructor)Reader_dealloc, /*tp_dealloc*/
923 (printfunc)0, /*tp_print*/
924 (getattrfunc)0, /*tp_getattr*/
925 (setattrfunc)0, /*tp_setattr*/
926 0, /*tp_reserved*/
927 (reprfunc)0, /*tp_repr*/
928 0, /*tp_as_number*/
929 0, /*tp_as_sequence*/
930 0, /*tp_as_mapping*/
931 (hashfunc)0, /*tp_hash*/
932 (ternaryfunc)0, /*tp_call*/
933 (reprfunc)0, /*tp_str*/
934 0, /*tp_getattro*/
935 0, /*tp_setattro*/
936 0, /*tp_as_buffer*/
937 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
938 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
939 Reader_Type_doc, /*tp_doc*/
940 (traverseproc)Reader_traverse, /*tp_traverse*/
941 (inquiry)Reader_clear, /*tp_clear*/
942 0, /*tp_richcompare*/
943 0, /*tp_weaklistoffset*/
944 PyObject_SelfIter, /*tp_iter*/
945 (getiterfunc)Reader_iternext, /*tp_iternext*/
946 Reader_methods, /*tp_methods*/
947 Reader_memberlist, /*tp_members*/
948 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000949
950};
951
952static PyObject *
953csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
954{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000955 PyObject * iterator, * dialect = NULL;
956 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000957
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000958 if (!self)
959 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000960
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000961 self->dialect = NULL;
962 self->fields = NULL;
963 self->input_iter = NULL;
964 self->field = NULL;
965 self->field_size = 0;
966 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000967
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000968 if (parse_reset(self) < 0) {
969 Py_DECREF(self);
970 return NULL;
971 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000972
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000973 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
974 Py_DECREF(self);
975 return NULL;
976 }
977 self->input_iter = PyObject_GetIter(iterator);
978 if (self->input_iter == NULL) {
979 PyErr_SetString(PyExc_TypeError,
980 "argument 1 must be an iterator");
981 Py_DECREF(self);
982 return NULL;
983 }
984 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
985 if (self->dialect == NULL) {
986 Py_DECREF(self);
987 return NULL;
988 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000989
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000990 PyObject_GC_Track(self);
991 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +0000992}
993
994/*
995 * WRITER
996 */
997/* ---------------------------------------------------------------- */
998static void
999join_reset(WriterObj *self)
1000{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001001 self->rec_len = 0;
1002 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001003}
1004
1005#define MEM_INCR 32768
1006
1007/* Calculate new record length or append field to record. Return new
1008 * record length.
1009 */
Antoine Pitrou40455752010-08-15 18:51:10 +00001010static Py_ssize_t
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001011join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
1012 Py_ssize_t field_len, int quote_empty, int *quoted,
1013 int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +00001014{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001015 DialectObj *dialect = self->dialect;
1016 int i;
Antoine Pitrou40455752010-08-15 18:51:10 +00001017 Py_ssize_t rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001018
1019#define ADDCH(c) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001020 do {\
1021 if (copy_phase) \
1022 self->rec[rec_len] = c;\
1023 rec_len++;\
1024 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001025
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001026 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001027
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001028 /* If this is not the first field we need a field separator */
1029 if (self->num_fields > 0)
1030 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001031
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001032 /* Handle preceding quote */
1033 if (copy_phase && *quoted)
1034 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001035
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001036 /* Copy/count field data */
1037 /* If field is null just pass over */
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001038 for (i = 0; field_data && (i < field_len); i++) {
1039 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001040 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001041
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001042 if (c == dialect->delimiter ||
1043 c == dialect->escapechar ||
1044 c == dialect->quotechar ||
Martin v. Löwis5f4f4c52011-11-01 18:42:23 +01001045 PyUnicode_FindChar(
1046 dialect->lineterminator, c, 0,
1047 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001048 if (dialect->quoting == QUOTE_NONE)
1049 want_escape = 1;
1050 else {
1051 if (c == dialect->quotechar) {
1052 if (dialect->doublequote)
1053 ADDCH(dialect->quotechar);
1054 else
1055 want_escape = 1;
1056 }
1057 if (!want_escape)
1058 *quoted = 1;
1059 }
1060 if (want_escape) {
1061 if (!dialect->escapechar) {
Antoine Pitroue7672d32012-05-16 11:33:08 +02001062 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001063 "need to escape, but no escapechar set");
1064 return -1;
1065 }
1066 ADDCH(dialect->escapechar);
1067 }
1068 }
1069 /* Copy field character into record buffer.
1070 */
1071 ADDCH(c);
1072 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001073
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001074 /* If field is empty check if it needs to be quoted.
1075 */
1076 if (i == 0 && quote_empty) {
1077 if (dialect->quoting == QUOTE_NONE) {
Antoine Pitroue7672d32012-05-16 11:33:08 +02001078 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001079 "single empty field record must be quoted");
1080 return -1;
1081 }
1082 else
1083 *quoted = 1;
1084 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001085
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001086 if (*quoted) {
1087 if (copy_phase)
1088 ADDCH(dialect->quotechar);
1089 else
1090 rec_len += 2;
1091 }
1092 return rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001093#undef ADDCH
Skip Montanarob4a04172003-03-20 23:29:12 +00001094}
1095
1096static int
Antoine Pitrou40455752010-08-15 18:51:10 +00001097join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
Skip Montanarob4a04172003-03-20 23:29:12 +00001098{
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001099
Antoine Pitrou40455752010-08-15 18:51:10 +00001100 if (rec_len < 0 || rec_len > PY_SSIZE_T_MAX - MEM_INCR) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001101 PyErr_NoMemory();
1102 return 0;
1103 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001104
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001105 if (rec_len > self->rec_size) {
1106 if (self->rec_size == 0) {
1107 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1108 if (self->rec != NULL)
1109 PyMem_Free(self->rec);
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001110 self->rec = PyMem_New(Py_UCS4, self->rec_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001111 }
1112 else {
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001113 Py_UCS4* old_rec = self->rec;
Skip Montanarob4a04172003-03-20 23:29:12 +00001114
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001115 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001116 self->rec = PyMem_Resize(old_rec, Py_UCS4, self->rec_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001117 if (self->rec == NULL)
1118 PyMem_Free(old_rec);
1119 }
1120 if (self->rec == NULL) {
1121 PyErr_NoMemory();
1122 return 0;
1123 }
1124 }
1125 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001126}
1127
1128static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001129join_append(WriterObj *self, PyObject *field, int *quoted, int quote_empty)
Skip Montanarob4a04172003-03-20 23:29:12 +00001130{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001131 unsigned int field_kind = -1;
1132 void *field_data = NULL;
1133 Py_ssize_t field_len = 0;
Antoine Pitrou40455752010-08-15 18:51:10 +00001134 Py_ssize_t rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001135
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001136 if (field != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +01001137 if (PyUnicode_READY(field) == -1)
1138 return 0;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001139 field_kind = PyUnicode_KIND(field);
1140 field_data = PyUnicode_DATA(field);
1141 field_len = PyUnicode_GET_LENGTH(field);
1142 }
1143 rec_len = join_append_data(self, field_kind, field_data, field_len,
1144 quote_empty, quoted, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001145 if (rec_len < 0)
1146 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001147
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001148 /* grow record buffer if necessary */
1149 if (!join_check_rec_size(self, rec_len))
1150 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001151
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001152 self->rec_len = join_append_data(self, field_kind, field_data, field_len,
1153 quote_empty, quoted, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001154 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001155
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001156 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001157}
1158
1159static int
1160join_append_lineterminator(WriterObj *self)
1161{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001162 Py_ssize_t terminator_len, i;
1163 unsigned int term_kind;
1164 void *term_data;
Skip Montanarob4a04172003-03-20 23:29:12 +00001165
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001166 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001167 if (terminator_len == -1)
1168 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001169
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001170 /* grow record buffer if necessary */
1171 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1172 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001173
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001174 term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1175 term_data = PyUnicode_DATA(self->dialect->lineterminator);
1176 for (i = 0; i < terminator_len; i++)
1177 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001178 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001179
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001180 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001181}
1182
1183PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001184"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001185"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001186"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001187"elements will be converted to string.");
1188
1189static PyObject *
1190csv_writerow(WriterObj *self, PyObject *seq)
1191{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001192 DialectObj *dialect = self->dialect;
Antoine Pitrou40455752010-08-15 18:51:10 +00001193 Py_ssize_t len, i;
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001194 PyObject *line, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001195
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001196 if (!PySequence_Check(seq))
Antoine Pitroue7672d32012-05-16 11:33:08 +02001197 return PyErr_Format(_csvstate_global->error_obj, "sequence expected");
Skip Montanarob4a04172003-03-20 23:29:12 +00001198
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001199 len = PySequence_Length(seq);
1200 if (len < 0)
1201 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001202
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001203 /* Join all fields in internal buffer.
1204 */
1205 join_reset(self);
1206 for (i = 0; i < len; i++) {
1207 PyObject *field;
1208 int append_ok;
1209 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001210
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001211 field = PySequence_GetItem(seq, i);
1212 if (field == NULL)
1213 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001214
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001215 switch (dialect->quoting) {
1216 case QUOTE_NONNUMERIC:
1217 quoted = !PyNumber_Check(field);
1218 break;
1219 case QUOTE_ALL:
1220 quoted = 1;
1221 break;
1222 default:
1223 quoted = 0;
1224 break;
1225 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001226
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001227 if (PyUnicode_Check(field)) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001228 append_ok = join_append(self, field, &quoted, len == 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001229 Py_DECREF(field);
1230 }
1231 else if (field == Py_None) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001232 append_ok = join_append(self, NULL, &quoted, len == 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001233 Py_DECREF(field);
1234 }
1235 else {
1236 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001237
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001238 str = PyObject_Str(field);
1239 Py_DECREF(field);
1240 if (str == NULL)
1241 return NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001242 append_ok = join_append(self, str, &quoted, len == 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001243 Py_DECREF(str);
1244 }
1245 if (!append_ok)
1246 return NULL;
1247 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001248
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001249 /* Add line terminator.
1250 */
1251 if (!join_append_lineterminator(self))
1252 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001253
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001254 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1255 (void *) self->rec, self->rec_len);
1256 if (line == NULL)
1257 return NULL;
1258 result = PyObject_CallFunctionObjArgs(self->writeline, line, NULL);
1259 Py_DECREF(line);
1260 return result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001261}
1262
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001263PyDoc_STRVAR(csv_writerows_doc,
1264"writerows(sequence of sequences)\n"
1265"\n"
1266"Construct and write a series of sequences to a csv file. Non-string\n"
1267"elements will be converted to string.");
1268
Skip Montanarob4a04172003-03-20 23:29:12 +00001269static PyObject *
1270csv_writerows(WriterObj *self, PyObject *seqseq)
1271{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001272 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001273
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001274 row_iter = PyObject_GetIter(seqseq);
1275 if (row_iter == NULL) {
1276 PyErr_SetString(PyExc_TypeError,
1277 "writerows() argument must be iterable");
1278 return NULL;
1279 }
1280 while ((row_obj = PyIter_Next(row_iter))) {
1281 result = csv_writerow(self, row_obj);
1282 Py_DECREF(row_obj);
1283 if (!result) {
1284 Py_DECREF(row_iter);
1285 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001286 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001287 else
1288 Py_DECREF(result);
1289 }
1290 Py_DECREF(row_iter);
1291 if (PyErr_Occurred())
1292 return NULL;
1293 Py_INCREF(Py_None);
1294 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001295}
1296
1297static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001298 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1299 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1300 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001301};
1302
1303#define W_OFF(x) offsetof(WriterObj, x)
1304
1305static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001306 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1307 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001308};
1309
1310static void
1311Writer_dealloc(WriterObj *self)
1312{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001313 PyObject_GC_UnTrack(self);
1314 Py_XDECREF(self->dialect);
1315 Py_XDECREF(self->writeline);
1316 if (self->rec != NULL)
1317 PyMem_Free(self->rec);
1318 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001319}
1320
1321static int
1322Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1323{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001324 Py_VISIT(self->dialect);
1325 Py_VISIT(self->writeline);
1326 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001327}
1328
1329static int
1330Writer_clear(WriterObj *self)
1331{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001332 Py_CLEAR(self->dialect);
1333 Py_CLEAR(self->writeline);
1334 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001335}
1336
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001337PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001338"CSV writer\n"
1339"\n"
1340"Writer objects are responsible for generating tabular data\n"
1341"in CSV format from sequence input.\n"
1342);
1343
1344static PyTypeObject Writer_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001345 PyVarObject_HEAD_INIT(NULL, 0)
1346 "_csv.writer", /*tp_name*/
1347 sizeof(WriterObj), /*tp_basicsize*/
1348 0, /*tp_itemsize*/
1349 /* methods */
1350 (destructor)Writer_dealloc, /*tp_dealloc*/
1351 (printfunc)0, /*tp_print*/
1352 (getattrfunc)0, /*tp_getattr*/
1353 (setattrfunc)0, /*tp_setattr*/
1354 0, /*tp_reserved*/
1355 (reprfunc)0, /*tp_repr*/
1356 0, /*tp_as_number*/
1357 0, /*tp_as_sequence*/
1358 0, /*tp_as_mapping*/
1359 (hashfunc)0, /*tp_hash*/
1360 (ternaryfunc)0, /*tp_call*/
1361 (reprfunc)0, /*tp_str*/
1362 0, /*tp_getattro*/
1363 0, /*tp_setattro*/
1364 0, /*tp_as_buffer*/
1365 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1366 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1367 Writer_Type_doc,
1368 (traverseproc)Writer_traverse, /*tp_traverse*/
1369 (inquiry)Writer_clear, /*tp_clear*/
1370 0, /*tp_richcompare*/
1371 0, /*tp_weaklistoffset*/
1372 (getiterfunc)0, /*tp_iter*/
1373 (getiterfunc)0, /*tp_iternext*/
1374 Writer_methods, /*tp_methods*/
1375 Writer_memberlist, /*tp_members*/
1376 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001377};
1378
1379static PyObject *
1380csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1381{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001382 PyObject * output_file, * dialect = NULL;
1383 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001384 _Py_IDENTIFIER(write);
Skip Montanarob4a04172003-03-20 23:29:12 +00001385
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001386 if (!self)
1387 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001388
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001389 self->dialect = NULL;
1390 self->writeline = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001391
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001392 self->rec = NULL;
1393 self->rec_size = 0;
1394 self->rec_len = 0;
1395 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001396
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001397 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1398 Py_DECREF(self);
1399 return NULL;
1400 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001401 self->writeline = _PyObject_GetAttrId(output_file, &PyId_write);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001402 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1403 PyErr_SetString(PyExc_TypeError,
1404 "argument 1 must have a \"write\" method");
1405 Py_DECREF(self);
1406 return NULL;
1407 }
1408 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1409 if (self->dialect == NULL) {
1410 Py_DECREF(self);
1411 return NULL;
1412 }
1413 PyObject_GC_Track(self);
1414 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001415}
1416
1417/*
1418 * DIALECT REGISTRY
1419 */
1420static PyObject *
1421csv_list_dialects(PyObject *module, PyObject *args)
1422{
Antoine Pitroue7672d32012-05-16 11:33:08 +02001423 return PyDict_Keys(_csvstate_global->dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001424}
1425
1426static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001427csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001428{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001429 PyObject *name_obj, *dialect_obj = NULL;
1430 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001431
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001432 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1433 return NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +01001434 if (!PyUnicode_Check(name_obj)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001435 PyErr_SetString(PyExc_TypeError,
Stefan Krahe6996ed2012-11-02 14:44:20 +01001436 "dialect name must be a string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001437 return NULL;
1438 }
Stefan Krahe6996ed2012-11-02 14:44:20 +01001439 if (PyUnicode_READY(name_obj) == -1)
1440 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001441 dialect = _call_dialect(dialect_obj, kwargs);
1442 if (dialect == NULL)
1443 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001444 if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001445 Py_DECREF(dialect);
1446 return NULL;
1447 }
1448 Py_DECREF(dialect);
1449 Py_INCREF(Py_None);
1450 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001451}
1452
1453static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001454csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001455{
Antoine Pitroue7672d32012-05-16 11:33:08 +02001456 if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0)
1457 return PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001458 Py_INCREF(Py_None);
1459 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001460}
1461
1462static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001463csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001464{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001465 return get_dialect_from_registry(name_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001466}
1467
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001468static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001469csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001470{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001471 PyObject *new_limit = NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001472 long old_limit = _csvstate_global->field_limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001473
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001474 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1475 return NULL;
1476 if (new_limit != NULL) {
1477 if (!PyLong_CheckExact(new_limit)) {
1478 PyErr_Format(PyExc_TypeError,
1479 "limit must be an integer");
1480 return NULL;
1481 }
Antoine Pitroue7672d32012-05-16 11:33:08 +02001482 _csvstate_global->field_limit = PyLong_AsLong(new_limit);
1483 if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
1484 _csvstate_global->field_limit = old_limit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001485 return NULL;
1486 }
1487 }
1488 return PyLong_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001489}
1490
Skip Montanarob4a04172003-03-20 23:29:12 +00001491/*
1492 * MODULE
1493 */
1494
1495PyDoc_STRVAR(csv_module_doc,
1496"CSV parsing and writing.\n"
1497"\n"
1498"This module provides classes that assist in the reading and writing\n"
1499"of Comma Separated Value (CSV) files, and implements the interface\n"
1500"described by PEP 305. Although many CSV files are simple to parse,\n"
1501"the format is not formally defined by a stable specification and\n"
1502"is subtle enough that parsing lines of a CSV file with something\n"
1503"like line.split(\",\") is bound to fail. The module supports three\n"
1504"basic APIs: reading, writing, and registration of dialects.\n"
1505"\n"
1506"\n"
1507"DIALECT REGISTRATION:\n"
1508"\n"
1509"Readers and writers support a dialect argument, which is a convenient\n"
1510"handle on a group of settings. When the dialect argument is a string,\n"
1511"it identifies one of the dialects previously registered with the module.\n"
1512"If it is a class or instance, the attributes of the argument are used as\n"
1513"the settings for the reader or writer:\n"
1514"\n"
1515" class excel:\n"
1516" delimiter = ','\n"
1517" quotechar = '\"'\n"
1518" escapechar = None\n"
1519" doublequote = True\n"
1520" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001521" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001522" quoting = QUOTE_MINIMAL\n"
1523"\n"
1524"SETTINGS:\n"
1525"\n"
1526" * quotechar - specifies a one-character string to use as the \n"
1527" quoting character. It defaults to '\"'.\n"
1528" * delimiter - specifies a one-character string to use as the \n"
1529" field separator. It defaults to ','.\n"
1530" * skipinitialspace - specifies how to interpret whitespace which\n"
1531" immediately follows a delimiter. It defaults to False, which\n"
1532" means that whitespace immediately following a delimiter is part\n"
1533" of the following field.\n"
1534" * lineterminator - specifies the character sequence which should \n"
1535" terminate rows.\n"
1536" * quoting - controls when quotes should be generated by the writer.\n"
1537" It can take on any of the following module constants:\n"
1538"\n"
1539" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1540" field contains either the quotechar or the delimiter\n"
1541" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1542" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001543" fields which do not parse as integers or floating point\n"
1544" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001545" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1546" * escapechar - specifies a one-character string used to escape \n"
1547" the delimiter when quoting is set to QUOTE_NONE.\n"
1548" * doublequote - controls the handling of quotes inside fields. When\n"
1549" True, two consecutive quotes are interpreted as one during read,\n"
1550" and when writing, each quote character embedded in the data is\n"
1551" written as two quotes\n");
1552
1553PyDoc_STRVAR(csv_reader_doc,
1554" csv_reader = reader(iterable [, dialect='excel']\n"
1555" [optional keyword args])\n"
1556" for row in csv_reader:\n"
1557" process(row)\n"
1558"\n"
1559"The \"iterable\" argument can be any object that returns a line\n"
1560"of input for each iteration, such as a file object or a list. The\n"
1561"optional \"dialect\" parameter is discussed below. The function\n"
1562"also accepts optional keyword arguments which override settings\n"
1563"provided by the dialect.\n"
1564"\n"
1565"The returned object is an iterator. Each iteration returns a row\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001566"of the CSV file (which can span multiple input lines):\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001567
1568PyDoc_STRVAR(csv_writer_doc,
1569" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1570" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001571" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001572" csv_writer.writerow(row)\n"
1573"\n"
1574" [or]\n"
1575"\n"
1576" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1577" [optional keyword args])\n"
1578" csv_writer.writerows(rows)\n"
1579"\n"
1580"The \"fileobj\" argument can be any object that supports the file API.\n");
1581
1582PyDoc_STRVAR(csv_list_dialects_doc,
1583"Return a list of all know dialect names.\n"
1584" names = csv.list_dialects()");
1585
1586PyDoc_STRVAR(csv_get_dialect_doc,
1587"Return the dialect instance associated with name.\n"
1588" dialect = csv.get_dialect(name)");
1589
1590PyDoc_STRVAR(csv_register_dialect_doc,
1591"Create a mapping from a string name to a dialect class.\n"
1592" dialect = csv.register_dialect(name, dialect)");
1593
1594PyDoc_STRVAR(csv_unregister_dialect_doc,
1595"Delete the name/dialect mapping associated with a string name.\n"
1596" csv.unregister_dialect(name)");
1597
Andrew McNamara31d88962005-01-12 03:45:10 +00001598PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001599"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001600" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001601"\n"
1602"Returns old limit. If limit is not given, no new limit is set and\n"
1603"the old limit is returned");
1604
Skip Montanarob4a04172003-03-20 23:29:12 +00001605static struct PyMethodDef csv_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001606 { "reader", (PyCFunction)csv_reader,
1607 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1608 { "writer", (PyCFunction)csv_writer,
1609 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1610 { "list_dialects", (PyCFunction)csv_list_dialects,
1611 METH_NOARGS, csv_list_dialects_doc},
1612 { "register_dialect", (PyCFunction)csv_register_dialect,
1613 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1614 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1615 METH_O, csv_unregister_dialect_doc},
1616 { "get_dialect", (PyCFunction)csv_get_dialect,
1617 METH_O, csv_get_dialect_doc},
1618 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1619 METH_VARARGS, csv_field_size_limit_doc},
1620 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001621};
1622
Martin v. Löwis1a214512008-06-11 05:26:20 +00001623static struct PyModuleDef _csvmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001624 PyModuleDef_HEAD_INIT,
1625 "_csv",
1626 csv_module_doc,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001627 sizeof(_csvstate),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001628 csv_methods,
1629 NULL,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001630 _csv_traverse,
1631 _csv_clear,
1632 _csv_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00001633};
1634
Skip Montanarob4a04172003-03-20 23:29:12 +00001635PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001636PyInit__csv(void)
Skip Montanarob4a04172003-03-20 23:29:12 +00001637{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001638 PyObject *module;
1639 StyleDesc *style;
Skip Montanarob4a04172003-03-20 23:29:12 +00001640
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001641 if (PyType_Ready(&Dialect_Type) < 0)
1642 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001643
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001644 if (PyType_Ready(&Reader_Type) < 0)
1645 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001646
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001647 if (PyType_Ready(&Writer_Type) < 0)
1648 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001649
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001650 /* Create the module and add the functions */
1651 module = PyModule_Create(&_csvmodule);
1652 if (module == NULL)
1653 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001654
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001655 /* Add version to the module. */
1656 if (PyModule_AddStringConstant(module, "__version__",
1657 MODULE_VERSION) == -1)
1658 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001659
Antoine Pitroue7672d32012-05-16 11:33:08 +02001660 /* Set the field limit */
1661 _csvstate(module)->field_limit = 128 * 1024;
1662 /* Do I still need to add this var to the Module Dict? */
1663
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001664 /* Add _dialects dictionary */
Antoine Pitroue7672d32012-05-16 11:33:08 +02001665 _csvstate(module)->dialects = PyDict_New();
1666 if (_csvstate(module)->dialects == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001667 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001668 Py_INCREF(_csvstate(module)->dialects);
1669 if (PyModule_AddObject(module, "_dialects", _csvstate(module)->dialects))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001670 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001671
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001672 /* Add quote styles into dictionary */
1673 for (style = quote_styles; style->name; style++) {
1674 if (PyModule_AddIntConstant(module, style->name,
1675 style->style) == -1)
1676 return NULL;
1677 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001678
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001679 /* Add the Dialect type */
1680 Py_INCREF(&Dialect_Type);
1681 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1682 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001683
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001684 /* Add the CSV exception object to the module. */
Antoine Pitroue7672d32012-05-16 11:33:08 +02001685 _csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1686 if (_csvstate(module)->error_obj == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001687 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001688 Py_INCREF(_csvstate(module)->error_obj);
1689 PyModule_AddObject(module, "Error", _csvstate(module)->error_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001690 return module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001691}