blob: 183a9a5edb4d1579eba088afb34b1eef0d6a657c [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
Skip Montanarob4a04172003-03-20 23:29:12 +00009*/
10
Skip Montanaro7b01a832003-04-12 19:23:46 +000011#define MODULE_VERSION "1.0"
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013#include "Python.h"
14#include "structmember.h"
15
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000016
Antoine Pitroue7672d32012-05-16 11:33:08 +020017typedef struct {
18 PyObject *error_obj; /* CSV exception */
19 PyObject *dialects; /* Dialect registry */
20 long field_limit; /* max parsed field size */
21} _csvstate;
22
23#define _csvstate(o) ((_csvstate *)PyModule_GetState(o))
24
25static int
26_csv_clear(PyObject *m)
27{
28 Py_CLEAR(_csvstate(m)->error_obj);
29 Py_CLEAR(_csvstate(m)->dialects);
30 return 0;
31}
32
33static int
34_csv_traverse(PyObject *m, visitproc visit, void *arg)
35{
36 Py_VISIT(_csvstate(m)->error_obj);
37 Py_VISIT(_csvstate(m)->dialects);
38 return 0;
39}
40
41static void
42_csv_free(void *m)
43{
44 _csv_clear((PyObject *)m);
45}
46
47static struct PyModuleDef _csvmodule;
48
49#define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
Skip Montanarob4a04172003-03-20 23:29:12 +000050
51typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000052 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
53 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
R David Murrayc7c42ef2013-03-19 22:41:47 -040054 EAT_CRNL,AFTER_ESCAPED_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000055} ParserState;
56
57typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000058 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000059} QuoteStyle;
60
61typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000062 QuoteStyle style;
63 char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000064} StyleDesc;
65
66static StyleDesc quote_styles[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
68 { QUOTE_ALL, "QUOTE_ALL" },
69 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
70 { QUOTE_NONE, "QUOTE_NONE" },
71 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000072};
73
74typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000075 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 int doublequote; /* is " represented by ""? */
Antoine Pitrou77ea6402011-10-07 04:26:55 +020078 Py_UCS4 delimiter; /* field separator */
79 Py_UCS4 quotechar; /* quote character */
80 Py_UCS4 escapechar; /* escape character */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000081 int skipinitialspace; /* ignore spaces following delimiter? */
82 PyObject *lineterminator; /* string to write between records */
83 int quoting; /* style of quoting to write */
Skip Montanarob4a04172003-03-20 23:29:12 +000084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 int strict; /* raise exception on bad CSV */
Skip Montanarob4a04172003-03-20 23:29:12 +000086} DialectObj;
87
Neal Norwitz227b5332006-03-22 09:28:35 +000088static PyTypeObject Dialect_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000089
90typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +000092
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000093 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +000094
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000095 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +000096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000097 PyObject *fields; /* field list for current record */
98 ParserState state; /* current CSV parse state */
Antoine Pitrou77ea6402011-10-07 04:26:55 +020099 Py_UCS4 *field; /* temporary buffer */
Antoine Pitrou40455752010-08-15 18:51:10 +0000100 Py_ssize_t field_size; /* size of allocated buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000101 Py_ssize_t field_len; /* length of current field */
102 int numeric_field; /* treat field as numeric */
103 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000104} ReaderObj;
105
Neal Norwitz227b5332006-03-22 09:28:35 +0000106static PyTypeObject Reader_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000107
Christian Heimes90aa7642007-12-19 02:45:37 +0000108#define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +0000109
110typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000111 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000112
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000113 PyObject *writeline; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +0000114
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000115 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000116
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200117 Py_UCS4 *rec; /* buffer for parser.join */
Antoine Pitrou40455752010-08-15 18:51:10 +0000118 Py_ssize_t rec_size; /* size of allocated record */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000119 Py_ssize_t rec_len; /* length of record */
120 int num_fields; /* number of fields in record */
Guido van Rossum46264582007-08-06 19:32:18 +0000121} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000122
Neal Norwitz227b5332006-03-22 09:28:35 +0000123static PyTypeObject Writer_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000124
125/*
126 * DIALECT class
127 */
128
129static PyObject *
130get_dialect_from_registry(PyObject * name_obj)
131{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000133
Antoine Pitroue7672d32012-05-16 11:33:08 +0200134 dialect_obj = PyDict_GetItem(_csvstate_global->dialects, name_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000135 if (dialect_obj == NULL) {
136 if (!PyErr_Occurred())
Antoine Pitroue7672d32012-05-16 11:33:08 +0200137 PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000138 }
139 else
140 Py_INCREF(dialect_obj);
141 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000142}
143
Skip Montanarob4a04172003-03-20 23:29:12 +0000144static PyObject *
145get_string(PyObject *str)
146{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000147 Py_XINCREF(str);
148 return str;
Skip Montanarob4a04172003-03-20 23:29:12 +0000149}
150
Skip Montanarob4a04172003-03-20 23:29:12 +0000151static PyObject *
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200152get_nullchar_as_None(Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000153{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 if (c == '\0') {
155 Py_INCREF(Py_None);
156 return Py_None;
157 }
158 else
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200159 return PyUnicode_FromOrdinal(c);
Skip Montanarob4a04172003-03-20 23:29:12 +0000160}
161
Skip Montanarob4a04172003-03-20 23:29:12 +0000162static PyObject *
163Dialect_get_lineterminator(DialectObj *self)
164{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000165 return get_string(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000166}
167
Skip Montanarob4a04172003-03-20 23:29:12 +0000168static PyObject *
Guido van Rossuma9769c22007-08-07 23:59:30 +0000169Dialect_get_delimiter(DialectObj *self)
170{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000171 return get_nullchar_as_None(self->delimiter);
Guido van Rossuma9769c22007-08-07 23:59:30 +0000172}
173
174static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000175Dialect_get_escapechar(DialectObj *self)
176{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000178}
179
Andrew McNamara1196cf12005-01-07 04:42:45 +0000180static PyObject *
181Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000182{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000183 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000184}
185
186static PyObject *
187Dialect_get_quoting(DialectObj *self)
188{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000189 return PyLong_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000190}
191
192static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000193_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000194{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000195 if (src == NULL)
196 *target = dflt;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200197 else {
198 int b = PyObject_IsTrue(src);
199 if (b < 0)
200 return -1;
201 *target = b;
202 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000203 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000204}
205
Andrew McNamara1196cf12005-01-07 04:42:45 +0000206static int
207_set_int(const char *name, int *target, PyObject *src, int dflt)
208{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000209 if (src == NULL)
210 *target = dflt;
211 else {
212 long value;
213 if (!PyLong_CheckExact(src)) {
214 PyErr_Format(PyExc_TypeError,
215 "\"%s\" must be an integer", name);
216 return -1;
217 }
218 value = PyLong_AsLong(src);
219 if (value == -1 && PyErr_Occurred())
220 return -1;
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +0000221#if SIZEOF_LONG > SIZEOF_INT
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000222 if (value > INT_MAX || value < INT_MIN) {
223 PyErr_Format(PyExc_ValueError,
224 "integer out of range for \"%s\"", name);
225 return -1;
226 }
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +0000227#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 *target = (int)value;
229 }
230 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000231}
232
233static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200234_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000235{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000236 if (src == NULL)
237 *target = dflt;
238 else {
239 *target = '\0';
240 if (src != Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000241 Py_ssize_t len;
Victor Stinner9e30aa52011-11-21 02:49:52 +0100242 len = PyUnicode_GetLength(src);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200243 if (len > 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000244 PyErr_Format(PyExc_TypeError,
245 "\"%s\" must be an 1-character string",
246 name);
247 return -1;
248 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100249 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000250 if (len > 0)
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200251 *target = PyUnicode_READ_CHAR(src, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000252 }
253 }
254 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000255}
256
257static int
258_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
259{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000260 if (src == NULL)
261 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
262 else {
263 if (src == Py_None)
264 *target = NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +0100265 else if (!PyUnicode_Check(src)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000266 PyErr_Format(PyExc_TypeError,
267 "\"%s\" must be a string", name);
268 return -1;
269 }
270 else {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100271 if (PyUnicode_READY(src) == -1)
272 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000273 Py_XDECREF(*target);
274 Py_INCREF(src);
275 *target = src;
276 }
277 }
278 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000279}
280
281static int
282dialect_check_quoting(int quoting)
283{
Victor Stinner4fe519b2010-11-09 09:40:16 +0000284 StyleDesc *qs;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000285
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000286 for (qs = quote_styles; qs->name; qs++) {
287 if (qs->style == quoting)
288 return 0;
289 }
290 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
291 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000292}
Skip Montanarob4a04172003-03-20 23:29:12 +0000293
294#define D_OFF(x) offsetof(DialectObj, x)
295
296static struct PyMemberDef Dialect_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000297 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
298 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
299 { "strict", T_INT, D_OFF(strict), READONLY },
300 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000301};
302
303static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000304 { "delimiter", (getter)Dialect_get_delimiter},
305 { "escapechar", (getter)Dialect_get_escapechar},
306 { "lineterminator", (getter)Dialect_get_lineterminator},
307 { "quotechar", (getter)Dialect_get_quotechar},
308 { "quoting", (getter)Dialect_get_quoting},
309 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000310};
311
312static void
313Dialect_dealloc(DialectObj *self)
314{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000315 Py_XDECREF(self->lineterminator);
316 Py_TYPE(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000317}
318
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000319static char *dialect_kws[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000320 "dialect",
321 "delimiter",
322 "doublequote",
323 "escapechar",
324 "lineterminator",
325 "quotechar",
326 "quoting",
327 "skipinitialspace",
328 "strict",
329 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000330};
331
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000332static PyObject *
333dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000334{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000335 DialectObj *self;
336 PyObject *ret = NULL;
337 PyObject *dialect = NULL;
338 PyObject *delimiter = NULL;
339 PyObject *doublequote = NULL;
340 PyObject *escapechar = NULL;
341 PyObject *lineterminator = NULL;
342 PyObject *quotechar = NULL;
343 PyObject *quoting = NULL;
344 PyObject *skipinitialspace = NULL;
345 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000346
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000347 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
348 "|OOOOOOOOO", dialect_kws,
349 &dialect,
350 &delimiter,
351 &doublequote,
352 &escapechar,
353 &lineterminator,
354 &quotechar,
355 &quoting,
356 &skipinitialspace,
357 &strict))
358 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000359
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000360 if (dialect != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100361 if (PyUnicode_Check(dialect)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000362 dialect = get_dialect_from_registry(dialect);
363 if (dialect == NULL)
364 return NULL;
365 }
366 else
367 Py_INCREF(dialect);
368 /* Can we reuse this instance? */
369 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
370 delimiter == 0 &&
371 doublequote == 0 &&
372 escapechar == 0 &&
373 lineterminator == 0 &&
374 quotechar == 0 &&
375 quoting == 0 &&
376 skipinitialspace == 0 &&
377 strict == 0)
378 return dialect;
379 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000380
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000381 self = (DialectObj *)type->tp_alloc(type, 0);
382 if (self == NULL) {
383 Py_XDECREF(dialect);
384 return NULL;
385 }
386 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000387
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000388 Py_XINCREF(delimiter);
389 Py_XINCREF(doublequote);
390 Py_XINCREF(escapechar);
391 Py_XINCREF(lineterminator);
392 Py_XINCREF(quotechar);
393 Py_XINCREF(quoting);
394 Py_XINCREF(skipinitialspace);
395 Py_XINCREF(strict);
396 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000397#define DIALECT_GETATTR(v, n) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000398 if (v == NULL) \
399 v = PyObject_GetAttrString(dialect, n)
400 DIALECT_GETATTR(delimiter, "delimiter");
401 DIALECT_GETATTR(doublequote, "doublequote");
402 DIALECT_GETATTR(escapechar, "escapechar");
403 DIALECT_GETATTR(lineterminator, "lineterminator");
404 DIALECT_GETATTR(quotechar, "quotechar");
405 DIALECT_GETATTR(quoting, "quoting");
406 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
407 DIALECT_GETATTR(strict, "strict");
408 PyErr_Clear();
409 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000410
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000411 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000412#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000413 if (meth(name, target, src, dflt)) \
414 goto err
415 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
416 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
417 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
418 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
419 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
420 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
421 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
422 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000423
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000424 /* validate options */
425 if (dialect_check_quoting(self->quoting))
426 goto err;
427 if (self->delimiter == 0) {
428 PyErr_SetString(PyExc_TypeError, "delimiter must be set");
429 goto err;
430 }
431 if (quotechar == Py_None && quoting == NULL)
432 self->quoting = QUOTE_NONE;
433 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
434 PyErr_SetString(PyExc_TypeError,
435 "quotechar must be set if quoting enabled");
436 goto err;
437 }
438 if (self->lineterminator == 0) {
439 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
440 goto err;
441 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000442
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000443 ret = (PyObject *)self;
444 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000445err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000446 Py_XDECREF(self);
447 Py_XDECREF(dialect);
448 Py_XDECREF(delimiter);
449 Py_XDECREF(doublequote);
450 Py_XDECREF(escapechar);
451 Py_XDECREF(lineterminator);
452 Py_XDECREF(quotechar);
453 Py_XDECREF(quoting);
454 Py_XDECREF(skipinitialspace);
455 Py_XDECREF(strict);
456 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000457}
458
459
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000460PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000461"CSV dialect\n"
462"\n"
463"The Dialect type records CSV parsing and generation options.\n");
464
465static PyTypeObject Dialect_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000466 PyVarObject_HEAD_INIT(NULL, 0)
467 "_csv.Dialect", /* tp_name */
468 sizeof(DialectObj), /* tp_basicsize */
469 0, /* tp_itemsize */
470 /* methods */
471 (destructor)Dialect_dealloc, /* tp_dealloc */
472 (printfunc)0, /* tp_print */
473 (getattrfunc)0, /* tp_getattr */
474 (setattrfunc)0, /* tp_setattr */
475 0, /* tp_reserved */
476 (reprfunc)0, /* tp_repr */
477 0, /* tp_as_number */
478 0, /* tp_as_sequence */
479 0, /* tp_as_mapping */
480 (hashfunc)0, /* tp_hash */
481 (ternaryfunc)0, /* tp_call */
482 (reprfunc)0, /* tp_str */
483 0, /* tp_getattro */
484 0, /* tp_setattro */
485 0, /* tp_as_buffer */
486 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
487 Dialect_Type_doc, /* tp_doc */
488 0, /* tp_traverse */
489 0, /* tp_clear */
490 0, /* tp_richcompare */
491 0, /* tp_weaklistoffset */
492 0, /* tp_iter */
493 0, /* tp_iternext */
494 0, /* tp_methods */
495 Dialect_memberlist, /* tp_members */
496 Dialect_getsetlist, /* tp_getset */
497 0, /* tp_base */
498 0, /* tp_dict */
499 0, /* tp_descr_get */
500 0, /* tp_descr_set */
501 0, /* tp_dictoffset */
502 0, /* tp_init */
503 0, /* tp_alloc */
504 dialect_new, /* tp_new */
505 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000506};
507
Andrew McNamara91b97462005-01-11 01:07:23 +0000508/*
509 * Return an instance of the dialect type, given a Python instance or kwarg
510 * description of the dialect
511 */
512static PyObject *
513_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
514{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000515 PyObject *ctor_args;
516 PyObject *dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000517
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000518 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
519 if (ctor_args == NULL)
520 return NULL;
521 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
522 Py_DECREF(ctor_args);
523 return dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000524}
525
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000526/*
527 * READER
528 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000529static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000530parse_save_field(ReaderObj *self)
531{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000532 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000533
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200534 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
535 (void *) self->field, self->field_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000536 if (field == NULL)
537 return -1;
538 self->field_len = 0;
539 if (self->numeric_field) {
540 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000541
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000542 self->numeric_field = 0;
543 tmp = PyNumber_Float(field);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000544 Py_DECREF(field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200545 if (tmp == NULL)
546 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000547 field = tmp;
548 }
Victor Stinnerb80b3782013-11-14 21:29:34 +0100549 if (PyList_Append(self->fields, field) < 0) {
550 Py_DECREF(field);
551 return -1;
552 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000553 Py_DECREF(field);
554 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000555}
556
557static int
558parse_grow_buff(ReaderObj *self)
559{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000560 if (self->field_size == 0) {
561 self->field_size = 4096;
562 if (self->field != NULL)
563 PyMem_Free(self->field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200564 self->field = PyMem_New(Py_UCS4, self->field_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000565 }
566 else {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200567 Py_UCS4 *field = self->field;
Antoine Pitrou40455752010-08-15 18:51:10 +0000568 if (self->field_size > PY_SSIZE_T_MAX / 2) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000569 PyErr_NoMemory();
570 return 0;
571 }
572 self->field_size *= 2;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200573 self->field = PyMem_Resize(field, Py_UCS4, self->field_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000574 }
575 if (self->field == NULL) {
576 PyErr_NoMemory();
577 return 0;
578 }
579 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000580}
581
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000582static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200583parse_add_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000584{
Antoine Pitroue7672d32012-05-16 11:33:08 +0200585 if (self->field_len >= _csvstate_global->field_limit) {
586 PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
587 _csvstate_global->field_limit);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000588 return -1;
589 }
590 if (self->field_len == self->field_size && !parse_grow_buff(self))
591 return -1;
592 self->field[self->field_len++] = c;
593 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000594}
595
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000596static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200597parse_process_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000598{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000599 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000600
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000601 switch (self->state) {
602 case START_RECORD:
603 /* start of record */
604 if (c == '\0')
605 /* empty line - return [] */
606 break;
607 else if (c == '\n' || c == '\r') {
608 self->state = EAT_CRNL;
609 break;
610 }
611 /* normal character - handle as START_FIELD */
612 self->state = START_FIELD;
613 /* fallthru */
614 case START_FIELD:
615 /* expecting field */
616 if (c == '\n' || c == '\r' || c == '\0') {
617 /* save empty field - return [fields] */
618 if (parse_save_field(self) < 0)
619 return -1;
620 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
621 }
622 else if (c == dialect->quotechar &&
623 dialect->quoting != QUOTE_NONE) {
624 /* start quoted field */
625 self->state = IN_QUOTED_FIELD;
626 }
627 else if (c == dialect->escapechar) {
628 /* possible escaped character */
629 self->state = ESCAPED_CHAR;
630 }
631 else if (c == ' ' && dialect->skipinitialspace)
632 /* ignore space at start of field */
633 ;
634 else if (c == dialect->delimiter) {
635 /* save empty field */
636 if (parse_save_field(self) < 0)
637 return -1;
638 }
639 else {
640 /* begin new unquoted field */
641 if (dialect->quoting == QUOTE_NONNUMERIC)
642 self->numeric_field = 1;
643 if (parse_add_char(self, c) < 0)
644 return -1;
645 self->state = IN_FIELD;
646 }
647 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000648
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000649 case ESCAPED_CHAR:
R David Murray9a7d3762013-03-20 00:15:20 -0400650 if (c == '\n' || c=='\r') {
R David Murrayc7c42ef2013-03-19 22:41:47 -0400651 if (parse_add_char(self, c) < 0)
652 return -1;
653 self->state = AFTER_ESCAPED_CRNL;
654 break;
655 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000656 if (c == '\0')
657 c = '\n';
658 if (parse_add_char(self, c) < 0)
659 return -1;
660 self->state = IN_FIELD;
661 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000662
R David Murrayc7c42ef2013-03-19 22:41:47 -0400663 case AFTER_ESCAPED_CRNL:
664 if (c == '\0')
665 break;
666 /*fallthru*/
667
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000668 case IN_FIELD:
669 /* in unquoted field */
670 if (c == '\n' || c == '\r' || c == '\0') {
671 /* end of line - return [fields] */
672 if (parse_save_field(self) < 0)
673 return -1;
674 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
675 }
676 else if (c == dialect->escapechar) {
677 /* possible escaped character */
678 self->state = ESCAPED_CHAR;
679 }
680 else if (c == dialect->delimiter) {
681 /* save field - wait for new field */
682 if (parse_save_field(self) < 0)
683 return -1;
684 self->state = START_FIELD;
685 }
686 else {
687 /* normal character - save in field */
688 if (parse_add_char(self, c) < 0)
689 return -1;
690 }
691 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000692
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000693 case IN_QUOTED_FIELD:
694 /* in quoted field */
695 if (c == '\0')
696 ;
697 else if (c == dialect->escapechar) {
698 /* Possible escape character */
699 self->state = ESCAPE_IN_QUOTED_FIELD;
700 }
701 else if (c == dialect->quotechar &&
702 dialect->quoting != QUOTE_NONE) {
703 if (dialect->doublequote) {
704 /* doublequote; " represented by "" */
705 self->state = QUOTE_IN_QUOTED_FIELD;
706 }
707 else {
708 /* end of quote part of field */
709 self->state = IN_FIELD;
710 }
711 }
712 else {
713 /* normal character - save in field */
714 if (parse_add_char(self, c) < 0)
715 return -1;
716 }
717 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000718
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000719 case ESCAPE_IN_QUOTED_FIELD:
720 if (c == '\0')
721 c = '\n';
722 if (parse_add_char(self, c) < 0)
723 return -1;
724 self->state = IN_QUOTED_FIELD;
725 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000726
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000727 case QUOTE_IN_QUOTED_FIELD:
728 /* doublequote - seen a quote in an quoted field */
729 if (dialect->quoting != QUOTE_NONE &&
730 c == dialect->quotechar) {
731 /* save "" as " */
732 if (parse_add_char(self, c) < 0)
733 return -1;
734 self->state = IN_QUOTED_FIELD;
735 }
736 else if (c == dialect->delimiter) {
737 /* save field - wait for new field */
738 if (parse_save_field(self) < 0)
739 return -1;
740 self->state = START_FIELD;
741 }
742 else if (c == '\n' || c == '\r' || c == '\0') {
743 /* end of line - return [fields] */
744 if (parse_save_field(self) < 0)
745 return -1;
746 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
747 }
748 else if (!dialect->strict) {
749 if (parse_add_char(self, c) < 0)
750 return -1;
751 self->state = IN_FIELD;
752 }
753 else {
754 /* illegal */
Antoine Pitroue7672d32012-05-16 11:33:08 +0200755 PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000756 dialect->delimiter,
757 dialect->quotechar);
758 return -1;
759 }
760 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000761
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000762 case EAT_CRNL:
763 if (c == '\n' || c == '\r')
764 ;
765 else if (c == '\0')
766 self->state = START_RECORD;
767 else {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200768 PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000769 return -1;
770 }
771 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000772
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000773 }
774 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000775}
776
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000777static int
778parse_reset(ReaderObj *self)
779{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000780 Py_XDECREF(self->fields);
781 self->fields = PyList_New(0);
782 if (self->fields == NULL)
783 return -1;
784 self->field_len = 0;
785 self->state = START_RECORD;
786 self->numeric_field = 0;
787 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000788}
Skip Montanarob4a04172003-03-20 23:29:12 +0000789
790static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000791Reader_iternext(ReaderObj *self)
792{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000793 PyObject *fields = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200794 Py_UCS4 c;
795 Py_ssize_t pos, linelen;
796 unsigned int kind;
797 void *data;
798 PyObject *lineobj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000799
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000800 if (parse_reset(self) < 0)
801 return NULL;
802 do {
803 lineobj = PyIter_Next(self->input_iter);
804 if (lineobj == NULL) {
805 /* End of input OR exception */
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700806 if (!PyErr_Occurred() && (self->field_len != 0 ||
807 self->state == IN_QUOTED_FIELD)) {
808 if (self->dialect->strict)
Senthil Kumaran49d13022012-09-25 02:37:20 -0700809 PyErr_SetString(_csvstate_global->error_obj,
810 "unexpected end of data");
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700811 else if (parse_save_field(self) >= 0)
812 break;
813 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000814 return NULL;
815 }
816 if (!PyUnicode_Check(lineobj)) {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200817 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000818 "iterator should return strings, "
819 "not %.200s "
820 "(did you open the file in text mode?)",
821 lineobj->ob_type->tp_name
822 );
823 Py_DECREF(lineobj);
824 return NULL;
825 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100826 if (PyUnicode_READY(lineobj) == -1) {
827 Py_DECREF(lineobj);
828 return NULL;
829 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000830 ++self->line_num;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200831 kind = PyUnicode_KIND(lineobj);
832 data = PyUnicode_DATA(lineobj);
833 pos = 0;
834 linelen = PyUnicode_GET_LENGTH(lineobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000835 while (linelen--) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200836 c = PyUnicode_READ(kind, data, pos);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000837 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000838 Py_DECREF(lineobj);
Antoine Pitroue7672d32012-05-16 11:33:08 +0200839 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000840 "line contains NULL byte");
841 goto err;
842 }
843 if (parse_process_char(self, c) < 0) {
844 Py_DECREF(lineobj);
845 goto err;
846 }
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200847 pos++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000848 }
849 Py_DECREF(lineobj);
850 if (parse_process_char(self, 0) < 0)
851 goto err;
852 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000853
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000854 fields = self->fields;
855 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000856err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000857 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000858}
859
860static void
861Reader_dealloc(ReaderObj *self)
862{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000863 PyObject_GC_UnTrack(self);
864 Py_XDECREF(self->dialect);
865 Py_XDECREF(self->input_iter);
866 Py_XDECREF(self->fields);
867 if (self->field != NULL)
868 PyMem_Free(self->field);
869 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000870}
871
872static int
873Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
874{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000875 Py_VISIT(self->dialect);
876 Py_VISIT(self->input_iter);
877 Py_VISIT(self->fields);
878 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000879}
880
881static int
882Reader_clear(ReaderObj *self)
883{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000884 Py_CLEAR(self->dialect);
885 Py_CLEAR(self->input_iter);
886 Py_CLEAR(self->fields);
887 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000888}
889
890PyDoc_STRVAR(Reader_Type_doc,
891"CSV reader\n"
892"\n"
893"Reader objects are responsible for reading and parsing tabular data\n"
894"in CSV format.\n"
895);
896
897static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000898 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000899};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000900#define R_OFF(x) offsetof(ReaderObj, x)
901
902static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000903 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
904 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
905 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000906};
907
Skip Montanarob4a04172003-03-20 23:29:12 +0000908
909static PyTypeObject Reader_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000910 PyVarObject_HEAD_INIT(NULL, 0)
911 "_csv.reader", /*tp_name*/
912 sizeof(ReaderObj), /*tp_basicsize*/
913 0, /*tp_itemsize*/
914 /* methods */
915 (destructor)Reader_dealloc, /*tp_dealloc*/
916 (printfunc)0, /*tp_print*/
917 (getattrfunc)0, /*tp_getattr*/
918 (setattrfunc)0, /*tp_setattr*/
919 0, /*tp_reserved*/
920 (reprfunc)0, /*tp_repr*/
921 0, /*tp_as_number*/
922 0, /*tp_as_sequence*/
923 0, /*tp_as_mapping*/
924 (hashfunc)0, /*tp_hash*/
925 (ternaryfunc)0, /*tp_call*/
926 (reprfunc)0, /*tp_str*/
927 0, /*tp_getattro*/
928 0, /*tp_setattro*/
929 0, /*tp_as_buffer*/
930 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
931 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
932 Reader_Type_doc, /*tp_doc*/
933 (traverseproc)Reader_traverse, /*tp_traverse*/
934 (inquiry)Reader_clear, /*tp_clear*/
935 0, /*tp_richcompare*/
936 0, /*tp_weaklistoffset*/
937 PyObject_SelfIter, /*tp_iter*/
938 (getiterfunc)Reader_iternext, /*tp_iternext*/
939 Reader_methods, /*tp_methods*/
940 Reader_memberlist, /*tp_members*/
941 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000942
943};
944
945static PyObject *
946csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
947{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000948 PyObject * iterator, * dialect = NULL;
949 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000950
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000951 if (!self)
952 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000953
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000954 self->dialect = NULL;
955 self->fields = NULL;
956 self->input_iter = NULL;
957 self->field = NULL;
958 self->field_size = 0;
959 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000960
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000961 if (parse_reset(self) < 0) {
962 Py_DECREF(self);
963 return NULL;
964 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000965
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000966 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
967 Py_DECREF(self);
968 return NULL;
969 }
970 self->input_iter = PyObject_GetIter(iterator);
971 if (self->input_iter == NULL) {
972 PyErr_SetString(PyExc_TypeError,
973 "argument 1 must be an iterator");
974 Py_DECREF(self);
975 return NULL;
976 }
977 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
978 if (self->dialect == NULL) {
979 Py_DECREF(self);
980 return NULL;
981 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000982
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000983 PyObject_GC_Track(self);
984 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +0000985}
986
987/*
988 * WRITER
989 */
990/* ---------------------------------------------------------------- */
991static void
992join_reset(WriterObj *self)
993{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000994 self->rec_len = 0;
995 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000996}
997
998#define MEM_INCR 32768
999
1000/* Calculate new record length or append field to record. Return new
1001 * record length.
1002 */
Antoine Pitrou40455752010-08-15 18:51:10 +00001003static Py_ssize_t
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001004join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
1005 Py_ssize_t field_len, int quote_empty, int *quoted,
1006 int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +00001007{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001008 DialectObj *dialect = self->dialect;
1009 int i;
Antoine Pitrou40455752010-08-15 18:51:10 +00001010 Py_ssize_t rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001011
1012#define ADDCH(c) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001013 do {\
1014 if (copy_phase) \
1015 self->rec[rec_len] = c;\
1016 rec_len++;\
1017 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001018
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001019 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001020
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001021 /* If this is not the first field we need a field separator */
1022 if (self->num_fields > 0)
1023 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001024
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001025 /* Handle preceding quote */
1026 if (copy_phase && *quoted)
1027 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001028
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001029 /* Copy/count field data */
1030 /* If field is null just pass over */
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001031 for (i = 0; field_data && (i < field_len); i++) {
1032 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001033 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001034
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001035 if (c == dialect->delimiter ||
1036 c == dialect->escapechar ||
1037 c == dialect->quotechar ||
Martin v. Löwis5f4f4c52011-11-01 18:42:23 +01001038 PyUnicode_FindChar(
1039 dialect->lineterminator, c, 0,
1040 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001041 if (dialect->quoting == QUOTE_NONE)
1042 want_escape = 1;
1043 else {
1044 if (c == dialect->quotechar) {
1045 if (dialect->doublequote)
1046 ADDCH(dialect->quotechar);
1047 else
1048 want_escape = 1;
1049 }
1050 if (!want_escape)
1051 *quoted = 1;
1052 }
1053 if (want_escape) {
1054 if (!dialect->escapechar) {
Antoine Pitroue7672d32012-05-16 11:33:08 +02001055 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001056 "need to escape, but no escapechar set");
1057 return -1;
1058 }
1059 ADDCH(dialect->escapechar);
1060 }
1061 }
1062 /* Copy field character into record buffer.
1063 */
1064 ADDCH(c);
1065 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001066
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001067 /* If field is empty check if it needs to be quoted.
1068 */
1069 if (i == 0 && quote_empty) {
1070 if (dialect->quoting == QUOTE_NONE) {
Antoine Pitroue7672d32012-05-16 11:33:08 +02001071 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001072 "single empty field record must be quoted");
1073 return -1;
1074 }
1075 else
1076 *quoted = 1;
1077 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001078
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001079 if (*quoted) {
1080 if (copy_phase)
1081 ADDCH(dialect->quotechar);
1082 else
1083 rec_len += 2;
1084 }
1085 return rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001086#undef ADDCH
Skip Montanarob4a04172003-03-20 23:29:12 +00001087}
1088
1089static int
Antoine Pitrou40455752010-08-15 18:51:10 +00001090join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
Skip Montanarob4a04172003-03-20 23:29:12 +00001091{
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001092
Antoine Pitrou40455752010-08-15 18:51:10 +00001093 if (rec_len < 0 || rec_len > PY_SSIZE_T_MAX - MEM_INCR) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001094 PyErr_NoMemory();
1095 return 0;
1096 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001097
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001098 if (rec_len > self->rec_size) {
1099 if (self->rec_size == 0) {
1100 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1101 if (self->rec != NULL)
1102 PyMem_Free(self->rec);
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001103 self->rec = PyMem_New(Py_UCS4, self->rec_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001104 }
1105 else {
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001106 Py_UCS4* old_rec = self->rec;
Skip Montanarob4a04172003-03-20 23:29:12 +00001107
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001108 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001109 self->rec = PyMem_Resize(old_rec, Py_UCS4, self->rec_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001110 if (self->rec == NULL)
1111 PyMem_Free(old_rec);
1112 }
1113 if (self->rec == NULL) {
1114 PyErr_NoMemory();
1115 return 0;
1116 }
1117 }
1118 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001119}
1120
1121static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001122join_append(WriterObj *self, PyObject *field, int *quoted, int quote_empty)
Skip Montanarob4a04172003-03-20 23:29:12 +00001123{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001124 unsigned int field_kind = -1;
1125 void *field_data = NULL;
1126 Py_ssize_t field_len = 0;
Antoine Pitrou40455752010-08-15 18:51:10 +00001127 Py_ssize_t rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001128
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001129 if (field != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +01001130 if (PyUnicode_READY(field) == -1)
1131 return 0;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001132 field_kind = PyUnicode_KIND(field);
1133 field_data = PyUnicode_DATA(field);
1134 field_len = PyUnicode_GET_LENGTH(field);
1135 }
1136 rec_len = join_append_data(self, field_kind, field_data, field_len,
1137 quote_empty, quoted, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001138 if (rec_len < 0)
1139 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001141 /* grow record buffer if necessary */
1142 if (!join_check_rec_size(self, rec_len))
1143 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001144
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001145 self->rec_len = join_append_data(self, field_kind, field_data, field_len,
1146 quote_empty, quoted, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001147 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001149 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001150}
1151
1152static int
1153join_append_lineterminator(WriterObj *self)
1154{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001155 Py_ssize_t terminator_len, i;
1156 unsigned int term_kind;
1157 void *term_data;
Skip Montanarob4a04172003-03-20 23:29:12 +00001158
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001159 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001160 if (terminator_len == -1)
1161 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001162
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001163 /* grow record buffer if necessary */
1164 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1165 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001166
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001167 term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1168 term_data = PyUnicode_DATA(self->dialect->lineterminator);
1169 for (i = 0; i < terminator_len; i++)
1170 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001171 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001172
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001173 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001174}
1175
1176PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001177"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001178"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001179"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001180"elements will be converted to string.");
1181
1182static PyObject *
1183csv_writerow(WriterObj *self, PyObject *seq)
1184{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001185 DialectObj *dialect = self->dialect;
Antoine Pitrou40455752010-08-15 18:51:10 +00001186 Py_ssize_t len, i;
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001187 PyObject *line, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001188
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001189 if (!PySequence_Check(seq))
Antoine Pitroue7672d32012-05-16 11:33:08 +02001190 return PyErr_Format(_csvstate_global->error_obj, "sequence expected");
Skip Montanarob4a04172003-03-20 23:29:12 +00001191
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001192 len = PySequence_Length(seq);
1193 if (len < 0)
1194 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001195
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001196 /* Join all fields in internal buffer.
1197 */
1198 join_reset(self);
1199 for (i = 0; i < len; i++) {
1200 PyObject *field;
1201 int append_ok;
1202 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001203
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001204 field = PySequence_GetItem(seq, i);
1205 if (field == NULL)
1206 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001207
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001208 switch (dialect->quoting) {
1209 case QUOTE_NONNUMERIC:
1210 quoted = !PyNumber_Check(field);
1211 break;
1212 case QUOTE_ALL:
1213 quoted = 1;
1214 break;
1215 default:
1216 quoted = 0;
1217 break;
1218 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001219
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001220 if (PyUnicode_Check(field)) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001221 append_ok = join_append(self, field, &quoted, len == 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001222 Py_DECREF(field);
1223 }
1224 else if (field == Py_None) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001225 append_ok = join_append(self, NULL, &quoted, len == 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001226 Py_DECREF(field);
1227 }
1228 else {
1229 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001230
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001231 str = PyObject_Str(field);
1232 Py_DECREF(field);
1233 if (str == NULL)
1234 return NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001235 append_ok = join_append(self, str, &quoted, len == 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001236 Py_DECREF(str);
1237 }
1238 if (!append_ok)
1239 return NULL;
1240 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001241
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001242 /* Add line terminator.
1243 */
1244 if (!join_append_lineterminator(self))
1245 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001246
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001247 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1248 (void *) self->rec, self->rec_len);
1249 if (line == NULL)
1250 return NULL;
1251 result = PyObject_CallFunctionObjArgs(self->writeline, line, NULL);
1252 Py_DECREF(line);
1253 return result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001254}
1255
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001256PyDoc_STRVAR(csv_writerows_doc,
1257"writerows(sequence of sequences)\n"
1258"\n"
1259"Construct and write a series of sequences to a csv file. Non-string\n"
1260"elements will be converted to string.");
1261
Skip Montanarob4a04172003-03-20 23:29:12 +00001262static PyObject *
1263csv_writerows(WriterObj *self, PyObject *seqseq)
1264{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001265 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001266
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001267 row_iter = PyObject_GetIter(seqseq);
1268 if (row_iter == NULL) {
1269 PyErr_SetString(PyExc_TypeError,
1270 "writerows() argument must be iterable");
1271 return NULL;
1272 }
1273 while ((row_obj = PyIter_Next(row_iter))) {
1274 result = csv_writerow(self, row_obj);
1275 Py_DECREF(row_obj);
1276 if (!result) {
1277 Py_DECREF(row_iter);
1278 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001279 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001280 else
1281 Py_DECREF(result);
1282 }
1283 Py_DECREF(row_iter);
1284 if (PyErr_Occurred())
1285 return NULL;
1286 Py_INCREF(Py_None);
1287 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001288}
1289
1290static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001291 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1292 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1293 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001294};
1295
1296#define W_OFF(x) offsetof(WriterObj, x)
1297
1298static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001299 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1300 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001301};
1302
1303static void
1304Writer_dealloc(WriterObj *self)
1305{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001306 PyObject_GC_UnTrack(self);
1307 Py_XDECREF(self->dialect);
1308 Py_XDECREF(self->writeline);
1309 if (self->rec != NULL)
1310 PyMem_Free(self->rec);
1311 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001312}
1313
1314static int
1315Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1316{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001317 Py_VISIT(self->dialect);
1318 Py_VISIT(self->writeline);
1319 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001320}
1321
1322static int
1323Writer_clear(WriterObj *self)
1324{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001325 Py_CLEAR(self->dialect);
1326 Py_CLEAR(self->writeline);
1327 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001328}
1329
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001330PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001331"CSV writer\n"
1332"\n"
1333"Writer objects are responsible for generating tabular data\n"
1334"in CSV format from sequence input.\n"
1335);
1336
1337static PyTypeObject Writer_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001338 PyVarObject_HEAD_INIT(NULL, 0)
1339 "_csv.writer", /*tp_name*/
1340 sizeof(WriterObj), /*tp_basicsize*/
1341 0, /*tp_itemsize*/
1342 /* methods */
1343 (destructor)Writer_dealloc, /*tp_dealloc*/
1344 (printfunc)0, /*tp_print*/
1345 (getattrfunc)0, /*tp_getattr*/
1346 (setattrfunc)0, /*tp_setattr*/
1347 0, /*tp_reserved*/
1348 (reprfunc)0, /*tp_repr*/
1349 0, /*tp_as_number*/
1350 0, /*tp_as_sequence*/
1351 0, /*tp_as_mapping*/
1352 (hashfunc)0, /*tp_hash*/
1353 (ternaryfunc)0, /*tp_call*/
1354 (reprfunc)0, /*tp_str*/
1355 0, /*tp_getattro*/
1356 0, /*tp_setattro*/
1357 0, /*tp_as_buffer*/
1358 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1359 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1360 Writer_Type_doc,
1361 (traverseproc)Writer_traverse, /*tp_traverse*/
1362 (inquiry)Writer_clear, /*tp_clear*/
1363 0, /*tp_richcompare*/
1364 0, /*tp_weaklistoffset*/
1365 (getiterfunc)0, /*tp_iter*/
1366 (getiterfunc)0, /*tp_iternext*/
1367 Writer_methods, /*tp_methods*/
1368 Writer_memberlist, /*tp_members*/
1369 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001370};
1371
1372static PyObject *
1373csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1374{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001375 PyObject * output_file, * dialect = NULL;
1376 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001377 _Py_IDENTIFIER(write);
Skip Montanarob4a04172003-03-20 23:29:12 +00001378
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001379 if (!self)
1380 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001381
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001382 self->dialect = NULL;
1383 self->writeline = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001384
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001385 self->rec = NULL;
1386 self->rec_size = 0;
1387 self->rec_len = 0;
1388 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001389
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001390 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1391 Py_DECREF(self);
1392 return NULL;
1393 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001394 self->writeline = _PyObject_GetAttrId(output_file, &PyId_write);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001395 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1396 PyErr_SetString(PyExc_TypeError,
1397 "argument 1 must have a \"write\" method");
1398 Py_DECREF(self);
1399 return NULL;
1400 }
1401 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1402 if (self->dialect == NULL) {
1403 Py_DECREF(self);
1404 return NULL;
1405 }
1406 PyObject_GC_Track(self);
1407 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001408}
1409
1410/*
1411 * DIALECT REGISTRY
1412 */
1413static PyObject *
1414csv_list_dialects(PyObject *module, PyObject *args)
1415{
Antoine Pitroue7672d32012-05-16 11:33:08 +02001416 return PyDict_Keys(_csvstate_global->dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001417}
1418
1419static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001420csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001421{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001422 PyObject *name_obj, *dialect_obj = NULL;
1423 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001424
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001425 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1426 return NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +01001427 if (!PyUnicode_Check(name_obj)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001428 PyErr_SetString(PyExc_TypeError,
Stefan Krahe6996ed2012-11-02 14:44:20 +01001429 "dialect name must be a string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001430 return NULL;
1431 }
Stefan Krahe6996ed2012-11-02 14:44:20 +01001432 if (PyUnicode_READY(name_obj) == -1)
1433 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001434 dialect = _call_dialect(dialect_obj, kwargs);
1435 if (dialect == NULL)
1436 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001437 if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001438 Py_DECREF(dialect);
1439 return NULL;
1440 }
1441 Py_DECREF(dialect);
1442 Py_INCREF(Py_None);
1443 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001444}
1445
1446static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001447csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001448{
Antoine Pitroue7672d32012-05-16 11:33:08 +02001449 if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0)
1450 return PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001451 Py_INCREF(Py_None);
1452 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001453}
1454
1455static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001456csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001457{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001458 return get_dialect_from_registry(name_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001459}
1460
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001461static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001462csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001463{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001464 PyObject *new_limit = NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001465 long old_limit = _csvstate_global->field_limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001466
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001467 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1468 return NULL;
1469 if (new_limit != NULL) {
1470 if (!PyLong_CheckExact(new_limit)) {
1471 PyErr_Format(PyExc_TypeError,
1472 "limit must be an integer");
1473 return NULL;
1474 }
Antoine Pitroue7672d32012-05-16 11:33:08 +02001475 _csvstate_global->field_limit = PyLong_AsLong(new_limit);
1476 if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
1477 _csvstate_global->field_limit = old_limit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001478 return NULL;
1479 }
1480 }
1481 return PyLong_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001482}
1483
Skip Montanarob4a04172003-03-20 23:29:12 +00001484/*
1485 * MODULE
1486 */
1487
1488PyDoc_STRVAR(csv_module_doc,
1489"CSV parsing and writing.\n"
1490"\n"
1491"This module provides classes that assist in the reading and writing\n"
1492"of Comma Separated Value (CSV) files, and implements the interface\n"
1493"described by PEP 305. Although many CSV files are simple to parse,\n"
1494"the format is not formally defined by a stable specification and\n"
1495"is subtle enough that parsing lines of a CSV file with something\n"
1496"like line.split(\",\") is bound to fail. The module supports three\n"
1497"basic APIs: reading, writing, and registration of dialects.\n"
1498"\n"
1499"\n"
1500"DIALECT REGISTRATION:\n"
1501"\n"
1502"Readers and writers support a dialect argument, which is a convenient\n"
1503"handle on a group of settings. When the dialect argument is a string,\n"
1504"it identifies one of the dialects previously registered with the module.\n"
1505"If it is a class or instance, the attributes of the argument are used as\n"
1506"the settings for the reader or writer:\n"
1507"\n"
1508" class excel:\n"
1509" delimiter = ','\n"
1510" quotechar = '\"'\n"
1511" escapechar = None\n"
1512" doublequote = True\n"
1513" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001514" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001515" quoting = QUOTE_MINIMAL\n"
1516"\n"
1517"SETTINGS:\n"
1518"\n"
1519" * quotechar - specifies a one-character string to use as the \n"
1520" quoting character. It defaults to '\"'.\n"
1521" * delimiter - specifies a one-character string to use as the \n"
1522" field separator. It defaults to ','.\n"
1523" * skipinitialspace - specifies how to interpret whitespace which\n"
1524" immediately follows a delimiter. It defaults to False, which\n"
1525" means that whitespace immediately following a delimiter is part\n"
1526" of the following field.\n"
1527" * lineterminator - specifies the character sequence which should \n"
1528" terminate rows.\n"
1529" * quoting - controls when quotes should be generated by the writer.\n"
1530" It can take on any of the following module constants:\n"
1531"\n"
1532" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1533" field contains either the quotechar or the delimiter\n"
1534" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1535" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001536" fields which do not parse as integers or floating point\n"
1537" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001538" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1539" * escapechar - specifies a one-character string used to escape \n"
1540" the delimiter when quoting is set to QUOTE_NONE.\n"
1541" * doublequote - controls the handling of quotes inside fields. When\n"
1542" True, two consecutive quotes are interpreted as one during read,\n"
1543" and when writing, each quote character embedded in the data is\n"
1544" written as two quotes\n");
1545
1546PyDoc_STRVAR(csv_reader_doc,
1547" csv_reader = reader(iterable [, dialect='excel']\n"
1548" [optional keyword args])\n"
1549" for row in csv_reader:\n"
1550" process(row)\n"
1551"\n"
1552"The \"iterable\" argument can be any object that returns a line\n"
1553"of input for each iteration, such as a file object or a list. The\n"
1554"optional \"dialect\" parameter is discussed below. The function\n"
1555"also accepts optional keyword arguments which override settings\n"
1556"provided by the dialect.\n"
1557"\n"
1558"The returned object is an iterator. Each iteration returns a row\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001559"of the CSV file (which can span multiple input lines):\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001560
1561PyDoc_STRVAR(csv_writer_doc,
1562" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1563" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001564" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001565" csv_writer.writerow(row)\n"
1566"\n"
1567" [or]\n"
1568"\n"
1569" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1570" [optional keyword args])\n"
1571" csv_writer.writerows(rows)\n"
1572"\n"
1573"The \"fileobj\" argument can be any object that supports the file API.\n");
1574
1575PyDoc_STRVAR(csv_list_dialects_doc,
1576"Return a list of all know dialect names.\n"
1577" names = csv.list_dialects()");
1578
1579PyDoc_STRVAR(csv_get_dialect_doc,
1580"Return the dialect instance associated with name.\n"
1581" dialect = csv.get_dialect(name)");
1582
1583PyDoc_STRVAR(csv_register_dialect_doc,
1584"Create a mapping from a string name to a dialect class.\n"
1585" dialect = csv.register_dialect(name, dialect)");
1586
1587PyDoc_STRVAR(csv_unregister_dialect_doc,
1588"Delete the name/dialect mapping associated with a string name.\n"
1589" csv.unregister_dialect(name)");
1590
Andrew McNamara31d88962005-01-12 03:45:10 +00001591PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001592"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001593" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001594"\n"
1595"Returns old limit. If limit is not given, no new limit is set and\n"
1596"the old limit is returned");
1597
Skip Montanarob4a04172003-03-20 23:29:12 +00001598static struct PyMethodDef csv_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001599 { "reader", (PyCFunction)csv_reader,
1600 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1601 { "writer", (PyCFunction)csv_writer,
1602 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1603 { "list_dialects", (PyCFunction)csv_list_dialects,
1604 METH_NOARGS, csv_list_dialects_doc},
1605 { "register_dialect", (PyCFunction)csv_register_dialect,
1606 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1607 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1608 METH_O, csv_unregister_dialect_doc},
1609 { "get_dialect", (PyCFunction)csv_get_dialect,
1610 METH_O, csv_get_dialect_doc},
1611 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1612 METH_VARARGS, csv_field_size_limit_doc},
1613 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001614};
1615
Martin v. Löwis1a214512008-06-11 05:26:20 +00001616static struct PyModuleDef _csvmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001617 PyModuleDef_HEAD_INIT,
1618 "_csv",
1619 csv_module_doc,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001620 sizeof(_csvstate),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001621 csv_methods,
1622 NULL,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001623 _csv_traverse,
1624 _csv_clear,
1625 _csv_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00001626};
1627
Skip Montanarob4a04172003-03-20 23:29:12 +00001628PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001629PyInit__csv(void)
Skip Montanarob4a04172003-03-20 23:29:12 +00001630{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001631 PyObject *module;
1632 StyleDesc *style;
Skip Montanarob4a04172003-03-20 23:29:12 +00001633
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001634 if (PyType_Ready(&Dialect_Type) < 0)
1635 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001636
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001637 if (PyType_Ready(&Reader_Type) < 0)
1638 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001639
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001640 if (PyType_Ready(&Writer_Type) < 0)
1641 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001642
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001643 /* Create the module and add the functions */
1644 module = PyModule_Create(&_csvmodule);
1645 if (module == NULL)
1646 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001647
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001648 /* Add version to the module. */
1649 if (PyModule_AddStringConstant(module, "__version__",
1650 MODULE_VERSION) == -1)
1651 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001652
Antoine Pitroue7672d32012-05-16 11:33:08 +02001653 /* Set the field limit */
1654 _csvstate(module)->field_limit = 128 * 1024;
1655 /* Do I still need to add this var to the Module Dict? */
1656
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001657 /* Add _dialects dictionary */
Antoine Pitroue7672d32012-05-16 11:33:08 +02001658 _csvstate(module)->dialects = PyDict_New();
1659 if (_csvstate(module)->dialects == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001660 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001661 Py_INCREF(_csvstate(module)->dialects);
1662 if (PyModule_AddObject(module, "_dialects", _csvstate(module)->dialects))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001663 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001664
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001665 /* Add quote styles into dictionary */
1666 for (style = quote_styles; style->name; style++) {
1667 if (PyModule_AddIntConstant(module, style->name,
1668 style->style) == -1)
1669 return NULL;
1670 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001671
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001672 /* Add the Dialect type */
1673 Py_INCREF(&Dialect_Type);
1674 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1675 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001676
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001677 /* Add the CSV exception object to the module. */
Antoine Pitroue7672d32012-05-16 11:33:08 +02001678 _csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1679 if (_csvstate(module)->error_obj == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001680 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001681 Py_INCREF(_csvstate(module)->error_obj);
1682 PyModule_AddObject(module, "Error", _csvstate(module)->error_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001683 return module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001684}