blob: eb886264d7a0b428e7376733101bb079c301b45d [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
Skip Montanarob4a04172003-03-20 23:29:12 +00009*/
10
Skip Montanaro7b01a832003-04-12 19:23:46 +000011#define MODULE_VERSION "1.0"
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013#include "Python.h"
14#include "structmember.h"
15
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000016
Antoine Pitroue7672d32012-05-16 11:33:08 +020017typedef struct {
18 PyObject *error_obj; /* CSV exception */
19 PyObject *dialects; /* Dialect registry */
20 long field_limit; /* max parsed field size */
21} _csvstate;
22
23#define _csvstate(o) ((_csvstate *)PyModule_GetState(o))
24
25static int
26_csv_clear(PyObject *m)
27{
28 Py_CLEAR(_csvstate(m)->error_obj);
29 Py_CLEAR(_csvstate(m)->dialects);
30 return 0;
31}
32
33static int
34_csv_traverse(PyObject *m, visitproc visit, void *arg)
35{
36 Py_VISIT(_csvstate(m)->error_obj);
37 Py_VISIT(_csvstate(m)->dialects);
38 return 0;
39}
40
41static void
42_csv_free(void *m)
43{
44 _csv_clear((PyObject *)m);
45}
46
47static struct PyModuleDef _csvmodule;
48
49#define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
Skip Montanarob4a04172003-03-20 23:29:12 +000050
51typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000052 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
53 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
R David Murrayc7c42ef2013-03-19 22:41:47 -040054 EAT_CRNL,AFTER_ESCAPED_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000055} ParserState;
56
57typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000058 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000059} QuoteStyle;
60
61typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000062 QuoteStyle style;
63 char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000064} StyleDesc;
65
66static StyleDesc quote_styles[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
68 { QUOTE_ALL, "QUOTE_ALL" },
69 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
70 { QUOTE_NONE, "QUOTE_NONE" },
71 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000072};
73
74typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000075 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 int doublequote; /* is " represented by ""? */
Antoine Pitrou77ea6402011-10-07 04:26:55 +020078 Py_UCS4 delimiter; /* field separator */
79 Py_UCS4 quotechar; /* quote character */
80 Py_UCS4 escapechar; /* escape character */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000081 int skipinitialspace; /* ignore spaces following delimiter? */
82 PyObject *lineterminator; /* string to write between records */
83 int quoting; /* style of quoting to write */
Skip Montanarob4a04172003-03-20 23:29:12 +000084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 int strict; /* raise exception on bad CSV */
Skip Montanarob4a04172003-03-20 23:29:12 +000086} DialectObj;
87
Neal Norwitz227b5332006-03-22 09:28:35 +000088static PyTypeObject Dialect_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000089
90typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +000092
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000093 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +000094
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000095 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +000096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000097 PyObject *fields; /* field list for current record */
98 ParserState state; /* current CSV parse state */
Antoine Pitrou77ea6402011-10-07 04:26:55 +020099 Py_UCS4 *field; /* temporary buffer */
Antoine Pitrou40455752010-08-15 18:51:10 +0000100 Py_ssize_t field_size; /* size of allocated buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000101 Py_ssize_t field_len; /* length of current field */
102 int numeric_field; /* treat field as numeric */
103 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000104} ReaderObj;
105
Neal Norwitz227b5332006-03-22 09:28:35 +0000106static PyTypeObject Reader_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000107
Christian Heimes90aa7642007-12-19 02:45:37 +0000108#define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +0000109
110typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000111 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000112
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000113 PyObject *writeline; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +0000114
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000115 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000116
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200117 Py_UCS4 *rec; /* buffer for parser.join */
Antoine Pitrou40455752010-08-15 18:51:10 +0000118 Py_ssize_t rec_size; /* size of allocated record */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000119 Py_ssize_t rec_len; /* length of record */
120 int num_fields; /* number of fields in record */
Guido van Rossum46264582007-08-06 19:32:18 +0000121} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000122
Neal Norwitz227b5332006-03-22 09:28:35 +0000123static PyTypeObject Writer_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000124
125/*
126 * DIALECT class
127 */
128
129static PyObject *
130get_dialect_from_registry(PyObject * name_obj)
131{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000133
Antoine Pitroue7672d32012-05-16 11:33:08 +0200134 dialect_obj = PyDict_GetItem(_csvstate_global->dialects, name_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000135 if (dialect_obj == NULL) {
136 if (!PyErr_Occurred())
Antoine Pitroue7672d32012-05-16 11:33:08 +0200137 PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000138 }
139 else
140 Py_INCREF(dialect_obj);
141 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000142}
143
Skip Montanarob4a04172003-03-20 23:29:12 +0000144static PyObject *
145get_string(PyObject *str)
146{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000147 Py_XINCREF(str);
148 return str;
Skip Montanarob4a04172003-03-20 23:29:12 +0000149}
150
Skip Montanarob4a04172003-03-20 23:29:12 +0000151static PyObject *
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200152get_nullchar_as_None(Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000153{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 if (c == '\0') {
155 Py_INCREF(Py_None);
156 return Py_None;
157 }
158 else
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200159 return PyUnicode_FromOrdinal(c);
Skip Montanarob4a04172003-03-20 23:29:12 +0000160}
161
Skip Montanarob4a04172003-03-20 23:29:12 +0000162static PyObject *
163Dialect_get_lineterminator(DialectObj *self)
164{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000165 return get_string(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000166}
167
Skip Montanarob4a04172003-03-20 23:29:12 +0000168static PyObject *
Guido van Rossuma9769c22007-08-07 23:59:30 +0000169Dialect_get_delimiter(DialectObj *self)
170{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000171 return get_nullchar_as_None(self->delimiter);
Guido van Rossuma9769c22007-08-07 23:59:30 +0000172}
173
174static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000175Dialect_get_escapechar(DialectObj *self)
176{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000178}
179
Andrew McNamara1196cf12005-01-07 04:42:45 +0000180static PyObject *
181Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000182{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000183 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000184}
185
186static PyObject *
187Dialect_get_quoting(DialectObj *self)
188{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000189 return PyLong_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000190}
191
192static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000193_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000194{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000195 if (src == NULL)
196 *target = dflt;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200197 else {
198 int b = PyObject_IsTrue(src);
199 if (b < 0)
200 return -1;
201 *target = b;
202 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000203 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000204}
205
Andrew McNamara1196cf12005-01-07 04:42:45 +0000206static int
207_set_int(const char *name, int *target, PyObject *src, int dflt)
208{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000209 if (src == NULL)
210 *target = dflt;
211 else {
212 long value;
213 if (!PyLong_CheckExact(src)) {
214 PyErr_Format(PyExc_TypeError,
215 "\"%s\" must be an integer", name);
216 return -1;
217 }
218 value = PyLong_AsLong(src);
219 if (value == -1 && PyErr_Occurred())
220 return -1;
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +0000221#if SIZEOF_LONG > SIZEOF_INT
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000222 if (value > INT_MAX || value < INT_MIN) {
223 PyErr_Format(PyExc_ValueError,
224 "integer out of range for \"%s\"", name);
225 return -1;
226 }
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +0000227#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 *target = (int)value;
229 }
230 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000231}
232
233static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200234_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000235{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000236 if (src == NULL)
237 *target = dflt;
238 else {
239 *target = '\0';
240 if (src != Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000241 Py_ssize_t len;
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200242 if (!PyUnicode_Check(src)) {
243 PyErr_Format(PyExc_TypeError,
244 "\"%s\" must be string, not %.200s", name,
245 src->ob_type->tp_name);
246 return -1;
247 }
Victor Stinner9e30aa52011-11-21 02:49:52 +0100248 len = PyUnicode_GetLength(src);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200249 if (len > 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000250 PyErr_Format(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300251 "\"%s\" must be a 1-character string",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000252 name);
253 return -1;
254 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100255 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000256 if (len > 0)
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200257 *target = PyUnicode_READ_CHAR(src, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000258 }
259 }
260 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000261}
262
263static int
264_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
265{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000266 if (src == NULL)
267 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
268 else {
269 if (src == Py_None)
270 *target = NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +0100271 else if (!PyUnicode_Check(src)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 PyErr_Format(PyExc_TypeError,
273 "\"%s\" must be a string", name);
274 return -1;
275 }
276 else {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100277 if (PyUnicode_READY(src) == -1)
278 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000279 Py_XDECREF(*target);
280 Py_INCREF(src);
281 *target = src;
282 }
283 }
284 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000285}
286
287static int
288dialect_check_quoting(int quoting)
289{
Victor Stinner4fe519b2010-11-09 09:40:16 +0000290 StyleDesc *qs;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000291
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000292 for (qs = quote_styles; qs->name; qs++) {
Victor Stinner706768c2014-08-16 01:03:39 +0200293 if ((int)qs->style == quoting)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000294 return 0;
295 }
296 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
297 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000298}
Skip Montanarob4a04172003-03-20 23:29:12 +0000299
300#define D_OFF(x) offsetof(DialectObj, x)
301
302static struct PyMemberDef Dialect_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000303 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
304 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
305 { "strict", T_INT, D_OFF(strict), READONLY },
306 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000307};
308
309static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000310 { "delimiter", (getter)Dialect_get_delimiter},
311 { "escapechar", (getter)Dialect_get_escapechar},
312 { "lineterminator", (getter)Dialect_get_lineterminator},
313 { "quotechar", (getter)Dialect_get_quotechar},
314 { "quoting", (getter)Dialect_get_quoting},
315 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000316};
317
318static void
319Dialect_dealloc(DialectObj *self)
320{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000321 Py_XDECREF(self->lineterminator);
322 Py_TYPE(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000323}
324
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000325static char *dialect_kws[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000326 "dialect",
327 "delimiter",
328 "doublequote",
329 "escapechar",
330 "lineterminator",
331 "quotechar",
332 "quoting",
333 "skipinitialspace",
334 "strict",
335 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000336};
337
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000338static PyObject *
339dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000340{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000341 DialectObj *self;
342 PyObject *ret = NULL;
343 PyObject *dialect = NULL;
344 PyObject *delimiter = NULL;
345 PyObject *doublequote = NULL;
346 PyObject *escapechar = NULL;
347 PyObject *lineterminator = NULL;
348 PyObject *quotechar = NULL;
349 PyObject *quoting = NULL;
350 PyObject *skipinitialspace = NULL;
351 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000352
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000353 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
354 "|OOOOOOOOO", dialect_kws,
355 &dialect,
356 &delimiter,
357 &doublequote,
358 &escapechar,
359 &lineterminator,
360 &quotechar,
361 &quoting,
362 &skipinitialspace,
363 &strict))
364 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000365
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000366 if (dialect != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100367 if (PyUnicode_Check(dialect)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000368 dialect = get_dialect_from_registry(dialect);
369 if (dialect == NULL)
370 return NULL;
371 }
372 else
373 Py_INCREF(dialect);
374 /* Can we reuse this instance? */
375 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
376 delimiter == 0 &&
377 doublequote == 0 &&
378 escapechar == 0 &&
379 lineterminator == 0 &&
380 quotechar == 0 &&
381 quoting == 0 &&
382 skipinitialspace == 0 &&
383 strict == 0)
384 return dialect;
385 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000386
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000387 self = (DialectObj *)type->tp_alloc(type, 0);
388 if (self == NULL) {
389 Py_XDECREF(dialect);
390 return NULL;
391 }
392 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000393
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000394 Py_XINCREF(delimiter);
395 Py_XINCREF(doublequote);
396 Py_XINCREF(escapechar);
397 Py_XINCREF(lineterminator);
398 Py_XINCREF(quotechar);
399 Py_XINCREF(quoting);
400 Py_XINCREF(skipinitialspace);
401 Py_XINCREF(strict);
402 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000403#define DIALECT_GETATTR(v, n) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000404 if (v == NULL) \
405 v = PyObject_GetAttrString(dialect, n)
406 DIALECT_GETATTR(delimiter, "delimiter");
407 DIALECT_GETATTR(doublequote, "doublequote");
408 DIALECT_GETATTR(escapechar, "escapechar");
409 DIALECT_GETATTR(lineterminator, "lineterminator");
410 DIALECT_GETATTR(quotechar, "quotechar");
411 DIALECT_GETATTR(quoting, "quoting");
412 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
413 DIALECT_GETATTR(strict, "strict");
414 PyErr_Clear();
415 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000416
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000417 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000418#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000419 if (meth(name, target, src, dflt)) \
420 goto err
421 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
422 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
423 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
424 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
425 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
426 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
427 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
428 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000429
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000430 /* validate options */
431 if (dialect_check_quoting(self->quoting))
432 goto err;
433 if (self->delimiter == 0) {
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200434 PyErr_SetString(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300435 "\"delimiter\" must be a 1-character string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000436 goto err;
437 }
438 if (quotechar == Py_None && quoting == NULL)
439 self->quoting = QUOTE_NONE;
440 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
441 PyErr_SetString(PyExc_TypeError,
442 "quotechar must be set if quoting enabled");
443 goto err;
444 }
445 if (self->lineterminator == 0) {
446 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
447 goto err;
448 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000449
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000450 ret = (PyObject *)self;
451 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000452err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000453 Py_XDECREF(self);
454 Py_XDECREF(dialect);
455 Py_XDECREF(delimiter);
456 Py_XDECREF(doublequote);
457 Py_XDECREF(escapechar);
458 Py_XDECREF(lineterminator);
459 Py_XDECREF(quotechar);
460 Py_XDECREF(quoting);
461 Py_XDECREF(skipinitialspace);
462 Py_XDECREF(strict);
463 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000464}
465
466
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000467PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000468"CSV dialect\n"
469"\n"
470"The Dialect type records CSV parsing and generation options.\n");
471
472static PyTypeObject Dialect_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000473 PyVarObject_HEAD_INIT(NULL, 0)
474 "_csv.Dialect", /* tp_name */
475 sizeof(DialectObj), /* tp_basicsize */
476 0, /* tp_itemsize */
477 /* methods */
478 (destructor)Dialect_dealloc, /* tp_dealloc */
479 (printfunc)0, /* tp_print */
480 (getattrfunc)0, /* tp_getattr */
481 (setattrfunc)0, /* tp_setattr */
482 0, /* tp_reserved */
483 (reprfunc)0, /* tp_repr */
484 0, /* tp_as_number */
485 0, /* tp_as_sequence */
486 0, /* tp_as_mapping */
487 (hashfunc)0, /* tp_hash */
488 (ternaryfunc)0, /* tp_call */
489 (reprfunc)0, /* tp_str */
490 0, /* tp_getattro */
491 0, /* tp_setattro */
492 0, /* tp_as_buffer */
493 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
494 Dialect_Type_doc, /* tp_doc */
495 0, /* tp_traverse */
496 0, /* tp_clear */
497 0, /* tp_richcompare */
498 0, /* tp_weaklistoffset */
499 0, /* tp_iter */
500 0, /* tp_iternext */
501 0, /* tp_methods */
502 Dialect_memberlist, /* tp_members */
503 Dialect_getsetlist, /* tp_getset */
504 0, /* tp_base */
505 0, /* tp_dict */
506 0, /* tp_descr_get */
507 0, /* tp_descr_set */
508 0, /* tp_dictoffset */
509 0, /* tp_init */
510 0, /* tp_alloc */
511 dialect_new, /* tp_new */
512 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000513};
514
Andrew McNamara91b97462005-01-11 01:07:23 +0000515/*
516 * Return an instance of the dialect type, given a Python instance or kwarg
517 * description of the dialect
518 */
519static PyObject *
520_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
521{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000522 PyObject *ctor_args;
523 PyObject *dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000524
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000525 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
526 if (ctor_args == NULL)
527 return NULL;
528 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
529 Py_DECREF(ctor_args);
530 return dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000531}
532
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000533/*
534 * READER
535 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000536static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000537parse_save_field(ReaderObj *self)
538{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000539 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000540
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200541 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
542 (void *) self->field, self->field_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000543 if (field == NULL)
544 return -1;
545 self->field_len = 0;
546 if (self->numeric_field) {
547 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000548
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000549 self->numeric_field = 0;
550 tmp = PyNumber_Float(field);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000551 Py_DECREF(field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200552 if (tmp == NULL)
553 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000554 field = tmp;
555 }
Victor Stinnerb80b3782013-11-14 21:29:34 +0100556 if (PyList_Append(self->fields, field) < 0) {
557 Py_DECREF(field);
558 return -1;
559 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000560 Py_DECREF(field);
561 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000562}
563
564static int
565parse_grow_buff(ReaderObj *self)
566{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000567 if (self->field_size == 0) {
568 self->field_size = 4096;
569 if (self->field != NULL)
570 PyMem_Free(self->field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200571 self->field = PyMem_New(Py_UCS4, self->field_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000572 }
573 else {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200574 Py_UCS4 *field = self->field;
Antoine Pitrou40455752010-08-15 18:51:10 +0000575 if (self->field_size > PY_SSIZE_T_MAX / 2) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000576 PyErr_NoMemory();
577 return 0;
578 }
579 self->field_size *= 2;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200580 self->field = PyMem_Resize(field, Py_UCS4, self->field_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000581 }
582 if (self->field == NULL) {
583 PyErr_NoMemory();
584 return 0;
585 }
586 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000587}
588
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000589static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200590parse_add_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000591{
Antoine Pitroue7672d32012-05-16 11:33:08 +0200592 if (self->field_len >= _csvstate_global->field_limit) {
593 PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
594 _csvstate_global->field_limit);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000595 return -1;
596 }
597 if (self->field_len == self->field_size && !parse_grow_buff(self))
598 return -1;
599 self->field[self->field_len++] = c;
600 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000601}
602
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000603static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200604parse_process_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000605{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000606 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000607
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000608 switch (self->state) {
609 case START_RECORD:
610 /* start of record */
611 if (c == '\0')
612 /* empty line - return [] */
613 break;
614 else if (c == '\n' || c == '\r') {
615 self->state = EAT_CRNL;
616 break;
617 }
618 /* normal character - handle as START_FIELD */
619 self->state = START_FIELD;
620 /* fallthru */
621 case START_FIELD:
622 /* expecting field */
623 if (c == '\n' || c == '\r' || c == '\0') {
624 /* save empty field - return [fields] */
625 if (parse_save_field(self) < 0)
626 return -1;
627 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
628 }
629 else if (c == dialect->quotechar &&
630 dialect->quoting != QUOTE_NONE) {
631 /* start quoted field */
632 self->state = IN_QUOTED_FIELD;
633 }
634 else if (c == dialect->escapechar) {
635 /* possible escaped character */
636 self->state = ESCAPED_CHAR;
637 }
638 else if (c == ' ' && dialect->skipinitialspace)
639 /* ignore space at start of field */
640 ;
641 else if (c == dialect->delimiter) {
642 /* save empty field */
643 if (parse_save_field(self) < 0)
644 return -1;
645 }
646 else {
647 /* begin new unquoted field */
648 if (dialect->quoting == QUOTE_NONNUMERIC)
649 self->numeric_field = 1;
650 if (parse_add_char(self, c) < 0)
651 return -1;
652 self->state = IN_FIELD;
653 }
654 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000655
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000656 case ESCAPED_CHAR:
R David Murray9a7d3762013-03-20 00:15:20 -0400657 if (c == '\n' || c=='\r') {
R David Murrayc7c42ef2013-03-19 22:41:47 -0400658 if (parse_add_char(self, c) < 0)
659 return -1;
660 self->state = AFTER_ESCAPED_CRNL;
661 break;
662 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000663 if (c == '\0')
664 c = '\n';
665 if (parse_add_char(self, c) < 0)
666 return -1;
667 self->state = IN_FIELD;
668 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000669
R David Murrayc7c42ef2013-03-19 22:41:47 -0400670 case AFTER_ESCAPED_CRNL:
671 if (c == '\0')
672 break;
673 /*fallthru*/
674
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000675 case IN_FIELD:
676 /* in unquoted field */
677 if (c == '\n' || c == '\r' || c == '\0') {
678 /* end of line - return [fields] */
679 if (parse_save_field(self) < 0)
680 return -1;
681 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
682 }
683 else if (c == dialect->escapechar) {
684 /* possible escaped character */
685 self->state = ESCAPED_CHAR;
686 }
687 else if (c == dialect->delimiter) {
688 /* save field - wait for new field */
689 if (parse_save_field(self) < 0)
690 return -1;
691 self->state = START_FIELD;
692 }
693 else {
694 /* normal character - save in field */
695 if (parse_add_char(self, c) < 0)
696 return -1;
697 }
698 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000699
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000700 case IN_QUOTED_FIELD:
701 /* in quoted field */
702 if (c == '\0')
703 ;
704 else if (c == dialect->escapechar) {
705 /* Possible escape character */
706 self->state = ESCAPE_IN_QUOTED_FIELD;
707 }
708 else if (c == dialect->quotechar &&
709 dialect->quoting != QUOTE_NONE) {
710 if (dialect->doublequote) {
711 /* doublequote; " represented by "" */
712 self->state = QUOTE_IN_QUOTED_FIELD;
713 }
714 else {
715 /* end of quote part of field */
716 self->state = IN_FIELD;
717 }
718 }
719 else {
720 /* normal character - save in field */
721 if (parse_add_char(self, c) < 0)
722 return -1;
723 }
724 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000725
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000726 case ESCAPE_IN_QUOTED_FIELD:
727 if (c == '\0')
728 c = '\n';
729 if (parse_add_char(self, c) < 0)
730 return -1;
731 self->state = IN_QUOTED_FIELD;
732 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000733
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000734 case QUOTE_IN_QUOTED_FIELD:
735 /* doublequote - seen a quote in an quoted field */
736 if (dialect->quoting != QUOTE_NONE &&
737 c == dialect->quotechar) {
738 /* save "" as " */
739 if (parse_add_char(self, c) < 0)
740 return -1;
741 self->state = IN_QUOTED_FIELD;
742 }
743 else if (c == dialect->delimiter) {
744 /* save field - wait for new field */
745 if (parse_save_field(self) < 0)
746 return -1;
747 self->state = START_FIELD;
748 }
749 else if (c == '\n' || c == '\r' || c == '\0') {
750 /* end of line - return [fields] */
751 if (parse_save_field(self) < 0)
752 return -1;
753 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
754 }
755 else if (!dialect->strict) {
756 if (parse_add_char(self, c) < 0)
757 return -1;
758 self->state = IN_FIELD;
759 }
760 else {
761 /* illegal */
Antoine Pitroue7672d32012-05-16 11:33:08 +0200762 PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000763 dialect->delimiter,
764 dialect->quotechar);
765 return -1;
766 }
767 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000768
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000769 case EAT_CRNL:
770 if (c == '\n' || c == '\r')
771 ;
772 else if (c == '\0')
773 self->state = START_RECORD;
774 else {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200775 PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000776 return -1;
777 }
778 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000779
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000780 }
781 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000782}
783
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000784static int
785parse_reset(ReaderObj *self)
786{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000787 Py_XDECREF(self->fields);
788 self->fields = PyList_New(0);
789 if (self->fields == NULL)
790 return -1;
791 self->field_len = 0;
792 self->state = START_RECORD;
793 self->numeric_field = 0;
794 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000795}
Skip Montanarob4a04172003-03-20 23:29:12 +0000796
797static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000798Reader_iternext(ReaderObj *self)
799{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000800 PyObject *fields = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200801 Py_UCS4 c;
802 Py_ssize_t pos, linelen;
803 unsigned int kind;
804 void *data;
805 PyObject *lineobj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000806
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000807 if (parse_reset(self) < 0)
808 return NULL;
809 do {
810 lineobj = PyIter_Next(self->input_iter);
811 if (lineobj == NULL) {
812 /* End of input OR exception */
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700813 if (!PyErr_Occurred() && (self->field_len != 0 ||
814 self->state == IN_QUOTED_FIELD)) {
815 if (self->dialect->strict)
Senthil Kumaran49d13022012-09-25 02:37:20 -0700816 PyErr_SetString(_csvstate_global->error_obj,
817 "unexpected end of data");
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700818 else if (parse_save_field(self) >= 0)
819 break;
820 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000821 return NULL;
822 }
823 if (!PyUnicode_Check(lineobj)) {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200824 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000825 "iterator should return strings, "
826 "not %.200s "
827 "(did you open the file in text mode?)",
828 lineobj->ob_type->tp_name
829 );
830 Py_DECREF(lineobj);
831 return NULL;
832 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100833 if (PyUnicode_READY(lineobj) == -1) {
834 Py_DECREF(lineobj);
835 return NULL;
836 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000837 ++self->line_num;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200838 kind = PyUnicode_KIND(lineobj);
839 data = PyUnicode_DATA(lineobj);
840 pos = 0;
841 linelen = PyUnicode_GET_LENGTH(lineobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000842 while (linelen--) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200843 c = PyUnicode_READ(kind, data, pos);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000844 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000845 Py_DECREF(lineobj);
Antoine Pitroue7672d32012-05-16 11:33:08 +0200846 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000847 "line contains NULL byte");
848 goto err;
849 }
850 if (parse_process_char(self, c) < 0) {
851 Py_DECREF(lineobj);
852 goto err;
853 }
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200854 pos++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000855 }
856 Py_DECREF(lineobj);
857 if (parse_process_char(self, 0) < 0)
858 goto err;
859 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000860
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000861 fields = self->fields;
862 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000863err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000864 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000865}
866
867static void
868Reader_dealloc(ReaderObj *self)
869{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000870 PyObject_GC_UnTrack(self);
871 Py_XDECREF(self->dialect);
872 Py_XDECREF(self->input_iter);
873 Py_XDECREF(self->fields);
874 if (self->field != NULL)
875 PyMem_Free(self->field);
876 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000877}
878
879static int
880Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
881{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000882 Py_VISIT(self->dialect);
883 Py_VISIT(self->input_iter);
884 Py_VISIT(self->fields);
885 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000886}
887
888static int
889Reader_clear(ReaderObj *self)
890{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000891 Py_CLEAR(self->dialect);
892 Py_CLEAR(self->input_iter);
893 Py_CLEAR(self->fields);
894 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000895}
896
897PyDoc_STRVAR(Reader_Type_doc,
898"CSV reader\n"
899"\n"
900"Reader objects are responsible for reading and parsing tabular data\n"
901"in CSV format.\n"
902);
903
904static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000905 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000906};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000907#define R_OFF(x) offsetof(ReaderObj, x)
908
909static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000910 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
911 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
912 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000913};
914
Skip Montanarob4a04172003-03-20 23:29:12 +0000915
916static PyTypeObject Reader_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000917 PyVarObject_HEAD_INIT(NULL, 0)
918 "_csv.reader", /*tp_name*/
919 sizeof(ReaderObj), /*tp_basicsize*/
920 0, /*tp_itemsize*/
921 /* methods */
922 (destructor)Reader_dealloc, /*tp_dealloc*/
923 (printfunc)0, /*tp_print*/
924 (getattrfunc)0, /*tp_getattr*/
925 (setattrfunc)0, /*tp_setattr*/
926 0, /*tp_reserved*/
927 (reprfunc)0, /*tp_repr*/
928 0, /*tp_as_number*/
929 0, /*tp_as_sequence*/
930 0, /*tp_as_mapping*/
931 (hashfunc)0, /*tp_hash*/
932 (ternaryfunc)0, /*tp_call*/
933 (reprfunc)0, /*tp_str*/
934 0, /*tp_getattro*/
935 0, /*tp_setattro*/
936 0, /*tp_as_buffer*/
937 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
938 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
939 Reader_Type_doc, /*tp_doc*/
940 (traverseproc)Reader_traverse, /*tp_traverse*/
941 (inquiry)Reader_clear, /*tp_clear*/
942 0, /*tp_richcompare*/
943 0, /*tp_weaklistoffset*/
944 PyObject_SelfIter, /*tp_iter*/
945 (getiterfunc)Reader_iternext, /*tp_iternext*/
946 Reader_methods, /*tp_methods*/
947 Reader_memberlist, /*tp_members*/
948 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000949
950};
951
952static PyObject *
953csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
954{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000955 PyObject * iterator, * dialect = NULL;
956 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000957
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000958 if (!self)
959 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000960
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000961 self->dialect = NULL;
962 self->fields = NULL;
963 self->input_iter = NULL;
964 self->field = NULL;
965 self->field_size = 0;
966 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000967
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000968 if (parse_reset(self) < 0) {
969 Py_DECREF(self);
970 return NULL;
971 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000972
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000973 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
974 Py_DECREF(self);
975 return NULL;
976 }
977 self->input_iter = PyObject_GetIter(iterator);
978 if (self->input_iter == NULL) {
979 PyErr_SetString(PyExc_TypeError,
980 "argument 1 must be an iterator");
981 Py_DECREF(self);
982 return NULL;
983 }
984 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
985 if (self->dialect == NULL) {
986 Py_DECREF(self);
987 return NULL;
988 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000989
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000990 PyObject_GC_Track(self);
991 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +0000992}
993
994/*
995 * WRITER
996 */
997/* ---------------------------------------------------------------- */
998static void
999join_reset(WriterObj *self)
1000{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001001 self->rec_len = 0;
1002 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001003}
1004
1005#define MEM_INCR 32768
1006
1007/* Calculate new record length or append field to record. Return new
1008 * record length.
1009 */
Antoine Pitrou40455752010-08-15 18:51:10 +00001010static Py_ssize_t
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001011join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001012 Py_ssize_t field_len, int *quoted,
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001013 int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +00001014{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001015 DialectObj *dialect = self->dialect;
1016 int i;
Antoine Pitrou40455752010-08-15 18:51:10 +00001017 Py_ssize_t rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001018
1019#define ADDCH(c) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001020 do {\
1021 if (copy_phase) \
1022 self->rec[rec_len] = c;\
1023 rec_len++;\
1024 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001025
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001026 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001027
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001028 /* If this is not the first field we need a field separator */
1029 if (self->num_fields > 0)
1030 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001031
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001032 /* Handle preceding quote */
1033 if (copy_phase && *quoted)
1034 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001035
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001036 /* Copy/count field data */
1037 /* If field is null just pass over */
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001038 for (i = 0; field_data && (i < field_len); i++) {
1039 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001040 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001041
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001042 if (c == dialect->delimiter ||
1043 c == dialect->escapechar ||
1044 c == dialect->quotechar ||
Martin v. Löwis5f4f4c52011-11-01 18:42:23 +01001045 PyUnicode_FindChar(
1046 dialect->lineterminator, c, 0,
1047 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001048 if (dialect->quoting == QUOTE_NONE)
1049 want_escape = 1;
1050 else {
1051 if (c == dialect->quotechar) {
1052 if (dialect->doublequote)
1053 ADDCH(dialect->quotechar);
1054 else
1055 want_escape = 1;
1056 }
1057 if (!want_escape)
1058 *quoted = 1;
1059 }
1060 if (want_escape) {
1061 if (!dialect->escapechar) {
Antoine Pitroue7672d32012-05-16 11:33:08 +02001062 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001063 "need to escape, but no escapechar set");
1064 return -1;
1065 }
1066 ADDCH(dialect->escapechar);
1067 }
1068 }
1069 /* Copy field character into record buffer.
1070 */
1071 ADDCH(c);
1072 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001073
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001074 if (*quoted) {
1075 if (copy_phase)
1076 ADDCH(dialect->quotechar);
1077 else
1078 rec_len += 2;
1079 }
1080 return rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001081#undef ADDCH
Skip Montanarob4a04172003-03-20 23:29:12 +00001082}
1083
1084static int
Antoine Pitrou40455752010-08-15 18:51:10 +00001085join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
Skip Montanarob4a04172003-03-20 23:29:12 +00001086{
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001087
Antoine Pitrou40455752010-08-15 18:51:10 +00001088 if (rec_len < 0 || rec_len > PY_SSIZE_T_MAX - MEM_INCR) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001089 PyErr_NoMemory();
1090 return 0;
1091 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001092
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001093 if (rec_len > self->rec_size) {
1094 if (self->rec_size == 0) {
1095 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1096 if (self->rec != NULL)
1097 PyMem_Free(self->rec);
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001098 self->rec = PyMem_New(Py_UCS4, self->rec_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001099 }
1100 else {
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001101 Py_UCS4* old_rec = self->rec;
Skip Montanarob4a04172003-03-20 23:29:12 +00001102
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001103 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001104 self->rec = PyMem_Resize(old_rec, Py_UCS4, self->rec_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001105 if (self->rec == NULL)
1106 PyMem_Free(old_rec);
1107 }
1108 if (self->rec == NULL) {
1109 PyErr_NoMemory();
1110 return 0;
1111 }
1112 }
1113 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001114}
1115
1116static int
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001117join_append(WriterObj *self, PyObject *field, int quoted)
Skip Montanarob4a04172003-03-20 23:29:12 +00001118{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001119 unsigned int field_kind = -1;
1120 void *field_data = NULL;
1121 Py_ssize_t field_len = 0;
Antoine Pitrou40455752010-08-15 18:51:10 +00001122 Py_ssize_t rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001123
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001124 if (field != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +01001125 if (PyUnicode_READY(field) == -1)
1126 return 0;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001127 field_kind = PyUnicode_KIND(field);
1128 field_data = PyUnicode_DATA(field);
1129 field_len = PyUnicode_GET_LENGTH(field);
1130 }
1131 rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001132 &quoted, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001133 if (rec_len < 0)
1134 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001135
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001136 /* grow record buffer if necessary */
1137 if (!join_check_rec_size(self, rec_len))
1138 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001139
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001140 self->rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001141 &quoted, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001142 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001143
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001144 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001145}
1146
1147static int
1148join_append_lineterminator(WriterObj *self)
1149{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001150 Py_ssize_t terminator_len, i;
1151 unsigned int term_kind;
1152 void *term_data;
Skip Montanarob4a04172003-03-20 23:29:12 +00001153
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001154 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001155 if (terminator_len == -1)
1156 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001157
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001158 /* grow record buffer if necessary */
1159 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1160 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001161
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001162 term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1163 term_data = PyUnicode_DATA(self->dialect->lineterminator);
1164 for (i = 0; i < terminator_len; i++)
1165 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001166 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001167
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001168 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001169}
1170
1171PyDoc_STRVAR(csv_writerow_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001172"writerow(iterable)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001173"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001174"Construct and write a CSV record from an iterable of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001175"elements will be converted to string.");
1176
1177static PyObject *
1178csv_writerow(WriterObj *self, PyObject *seq)
1179{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001180 DialectObj *dialect = self->dialect;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001181 PyObject *iter, *field, *line, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001182
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001183 iter = PyObject_GetIter(seq);
1184 if (iter == NULL)
1185 return PyErr_Format(_csvstate_global->error_obj,
1186 "iterable expected, not %.200s",
1187 seq->ob_type->tp_name);
Skip Montanarob4a04172003-03-20 23:29:12 +00001188
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001189 /* Join all fields in internal buffer.
1190 */
1191 join_reset(self);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001192 while ((field = PyIter_Next(iter))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001193 int append_ok;
1194 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001195
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001196 switch (dialect->quoting) {
1197 case QUOTE_NONNUMERIC:
1198 quoted = !PyNumber_Check(field);
1199 break;
1200 case QUOTE_ALL:
1201 quoted = 1;
1202 break;
1203 default:
1204 quoted = 0;
1205 break;
1206 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001207
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001208 if (PyUnicode_Check(field)) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001209 append_ok = join_append(self, field, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001210 Py_DECREF(field);
1211 }
1212 else if (field == Py_None) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001213 append_ok = join_append(self, NULL, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001214 Py_DECREF(field);
1215 }
1216 else {
1217 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001218
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001219 str = PyObject_Str(field);
1220 Py_DECREF(field);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001221 if (str == NULL) {
1222 Py_DECREF(iter);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001223 return NULL;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001224 }
1225 append_ok = join_append(self, str, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001226 Py_DECREF(str);
1227 }
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001228 if (!append_ok) {
1229 Py_DECREF(iter);
1230 return NULL;
1231 }
1232 }
1233 Py_DECREF(iter);
1234 if (PyErr_Occurred())
1235 return NULL;
1236
1237 if (self->num_fields > 0 && self->rec_size == 0) {
1238 if (dialect->quoting == QUOTE_NONE) {
1239 PyErr_Format(_csvstate_global->error_obj,
1240 "single empty field record must be quoted");
1241 return NULL;
1242 }
1243 self->num_fields--;
1244 if (!join_append(self, NULL, 1))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001245 return NULL;
1246 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001247
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001248 /* Add line terminator.
1249 */
1250 if (!join_append_lineterminator(self))
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001251 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001252
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001253 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1254 (void *) self->rec, self->rec_len);
1255 if (line == NULL)
1256 return NULL;
1257 result = PyObject_CallFunctionObjArgs(self->writeline, line, NULL);
1258 Py_DECREF(line);
1259 return result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001260}
1261
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001262PyDoc_STRVAR(csv_writerows_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001263"writerows(iterable of iterables)\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001264"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001265"Construct and write a series of iterables to a csv file. Non-string\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001266"elements will be converted to string.");
1267
Skip Montanarob4a04172003-03-20 23:29:12 +00001268static PyObject *
1269csv_writerows(WriterObj *self, PyObject *seqseq)
1270{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001271 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001272
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001273 row_iter = PyObject_GetIter(seqseq);
1274 if (row_iter == NULL) {
1275 PyErr_SetString(PyExc_TypeError,
1276 "writerows() argument must be iterable");
1277 return NULL;
1278 }
1279 while ((row_obj = PyIter_Next(row_iter))) {
1280 result = csv_writerow(self, row_obj);
1281 Py_DECREF(row_obj);
1282 if (!result) {
1283 Py_DECREF(row_iter);
1284 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001285 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001286 else
1287 Py_DECREF(result);
1288 }
1289 Py_DECREF(row_iter);
1290 if (PyErr_Occurred())
1291 return NULL;
1292 Py_INCREF(Py_None);
1293 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001294}
1295
1296static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001297 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1298 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1299 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001300};
1301
1302#define W_OFF(x) offsetof(WriterObj, x)
1303
1304static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001305 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1306 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001307};
1308
1309static void
1310Writer_dealloc(WriterObj *self)
1311{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001312 PyObject_GC_UnTrack(self);
1313 Py_XDECREF(self->dialect);
1314 Py_XDECREF(self->writeline);
1315 if (self->rec != NULL)
1316 PyMem_Free(self->rec);
1317 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001318}
1319
1320static int
1321Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1322{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001323 Py_VISIT(self->dialect);
1324 Py_VISIT(self->writeline);
1325 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001326}
1327
1328static int
1329Writer_clear(WriterObj *self)
1330{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001331 Py_CLEAR(self->dialect);
1332 Py_CLEAR(self->writeline);
1333 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001334}
1335
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001336PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001337"CSV writer\n"
1338"\n"
1339"Writer objects are responsible for generating tabular data\n"
1340"in CSV format from sequence input.\n"
1341);
1342
1343static PyTypeObject Writer_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001344 PyVarObject_HEAD_INIT(NULL, 0)
1345 "_csv.writer", /*tp_name*/
1346 sizeof(WriterObj), /*tp_basicsize*/
1347 0, /*tp_itemsize*/
1348 /* methods */
1349 (destructor)Writer_dealloc, /*tp_dealloc*/
1350 (printfunc)0, /*tp_print*/
1351 (getattrfunc)0, /*tp_getattr*/
1352 (setattrfunc)0, /*tp_setattr*/
1353 0, /*tp_reserved*/
1354 (reprfunc)0, /*tp_repr*/
1355 0, /*tp_as_number*/
1356 0, /*tp_as_sequence*/
1357 0, /*tp_as_mapping*/
1358 (hashfunc)0, /*tp_hash*/
1359 (ternaryfunc)0, /*tp_call*/
1360 (reprfunc)0, /*tp_str*/
1361 0, /*tp_getattro*/
1362 0, /*tp_setattro*/
1363 0, /*tp_as_buffer*/
1364 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1365 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1366 Writer_Type_doc,
1367 (traverseproc)Writer_traverse, /*tp_traverse*/
1368 (inquiry)Writer_clear, /*tp_clear*/
1369 0, /*tp_richcompare*/
1370 0, /*tp_weaklistoffset*/
1371 (getiterfunc)0, /*tp_iter*/
1372 (getiterfunc)0, /*tp_iternext*/
1373 Writer_methods, /*tp_methods*/
1374 Writer_memberlist, /*tp_members*/
1375 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001376};
1377
1378static PyObject *
1379csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1380{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001381 PyObject * output_file, * dialect = NULL;
1382 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001383 _Py_IDENTIFIER(write);
Skip Montanarob4a04172003-03-20 23:29:12 +00001384
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001385 if (!self)
1386 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001387
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001388 self->dialect = NULL;
1389 self->writeline = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001390
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001391 self->rec = NULL;
1392 self->rec_size = 0;
1393 self->rec_len = 0;
1394 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001395
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001396 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1397 Py_DECREF(self);
1398 return NULL;
1399 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001400 self->writeline = _PyObject_GetAttrId(output_file, &PyId_write);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001401 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1402 PyErr_SetString(PyExc_TypeError,
1403 "argument 1 must have a \"write\" method");
1404 Py_DECREF(self);
1405 return NULL;
1406 }
1407 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1408 if (self->dialect == NULL) {
1409 Py_DECREF(self);
1410 return NULL;
1411 }
1412 PyObject_GC_Track(self);
1413 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001414}
1415
1416/*
1417 * DIALECT REGISTRY
1418 */
1419static PyObject *
1420csv_list_dialects(PyObject *module, PyObject *args)
1421{
Antoine Pitroue7672d32012-05-16 11:33:08 +02001422 return PyDict_Keys(_csvstate_global->dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001423}
1424
1425static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001426csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001427{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001428 PyObject *name_obj, *dialect_obj = NULL;
1429 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001430
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001431 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1432 return NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +01001433 if (!PyUnicode_Check(name_obj)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001434 PyErr_SetString(PyExc_TypeError,
Stefan Krahe6996ed2012-11-02 14:44:20 +01001435 "dialect name must be a string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001436 return NULL;
1437 }
Stefan Krahe6996ed2012-11-02 14:44:20 +01001438 if (PyUnicode_READY(name_obj) == -1)
1439 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001440 dialect = _call_dialect(dialect_obj, kwargs);
1441 if (dialect == NULL)
1442 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001443 if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001444 Py_DECREF(dialect);
1445 return NULL;
1446 }
1447 Py_DECREF(dialect);
1448 Py_INCREF(Py_None);
1449 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001450}
1451
1452static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001453csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001454{
Antoine Pitroue7672d32012-05-16 11:33:08 +02001455 if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0)
1456 return PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001457 Py_INCREF(Py_None);
1458 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001459}
1460
1461static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001462csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001463{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001464 return get_dialect_from_registry(name_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001465}
1466
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001467static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001468csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001469{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001470 PyObject *new_limit = NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001471 long old_limit = _csvstate_global->field_limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001472
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001473 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1474 return NULL;
1475 if (new_limit != NULL) {
1476 if (!PyLong_CheckExact(new_limit)) {
1477 PyErr_Format(PyExc_TypeError,
1478 "limit must be an integer");
1479 return NULL;
1480 }
Antoine Pitroue7672d32012-05-16 11:33:08 +02001481 _csvstate_global->field_limit = PyLong_AsLong(new_limit);
1482 if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
1483 _csvstate_global->field_limit = old_limit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001484 return NULL;
1485 }
1486 }
1487 return PyLong_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001488}
1489
Skip Montanarob4a04172003-03-20 23:29:12 +00001490/*
1491 * MODULE
1492 */
1493
1494PyDoc_STRVAR(csv_module_doc,
1495"CSV parsing and writing.\n"
1496"\n"
1497"This module provides classes that assist in the reading and writing\n"
1498"of Comma Separated Value (CSV) files, and implements the interface\n"
1499"described by PEP 305. Although many CSV files are simple to parse,\n"
1500"the format is not formally defined by a stable specification and\n"
1501"is subtle enough that parsing lines of a CSV file with something\n"
1502"like line.split(\",\") is bound to fail. The module supports three\n"
1503"basic APIs: reading, writing, and registration of dialects.\n"
1504"\n"
1505"\n"
1506"DIALECT REGISTRATION:\n"
1507"\n"
1508"Readers and writers support a dialect argument, which is a convenient\n"
1509"handle on a group of settings. When the dialect argument is a string,\n"
1510"it identifies one of the dialects previously registered with the module.\n"
1511"If it is a class or instance, the attributes of the argument are used as\n"
1512"the settings for the reader or writer:\n"
1513"\n"
1514" class excel:\n"
1515" delimiter = ','\n"
1516" quotechar = '\"'\n"
1517" escapechar = None\n"
1518" doublequote = True\n"
1519" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001520" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001521" quoting = QUOTE_MINIMAL\n"
1522"\n"
1523"SETTINGS:\n"
1524"\n"
1525" * quotechar - specifies a one-character string to use as the \n"
1526" quoting character. It defaults to '\"'.\n"
1527" * delimiter - specifies a one-character string to use as the \n"
1528" field separator. It defaults to ','.\n"
1529" * skipinitialspace - specifies how to interpret whitespace which\n"
1530" immediately follows a delimiter. It defaults to False, which\n"
1531" means that whitespace immediately following a delimiter is part\n"
1532" of the following field.\n"
1533" * lineterminator - specifies the character sequence which should \n"
1534" terminate rows.\n"
1535" * quoting - controls when quotes should be generated by the writer.\n"
1536" It can take on any of the following module constants:\n"
1537"\n"
1538" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1539" field contains either the quotechar or the delimiter\n"
1540" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1541" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001542" fields which do not parse as integers or floating point\n"
1543" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001544" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1545" * escapechar - specifies a one-character string used to escape \n"
1546" the delimiter when quoting is set to QUOTE_NONE.\n"
1547" * doublequote - controls the handling of quotes inside fields. When\n"
1548" True, two consecutive quotes are interpreted as one during read,\n"
1549" and when writing, each quote character embedded in the data is\n"
1550" written as two quotes\n");
1551
1552PyDoc_STRVAR(csv_reader_doc,
1553" csv_reader = reader(iterable [, dialect='excel']\n"
1554" [optional keyword args])\n"
1555" for row in csv_reader:\n"
1556" process(row)\n"
1557"\n"
1558"The \"iterable\" argument can be any object that returns a line\n"
1559"of input for each iteration, such as a file object or a list. The\n"
1560"optional \"dialect\" parameter is discussed below. The function\n"
1561"also accepts optional keyword arguments which override settings\n"
1562"provided by the dialect.\n"
1563"\n"
1564"The returned object is an iterator. Each iteration returns a row\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001565"of the CSV file (which can span multiple input lines):\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001566
1567PyDoc_STRVAR(csv_writer_doc,
1568" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1569" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001570" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001571" csv_writer.writerow(row)\n"
1572"\n"
1573" [or]\n"
1574"\n"
1575" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1576" [optional keyword args])\n"
1577" csv_writer.writerows(rows)\n"
1578"\n"
1579"The \"fileobj\" argument can be any object that supports the file API.\n");
1580
1581PyDoc_STRVAR(csv_list_dialects_doc,
1582"Return a list of all know dialect names.\n"
1583" names = csv.list_dialects()");
1584
1585PyDoc_STRVAR(csv_get_dialect_doc,
1586"Return the dialect instance associated with name.\n"
1587" dialect = csv.get_dialect(name)");
1588
1589PyDoc_STRVAR(csv_register_dialect_doc,
1590"Create a mapping from a string name to a dialect class.\n"
1591" dialect = csv.register_dialect(name, dialect)");
1592
1593PyDoc_STRVAR(csv_unregister_dialect_doc,
1594"Delete the name/dialect mapping associated with a string name.\n"
1595" csv.unregister_dialect(name)");
1596
Andrew McNamara31d88962005-01-12 03:45:10 +00001597PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001598"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001599" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001600"\n"
1601"Returns old limit. If limit is not given, no new limit is set and\n"
1602"the old limit is returned");
1603
Skip Montanarob4a04172003-03-20 23:29:12 +00001604static struct PyMethodDef csv_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001605 { "reader", (PyCFunction)csv_reader,
1606 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1607 { "writer", (PyCFunction)csv_writer,
1608 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1609 { "list_dialects", (PyCFunction)csv_list_dialects,
1610 METH_NOARGS, csv_list_dialects_doc},
1611 { "register_dialect", (PyCFunction)csv_register_dialect,
1612 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1613 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1614 METH_O, csv_unregister_dialect_doc},
1615 { "get_dialect", (PyCFunction)csv_get_dialect,
1616 METH_O, csv_get_dialect_doc},
1617 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1618 METH_VARARGS, csv_field_size_limit_doc},
1619 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001620};
1621
Martin v. Löwis1a214512008-06-11 05:26:20 +00001622static struct PyModuleDef _csvmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001623 PyModuleDef_HEAD_INIT,
1624 "_csv",
1625 csv_module_doc,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001626 sizeof(_csvstate),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001627 csv_methods,
1628 NULL,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001629 _csv_traverse,
1630 _csv_clear,
1631 _csv_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00001632};
1633
Skip Montanarob4a04172003-03-20 23:29:12 +00001634PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001635PyInit__csv(void)
Skip Montanarob4a04172003-03-20 23:29:12 +00001636{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001637 PyObject *module;
1638 StyleDesc *style;
Skip Montanarob4a04172003-03-20 23:29:12 +00001639
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001640 if (PyType_Ready(&Dialect_Type) < 0)
1641 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001642
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001643 if (PyType_Ready(&Reader_Type) < 0)
1644 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001645
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001646 if (PyType_Ready(&Writer_Type) < 0)
1647 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001648
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001649 /* Create the module and add the functions */
1650 module = PyModule_Create(&_csvmodule);
1651 if (module == NULL)
1652 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001653
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001654 /* Add version to the module. */
1655 if (PyModule_AddStringConstant(module, "__version__",
1656 MODULE_VERSION) == -1)
1657 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001658
Antoine Pitroue7672d32012-05-16 11:33:08 +02001659 /* Set the field limit */
1660 _csvstate(module)->field_limit = 128 * 1024;
1661 /* Do I still need to add this var to the Module Dict? */
1662
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001663 /* Add _dialects dictionary */
Antoine Pitroue7672d32012-05-16 11:33:08 +02001664 _csvstate(module)->dialects = PyDict_New();
1665 if (_csvstate(module)->dialects == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001666 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001667 Py_INCREF(_csvstate(module)->dialects);
1668 if (PyModule_AddObject(module, "_dialects", _csvstate(module)->dialects))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001669 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001670
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001671 /* Add quote styles into dictionary */
1672 for (style = quote_styles; style->name; style++) {
1673 if (PyModule_AddIntConstant(module, style->name,
1674 style->style) == -1)
1675 return NULL;
1676 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001677
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001678 /* Add the Dialect type */
1679 Py_INCREF(&Dialect_Type);
1680 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1681 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001682
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001683 /* Add the CSV exception object to the module. */
Antoine Pitroue7672d32012-05-16 11:33:08 +02001684 _csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1685 if (_csvstate(module)->error_obj == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001686 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001687 Py_INCREF(_csvstate(module)->error_obj);
1688 PyModule_AddObject(module, "Error", _csvstate(module)->error_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001689 return module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001690}