blob: c0be739c672970e45b4b57fd0ce185c8d4498d23 [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
Skip Montanarob4a04172003-03-20 23:29:12 +00009*/
10
Skip Montanaro7b01a832003-04-12 19:23:46 +000011#define MODULE_VERSION "1.0"
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013#include "Python.h"
14#include "structmember.h"
15
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000016
Antoine Pitroue7672d32012-05-16 11:33:08 +020017typedef struct {
18 PyObject *error_obj; /* CSV exception */
19 PyObject *dialects; /* Dialect registry */
20 long field_limit; /* max parsed field size */
21} _csvstate;
22
23#define _csvstate(o) ((_csvstate *)PyModule_GetState(o))
24
25static int
26_csv_clear(PyObject *m)
27{
28 Py_CLEAR(_csvstate(m)->error_obj);
29 Py_CLEAR(_csvstate(m)->dialects);
30 return 0;
31}
32
33static int
34_csv_traverse(PyObject *m, visitproc visit, void *arg)
35{
36 Py_VISIT(_csvstate(m)->error_obj);
37 Py_VISIT(_csvstate(m)->dialects);
38 return 0;
39}
40
41static void
42_csv_free(void *m)
43{
44 _csv_clear((PyObject *)m);
45}
46
47static struct PyModuleDef _csvmodule;
48
49#define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
Skip Montanarob4a04172003-03-20 23:29:12 +000050
51typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000052 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
53 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
R David Murrayc7c42ef2013-03-19 22:41:47 -040054 EAT_CRNL,AFTER_ESCAPED_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000055} ParserState;
56
57typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000058 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000059} QuoteStyle;
60
61typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000062 QuoteStyle style;
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020063 const char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000064} StyleDesc;
65
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020066static const StyleDesc quote_styles[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
68 { QUOTE_ALL, "QUOTE_ALL" },
69 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
70 { QUOTE_NONE, "QUOTE_NONE" },
71 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000072};
73
74typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000075 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 int doublequote; /* is " represented by ""? */
Antoine Pitrou77ea6402011-10-07 04:26:55 +020078 Py_UCS4 delimiter; /* field separator */
79 Py_UCS4 quotechar; /* quote character */
80 Py_UCS4 escapechar; /* escape character */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000081 int skipinitialspace; /* ignore spaces following delimiter? */
82 PyObject *lineterminator; /* string to write between records */
83 int quoting; /* style of quoting to write */
Skip Montanarob4a04172003-03-20 23:29:12 +000084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 int strict; /* raise exception on bad CSV */
Skip Montanarob4a04172003-03-20 23:29:12 +000086} DialectObj;
87
Neal Norwitz227b5332006-03-22 09:28:35 +000088static PyTypeObject Dialect_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000089
90typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +000092
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000093 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +000094
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000095 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +000096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000097 PyObject *fields; /* field list for current record */
98 ParserState state; /* current CSV parse state */
Antoine Pitrou77ea6402011-10-07 04:26:55 +020099 Py_UCS4 *field; /* temporary buffer */
Antoine Pitrou40455752010-08-15 18:51:10 +0000100 Py_ssize_t field_size; /* size of allocated buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000101 Py_ssize_t field_len; /* length of current field */
102 int numeric_field; /* treat field as numeric */
103 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000104} ReaderObj;
105
Neal Norwitz227b5332006-03-22 09:28:35 +0000106static PyTypeObject Reader_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000107
Christian Heimes90aa7642007-12-19 02:45:37 +0000108#define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +0000109
110typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000111 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000112
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000113 PyObject *writeline; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +0000114
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000115 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000116
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200117 Py_UCS4 *rec; /* buffer for parser.join */
Antoine Pitrou40455752010-08-15 18:51:10 +0000118 Py_ssize_t rec_size; /* size of allocated record */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000119 Py_ssize_t rec_len; /* length of record */
120 int num_fields; /* number of fields in record */
Guido van Rossum46264582007-08-06 19:32:18 +0000121} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000122
Neal Norwitz227b5332006-03-22 09:28:35 +0000123static PyTypeObject Writer_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000124
125/*
126 * DIALECT class
127 */
128
129static PyObject *
130get_dialect_from_registry(PyObject * name_obj)
131{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000133
Antoine Pitroue7672d32012-05-16 11:33:08 +0200134 dialect_obj = PyDict_GetItem(_csvstate_global->dialects, name_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000135 if (dialect_obj == NULL) {
136 if (!PyErr_Occurred())
Antoine Pitroue7672d32012-05-16 11:33:08 +0200137 PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000138 }
139 else
140 Py_INCREF(dialect_obj);
141 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000142}
143
Skip Montanarob4a04172003-03-20 23:29:12 +0000144static PyObject *
145get_string(PyObject *str)
146{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000147 Py_XINCREF(str);
148 return str;
Skip Montanarob4a04172003-03-20 23:29:12 +0000149}
150
Skip Montanarob4a04172003-03-20 23:29:12 +0000151static PyObject *
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200152get_nullchar_as_None(Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000153{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 if (c == '\0') {
155 Py_INCREF(Py_None);
156 return Py_None;
157 }
158 else
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200159 return PyUnicode_FromOrdinal(c);
Skip Montanarob4a04172003-03-20 23:29:12 +0000160}
161
Skip Montanarob4a04172003-03-20 23:29:12 +0000162static PyObject *
163Dialect_get_lineterminator(DialectObj *self)
164{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000165 return get_string(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000166}
167
Skip Montanarob4a04172003-03-20 23:29:12 +0000168static PyObject *
Guido van Rossuma9769c22007-08-07 23:59:30 +0000169Dialect_get_delimiter(DialectObj *self)
170{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000171 return get_nullchar_as_None(self->delimiter);
Guido van Rossuma9769c22007-08-07 23:59:30 +0000172}
173
174static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000175Dialect_get_escapechar(DialectObj *self)
176{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000178}
179
Andrew McNamara1196cf12005-01-07 04:42:45 +0000180static PyObject *
181Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000182{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000183 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000184}
185
186static PyObject *
187Dialect_get_quoting(DialectObj *self)
188{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000189 return PyLong_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000190}
191
192static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000193_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000194{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000195 if (src == NULL)
196 *target = dflt;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200197 else {
198 int b = PyObject_IsTrue(src);
199 if (b < 0)
200 return -1;
201 *target = b;
202 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000203 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000204}
205
Andrew McNamara1196cf12005-01-07 04:42:45 +0000206static int
207_set_int(const char *name, int *target, PyObject *src, int dflt)
208{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000209 if (src == NULL)
210 *target = dflt;
211 else {
212 long value;
213 if (!PyLong_CheckExact(src)) {
214 PyErr_Format(PyExc_TypeError,
215 "\"%s\" must be an integer", name);
216 return -1;
217 }
218 value = PyLong_AsLong(src);
219 if (value == -1 && PyErr_Occurred())
220 return -1;
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +0000221#if SIZEOF_LONG > SIZEOF_INT
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000222 if (value > INT_MAX || value < INT_MIN) {
223 PyErr_Format(PyExc_ValueError,
224 "integer out of range for \"%s\"", name);
225 return -1;
226 }
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +0000227#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 *target = (int)value;
229 }
230 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000231}
232
233static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200234_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000235{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000236 if (src == NULL)
237 *target = dflt;
238 else {
239 *target = '\0';
240 if (src != Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000241 Py_ssize_t len;
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200242 if (!PyUnicode_Check(src)) {
243 PyErr_Format(PyExc_TypeError,
244 "\"%s\" must be string, not %.200s", name,
245 src->ob_type->tp_name);
246 return -1;
247 }
Victor Stinner9e30aa52011-11-21 02:49:52 +0100248 len = PyUnicode_GetLength(src);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200249 if (len > 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000250 PyErr_Format(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300251 "\"%s\" must be a 1-character string",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000252 name);
253 return -1;
254 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100255 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000256 if (len > 0)
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200257 *target = PyUnicode_READ_CHAR(src, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000258 }
259 }
260 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000261}
262
263static int
264_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
265{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000266 if (src == NULL)
267 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
268 else {
269 if (src == Py_None)
270 *target = NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +0100271 else if (!PyUnicode_Check(src)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 PyErr_Format(PyExc_TypeError,
273 "\"%s\" must be a string", name);
274 return -1;
275 }
276 else {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100277 if (PyUnicode_READY(src) == -1)
278 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000279 Py_INCREF(src);
Serhiy Storchaka5a57ade2015-12-24 10:35:59 +0200280 Py_SETREF(*target, src);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000281 }
282 }
283 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000284}
285
286static int
287dialect_check_quoting(int quoting)
288{
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200289 const StyleDesc *qs;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000290
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000291 for (qs = quote_styles; qs->name; qs++) {
Victor Stinner706768c2014-08-16 01:03:39 +0200292 if ((int)qs->style == quoting)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000293 return 0;
294 }
295 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
296 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000297}
Skip Montanarob4a04172003-03-20 23:29:12 +0000298
299#define D_OFF(x) offsetof(DialectObj, x)
300
301static struct PyMemberDef Dialect_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000302 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
303 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
304 { "strict", T_INT, D_OFF(strict), READONLY },
305 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000306};
307
308static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000309 { "delimiter", (getter)Dialect_get_delimiter},
310 { "escapechar", (getter)Dialect_get_escapechar},
311 { "lineterminator", (getter)Dialect_get_lineterminator},
312 { "quotechar", (getter)Dialect_get_quotechar},
313 { "quoting", (getter)Dialect_get_quoting},
314 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000315};
316
317static void
318Dialect_dealloc(DialectObj *self)
319{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000320 Py_XDECREF(self->lineterminator);
321 Py_TYPE(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000322}
323
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000324static char *dialect_kws[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000325 "dialect",
326 "delimiter",
327 "doublequote",
328 "escapechar",
329 "lineterminator",
330 "quotechar",
331 "quoting",
332 "skipinitialspace",
333 "strict",
334 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000335};
336
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000337static PyObject *
338dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000339{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000340 DialectObj *self;
341 PyObject *ret = NULL;
342 PyObject *dialect = NULL;
343 PyObject *delimiter = NULL;
344 PyObject *doublequote = NULL;
345 PyObject *escapechar = NULL;
346 PyObject *lineterminator = NULL;
347 PyObject *quotechar = NULL;
348 PyObject *quoting = NULL;
349 PyObject *skipinitialspace = NULL;
350 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000351
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000352 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
353 "|OOOOOOOOO", dialect_kws,
354 &dialect,
355 &delimiter,
356 &doublequote,
357 &escapechar,
358 &lineterminator,
359 &quotechar,
360 &quoting,
361 &skipinitialspace,
362 &strict))
363 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000364
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000365 if (dialect != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100366 if (PyUnicode_Check(dialect)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 dialect = get_dialect_from_registry(dialect);
368 if (dialect == NULL)
369 return NULL;
370 }
371 else
372 Py_INCREF(dialect);
373 /* Can we reuse this instance? */
374 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
375 delimiter == 0 &&
376 doublequote == 0 &&
377 escapechar == 0 &&
378 lineterminator == 0 &&
379 quotechar == 0 &&
380 quoting == 0 &&
381 skipinitialspace == 0 &&
382 strict == 0)
383 return dialect;
384 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000385
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000386 self = (DialectObj *)type->tp_alloc(type, 0);
387 if (self == NULL) {
388 Py_XDECREF(dialect);
389 return NULL;
390 }
391 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000392
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000393 Py_XINCREF(delimiter);
394 Py_XINCREF(doublequote);
395 Py_XINCREF(escapechar);
396 Py_XINCREF(lineterminator);
397 Py_XINCREF(quotechar);
398 Py_XINCREF(quoting);
399 Py_XINCREF(skipinitialspace);
400 Py_XINCREF(strict);
401 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000402#define DIALECT_GETATTR(v, n) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000403 if (v == NULL) \
404 v = PyObject_GetAttrString(dialect, n)
405 DIALECT_GETATTR(delimiter, "delimiter");
406 DIALECT_GETATTR(doublequote, "doublequote");
407 DIALECT_GETATTR(escapechar, "escapechar");
408 DIALECT_GETATTR(lineterminator, "lineterminator");
409 DIALECT_GETATTR(quotechar, "quotechar");
410 DIALECT_GETATTR(quoting, "quoting");
411 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
412 DIALECT_GETATTR(strict, "strict");
413 PyErr_Clear();
414 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000415
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000416 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000417#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000418 if (meth(name, target, src, dflt)) \
419 goto err
420 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
421 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
422 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
423 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
424 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
425 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
426 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
427 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000428
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000429 /* validate options */
430 if (dialect_check_quoting(self->quoting))
431 goto err;
432 if (self->delimiter == 0) {
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200433 PyErr_SetString(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300434 "\"delimiter\" must be a 1-character string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000435 goto err;
436 }
437 if (quotechar == Py_None && quoting == NULL)
438 self->quoting = QUOTE_NONE;
439 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
440 PyErr_SetString(PyExc_TypeError,
441 "quotechar must be set if quoting enabled");
442 goto err;
443 }
444 if (self->lineterminator == 0) {
445 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
446 goto err;
447 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000448
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000449 ret = (PyObject *)self;
450 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000451err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000452 Py_XDECREF(self);
453 Py_XDECREF(dialect);
454 Py_XDECREF(delimiter);
455 Py_XDECREF(doublequote);
456 Py_XDECREF(escapechar);
457 Py_XDECREF(lineterminator);
458 Py_XDECREF(quotechar);
459 Py_XDECREF(quoting);
460 Py_XDECREF(skipinitialspace);
461 Py_XDECREF(strict);
462 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000463}
464
465
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000466PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000467"CSV dialect\n"
468"\n"
469"The Dialect type records CSV parsing and generation options.\n");
470
471static PyTypeObject Dialect_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000472 PyVarObject_HEAD_INIT(NULL, 0)
473 "_csv.Dialect", /* tp_name */
474 sizeof(DialectObj), /* tp_basicsize */
475 0, /* tp_itemsize */
476 /* methods */
477 (destructor)Dialect_dealloc, /* tp_dealloc */
478 (printfunc)0, /* tp_print */
479 (getattrfunc)0, /* tp_getattr */
480 (setattrfunc)0, /* tp_setattr */
481 0, /* tp_reserved */
482 (reprfunc)0, /* tp_repr */
483 0, /* tp_as_number */
484 0, /* tp_as_sequence */
485 0, /* tp_as_mapping */
486 (hashfunc)0, /* tp_hash */
487 (ternaryfunc)0, /* tp_call */
488 (reprfunc)0, /* tp_str */
489 0, /* tp_getattro */
490 0, /* tp_setattro */
491 0, /* tp_as_buffer */
492 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
493 Dialect_Type_doc, /* tp_doc */
494 0, /* tp_traverse */
495 0, /* tp_clear */
496 0, /* tp_richcompare */
497 0, /* tp_weaklistoffset */
498 0, /* tp_iter */
499 0, /* tp_iternext */
500 0, /* tp_methods */
501 Dialect_memberlist, /* tp_members */
502 Dialect_getsetlist, /* tp_getset */
503 0, /* tp_base */
504 0, /* tp_dict */
505 0, /* tp_descr_get */
506 0, /* tp_descr_set */
507 0, /* tp_dictoffset */
508 0, /* tp_init */
509 0, /* tp_alloc */
510 dialect_new, /* tp_new */
511 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000512};
513
Andrew McNamara91b97462005-01-11 01:07:23 +0000514/*
515 * Return an instance of the dialect type, given a Python instance or kwarg
516 * description of the dialect
517 */
518static PyObject *
519_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
520{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000521 PyObject *ctor_args;
522 PyObject *dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000523
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000524 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
525 if (ctor_args == NULL)
526 return NULL;
527 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
528 Py_DECREF(ctor_args);
529 return dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000530}
531
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000532/*
533 * READER
534 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000535static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000536parse_save_field(ReaderObj *self)
537{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000538 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000539
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200540 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
541 (void *) self->field, self->field_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000542 if (field == NULL)
543 return -1;
544 self->field_len = 0;
545 if (self->numeric_field) {
546 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000547
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000548 self->numeric_field = 0;
549 tmp = PyNumber_Float(field);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000550 Py_DECREF(field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200551 if (tmp == NULL)
552 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000553 field = tmp;
554 }
Victor Stinnerb80b3782013-11-14 21:29:34 +0100555 if (PyList_Append(self->fields, field) < 0) {
556 Py_DECREF(field);
557 return -1;
558 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000559 Py_DECREF(field);
560 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000561}
562
563static int
564parse_grow_buff(ReaderObj *self)
565{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000566 if (self->field_size == 0) {
567 self->field_size = 4096;
568 if (self->field != NULL)
569 PyMem_Free(self->field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200570 self->field = PyMem_New(Py_UCS4, self->field_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000571 }
572 else {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200573 Py_UCS4 *field = self->field;
Antoine Pitrou40455752010-08-15 18:51:10 +0000574 if (self->field_size > PY_SSIZE_T_MAX / 2) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000575 PyErr_NoMemory();
576 return 0;
577 }
578 self->field_size *= 2;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200579 self->field = PyMem_Resize(field, Py_UCS4, self->field_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000580 }
581 if (self->field == NULL) {
582 PyErr_NoMemory();
583 return 0;
584 }
585 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000586}
587
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000588static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200589parse_add_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000590{
Antoine Pitroue7672d32012-05-16 11:33:08 +0200591 if (self->field_len >= _csvstate_global->field_limit) {
592 PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
593 _csvstate_global->field_limit);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000594 return -1;
595 }
596 if (self->field_len == self->field_size && !parse_grow_buff(self))
597 return -1;
598 self->field[self->field_len++] = c;
599 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000600}
601
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000602static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200603parse_process_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000604{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000605 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000606
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000607 switch (self->state) {
608 case START_RECORD:
609 /* start of record */
610 if (c == '\0')
611 /* empty line - return [] */
612 break;
613 else if (c == '\n' || c == '\r') {
614 self->state = EAT_CRNL;
615 break;
616 }
617 /* normal character - handle as START_FIELD */
618 self->state = START_FIELD;
619 /* fallthru */
620 case START_FIELD:
621 /* expecting field */
622 if (c == '\n' || c == '\r' || c == '\0') {
623 /* save empty field - return [fields] */
624 if (parse_save_field(self) < 0)
625 return -1;
626 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
627 }
628 else if (c == dialect->quotechar &&
629 dialect->quoting != QUOTE_NONE) {
630 /* start quoted field */
631 self->state = IN_QUOTED_FIELD;
632 }
633 else if (c == dialect->escapechar) {
634 /* possible escaped character */
635 self->state = ESCAPED_CHAR;
636 }
637 else if (c == ' ' && dialect->skipinitialspace)
638 /* ignore space at start of field */
639 ;
640 else if (c == dialect->delimiter) {
641 /* save empty field */
642 if (parse_save_field(self) < 0)
643 return -1;
644 }
645 else {
646 /* begin new unquoted field */
647 if (dialect->quoting == QUOTE_NONNUMERIC)
648 self->numeric_field = 1;
649 if (parse_add_char(self, c) < 0)
650 return -1;
651 self->state = IN_FIELD;
652 }
653 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000654
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000655 case ESCAPED_CHAR:
R David Murray9a7d3762013-03-20 00:15:20 -0400656 if (c == '\n' || c=='\r') {
R David Murrayc7c42ef2013-03-19 22:41:47 -0400657 if (parse_add_char(self, c) < 0)
658 return -1;
659 self->state = AFTER_ESCAPED_CRNL;
660 break;
661 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000662 if (c == '\0')
663 c = '\n';
664 if (parse_add_char(self, c) < 0)
665 return -1;
666 self->state = IN_FIELD;
667 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000668
R David Murrayc7c42ef2013-03-19 22:41:47 -0400669 case AFTER_ESCAPED_CRNL:
670 if (c == '\0')
671 break;
672 /*fallthru*/
673
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000674 case IN_FIELD:
675 /* in unquoted field */
676 if (c == '\n' || c == '\r' || c == '\0') {
677 /* end of line - return [fields] */
678 if (parse_save_field(self) < 0)
679 return -1;
680 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
681 }
682 else if (c == dialect->escapechar) {
683 /* possible escaped character */
684 self->state = ESCAPED_CHAR;
685 }
686 else if (c == dialect->delimiter) {
687 /* save field - wait for new field */
688 if (parse_save_field(self) < 0)
689 return -1;
690 self->state = START_FIELD;
691 }
692 else {
693 /* normal character - save in field */
694 if (parse_add_char(self, c) < 0)
695 return -1;
696 }
697 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000698
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000699 case IN_QUOTED_FIELD:
700 /* in quoted field */
701 if (c == '\0')
702 ;
703 else if (c == dialect->escapechar) {
704 /* Possible escape character */
705 self->state = ESCAPE_IN_QUOTED_FIELD;
706 }
707 else if (c == dialect->quotechar &&
708 dialect->quoting != QUOTE_NONE) {
709 if (dialect->doublequote) {
710 /* doublequote; " represented by "" */
711 self->state = QUOTE_IN_QUOTED_FIELD;
712 }
713 else {
714 /* end of quote part of field */
715 self->state = IN_FIELD;
716 }
717 }
718 else {
719 /* normal character - save in field */
720 if (parse_add_char(self, c) < 0)
721 return -1;
722 }
723 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000724
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000725 case ESCAPE_IN_QUOTED_FIELD:
726 if (c == '\0')
727 c = '\n';
728 if (parse_add_char(self, c) < 0)
729 return -1;
730 self->state = IN_QUOTED_FIELD;
731 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000732
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000733 case QUOTE_IN_QUOTED_FIELD:
734 /* doublequote - seen a quote in an quoted field */
735 if (dialect->quoting != QUOTE_NONE &&
736 c == dialect->quotechar) {
737 /* save "" as " */
738 if (parse_add_char(self, c) < 0)
739 return -1;
740 self->state = IN_QUOTED_FIELD;
741 }
742 else if (c == dialect->delimiter) {
743 /* save field - wait for new field */
744 if (parse_save_field(self) < 0)
745 return -1;
746 self->state = START_FIELD;
747 }
748 else if (c == '\n' || c == '\r' || c == '\0') {
749 /* end of line - return [fields] */
750 if (parse_save_field(self) < 0)
751 return -1;
752 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
753 }
754 else if (!dialect->strict) {
755 if (parse_add_char(self, c) < 0)
756 return -1;
757 self->state = IN_FIELD;
758 }
759 else {
760 /* illegal */
Antoine Pitroue7672d32012-05-16 11:33:08 +0200761 PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000762 dialect->delimiter,
763 dialect->quotechar);
764 return -1;
765 }
766 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000767
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000768 case EAT_CRNL:
769 if (c == '\n' || c == '\r')
770 ;
771 else if (c == '\0')
772 self->state = START_RECORD;
773 else {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200774 PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000775 return -1;
776 }
777 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000778
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000779 }
780 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000781}
782
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000783static int
784parse_reset(ReaderObj *self)
785{
Serhiy Storchaka5a57ade2015-12-24 10:35:59 +0200786 Py_SETREF(self->fields, PyList_New(0));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000787 if (self->fields == NULL)
788 return -1;
789 self->field_len = 0;
790 self->state = START_RECORD;
791 self->numeric_field = 0;
792 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000793}
Skip Montanarob4a04172003-03-20 23:29:12 +0000794
795static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000796Reader_iternext(ReaderObj *self)
797{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000798 PyObject *fields = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200799 Py_UCS4 c;
800 Py_ssize_t pos, linelen;
801 unsigned int kind;
802 void *data;
803 PyObject *lineobj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000804
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000805 if (parse_reset(self) < 0)
806 return NULL;
807 do {
808 lineobj = PyIter_Next(self->input_iter);
809 if (lineobj == NULL) {
810 /* End of input OR exception */
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700811 if (!PyErr_Occurred() && (self->field_len != 0 ||
812 self->state == IN_QUOTED_FIELD)) {
813 if (self->dialect->strict)
Senthil Kumaran49d13022012-09-25 02:37:20 -0700814 PyErr_SetString(_csvstate_global->error_obj,
815 "unexpected end of data");
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700816 else if (parse_save_field(self) >= 0)
817 break;
818 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000819 return NULL;
820 }
821 if (!PyUnicode_Check(lineobj)) {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200822 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000823 "iterator should return strings, "
824 "not %.200s "
825 "(did you open the file in text mode?)",
826 lineobj->ob_type->tp_name
827 );
828 Py_DECREF(lineobj);
829 return NULL;
830 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100831 if (PyUnicode_READY(lineobj) == -1) {
832 Py_DECREF(lineobj);
833 return NULL;
834 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000835 ++self->line_num;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200836 kind = PyUnicode_KIND(lineobj);
837 data = PyUnicode_DATA(lineobj);
838 pos = 0;
839 linelen = PyUnicode_GET_LENGTH(lineobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000840 while (linelen--) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200841 c = PyUnicode_READ(kind, data, pos);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000842 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000843 Py_DECREF(lineobj);
Antoine Pitroue7672d32012-05-16 11:33:08 +0200844 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000845 "line contains NULL byte");
846 goto err;
847 }
848 if (parse_process_char(self, c) < 0) {
849 Py_DECREF(lineobj);
850 goto err;
851 }
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200852 pos++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000853 }
854 Py_DECREF(lineobj);
855 if (parse_process_char(self, 0) < 0)
856 goto err;
857 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000858
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000859 fields = self->fields;
860 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000861err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000862 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000863}
864
865static void
866Reader_dealloc(ReaderObj *self)
867{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000868 PyObject_GC_UnTrack(self);
869 Py_XDECREF(self->dialect);
870 Py_XDECREF(self->input_iter);
871 Py_XDECREF(self->fields);
872 if (self->field != NULL)
873 PyMem_Free(self->field);
874 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000875}
876
877static int
878Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
879{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000880 Py_VISIT(self->dialect);
881 Py_VISIT(self->input_iter);
882 Py_VISIT(self->fields);
883 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000884}
885
886static int
887Reader_clear(ReaderObj *self)
888{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000889 Py_CLEAR(self->dialect);
890 Py_CLEAR(self->input_iter);
891 Py_CLEAR(self->fields);
892 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000893}
894
895PyDoc_STRVAR(Reader_Type_doc,
896"CSV reader\n"
897"\n"
898"Reader objects are responsible for reading and parsing tabular data\n"
899"in CSV format.\n"
900);
901
902static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000903 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000904};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000905#define R_OFF(x) offsetof(ReaderObj, x)
906
907static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000908 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
909 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
910 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000911};
912
Skip Montanarob4a04172003-03-20 23:29:12 +0000913
914static PyTypeObject Reader_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000915 PyVarObject_HEAD_INIT(NULL, 0)
916 "_csv.reader", /*tp_name*/
917 sizeof(ReaderObj), /*tp_basicsize*/
918 0, /*tp_itemsize*/
919 /* methods */
920 (destructor)Reader_dealloc, /*tp_dealloc*/
921 (printfunc)0, /*tp_print*/
922 (getattrfunc)0, /*tp_getattr*/
923 (setattrfunc)0, /*tp_setattr*/
924 0, /*tp_reserved*/
925 (reprfunc)0, /*tp_repr*/
926 0, /*tp_as_number*/
927 0, /*tp_as_sequence*/
928 0, /*tp_as_mapping*/
929 (hashfunc)0, /*tp_hash*/
930 (ternaryfunc)0, /*tp_call*/
931 (reprfunc)0, /*tp_str*/
932 0, /*tp_getattro*/
933 0, /*tp_setattro*/
934 0, /*tp_as_buffer*/
935 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
936 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
937 Reader_Type_doc, /*tp_doc*/
938 (traverseproc)Reader_traverse, /*tp_traverse*/
939 (inquiry)Reader_clear, /*tp_clear*/
940 0, /*tp_richcompare*/
941 0, /*tp_weaklistoffset*/
942 PyObject_SelfIter, /*tp_iter*/
943 (getiterfunc)Reader_iternext, /*tp_iternext*/
944 Reader_methods, /*tp_methods*/
945 Reader_memberlist, /*tp_members*/
946 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000947
948};
949
950static PyObject *
951csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
952{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000953 PyObject * iterator, * dialect = NULL;
954 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000955
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000956 if (!self)
957 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000958
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000959 self->dialect = NULL;
960 self->fields = NULL;
961 self->input_iter = NULL;
962 self->field = NULL;
963 self->field_size = 0;
964 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000965
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000966 if (parse_reset(self) < 0) {
967 Py_DECREF(self);
968 return NULL;
969 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000970
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000971 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
972 Py_DECREF(self);
973 return NULL;
974 }
975 self->input_iter = PyObject_GetIter(iterator);
976 if (self->input_iter == NULL) {
977 PyErr_SetString(PyExc_TypeError,
978 "argument 1 must be an iterator");
979 Py_DECREF(self);
980 return NULL;
981 }
982 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
983 if (self->dialect == NULL) {
984 Py_DECREF(self);
985 return NULL;
986 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000987
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000988 PyObject_GC_Track(self);
989 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +0000990}
991
992/*
993 * WRITER
994 */
995/* ---------------------------------------------------------------- */
996static void
997join_reset(WriterObj *self)
998{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000999 self->rec_len = 0;
1000 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001001}
1002
1003#define MEM_INCR 32768
1004
1005/* Calculate new record length or append field to record. Return new
1006 * record length.
1007 */
Antoine Pitrou40455752010-08-15 18:51:10 +00001008static Py_ssize_t
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001009join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001010 Py_ssize_t field_len, int *quoted,
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001011 int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +00001012{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001013 DialectObj *dialect = self->dialect;
1014 int i;
Antoine Pitrou40455752010-08-15 18:51:10 +00001015 Py_ssize_t rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001016
1017#define ADDCH(c) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001018 do {\
1019 if (copy_phase) \
1020 self->rec[rec_len] = c;\
1021 rec_len++;\
1022 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001023
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001024 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001025
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001026 /* If this is not the first field we need a field separator */
1027 if (self->num_fields > 0)
1028 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001029
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001030 /* Handle preceding quote */
1031 if (copy_phase && *quoted)
1032 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001033
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001034 /* Copy/count field data */
1035 /* If field is null just pass over */
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001036 for (i = 0; field_data && (i < field_len); i++) {
1037 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001038 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001039
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001040 if (c == dialect->delimiter ||
1041 c == dialect->escapechar ||
1042 c == dialect->quotechar ||
Martin v. Löwis5f4f4c52011-11-01 18:42:23 +01001043 PyUnicode_FindChar(
1044 dialect->lineterminator, c, 0,
1045 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001046 if (dialect->quoting == QUOTE_NONE)
1047 want_escape = 1;
1048 else {
1049 if (c == dialect->quotechar) {
1050 if (dialect->doublequote)
1051 ADDCH(dialect->quotechar);
1052 else
1053 want_escape = 1;
1054 }
1055 if (!want_escape)
1056 *quoted = 1;
1057 }
1058 if (want_escape) {
1059 if (!dialect->escapechar) {
Antoine Pitroue7672d32012-05-16 11:33:08 +02001060 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001061 "need to escape, but no escapechar set");
1062 return -1;
1063 }
1064 ADDCH(dialect->escapechar);
1065 }
1066 }
1067 /* Copy field character into record buffer.
1068 */
1069 ADDCH(c);
1070 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001071
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001072 if (*quoted) {
1073 if (copy_phase)
1074 ADDCH(dialect->quotechar);
1075 else
1076 rec_len += 2;
1077 }
1078 return rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001079#undef ADDCH
Skip Montanarob4a04172003-03-20 23:29:12 +00001080}
1081
1082static int
Antoine Pitrou40455752010-08-15 18:51:10 +00001083join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
Skip Montanarob4a04172003-03-20 23:29:12 +00001084{
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001085
Antoine Pitrou40455752010-08-15 18:51:10 +00001086 if (rec_len < 0 || rec_len > PY_SSIZE_T_MAX - MEM_INCR) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001087 PyErr_NoMemory();
1088 return 0;
1089 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001090
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001091 if (rec_len > self->rec_size) {
1092 if (self->rec_size == 0) {
1093 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1094 if (self->rec != NULL)
1095 PyMem_Free(self->rec);
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001096 self->rec = PyMem_New(Py_UCS4, self->rec_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001097 }
1098 else {
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001099 Py_UCS4* old_rec = self->rec;
Skip Montanarob4a04172003-03-20 23:29:12 +00001100
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001101 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001102 self->rec = PyMem_Resize(old_rec, Py_UCS4, self->rec_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001103 if (self->rec == NULL)
1104 PyMem_Free(old_rec);
1105 }
1106 if (self->rec == NULL) {
1107 PyErr_NoMemory();
1108 return 0;
1109 }
1110 }
1111 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001112}
1113
1114static int
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001115join_append(WriterObj *self, PyObject *field, int quoted)
Skip Montanarob4a04172003-03-20 23:29:12 +00001116{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001117 unsigned int field_kind = -1;
1118 void *field_data = NULL;
1119 Py_ssize_t field_len = 0;
Antoine Pitrou40455752010-08-15 18:51:10 +00001120 Py_ssize_t rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001121
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001122 if (field != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +01001123 if (PyUnicode_READY(field) == -1)
1124 return 0;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001125 field_kind = PyUnicode_KIND(field);
1126 field_data = PyUnicode_DATA(field);
1127 field_len = PyUnicode_GET_LENGTH(field);
1128 }
1129 rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001130 &quoted, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001131 if (rec_len < 0)
1132 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001133
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001134 /* grow record buffer if necessary */
1135 if (!join_check_rec_size(self, rec_len))
1136 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001137
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001138 self->rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001139 &quoted, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001140 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001141
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001142 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001143}
1144
1145static int
1146join_append_lineterminator(WriterObj *self)
1147{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001148 Py_ssize_t terminator_len, i;
1149 unsigned int term_kind;
1150 void *term_data;
Skip Montanarob4a04172003-03-20 23:29:12 +00001151
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001152 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001153 if (terminator_len == -1)
1154 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001155
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001156 /* grow record buffer if necessary */
1157 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1158 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001159
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001160 term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1161 term_data = PyUnicode_DATA(self->dialect->lineterminator);
1162 for (i = 0; i < terminator_len; i++)
1163 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001164 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001165
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001166 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001167}
1168
1169PyDoc_STRVAR(csv_writerow_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001170"writerow(iterable)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001171"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001172"Construct and write a CSV record from an iterable of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001173"elements will be converted to string.");
1174
1175static PyObject *
1176csv_writerow(WriterObj *self, PyObject *seq)
1177{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001178 DialectObj *dialect = self->dialect;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001179 PyObject *iter, *field, *line, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001180
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001181 iter = PyObject_GetIter(seq);
1182 if (iter == NULL)
1183 return PyErr_Format(_csvstate_global->error_obj,
1184 "iterable expected, not %.200s",
1185 seq->ob_type->tp_name);
Skip Montanarob4a04172003-03-20 23:29:12 +00001186
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001187 /* Join all fields in internal buffer.
1188 */
1189 join_reset(self);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001190 while ((field = PyIter_Next(iter))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001191 int append_ok;
1192 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001193
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001194 switch (dialect->quoting) {
1195 case QUOTE_NONNUMERIC:
1196 quoted = !PyNumber_Check(field);
1197 break;
1198 case QUOTE_ALL:
1199 quoted = 1;
1200 break;
1201 default:
1202 quoted = 0;
1203 break;
1204 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001205
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001206 if (PyUnicode_Check(field)) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001207 append_ok = join_append(self, field, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001208 Py_DECREF(field);
1209 }
1210 else if (field == Py_None) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001211 append_ok = join_append(self, NULL, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001212 Py_DECREF(field);
1213 }
1214 else {
1215 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001216
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001217 str = PyObject_Str(field);
1218 Py_DECREF(field);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001219 if (str == NULL) {
1220 Py_DECREF(iter);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001221 return NULL;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001222 }
1223 append_ok = join_append(self, str, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001224 Py_DECREF(str);
1225 }
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001226 if (!append_ok) {
1227 Py_DECREF(iter);
1228 return NULL;
1229 }
1230 }
1231 Py_DECREF(iter);
1232 if (PyErr_Occurred())
1233 return NULL;
1234
1235 if (self->num_fields > 0 && self->rec_size == 0) {
1236 if (dialect->quoting == QUOTE_NONE) {
1237 PyErr_Format(_csvstate_global->error_obj,
1238 "single empty field record must be quoted");
1239 return NULL;
1240 }
1241 self->num_fields--;
1242 if (!join_append(self, NULL, 1))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001243 return NULL;
1244 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001245
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001246 /* Add line terminator.
1247 */
1248 if (!join_append_lineterminator(self))
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001249 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001250
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001251 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1252 (void *) self->rec, self->rec_len);
1253 if (line == NULL)
1254 return NULL;
1255 result = PyObject_CallFunctionObjArgs(self->writeline, line, NULL);
1256 Py_DECREF(line);
1257 return result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001258}
1259
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001260PyDoc_STRVAR(csv_writerows_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001261"writerows(iterable of iterables)\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001262"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001263"Construct and write a series of iterables to a csv file. Non-string\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001264"elements will be converted to string.");
1265
Skip Montanarob4a04172003-03-20 23:29:12 +00001266static PyObject *
1267csv_writerows(WriterObj *self, PyObject *seqseq)
1268{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001269 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001270
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001271 row_iter = PyObject_GetIter(seqseq);
1272 if (row_iter == NULL) {
1273 PyErr_SetString(PyExc_TypeError,
1274 "writerows() argument must be iterable");
1275 return NULL;
1276 }
1277 while ((row_obj = PyIter_Next(row_iter))) {
1278 result = csv_writerow(self, row_obj);
1279 Py_DECREF(row_obj);
1280 if (!result) {
1281 Py_DECREF(row_iter);
1282 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001283 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001284 else
1285 Py_DECREF(result);
1286 }
1287 Py_DECREF(row_iter);
1288 if (PyErr_Occurred())
1289 return NULL;
1290 Py_INCREF(Py_None);
1291 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001292}
1293
1294static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001295 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1296 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1297 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001298};
1299
1300#define W_OFF(x) offsetof(WriterObj, x)
1301
1302static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001303 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1304 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001305};
1306
1307static void
1308Writer_dealloc(WriterObj *self)
1309{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001310 PyObject_GC_UnTrack(self);
1311 Py_XDECREF(self->dialect);
1312 Py_XDECREF(self->writeline);
1313 if (self->rec != NULL)
1314 PyMem_Free(self->rec);
1315 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001316}
1317
1318static int
1319Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1320{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001321 Py_VISIT(self->dialect);
1322 Py_VISIT(self->writeline);
1323 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001324}
1325
1326static int
1327Writer_clear(WriterObj *self)
1328{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001329 Py_CLEAR(self->dialect);
1330 Py_CLEAR(self->writeline);
1331 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001332}
1333
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001334PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001335"CSV writer\n"
1336"\n"
1337"Writer objects are responsible for generating tabular data\n"
1338"in CSV format from sequence input.\n"
1339);
1340
1341static PyTypeObject Writer_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001342 PyVarObject_HEAD_INIT(NULL, 0)
1343 "_csv.writer", /*tp_name*/
1344 sizeof(WriterObj), /*tp_basicsize*/
1345 0, /*tp_itemsize*/
1346 /* methods */
1347 (destructor)Writer_dealloc, /*tp_dealloc*/
1348 (printfunc)0, /*tp_print*/
1349 (getattrfunc)0, /*tp_getattr*/
1350 (setattrfunc)0, /*tp_setattr*/
1351 0, /*tp_reserved*/
1352 (reprfunc)0, /*tp_repr*/
1353 0, /*tp_as_number*/
1354 0, /*tp_as_sequence*/
1355 0, /*tp_as_mapping*/
1356 (hashfunc)0, /*tp_hash*/
1357 (ternaryfunc)0, /*tp_call*/
1358 (reprfunc)0, /*tp_str*/
1359 0, /*tp_getattro*/
1360 0, /*tp_setattro*/
1361 0, /*tp_as_buffer*/
1362 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1363 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1364 Writer_Type_doc,
1365 (traverseproc)Writer_traverse, /*tp_traverse*/
1366 (inquiry)Writer_clear, /*tp_clear*/
1367 0, /*tp_richcompare*/
1368 0, /*tp_weaklistoffset*/
1369 (getiterfunc)0, /*tp_iter*/
1370 (getiterfunc)0, /*tp_iternext*/
1371 Writer_methods, /*tp_methods*/
1372 Writer_memberlist, /*tp_members*/
1373 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001374};
1375
1376static PyObject *
1377csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1378{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001379 PyObject * output_file, * dialect = NULL;
1380 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001381 _Py_IDENTIFIER(write);
Skip Montanarob4a04172003-03-20 23:29:12 +00001382
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001383 if (!self)
1384 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001385
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001386 self->dialect = NULL;
1387 self->writeline = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001388
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001389 self->rec = NULL;
1390 self->rec_size = 0;
1391 self->rec_len = 0;
1392 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001393
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001394 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1395 Py_DECREF(self);
1396 return NULL;
1397 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001398 self->writeline = _PyObject_GetAttrId(output_file, &PyId_write);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001399 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1400 PyErr_SetString(PyExc_TypeError,
1401 "argument 1 must have a \"write\" method");
1402 Py_DECREF(self);
1403 return NULL;
1404 }
1405 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1406 if (self->dialect == NULL) {
1407 Py_DECREF(self);
1408 return NULL;
1409 }
1410 PyObject_GC_Track(self);
1411 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001412}
1413
1414/*
1415 * DIALECT REGISTRY
1416 */
1417static PyObject *
1418csv_list_dialects(PyObject *module, PyObject *args)
1419{
Antoine Pitroue7672d32012-05-16 11:33:08 +02001420 return PyDict_Keys(_csvstate_global->dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001421}
1422
1423static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001424csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001425{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001426 PyObject *name_obj, *dialect_obj = NULL;
1427 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001428
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001429 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1430 return NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +01001431 if (!PyUnicode_Check(name_obj)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001432 PyErr_SetString(PyExc_TypeError,
Stefan Krahe6996ed2012-11-02 14:44:20 +01001433 "dialect name must be a string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001434 return NULL;
1435 }
Stefan Krahe6996ed2012-11-02 14:44:20 +01001436 if (PyUnicode_READY(name_obj) == -1)
1437 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001438 dialect = _call_dialect(dialect_obj, kwargs);
1439 if (dialect == NULL)
1440 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001441 if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001442 Py_DECREF(dialect);
1443 return NULL;
1444 }
1445 Py_DECREF(dialect);
1446 Py_INCREF(Py_None);
1447 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001448}
1449
1450static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001451csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001452{
Antoine Pitroue7672d32012-05-16 11:33:08 +02001453 if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0)
1454 return PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001455 Py_INCREF(Py_None);
1456 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001457}
1458
1459static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001460csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001461{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001462 return get_dialect_from_registry(name_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001463}
1464
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001465static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001466csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001467{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001468 PyObject *new_limit = NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001469 long old_limit = _csvstate_global->field_limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001470
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001471 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1472 return NULL;
1473 if (new_limit != NULL) {
1474 if (!PyLong_CheckExact(new_limit)) {
1475 PyErr_Format(PyExc_TypeError,
1476 "limit must be an integer");
1477 return NULL;
1478 }
Antoine Pitroue7672d32012-05-16 11:33:08 +02001479 _csvstate_global->field_limit = PyLong_AsLong(new_limit);
1480 if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
1481 _csvstate_global->field_limit = old_limit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001482 return NULL;
1483 }
1484 }
1485 return PyLong_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001486}
1487
Skip Montanarob4a04172003-03-20 23:29:12 +00001488/*
1489 * MODULE
1490 */
1491
1492PyDoc_STRVAR(csv_module_doc,
1493"CSV parsing and writing.\n"
1494"\n"
1495"This module provides classes that assist in the reading and writing\n"
1496"of Comma Separated Value (CSV) files, and implements the interface\n"
1497"described by PEP 305. Although many CSV files are simple to parse,\n"
1498"the format is not formally defined by a stable specification and\n"
1499"is subtle enough that parsing lines of a CSV file with something\n"
1500"like line.split(\",\") is bound to fail. The module supports three\n"
1501"basic APIs: reading, writing, and registration of dialects.\n"
1502"\n"
1503"\n"
1504"DIALECT REGISTRATION:\n"
1505"\n"
1506"Readers and writers support a dialect argument, which is a convenient\n"
1507"handle on a group of settings. When the dialect argument is a string,\n"
1508"it identifies one of the dialects previously registered with the module.\n"
1509"If it is a class or instance, the attributes of the argument are used as\n"
1510"the settings for the reader or writer:\n"
1511"\n"
1512" class excel:\n"
1513" delimiter = ','\n"
1514" quotechar = '\"'\n"
1515" escapechar = None\n"
1516" doublequote = True\n"
1517" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001518" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001519" quoting = QUOTE_MINIMAL\n"
1520"\n"
1521"SETTINGS:\n"
1522"\n"
1523" * quotechar - specifies a one-character string to use as the \n"
1524" quoting character. It defaults to '\"'.\n"
1525" * delimiter - specifies a one-character string to use as the \n"
1526" field separator. It defaults to ','.\n"
1527" * skipinitialspace - specifies how to interpret whitespace which\n"
1528" immediately follows a delimiter. It defaults to False, which\n"
1529" means that whitespace immediately following a delimiter is part\n"
1530" of the following field.\n"
1531" * lineterminator - specifies the character sequence which should \n"
1532" terminate rows.\n"
1533" * quoting - controls when quotes should be generated by the writer.\n"
1534" It can take on any of the following module constants:\n"
1535"\n"
1536" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1537" field contains either the quotechar or the delimiter\n"
1538" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1539" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001540" fields which do not parse as integers or floating point\n"
1541" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001542" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1543" * escapechar - specifies a one-character string used to escape \n"
1544" the delimiter when quoting is set to QUOTE_NONE.\n"
1545" * doublequote - controls the handling of quotes inside fields. When\n"
1546" True, two consecutive quotes are interpreted as one during read,\n"
1547" and when writing, each quote character embedded in the data is\n"
1548" written as two quotes\n");
1549
1550PyDoc_STRVAR(csv_reader_doc,
1551" csv_reader = reader(iterable [, dialect='excel']\n"
1552" [optional keyword args])\n"
1553" for row in csv_reader:\n"
1554" process(row)\n"
1555"\n"
1556"The \"iterable\" argument can be any object that returns a line\n"
1557"of input for each iteration, such as a file object or a list. The\n"
1558"optional \"dialect\" parameter is discussed below. The function\n"
1559"also accepts optional keyword arguments which override settings\n"
1560"provided by the dialect.\n"
1561"\n"
1562"The returned object is an iterator. Each iteration returns a row\n"
Berker Peksage2382c52015-10-02 19:25:32 +03001563"of the CSV file (which can span multiple input lines).\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001564
1565PyDoc_STRVAR(csv_writer_doc,
1566" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1567" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001568" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001569" csv_writer.writerow(row)\n"
1570"\n"
1571" [or]\n"
1572"\n"
1573" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1574" [optional keyword args])\n"
1575" csv_writer.writerows(rows)\n"
1576"\n"
1577"The \"fileobj\" argument can be any object that supports the file API.\n");
1578
1579PyDoc_STRVAR(csv_list_dialects_doc,
1580"Return a list of all know dialect names.\n"
1581" names = csv.list_dialects()");
1582
1583PyDoc_STRVAR(csv_get_dialect_doc,
1584"Return the dialect instance associated with name.\n"
1585" dialect = csv.get_dialect(name)");
1586
1587PyDoc_STRVAR(csv_register_dialect_doc,
1588"Create a mapping from a string name to a dialect class.\n"
Berker Peksag12b50ce2015-06-05 15:17:51 +03001589" dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
Skip Montanarob4a04172003-03-20 23:29:12 +00001590
1591PyDoc_STRVAR(csv_unregister_dialect_doc,
1592"Delete the name/dialect mapping associated with a string name.\n"
1593" csv.unregister_dialect(name)");
1594
Andrew McNamara31d88962005-01-12 03:45:10 +00001595PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001596"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001597" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001598"\n"
1599"Returns old limit. If limit is not given, no new limit is set and\n"
1600"the old limit is returned");
1601
Skip Montanarob4a04172003-03-20 23:29:12 +00001602static struct PyMethodDef csv_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001603 { "reader", (PyCFunction)csv_reader,
1604 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1605 { "writer", (PyCFunction)csv_writer,
1606 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1607 { "list_dialects", (PyCFunction)csv_list_dialects,
1608 METH_NOARGS, csv_list_dialects_doc},
1609 { "register_dialect", (PyCFunction)csv_register_dialect,
1610 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1611 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1612 METH_O, csv_unregister_dialect_doc},
1613 { "get_dialect", (PyCFunction)csv_get_dialect,
1614 METH_O, csv_get_dialect_doc},
1615 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1616 METH_VARARGS, csv_field_size_limit_doc},
1617 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001618};
1619
Martin v. Löwis1a214512008-06-11 05:26:20 +00001620static struct PyModuleDef _csvmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001621 PyModuleDef_HEAD_INIT,
1622 "_csv",
1623 csv_module_doc,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001624 sizeof(_csvstate),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001625 csv_methods,
1626 NULL,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001627 _csv_traverse,
1628 _csv_clear,
1629 _csv_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00001630};
1631
Skip Montanarob4a04172003-03-20 23:29:12 +00001632PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001633PyInit__csv(void)
Skip Montanarob4a04172003-03-20 23:29:12 +00001634{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001635 PyObject *module;
Serhiy Storchaka2d06e842015-12-25 19:53:18 +02001636 const StyleDesc *style;
Skip Montanarob4a04172003-03-20 23:29:12 +00001637
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001638 if (PyType_Ready(&Dialect_Type) < 0)
1639 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001640
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001641 if (PyType_Ready(&Reader_Type) < 0)
1642 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001643
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001644 if (PyType_Ready(&Writer_Type) < 0)
1645 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001646
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001647 /* Create the module and add the functions */
1648 module = PyModule_Create(&_csvmodule);
1649 if (module == NULL)
1650 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001651
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001652 /* Add version to the module. */
1653 if (PyModule_AddStringConstant(module, "__version__",
1654 MODULE_VERSION) == -1)
1655 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001656
Antoine Pitroue7672d32012-05-16 11:33:08 +02001657 /* Set the field limit */
1658 _csvstate(module)->field_limit = 128 * 1024;
1659 /* Do I still need to add this var to the Module Dict? */
1660
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001661 /* Add _dialects dictionary */
Antoine Pitroue7672d32012-05-16 11:33:08 +02001662 _csvstate(module)->dialects = PyDict_New();
1663 if (_csvstate(module)->dialects == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001664 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001665 Py_INCREF(_csvstate(module)->dialects);
1666 if (PyModule_AddObject(module, "_dialects", _csvstate(module)->dialects))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001667 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001668
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001669 /* Add quote styles into dictionary */
1670 for (style = quote_styles; style->name; style++) {
1671 if (PyModule_AddIntConstant(module, style->name,
1672 style->style) == -1)
1673 return NULL;
1674 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001675
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001676 /* Add the Dialect type */
1677 Py_INCREF(&Dialect_Type);
1678 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1679 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001680
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001681 /* Add the CSV exception object to the module. */
Antoine Pitroue7672d32012-05-16 11:33:08 +02001682 _csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1683 if (_csvstate(module)->error_obj == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001684 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001685 Py_INCREF(_csvstate(module)->error_obj);
1686 PyModule_AddObject(module, "Error", _csvstate(module)->error_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001687 return module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001688}