blob: b428279dd3ec9c8375d2f5b522919f41abe59190 [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
Skip Montanarob4a04172003-03-20 23:29:12 +00009*/
10
Skip Montanaro7b01a832003-04-12 19:23:46 +000011#define MODULE_VERSION "1.0"
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013#include "Python.h"
14#include "structmember.h"
15
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000016
Antoine Pitroue7672d32012-05-16 11:33:08 +020017typedef struct {
18 PyObject *error_obj; /* CSV exception */
19 PyObject *dialects; /* Dialect registry */
20 long field_limit; /* max parsed field size */
21} _csvstate;
22
23#define _csvstate(o) ((_csvstate *)PyModule_GetState(o))
24
25static int
26_csv_clear(PyObject *m)
27{
28 Py_CLEAR(_csvstate(m)->error_obj);
29 Py_CLEAR(_csvstate(m)->dialects);
30 return 0;
31}
32
33static int
34_csv_traverse(PyObject *m, visitproc visit, void *arg)
35{
36 Py_VISIT(_csvstate(m)->error_obj);
37 Py_VISIT(_csvstate(m)->dialects);
38 return 0;
39}
40
41static void
42_csv_free(void *m)
43{
44 _csv_clear((PyObject *)m);
45}
46
47static struct PyModuleDef _csvmodule;
48
49#define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
Skip Montanarob4a04172003-03-20 23:29:12 +000050
51typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000052 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
53 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
R David Murrayc7c42ef2013-03-19 22:41:47 -040054 EAT_CRNL,AFTER_ESCAPED_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000055} ParserState;
56
57typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000058 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000059} QuoteStyle;
60
61typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000062 QuoteStyle style;
63 char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000064} StyleDesc;
65
66static StyleDesc quote_styles[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
68 { QUOTE_ALL, "QUOTE_ALL" },
69 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
70 { QUOTE_NONE, "QUOTE_NONE" },
71 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000072};
73
74typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000075 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 int doublequote; /* is " represented by ""? */
Antoine Pitrou77ea6402011-10-07 04:26:55 +020078 Py_UCS4 delimiter; /* field separator */
79 Py_UCS4 quotechar; /* quote character */
80 Py_UCS4 escapechar; /* escape character */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000081 int skipinitialspace; /* ignore spaces following delimiter? */
82 PyObject *lineterminator; /* string to write between records */
83 int quoting; /* style of quoting to write */
Skip Montanarob4a04172003-03-20 23:29:12 +000084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 int strict; /* raise exception on bad CSV */
Skip Montanarob4a04172003-03-20 23:29:12 +000086} DialectObj;
87
Neal Norwitz227b5332006-03-22 09:28:35 +000088static PyTypeObject Dialect_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000089
90typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +000092
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000093 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +000094
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000095 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +000096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000097 PyObject *fields; /* field list for current record */
98 ParserState state; /* current CSV parse state */
Antoine Pitrou77ea6402011-10-07 04:26:55 +020099 Py_UCS4 *field; /* temporary buffer */
Antoine Pitrou40455752010-08-15 18:51:10 +0000100 Py_ssize_t field_size; /* size of allocated buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000101 Py_ssize_t field_len; /* length of current field */
102 int numeric_field; /* treat field as numeric */
103 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000104} ReaderObj;
105
Neal Norwitz227b5332006-03-22 09:28:35 +0000106static PyTypeObject Reader_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000107
Christian Heimes90aa7642007-12-19 02:45:37 +0000108#define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +0000109
110typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000111 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000112
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000113 PyObject *writeline; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +0000114
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000115 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000116
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200117 Py_UCS4 *rec; /* buffer for parser.join */
Antoine Pitrou40455752010-08-15 18:51:10 +0000118 Py_ssize_t rec_size; /* size of allocated record */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000119 Py_ssize_t rec_len; /* length of record */
120 int num_fields; /* number of fields in record */
Guido van Rossum46264582007-08-06 19:32:18 +0000121} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000122
Neal Norwitz227b5332006-03-22 09:28:35 +0000123static PyTypeObject Writer_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000124
125/*
126 * DIALECT class
127 */
128
129static PyObject *
130get_dialect_from_registry(PyObject * name_obj)
131{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000133
Antoine Pitroue7672d32012-05-16 11:33:08 +0200134 dialect_obj = PyDict_GetItem(_csvstate_global->dialects, name_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000135 if (dialect_obj == NULL) {
136 if (!PyErr_Occurred())
Antoine Pitroue7672d32012-05-16 11:33:08 +0200137 PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000138 }
139 else
140 Py_INCREF(dialect_obj);
141 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000142}
143
Skip Montanarob4a04172003-03-20 23:29:12 +0000144static PyObject *
145get_string(PyObject *str)
146{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000147 Py_XINCREF(str);
148 return str;
Skip Montanarob4a04172003-03-20 23:29:12 +0000149}
150
Skip Montanarob4a04172003-03-20 23:29:12 +0000151static PyObject *
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200152get_nullchar_as_None(Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000153{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 if (c == '\0') {
155 Py_INCREF(Py_None);
156 return Py_None;
157 }
158 else
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200159 return PyUnicode_FromOrdinal(c);
Skip Montanarob4a04172003-03-20 23:29:12 +0000160}
161
Skip Montanarob4a04172003-03-20 23:29:12 +0000162static PyObject *
163Dialect_get_lineterminator(DialectObj *self)
164{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000165 return get_string(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000166}
167
Skip Montanarob4a04172003-03-20 23:29:12 +0000168static PyObject *
Guido van Rossuma9769c22007-08-07 23:59:30 +0000169Dialect_get_delimiter(DialectObj *self)
170{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000171 return get_nullchar_as_None(self->delimiter);
Guido van Rossuma9769c22007-08-07 23:59:30 +0000172}
173
174static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000175Dialect_get_escapechar(DialectObj *self)
176{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000178}
179
Andrew McNamara1196cf12005-01-07 04:42:45 +0000180static PyObject *
181Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000182{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000183 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000184}
185
186static PyObject *
187Dialect_get_quoting(DialectObj *self)
188{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000189 return PyLong_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000190}
191
192static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000193_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000194{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000195 if (src == NULL)
196 *target = dflt;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200197 else {
198 int b = PyObject_IsTrue(src);
199 if (b < 0)
200 return -1;
201 *target = b;
202 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000203 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000204}
205
Andrew McNamara1196cf12005-01-07 04:42:45 +0000206static int
207_set_int(const char *name, int *target, PyObject *src, int dflt)
208{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000209 if (src == NULL)
210 *target = dflt;
211 else {
212 long value;
213 if (!PyLong_CheckExact(src)) {
214 PyErr_Format(PyExc_TypeError,
215 "\"%s\" must be an integer", name);
216 return -1;
217 }
218 value = PyLong_AsLong(src);
219 if (value == -1 && PyErr_Occurred())
220 return -1;
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +0000221#if SIZEOF_LONG > SIZEOF_INT
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000222 if (value > INT_MAX || value < INT_MIN) {
223 PyErr_Format(PyExc_ValueError,
224 "integer out of range for \"%s\"", name);
225 return -1;
226 }
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +0000227#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 *target = (int)value;
229 }
230 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000231}
232
233static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200234_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000235{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000236 if (src == NULL)
237 *target = dflt;
238 else {
239 *target = '\0';
240 if (src != Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000241 Py_ssize_t len;
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200242 if (!PyUnicode_Check(src)) {
243 PyErr_Format(PyExc_TypeError,
244 "\"%s\" must be string, not %.200s", name,
245 src->ob_type->tp_name);
246 return -1;
247 }
Victor Stinner9e30aa52011-11-21 02:49:52 +0100248 len = PyUnicode_GetLength(src);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200249 if (len > 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000250 PyErr_Format(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300251 "\"%s\" must be a 1-character string",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000252 name);
253 return -1;
254 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100255 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000256 if (len > 0)
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200257 *target = PyUnicode_READ_CHAR(src, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000258 }
259 }
260 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000261}
262
263static int
264_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
265{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000266 if (src == NULL)
267 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
268 else {
269 if (src == Py_None)
270 *target = NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +0100271 else if (!PyUnicode_Check(src)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 PyErr_Format(PyExc_TypeError,
273 "\"%s\" must be a string", name);
274 return -1;
275 }
276 else {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100277 if (PyUnicode_READY(src) == -1)
278 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000279 Py_INCREF(src);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300280 Py_XSETREF(*target, src);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000281 }
282 }
283 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000284}
285
286static int
287dialect_check_quoting(int quoting)
288{
Victor Stinner4fe519b2010-11-09 09:40:16 +0000289 StyleDesc *qs;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000290
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000291 for (qs = quote_styles; qs->name; qs++) {
Victor Stinner706768c2014-08-16 01:03:39 +0200292 if ((int)qs->style == quoting)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000293 return 0;
294 }
295 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
296 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000297}
Skip Montanarob4a04172003-03-20 23:29:12 +0000298
299#define D_OFF(x) offsetof(DialectObj, x)
300
301static struct PyMemberDef Dialect_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000302 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
303 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
304 { "strict", T_INT, D_OFF(strict), READONLY },
305 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000306};
307
308static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000309 { "delimiter", (getter)Dialect_get_delimiter},
310 { "escapechar", (getter)Dialect_get_escapechar},
311 { "lineterminator", (getter)Dialect_get_lineterminator},
312 { "quotechar", (getter)Dialect_get_quotechar},
313 { "quoting", (getter)Dialect_get_quoting},
314 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000315};
316
317static void
318Dialect_dealloc(DialectObj *self)
319{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000320 Py_XDECREF(self->lineterminator);
321 Py_TYPE(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000322}
323
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000324static char *dialect_kws[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000325 "dialect",
326 "delimiter",
327 "doublequote",
328 "escapechar",
329 "lineterminator",
330 "quotechar",
331 "quoting",
332 "skipinitialspace",
333 "strict",
334 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000335};
336
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000337static PyObject *
338dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000339{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000340 DialectObj *self;
341 PyObject *ret = NULL;
342 PyObject *dialect = NULL;
343 PyObject *delimiter = NULL;
344 PyObject *doublequote = NULL;
345 PyObject *escapechar = NULL;
346 PyObject *lineterminator = NULL;
347 PyObject *quotechar = NULL;
348 PyObject *quoting = NULL;
349 PyObject *skipinitialspace = NULL;
350 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000351
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000352 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
353 "|OOOOOOOOO", dialect_kws,
354 &dialect,
355 &delimiter,
356 &doublequote,
357 &escapechar,
358 &lineterminator,
359 &quotechar,
360 &quoting,
361 &skipinitialspace,
362 &strict))
363 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000364
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000365 if (dialect != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100366 if (PyUnicode_Check(dialect)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 dialect = get_dialect_from_registry(dialect);
368 if (dialect == NULL)
369 return NULL;
370 }
371 else
372 Py_INCREF(dialect);
373 /* Can we reuse this instance? */
374 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
375 delimiter == 0 &&
376 doublequote == 0 &&
377 escapechar == 0 &&
378 lineterminator == 0 &&
379 quotechar == 0 &&
380 quoting == 0 &&
381 skipinitialspace == 0 &&
382 strict == 0)
383 return dialect;
384 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000385
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000386 self = (DialectObj *)type->tp_alloc(type, 0);
387 if (self == NULL) {
388 Py_XDECREF(dialect);
389 return NULL;
390 }
391 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000392
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000393 Py_XINCREF(delimiter);
394 Py_XINCREF(doublequote);
395 Py_XINCREF(escapechar);
396 Py_XINCREF(lineterminator);
397 Py_XINCREF(quotechar);
398 Py_XINCREF(quoting);
399 Py_XINCREF(skipinitialspace);
400 Py_XINCREF(strict);
401 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000402#define DIALECT_GETATTR(v, n) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000403 if (v == NULL) \
404 v = PyObject_GetAttrString(dialect, n)
405 DIALECT_GETATTR(delimiter, "delimiter");
406 DIALECT_GETATTR(doublequote, "doublequote");
407 DIALECT_GETATTR(escapechar, "escapechar");
408 DIALECT_GETATTR(lineterminator, "lineterminator");
409 DIALECT_GETATTR(quotechar, "quotechar");
410 DIALECT_GETATTR(quoting, "quoting");
411 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
412 DIALECT_GETATTR(strict, "strict");
413 PyErr_Clear();
414 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000415
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000416 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000417#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000418 if (meth(name, target, src, dflt)) \
419 goto err
420 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
421 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
422 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
423 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
424 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
425 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
426 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
427 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000428
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000429 /* validate options */
430 if (dialect_check_quoting(self->quoting))
431 goto err;
432 if (self->delimiter == 0) {
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200433 PyErr_SetString(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300434 "\"delimiter\" must be a 1-character string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000435 goto err;
436 }
437 if (quotechar == Py_None && quoting == NULL)
438 self->quoting = QUOTE_NONE;
439 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
440 PyErr_SetString(PyExc_TypeError,
441 "quotechar must be set if quoting enabled");
442 goto err;
443 }
444 if (self->lineterminator == 0) {
445 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
446 goto err;
447 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000448
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000449 ret = (PyObject *)self;
450 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000451err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000452 Py_XDECREF(self);
453 Py_XDECREF(dialect);
454 Py_XDECREF(delimiter);
455 Py_XDECREF(doublequote);
456 Py_XDECREF(escapechar);
457 Py_XDECREF(lineterminator);
458 Py_XDECREF(quotechar);
459 Py_XDECREF(quoting);
460 Py_XDECREF(skipinitialspace);
461 Py_XDECREF(strict);
462 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000463}
464
465
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000466PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000467"CSV dialect\n"
468"\n"
469"The Dialect type records CSV parsing and generation options.\n");
470
471static PyTypeObject Dialect_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000472 PyVarObject_HEAD_INIT(NULL, 0)
473 "_csv.Dialect", /* tp_name */
474 sizeof(DialectObj), /* tp_basicsize */
475 0, /* tp_itemsize */
476 /* methods */
477 (destructor)Dialect_dealloc, /* tp_dealloc */
478 (printfunc)0, /* tp_print */
479 (getattrfunc)0, /* tp_getattr */
480 (setattrfunc)0, /* tp_setattr */
481 0, /* tp_reserved */
482 (reprfunc)0, /* tp_repr */
483 0, /* tp_as_number */
484 0, /* tp_as_sequence */
485 0, /* tp_as_mapping */
486 (hashfunc)0, /* tp_hash */
487 (ternaryfunc)0, /* tp_call */
488 (reprfunc)0, /* tp_str */
489 0, /* tp_getattro */
490 0, /* tp_setattro */
491 0, /* tp_as_buffer */
492 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
493 Dialect_Type_doc, /* tp_doc */
494 0, /* tp_traverse */
495 0, /* tp_clear */
496 0, /* tp_richcompare */
497 0, /* tp_weaklistoffset */
498 0, /* tp_iter */
499 0, /* tp_iternext */
500 0, /* tp_methods */
501 Dialect_memberlist, /* tp_members */
502 Dialect_getsetlist, /* tp_getset */
503 0, /* tp_base */
504 0, /* tp_dict */
505 0, /* tp_descr_get */
506 0, /* tp_descr_set */
507 0, /* tp_dictoffset */
508 0, /* tp_init */
509 0, /* tp_alloc */
510 dialect_new, /* tp_new */
511 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000512};
513
Andrew McNamara91b97462005-01-11 01:07:23 +0000514/*
515 * Return an instance of the dialect type, given a Python instance or kwarg
516 * description of the dialect
517 */
518static PyObject *
519_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
520{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000521 PyObject *ctor_args;
522 PyObject *dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000523
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000524 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
525 if (ctor_args == NULL)
526 return NULL;
527 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
528 Py_DECREF(ctor_args);
529 return dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000530}
531
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000532/*
533 * READER
534 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000535static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000536parse_save_field(ReaderObj *self)
537{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000538 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000539
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200540 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
541 (void *) self->field, self->field_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000542 if (field == NULL)
543 return -1;
544 self->field_len = 0;
545 if (self->numeric_field) {
546 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000547
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000548 self->numeric_field = 0;
549 tmp = PyNumber_Float(field);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000550 Py_DECREF(field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200551 if (tmp == NULL)
552 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000553 field = tmp;
554 }
Victor Stinnerb80b3782013-11-14 21:29:34 +0100555 if (PyList_Append(self->fields, field) < 0) {
556 Py_DECREF(field);
557 return -1;
558 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000559 Py_DECREF(field);
560 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000561}
562
563static int
564parse_grow_buff(ReaderObj *self)
565{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000566 if (self->field_size == 0) {
567 self->field_size = 4096;
568 if (self->field != NULL)
569 PyMem_Free(self->field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200570 self->field = PyMem_New(Py_UCS4, self->field_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000571 }
572 else {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200573 Py_UCS4 *field = self->field;
Antoine Pitrou40455752010-08-15 18:51:10 +0000574 if (self->field_size > PY_SSIZE_T_MAX / 2) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000575 PyErr_NoMemory();
576 return 0;
577 }
578 self->field_size *= 2;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200579 self->field = PyMem_Resize(field, Py_UCS4, self->field_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000580 }
581 if (self->field == NULL) {
582 PyErr_NoMemory();
583 return 0;
584 }
585 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000586}
587
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000588static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200589parse_add_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000590{
Antoine Pitroue7672d32012-05-16 11:33:08 +0200591 if (self->field_len >= _csvstate_global->field_limit) {
592 PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
593 _csvstate_global->field_limit);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000594 return -1;
595 }
596 if (self->field_len == self->field_size && !parse_grow_buff(self))
597 return -1;
598 self->field[self->field_len++] = c;
599 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000600}
601
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000602static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200603parse_process_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000604{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000605 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000606
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000607 switch (self->state) {
608 case START_RECORD:
609 /* start of record */
610 if (c == '\0')
611 /* empty line - return [] */
612 break;
613 else if (c == '\n' || c == '\r') {
614 self->state = EAT_CRNL;
615 break;
616 }
617 /* normal character - handle as START_FIELD */
618 self->state = START_FIELD;
619 /* fallthru */
620 case START_FIELD:
621 /* expecting field */
622 if (c == '\n' || c == '\r' || c == '\0') {
623 /* save empty field - return [fields] */
624 if (parse_save_field(self) < 0)
625 return -1;
626 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
627 }
628 else if (c == dialect->quotechar &&
629 dialect->quoting != QUOTE_NONE) {
630 /* start quoted field */
631 self->state = IN_QUOTED_FIELD;
632 }
633 else if (c == dialect->escapechar) {
634 /* possible escaped character */
635 self->state = ESCAPED_CHAR;
636 }
637 else if (c == ' ' && dialect->skipinitialspace)
638 /* ignore space at start of field */
639 ;
640 else if (c == dialect->delimiter) {
641 /* save empty field */
642 if (parse_save_field(self) < 0)
643 return -1;
644 }
645 else {
646 /* begin new unquoted field */
647 if (dialect->quoting == QUOTE_NONNUMERIC)
648 self->numeric_field = 1;
649 if (parse_add_char(self, c) < 0)
650 return -1;
651 self->state = IN_FIELD;
652 }
653 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000654
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000655 case ESCAPED_CHAR:
R David Murray9a7d3762013-03-20 00:15:20 -0400656 if (c == '\n' || c=='\r') {
R David Murrayc7c42ef2013-03-19 22:41:47 -0400657 if (parse_add_char(self, c) < 0)
658 return -1;
659 self->state = AFTER_ESCAPED_CRNL;
660 break;
661 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000662 if (c == '\0')
663 c = '\n';
664 if (parse_add_char(self, c) < 0)
665 return -1;
666 self->state = IN_FIELD;
667 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000668
R David Murrayc7c42ef2013-03-19 22:41:47 -0400669 case AFTER_ESCAPED_CRNL:
670 if (c == '\0')
671 break;
672 /*fallthru*/
673
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000674 case IN_FIELD:
675 /* in unquoted field */
676 if (c == '\n' || c == '\r' || c == '\0') {
677 /* end of line - return [fields] */
678 if (parse_save_field(self) < 0)
679 return -1;
680 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
681 }
682 else if (c == dialect->escapechar) {
683 /* possible escaped character */
684 self->state = ESCAPED_CHAR;
685 }
686 else if (c == dialect->delimiter) {
687 /* save field - wait for new field */
688 if (parse_save_field(self) < 0)
689 return -1;
690 self->state = START_FIELD;
691 }
692 else {
693 /* normal character - save in field */
694 if (parse_add_char(self, c) < 0)
695 return -1;
696 }
697 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000698
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000699 case IN_QUOTED_FIELD:
700 /* in quoted field */
701 if (c == '\0')
702 ;
703 else if (c == dialect->escapechar) {
704 /* Possible escape character */
705 self->state = ESCAPE_IN_QUOTED_FIELD;
706 }
707 else if (c == dialect->quotechar &&
708 dialect->quoting != QUOTE_NONE) {
709 if (dialect->doublequote) {
710 /* doublequote; " represented by "" */
711 self->state = QUOTE_IN_QUOTED_FIELD;
712 }
713 else {
714 /* end of quote part of field */
715 self->state = IN_FIELD;
716 }
717 }
718 else {
719 /* normal character - save in field */
720 if (parse_add_char(self, c) < 0)
721 return -1;
722 }
723 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000724
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000725 case ESCAPE_IN_QUOTED_FIELD:
726 if (c == '\0')
727 c = '\n';
728 if (parse_add_char(self, c) < 0)
729 return -1;
730 self->state = IN_QUOTED_FIELD;
731 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000732
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000733 case QUOTE_IN_QUOTED_FIELD:
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300734 /* doublequote - seen a quote in a quoted field */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000735 if (dialect->quoting != QUOTE_NONE &&
736 c == dialect->quotechar) {
737 /* save "" as " */
738 if (parse_add_char(self, c) < 0)
739 return -1;
740 self->state = IN_QUOTED_FIELD;
741 }
742 else if (c == dialect->delimiter) {
743 /* save field - wait for new field */
744 if (parse_save_field(self) < 0)
745 return -1;
746 self->state = START_FIELD;
747 }
748 else if (c == '\n' || c == '\r' || c == '\0') {
749 /* end of line - return [fields] */
750 if (parse_save_field(self) < 0)
751 return -1;
752 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
753 }
754 else if (!dialect->strict) {
755 if (parse_add_char(self, c) < 0)
756 return -1;
757 self->state = IN_FIELD;
758 }
759 else {
760 /* illegal */
Antoine Pitroue7672d32012-05-16 11:33:08 +0200761 PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000762 dialect->delimiter,
763 dialect->quotechar);
764 return -1;
765 }
766 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000767
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000768 case EAT_CRNL:
769 if (c == '\n' || c == '\r')
770 ;
771 else if (c == '\0')
772 self->state = START_RECORD;
773 else {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200774 PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000775 return -1;
776 }
777 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000778
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000779 }
780 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000781}
782
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000783static int
784parse_reset(ReaderObj *self)
785{
Serhiy Storchaka48842712016-04-06 09:45:48 +0300786 Py_XSETREF(self->fields, PyList_New(0));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000787 if (self->fields == NULL)
788 return -1;
789 self->field_len = 0;
790 self->state = START_RECORD;
791 self->numeric_field = 0;
792 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000793}
Skip Montanarob4a04172003-03-20 23:29:12 +0000794
795static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000796Reader_iternext(ReaderObj *self)
797{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000798 PyObject *fields = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200799 Py_UCS4 c;
800 Py_ssize_t pos, linelen;
801 unsigned int kind;
802 void *data;
803 PyObject *lineobj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000804
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000805 if (parse_reset(self) < 0)
806 return NULL;
807 do {
808 lineobj = PyIter_Next(self->input_iter);
809 if (lineobj == NULL) {
810 /* End of input OR exception */
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700811 if (!PyErr_Occurred() && (self->field_len != 0 ||
812 self->state == IN_QUOTED_FIELD)) {
813 if (self->dialect->strict)
Senthil Kumaran49d13022012-09-25 02:37:20 -0700814 PyErr_SetString(_csvstate_global->error_obj,
815 "unexpected end of data");
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700816 else if (parse_save_field(self) >= 0)
817 break;
818 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000819 return NULL;
820 }
821 if (!PyUnicode_Check(lineobj)) {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200822 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000823 "iterator should return strings, "
824 "not %.200s "
825 "(did you open the file in text mode?)",
826 lineobj->ob_type->tp_name
827 );
828 Py_DECREF(lineobj);
829 return NULL;
830 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100831 if (PyUnicode_READY(lineobj) == -1) {
832 Py_DECREF(lineobj);
833 return NULL;
834 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000835 ++self->line_num;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200836 kind = PyUnicode_KIND(lineobj);
837 data = PyUnicode_DATA(lineobj);
838 pos = 0;
839 linelen = PyUnicode_GET_LENGTH(lineobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000840 while (linelen--) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200841 c = PyUnicode_READ(kind, data, pos);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000842 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000843 Py_DECREF(lineobj);
Antoine Pitroue7672d32012-05-16 11:33:08 +0200844 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000845 "line contains NULL byte");
846 goto err;
847 }
848 if (parse_process_char(self, c) < 0) {
849 Py_DECREF(lineobj);
850 goto err;
851 }
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200852 pos++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000853 }
854 Py_DECREF(lineobj);
855 if (parse_process_char(self, 0) < 0)
856 goto err;
857 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000858
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000859 fields = self->fields;
860 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000861err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000862 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000863}
864
865static void
866Reader_dealloc(ReaderObj *self)
867{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000868 PyObject_GC_UnTrack(self);
869 Py_XDECREF(self->dialect);
870 Py_XDECREF(self->input_iter);
871 Py_XDECREF(self->fields);
872 if (self->field != NULL)
873 PyMem_Free(self->field);
874 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000875}
876
877static int
878Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
879{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000880 Py_VISIT(self->dialect);
881 Py_VISIT(self->input_iter);
882 Py_VISIT(self->fields);
883 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000884}
885
886static int
887Reader_clear(ReaderObj *self)
888{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000889 Py_CLEAR(self->dialect);
890 Py_CLEAR(self->input_iter);
891 Py_CLEAR(self->fields);
892 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000893}
894
895PyDoc_STRVAR(Reader_Type_doc,
896"CSV reader\n"
897"\n"
898"Reader objects are responsible for reading and parsing tabular data\n"
899"in CSV format.\n"
900);
901
902static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000903 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000904};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000905#define R_OFF(x) offsetof(ReaderObj, x)
906
907static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000908 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
909 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
910 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000911};
912
Skip Montanarob4a04172003-03-20 23:29:12 +0000913
914static PyTypeObject Reader_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000915 PyVarObject_HEAD_INIT(NULL, 0)
916 "_csv.reader", /*tp_name*/
917 sizeof(ReaderObj), /*tp_basicsize*/
918 0, /*tp_itemsize*/
919 /* methods */
920 (destructor)Reader_dealloc, /*tp_dealloc*/
921 (printfunc)0, /*tp_print*/
922 (getattrfunc)0, /*tp_getattr*/
923 (setattrfunc)0, /*tp_setattr*/
924 0, /*tp_reserved*/
925 (reprfunc)0, /*tp_repr*/
926 0, /*tp_as_number*/
927 0, /*tp_as_sequence*/
928 0, /*tp_as_mapping*/
929 (hashfunc)0, /*tp_hash*/
930 (ternaryfunc)0, /*tp_call*/
931 (reprfunc)0, /*tp_str*/
932 0, /*tp_getattro*/
933 0, /*tp_setattro*/
934 0, /*tp_as_buffer*/
935 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
936 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
937 Reader_Type_doc, /*tp_doc*/
938 (traverseproc)Reader_traverse, /*tp_traverse*/
939 (inquiry)Reader_clear, /*tp_clear*/
940 0, /*tp_richcompare*/
941 0, /*tp_weaklistoffset*/
942 PyObject_SelfIter, /*tp_iter*/
943 (getiterfunc)Reader_iternext, /*tp_iternext*/
944 Reader_methods, /*tp_methods*/
945 Reader_memberlist, /*tp_members*/
946 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000947
948};
949
950static PyObject *
951csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
952{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000953 PyObject * iterator, * dialect = NULL;
954 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000955
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000956 if (!self)
957 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000958
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000959 self->dialect = NULL;
960 self->fields = NULL;
961 self->input_iter = NULL;
962 self->field = NULL;
963 self->field_size = 0;
964 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000965
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000966 if (parse_reset(self) < 0) {
967 Py_DECREF(self);
968 return NULL;
969 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000970
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000971 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
972 Py_DECREF(self);
973 return NULL;
974 }
975 self->input_iter = PyObject_GetIter(iterator);
976 if (self->input_iter == NULL) {
977 PyErr_SetString(PyExc_TypeError,
978 "argument 1 must be an iterator");
979 Py_DECREF(self);
980 return NULL;
981 }
982 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
983 if (self->dialect == NULL) {
984 Py_DECREF(self);
985 return NULL;
986 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000987
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000988 PyObject_GC_Track(self);
989 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +0000990}
991
992/*
993 * WRITER
994 */
995/* ---------------------------------------------------------------- */
996static void
997join_reset(WriterObj *self)
998{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000999 self->rec_len = 0;
1000 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001001}
1002
1003#define MEM_INCR 32768
1004
1005/* Calculate new record length or append field to record. Return new
1006 * record length.
1007 */
Antoine Pitrou40455752010-08-15 18:51:10 +00001008static Py_ssize_t
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001009join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001010 Py_ssize_t field_len, int *quoted,
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001011 int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +00001012{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001013 DialectObj *dialect = self->dialect;
1014 int i;
Antoine Pitrou40455752010-08-15 18:51:10 +00001015 Py_ssize_t rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001016
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001017#define INCLEN \
1018 do {\
1019 if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \
1020 goto overflow; \
1021 } \
1022 rec_len++; \
1023 } while(0)
1024
1025#define ADDCH(c) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001026 do {\
1027 if (copy_phase) \
1028 self->rec[rec_len] = c;\
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001029 INCLEN;\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001030 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001031
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001032 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001033
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001034 /* If this is not the first field we need a field separator */
1035 if (self->num_fields > 0)
1036 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001037
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001038 /* Handle preceding quote */
1039 if (copy_phase && *quoted)
1040 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001041
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001042 /* Copy/count field data */
1043 /* If field is null just pass over */
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001044 for (i = 0; field_data && (i < field_len); i++) {
1045 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001046 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001047
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001048 if (c == dialect->delimiter ||
1049 c == dialect->escapechar ||
1050 c == dialect->quotechar ||
Martin v. Löwis5f4f4c52011-11-01 18:42:23 +01001051 PyUnicode_FindChar(
1052 dialect->lineterminator, c, 0,
1053 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001054 if (dialect->quoting == QUOTE_NONE)
1055 want_escape = 1;
1056 else {
1057 if (c == dialect->quotechar) {
1058 if (dialect->doublequote)
1059 ADDCH(dialect->quotechar);
1060 else
1061 want_escape = 1;
1062 }
1063 if (!want_escape)
1064 *quoted = 1;
1065 }
1066 if (want_escape) {
1067 if (!dialect->escapechar) {
Antoine Pitroue7672d32012-05-16 11:33:08 +02001068 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001069 "need to escape, but no escapechar set");
1070 return -1;
1071 }
1072 ADDCH(dialect->escapechar);
1073 }
1074 }
1075 /* Copy field character into record buffer.
1076 */
1077 ADDCH(c);
1078 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001079
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001080 if (*quoted) {
1081 if (copy_phase)
1082 ADDCH(dialect->quotechar);
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001083 else {
1084 INCLEN; /* starting quote */
1085 INCLEN; /* ending quote */
1086 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001087 }
1088 return rec_len;
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001089
1090 overflow:
1091 PyErr_NoMemory();
1092 return -1;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001093#undef ADDCH
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001094#undef INCLEN
Skip Montanarob4a04172003-03-20 23:29:12 +00001095}
1096
1097static int
Antoine Pitrou40455752010-08-15 18:51:10 +00001098join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
Skip Montanarob4a04172003-03-20 23:29:12 +00001099{
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001100
Antoine Pitrou40455752010-08-15 18:51:10 +00001101 if (rec_len < 0 || rec_len > PY_SSIZE_T_MAX - MEM_INCR) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001102 PyErr_NoMemory();
1103 return 0;
1104 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001105
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001106 if (rec_len > self->rec_size) {
1107 if (self->rec_size == 0) {
1108 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1109 if (self->rec != NULL)
1110 PyMem_Free(self->rec);
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001111 self->rec = PyMem_New(Py_UCS4, self->rec_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001112 }
1113 else {
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001114 Py_UCS4* old_rec = self->rec;
Skip Montanarob4a04172003-03-20 23:29:12 +00001115
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001116 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001117 self->rec = PyMem_Resize(old_rec, Py_UCS4, self->rec_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001118 if (self->rec == NULL)
1119 PyMem_Free(old_rec);
1120 }
1121 if (self->rec == NULL) {
1122 PyErr_NoMemory();
1123 return 0;
1124 }
1125 }
1126 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001127}
1128
1129static int
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001130join_append(WriterObj *self, PyObject *field, int quoted)
Skip Montanarob4a04172003-03-20 23:29:12 +00001131{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001132 unsigned int field_kind = -1;
1133 void *field_data = NULL;
1134 Py_ssize_t field_len = 0;
Antoine Pitrou40455752010-08-15 18:51:10 +00001135 Py_ssize_t rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001136
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001137 if (field != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +01001138 if (PyUnicode_READY(field) == -1)
1139 return 0;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001140 field_kind = PyUnicode_KIND(field);
1141 field_data = PyUnicode_DATA(field);
1142 field_len = PyUnicode_GET_LENGTH(field);
1143 }
1144 rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001145 &quoted, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001146 if (rec_len < 0)
1147 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001149 /* grow record buffer if necessary */
1150 if (!join_check_rec_size(self, rec_len))
1151 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001152
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001153 self->rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001154 &quoted, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001155 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001156
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001157 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001158}
1159
1160static int
1161join_append_lineterminator(WriterObj *self)
1162{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001163 Py_ssize_t terminator_len, i;
1164 unsigned int term_kind;
1165 void *term_data;
Skip Montanarob4a04172003-03-20 23:29:12 +00001166
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001167 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001168 if (terminator_len == -1)
1169 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001170
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001171 /* grow record buffer if necessary */
1172 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1173 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001174
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001175 term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1176 term_data = PyUnicode_DATA(self->dialect->lineterminator);
1177 for (i = 0; i < terminator_len; i++)
1178 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001179 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001180
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001181 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001182}
1183
1184PyDoc_STRVAR(csv_writerow_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001185"writerow(iterable)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001186"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001187"Construct and write a CSV record from an iterable of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001188"elements will be converted to string.");
1189
1190static PyObject *
1191csv_writerow(WriterObj *self, PyObject *seq)
1192{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001193 DialectObj *dialect = self->dialect;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001194 PyObject *iter, *field, *line, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001195
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001196 iter = PyObject_GetIter(seq);
1197 if (iter == NULL)
1198 return PyErr_Format(_csvstate_global->error_obj,
1199 "iterable expected, not %.200s",
1200 seq->ob_type->tp_name);
Skip Montanarob4a04172003-03-20 23:29:12 +00001201
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001202 /* Join all fields in internal buffer.
1203 */
1204 join_reset(self);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001205 while ((field = PyIter_Next(iter))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001206 int append_ok;
1207 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001208
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001209 switch (dialect->quoting) {
1210 case QUOTE_NONNUMERIC:
1211 quoted = !PyNumber_Check(field);
1212 break;
1213 case QUOTE_ALL:
1214 quoted = 1;
1215 break;
1216 default:
1217 quoted = 0;
1218 break;
1219 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001220
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001221 if (PyUnicode_Check(field)) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001222 append_ok = join_append(self, field, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001223 Py_DECREF(field);
1224 }
1225 else if (field == Py_None) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001226 append_ok = join_append(self, NULL, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001227 Py_DECREF(field);
1228 }
1229 else {
1230 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001231
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001232 str = PyObject_Str(field);
1233 Py_DECREF(field);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001234 if (str == NULL) {
1235 Py_DECREF(iter);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001236 return NULL;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001237 }
1238 append_ok = join_append(self, str, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001239 Py_DECREF(str);
1240 }
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001241 if (!append_ok) {
1242 Py_DECREF(iter);
1243 return NULL;
1244 }
1245 }
1246 Py_DECREF(iter);
1247 if (PyErr_Occurred())
1248 return NULL;
1249
1250 if (self->num_fields > 0 && self->rec_size == 0) {
1251 if (dialect->quoting == QUOTE_NONE) {
1252 PyErr_Format(_csvstate_global->error_obj,
1253 "single empty field record must be quoted");
1254 return NULL;
1255 }
1256 self->num_fields--;
1257 if (!join_append(self, NULL, 1))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001258 return NULL;
1259 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001260
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001261 /* Add line terminator.
1262 */
1263 if (!join_append_lineterminator(self))
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001264 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001265
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001266 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1267 (void *) self->rec, self->rec_len);
1268 if (line == NULL)
1269 return NULL;
1270 result = PyObject_CallFunctionObjArgs(self->writeline, line, NULL);
1271 Py_DECREF(line);
1272 return result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001273}
1274
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001275PyDoc_STRVAR(csv_writerows_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001276"writerows(iterable of iterables)\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001277"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001278"Construct and write a series of iterables to a csv file. Non-string\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001279"elements will be converted to string.");
1280
Skip Montanarob4a04172003-03-20 23:29:12 +00001281static PyObject *
1282csv_writerows(WriterObj *self, PyObject *seqseq)
1283{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001284 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001285
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001286 row_iter = PyObject_GetIter(seqseq);
1287 if (row_iter == NULL) {
1288 PyErr_SetString(PyExc_TypeError,
1289 "writerows() argument must be iterable");
1290 return NULL;
1291 }
1292 while ((row_obj = PyIter_Next(row_iter))) {
1293 result = csv_writerow(self, row_obj);
1294 Py_DECREF(row_obj);
1295 if (!result) {
1296 Py_DECREF(row_iter);
1297 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001298 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001299 else
1300 Py_DECREF(result);
1301 }
1302 Py_DECREF(row_iter);
1303 if (PyErr_Occurred())
1304 return NULL;
1305 Py_INCREF(Py_None);
1306 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001307}
1308
1309static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001310 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1311 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1312 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001313};
1314
1315#define W_OFF(x) offsetof(WriterObj, x)
1316
1317static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001318 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1319 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001320};
1321
1322static void
1323Writer_dealloc(WriterObj *self)
1324{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001325 PyObject_GC_UnTrack(self);
1326 Py_XDECREF(self->dialect);
1327 Py_XDECREF(self->writeline);
1328 if (self->rec != NULL)
1329 PyMem_Free(self->rec);
1330 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001331}
1332
1333static int
1334Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1335{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001336 Py_VISIT(self->dialect);
1337 Py_VISIT(self->writeline);
1338 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001339}
1340
1341static int
1342Writer_clear(WriterObj *self)
1343{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001344 Py_CLEAR(self->dialect);
1345 Py_CLEAR(self->writeline);
1346 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001347}
1348
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001349PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001350"CSV writer\n"
1351"\n"
1352"Writer objects are responsible for generating tabular data\n"
1353"in CSV format from sequence input.\n"
1354);
1355
1356static PyTypeObject Writer_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001357 PyVarObject_HEAD_INIT(NULL, 0)
1358 "_csv.writer", /*tp_name*/
1359 sizeof(WriterObj), /*tp_basicsize*/
1360 0, /*tp_itemsize*/
1361 /* methods */
1362 (destructor)Writer_dealloc, /*tp_dealloc*/
1363 (printfunc)0, /*tp_print*/
1364 (getattrfunc)0, /*tp_getattr*/
1365 (setattrfunc)0, /*tp_setattr*/
1366 0, /*tp_reserved*/
1367 (reprfunc)0, /*tp_repr*/
1368 0, /*tp_as_number*/
1369 0, /*tp_as_sequence*/
1370 0, /*tp_as_mapping*/
1371 (hashfunc)0, /*tp_hash*/
1372 (ternaryfunc)0, /*tp_call*/
1373 (reprfunc)0, /*tp_str*/
1374 0, /*tp_getattro*/
1375 0, /*tp_setattro*/
1376 0, /*tp_as_buffer*/
1377 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1378 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1379 Writer_Type_doc,
1380 (traverseproc)Writer_traverse, /*tp_traverse*/
1381 (inquiry)Writer_clear, /*tp_clear*/
1382 0, /*tp_richcompare*/
1383 0, /*tp_weaklistoffset*/
1384 (getiterfunc)0, /*tp_iter*/
1385 (getiterfunc)0, /*tp_iternext*/
1386 Writer_methods, /*tp_methods*/
1387 Writer_memberlist, /*tp_members*/
1388 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001389};
1390
1391static PyObject *
1392csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1393{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001394 PyObject * output_file, * dialect = NULL;
1395 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001396 _Py_IDENTIFIER(write);
Skip Montanarob4a04172003-03-20 23:29:12 +00001397
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001398 if (!self)
1399 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001400
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001401 self->dialect = NULL;
1402 self->writeline = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001403
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001404 self->rec = NULL;
1405 self->rec_size = 0;
1406 self->rec_len = 0;
1407 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001408
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001409 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1410 Py_DECREF(self);
1411 return NULL;
1412 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001413 self->writeline = _PyObject_GetAttrId(output_file, &PyId_write);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001414 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1415 PyErr_SetString(PyExc_TypeError,
1416 "argument 1 must have a \"write\" method");
1417 Py_DECREF(self);
1418 return NULL;
1419 }
1420 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1421 if (self->dialect == NULL) {
1422 Py_DECREF(self);
1423 return NULL;
1424 }
1425 PyObject_GC_Track(self);
1426 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001427}
1428
1429/*
1430 * DIALECT REGISTRY
1431 */
1432static PyObject *
1433csv_list_dialects(PyObject *module, PyObject *args)
1434{
Antoine Pitroue7672d32012-05-16 11:33:08 +02001435 return PyDict_Keys(_csvstate_global->dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001436}
1437
1438static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001439csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001440{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001441 PyObject *name_obj, *dialect_obj = NULL;
1442 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001443
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001444 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1445 return NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +01001446 if (!PyUnicode_Check(name_obj)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001447 PyErr_SetString(PyExc_TypeError,
Stefan Krahe6996ed2012-11-02 14:44:20 +01001448 "dialect name must be a string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001449 return NULL;
1450 }
Stefan Krahe6996ed2012-11-02 14:44:20 +01001451 if (PyUnicode_READY(name_obj) == -1)
1452 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001453 dialect = _call_dialect(dialect_obj, kwargs);
1454 if (dialect == NULL)
1455 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001456 if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001457 Py_DECREF(dialect);
1458 return NULL;
1459 }
1460 Py_DECREF(dialect);
1461 Py_INCREF(Py_None);
1462 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001463}
1464
1465static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001466csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001467{
Antoine Pitroue7672d32012-05-16 11:33:08 +02001468 if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0)
1469 return PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001470 Py_INCREF(Py_None);
1471 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001472}
1473
1474static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001475csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001476{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001477 return get_dialect_from_registry(name_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001478}
1479
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001480static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001481csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001482{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001483 PyObject *new_limit = NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001484 long old_limit = _csvstate_global->field_limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001485
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001486 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1487 return NULL;
1488 if (new_limit != NULL) {
1489 if (!PyLong_CheckExact(new_limit)) {
1490 PyErr_Format(PyExc_TypeError,
1491 "limit must be an integer");
1492 return NULL;
1493 }
Antoine Pitroue7672d32012-05-16 11:33:08 +02001494 _csvstate_global->field_limit = PyLong_AsLong(new_limit);
1495 if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
1496 _csvstate_global->field_limit = old_limit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001497 return NULL;
1498 }
1499 }
1500 return PyLong_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001501}
1502
Skip Montanarob4a04172003-03-20 23:29:12 +00001503/*
1504 * MODULE
1505 */
1506
1507PyDoc_STRVAR(csv_module_doc,
1508"CSV parsing and writing.\n"
1509"\n"
1510"This module provides classes that assist in the reading and writing\n"
1511"of Comma Separated Value (CSV) files, and implements the interface\n"
1512"described by PEP 305. Although many CSV files are simple to parse,\n"
1513"the format is not formally defined by a stable specification and\n"
1514"is subtle enough that parsing lines of a CSV file with something\n"
1515"like line.split(\",\") is bound to fail. The module supports three\n"
1516"basic APIs: reading, writing, and registration of dialects.\n"
1517"\n"
1518"\n"
1519"DIALECT REGISTRATION:\n"
1520"\n"
1521"Readers and writers support a dialect argument, which is a convenient\n"
1522"handle on a group of settings. When the dialect argument is a string,\n"
1523"it identifies one of the dialects previously registered with the module.\n"
1524"If it is a class or instance, the attributes of the argument are used as\n"
1525"the settings for the reader or writer:\n"
1526"\n"
1527" class excel:\n"
1528" delimiter = ','\n"
1529" quotechar = '\"'\n"
1530" escapechar = None\n"
1531" doublequote = True\n"
1532" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001533" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001534" quoting = QUOTE_MINIMAL\n"
1535"\n"
1536"SETTINGS:\n"
1537"\n"
1538" * quotechar - specifies a one-character string to use as the \n"
1539" quoting character. It defaults to '\"'.\n"
1540" * delimiter - specifies a one-character string to use as the \n"
1541" field separator. It defaults to ','.\n"
1542" * skipinitialspace - specifies how to interpret whitespace which\n"
1543" immediately follows a delimiter. It defaults to False, which\n"
1544" means that whitespace immediately following a delimiter is part\n"
1545" of the following field.\n"
1546" * lineterminator - specifies the character sequence which should \n"
1547" terminate rows.\n"
1548" * quoting - controls when quotes should be generated by the writer.\n"
1549" It can take on any of the following module constants:\n"
1550"\n"
1551" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1552" field contains either the quotechar or the delimiter\n"
1553" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1554" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001555" fields which do not parse as integers or floating point\n"
1556" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001557" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1558" * escapechar - specifies a one-character string used to escape \n"
1559" the delimiter when quoting is set to QUOTE_NONE.\n"
1560" * doublequote - controls the handling of quotes inside fields. When\n"
1561" True, two consecutive quotes are interpreted as one during read,\n"
1562" and when writing, each quote character embedded in the data is\n"
1563" written as two quotes\n");
1564
1565PyDoc_STRVAR(csv_reader_doc,
1566" csv_reader = reader(iterable [, dialect='excel']\n"
1567" [optional keyword args])\n"
1568" for row in csv_reader:\n"
1569" process(row)\n"
1570"\n"
1571"The \"iterable\" argument can be any object that returns a line\n"
1572"of input for each iteration, such as a file object or a list. The\n"
1573"optional \"dialect\" parameter is discussed below. The function\n"
1574"also accepts optional keyword arguments which override settings\n"
1575"provided by the dialect.\n"
1576"\n"
1577"The returned object is an iterator. Each iteration returns a row\n"
Berker Peksage2382c52015-10-02 19:25:32 +03001578"of the CSV file (which can span multiple input lines).\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001579
1580PyDoc_STRVAR(csv_writer_doc,
1581" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1582" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001583" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001584" csv_writer.writerow(row)\n"
1585"\n"
1586" [or]\n"
1587"\n"
1588" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1589" [optional keyword args])\n"
1590" csv_writer.writerows(rows)\n"
1591"\n"
1592"The \"fileobj\" argument can be any object that supports the file API.\n");
1593
1594PyDoc_STRVAR(csv_list_dialects_doc,
1595"Return a list of all know dialect names.\n"
1596" names = csv.list_dialects()");
1597
1598PyDoc_STRVAR(csv_get_dialect_doc,
1599"Return the dialect instance associated with name.\n"
1600" dialect = csv.get_dialect(name)");
1601
1602PyDoc_STRVAR(csv_register_dialect_doc,
1603"Create a mapping from a string name to a dialect class.\n"
Berker Peksag12b50ce2015-06-05 15:17:51 +03001604" dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
Skip Montanarob4a04172003-03-20 23:29:12 +00001605
1606PyDoc_STRVAR(csv_unregister_dialect_doc,
1607"Delete the name/dialect mapping associated with a string name.\n"
1608" csv.unregister_dialect(name)");
1609
Andrew McNamara31d88962005-01-12 03:45:10 +00001610PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001611"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001612" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001613"\n"
1614"Returns old limit. If limit is not given, no new limit is set and\n"
1615"the old limit is returned");
1616
Skip Montanarob4a04172003-03-20 23:29:12 +00001617static struct PyMethodDef csv_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001618 { "reader", (PyCFunction)csv_reader,
1619 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1620 { "writer", (PyCFunction)csv_writer,
1621 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1622 { "list_dialects", (PyCFunction)csv_list_dialects,
1623 METH_NOARGS, csv_list_dialects_doc},
1624 { "register_dialect", (PyCFunction)csv_register_dialect,
1625 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1626 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1627 METH_O, csv_unregister_dialect_doc},
1628 { "get_dialect", (PyCFunction)csv_get_dialect,
1629 METH_O, csv_get_dialect_doc},
1630 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1631 METH_VARARGS, csv_field_size_limit_doc},
1632 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001633};
1634
Martin v. Löwis1a214512008-06-11 05:26:20 +00001635static struct PyModuleDef _csvmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001636 PyModuleDef_HEAD_INIT,
1637 "_csv",
1638 csv_module_doc,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001639 sizeof(_csvstate),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001640 csv_methods,
1641 NULL,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001642 _csv_traverse,
1643 _csv_clear,
1644 _csv_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00001645};
1646
Skip Montanarob4a04172003-03-20 23:29:12 +00001647PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001648PyInit__csv(void)
Skip Montanarob4a04172003-03-20 23:29:12 +00001649{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001650 PyObject *module;
1651 StyleDesc *style;
Skip Montanarob4a04172003-03-20 23:29:12 +00001652
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001653 if (PyType_Ready(&Dialect_Type) < 0)
1654 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001655
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001656 if (PyType_Ready(&Reader_Type) < 0)
1657 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001658
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001659 if (PyType_Ready(&Writer_Type) < 0)
1660 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001661
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001662 /* Create the module and add the functions */
1663 module = PyModule_Create(&_csvmodule);
1664 if (module == NULL)
1665 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001666
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001667 /* Add version to the module. */
1668 if (PyModule_AddStringConstant(module, "__version__",
1669 MODULE_VERSION) == -1)
1670 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001671
Antoine Pitroue7672d32012-05-16 11:33:08 +02001672 /* Set the field limit */
1673 _csvstate(module)->field_limit = 128 * 1024;
1674 /* Do I still need to add this var to the Module Dict? */
1675
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001676 /* Add _dialects dictionary */
Antoine Pitroue7672d32012-05-16 11:33:08 +02001677 _csvstate(module)->dialects = PyDict_New();
1678 if (_csvstate(module)->dialects == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001679 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001680 Py_INCREF(_csvstate(module)->dialects);
1681 if (PyModule_AddObject(module, "_dialects", _csvstate(module)->dialects))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001682 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001683
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001684 /* Add quote styles into dictionary */
1685 for (style = quote_styles; style->name; style++) {
1686 if (PyModule_AddIntConstant(module, style->name,
1687 style->style) == -1)
1688 return NULL;
1689 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001690
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001691 /* Add the Dialect type */
1692 Py_INCREF(&Dialect_Type);
1693 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1694 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001695
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001696 /* Add the CSV exception object to the module. */
Antoine Pitroue7672d32012-05-16 11:33:08 +02001697 _csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1698 if (_csvstate(module)->error_obj == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001699 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001700 Py_INCREF(_csvstate(module)->error_obj);
1701 PyModule_AddObject(module, "Error", _csvstate(module)->error_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001702 return module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001703}