blob: 7a78541bf2d70a72dac5d6a3e180e5f4b6cca1e7 [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
Skip Montanarob4a04172003-03-20 23:29:12 +00009*/
10
Skip Montanaro7b01a832003-04-12 19:23:46 +000011#define MODULE_VERSION "1.0"
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013#include "Python.h"
14#include "structmember.h"
15
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000016
Antoine Pitroue7672d32012-05-16 11:33:08 +020017typedef struct {
18 PyObject *error_obj; /* CSV exception */
19 PyObject *dialects; /* Dialect registry */
20 long field_limit; /* max parsed field size */
21} _csvstate;
22
23#define _csvstate(o) ((_csvstate *)PyModule_GetState(o))
24
25static int
26_csv_clear(PyObject *m)
27{
28 Py_CLEAR(_csvstate(m)->error_obj);
29 Py_CLEAR(_csvstate(m)->dialects);
30 return 0;
31}
32
33static int
34_csv_traverse(PyObject *m, visitproc visit, void *arg)
35{
36 Py_VISIT(_csvstate(m)->error_obj);
37 Py_VISIT(_csvstate(m)->dialects);
38 return 0;
39}
40
41static void
42_csv_free(void *m)
43{
44 _csv_clear((PyObject *)m);
45}
46
47static struct PyModuleDef _csvmodule;
48
49#define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
Skip Montanarob4a04172003-03-20 23:29:12 +000050
51typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000052 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
53 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
R David Murrayc7c42ef2013-03-19 22:41:47 -040054 EAT_CRNL,AFTER_ESCAPED_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000055} ParserState;
56
57typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000058 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000059} QuoteStyle;
60
61typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000062 QuoteStyle style;
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020063 const char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000064} StyleDesc;
65
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020066static const StyleDesc quote_styles[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
68 { QUOTE_ALL, "QUOTE_ALL" },
69 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
70 { QUOTE_NONE, "QUOTE_NONE" },
71 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000072};
73
74typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000075 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 int doublequote; /* is " represented by ""? */
Antoine Pitrou77ea6402011-10-07 04:26:55 +020078 Py_UCS4 delimiter; /* field separator */
79 Py_UCS4 quotechar; /* quote character */
80 Py_UCS4 escapechar; /* escape character */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000081 int skipinitialspace; /* ignore spaces following delimiter? */
82 PyObject *lineterminator; /* string to write between records */
83 int quoting; /* style of quoting to write */
Skip Montanarob4a04172003-03-20 23:29:12 +000084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 int strict; /* raise exception on bad CSV */
Skip Montanarob4a04172003-03-20 23:29:12 +000086} DialectObj;
87
Neal Norwitz227b5332006-03-22 09:28:35 +000088static PyTypeObject Dialect_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000089
90typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +000092
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000093 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +000094
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000095 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +000096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000097 PyObject *fields; /* field list for current record */
98 ParserState state; /* current CSV parse state */
Antoine Pitrou77ea6402011-10-07 04:26:55 +020099 Py_UCS4 *field; /* temporary buffer */
Antoine Pitrou40455752010-08-15 18:51:10 +0000100 Py_ssize_t field_size; /* size of allocated buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000101 Py_ssize_t field_len; /* length of current field */
102 int numeric_field; /* treat field as numeric */
103 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000104} ReaderObj;
105
Neal Norwitz227b5332006-03-22 09:28:35 +0000106static PyTypeObject Reader_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000107
Christian Heimes90aa7642007-12-19 02:45:37 +0000108#define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +0000109
110typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000111 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000112
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000113 PyObject *writeline; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +0000114
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000115 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000116
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200117 Py_UCS4 *rec; /* buffer for parser.join */
Antoine Pitrou40455752010-08-15 18:51:10 +0000118 Py_ssize_t rec_size; /* size of allocated record */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000119 Py_ssize_t rec_len; /* length of record */
120 int num_fields; /* number of fields in record */
Guido van Rossum46264582007-08-06 19:32:18 +0000121} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000122
Neal Norwitz227b5332006-03-22 09:28:35 +0000123static PyTypeObject Writer_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000124
125/*
126 * DIALECT class
127 */
128
129static PyObject *
130get_dialect_from_registry(PyObject * name_obj)
131{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000133
Antoine Pitroue7672d32012-05-16 11:33:08 +0200134 dialect_obj = PyDict_GetItem(_csvstate_global->dialects, name_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000135 if (dialect_obj == NULL) {
136 if (!PyErr_Occurred())
Antoine Pitroue7672d32012-05-16 11:33:08 +0200137 PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000138 }
139 else
140 Py_INCREF(dialect_obj);
141 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000142}
143
Skip Montanarob4a04172003-03-20 23:29:12 +0000144static PyObject *
145get_string(PyObject *str)
146{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000147 Py_XINCREF(str);
148 return str;
Skip Montanarob4a04172003-03-20 23:29:12 +0000149}
150
Skip Montanarob4a04172003-03-20 23:29:12 +0000151static PyObject *
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200152get_nullchar_as_None(Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000153{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 if (c == '\0') {
155 Py_INCREF(Py_None);
156 return Py_None;
157 }
158 else
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200159 return PyUnicode_FromOrdinal(c);
Skip Montanarob4a04172003-03-20 23:29:12 +0000160}
161
Skip Montanarob4a04172003-03-20 23:29:12 +0000162static PyObject *
163Dialect_get_lineterminator(DialectObj *self)
164{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000165 return get_string(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000166}
167
Skip Montanarob4a04172003-03-20 23:29:12 +0000168static PyObject *
Guido van Rossuma9769c22007-08-07 23:59:30 +0000169Dialect_get_delimiter(DialectObj *self)
170{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000171 return get_nullchar_as_None(self->delimiter);
Guido van Rossuma9769c22007-08-07 23:59:30 +0000172}
173
174static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000175Dialect_get_escapechar(DialectObj *self)
176{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000178}
179
Andrew McNamara1196cf12005-01-07 04:42:45 +0000180static PyObject *
181Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000182{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000183 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000184}
185
186static PyObject *
187Dialect_get_quoting(DialectObj *self)
188{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000189 return PyLong_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000190}
191
192static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000193_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000194{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000195 if (src == NULL)
196 *target = dflt;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200197 else {
198 int b = PyObject_IsTrue(src);
199 if (b < 0)
200 return -1;
201 *target = b;
202 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000203 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000204}
205
Andrew McNamara1196cf12005-01-07 04:42:45 +0000206static int
207_set_int(const char *name, int *target, PyObject *src, int dflt)
208{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000209 if (src == NULL)
210 *target = dflt;
211 else {
212 long value;
213 if (!PyLong_CheckExact(src)) {
214 PyErr_Format(PyExc_TypeError,
215 "\"%s\" must be an integer", name);
216 return -1;
217 }
218 value = PyLong_AsLong(src);
219 if (value == -1 && PyErr_Occurred())
220 return -1;
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +0000221#if SIZEOF_LONG > SIZEOF_INT
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000222 if (value > INT_MAX || value < INT_MIN) {
223 PyErr_Format(PyExc_ValueError,
224 "integer out of range for \"%s\"", name);
225 return -1;
226 }
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +0000227#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 *target = (int)value;
229 }
230 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000231}
232
233static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200234_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000235{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000236 if (src == NULL)
237 *target = dflt;
238 else {
239 *target = '\0';
240 if (src != Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000241 Py_ssize_t len;
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200242 if (!PyUnicode_Check(src)) {
243 PyErr_Format(PyExc_TypeError,
244 "\"%s\" must be string, not %.200s", name,
245 src->ob_type->tp_name);
246 return -1;
247 }
Victor Stinner9e30aa52011-11-21 02:49:52 +0100248 len = PyUnicode_GetLength(src);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200249 if (len > 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000250 PyErr_Format(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300251 "\"%s\" must be a 1-character string",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000252 name);
253 return -1;
254 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100255 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000256 if (len > 0)
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200257 *target = PyUnicode_READ_CHAR(src, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000258 }
259 }
260 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000261}
262
263static int
264_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
265{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000266 if (src == NULL)
267 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
268 else {
269 if (src == Py_None)
270 *target = NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +0100271 else if (!PyUnicode_Check(src)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 PyErr_Format(PyExc_TypeError,
273 "\"%s\" must be a string", name);
274 return -1;
275 }
276 else {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100277 if (PyUnicode_READY(src) == -1)
278 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000279 Py_INCREF(src);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300280 Py_XSETREF(*target, src);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000281 }
282 }
283 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000284}
285
286static int
287dialect_check_quoting(int quoting)
288{
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200289 const StyleDesc *qs;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000290
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000291 for (qs = quote_styles; qs->name; qs++) {
Victor Stinner706768c2014-08-16 01:03:39 +0200292 if ((int)qs->style == quoting)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000293 return 0;
294 }
295 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
296 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000297}
Skip Montanarob4a04172003-03-20 23:29:12 +0000298
299#define D_OFF(x) offsetof(DialectObj, x)
300
301static struct PyMemberDef Dialect_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000302 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
303 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
304 { "strict", T_INT, D_OFF(strict), READONLY },
305 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000306};
307
308static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000309 { "delimiter", (getter)Dialect_get_delimiter},
310 { "escapechar", (getter)Dialect_get_escapechar},
311 { "lineterminator", (getter)Dialect_get_lineterminator},
312 { "quotechar", (getter)Dialect_get_quotechar},
313 { "quoting", (getter)Dialect_get_quoting},
314 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000315};
316
317static void
318Dialect_dealloc(DialectObj *self)
319{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000320 Py_XDECREF(self->lineterminator);
321 Py_TYPE(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000322}
323
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000324static char *dialect_kws[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000325 "dialect",
326 "delimiter",
327 "doublequote",
328 "escapechar",
329 "lineterminator",
330 "quotechar",
331 "quoting",
332 "skipinitialspace",
333 "strict",
334 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000335};
336
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000337static PyObject *
338dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000339{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000340 DialectObj *self;
341 PyObject *ret = NULL;
342 PyObject *dialect = NULL;
343 PyObject *delimiter = NULL;
344 PyObject *doublequote = NULL;
345 PyObject *escapechar = NULL;
346 PyObject *lineterminator = NULL;
347 PyObject *quotechar = NULL;
348 PyObject *quoting = NULL;
349 PyObject *skipinitialspace = NULL;
350 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000351
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000352 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
353 "|OOOOOOOOO", dialect_kws,
354 &dialect,
355 &delimiter,
356 &doublequote,
357 &escapechar,
358 &lineterminator,
359 &quotechar,
360 &quoting,
361 &skipinitialspace,
362 &strict))
363 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000364
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000365 if (dialect != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100366 if (PyUnicode_Check(dialect)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 dialect = get_dialect_from_registry(dialect);
368 if (dialect == NULL)
369 return NULL;
370 }
371 else
372 Py_INCREF(dialect);
373 /* Can we reuse this instance? */
374 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
375 delimiter == 0 &&
376 doublequote == 0 &&
377 escapechar == 0 &&
378 lineterminator == 0 &&
379 quotechar == 0 &&
380 quoting == 0 &&
381 skipinitialspace == 0 &&
382 strict == 0)
383 return dialect;
384 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000385
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000386 self = (DialectObj *)type->tp_alloc(type, 0);
387 if (self == NULL) {
388 Py_XDECREF(dialect);
389 return NULL;
390 }
391 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000392
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000393 Py_XINCREF(delimiter);
394 Py_XINCREF(doublequote);
395 Py_XINCREF(escapechar);
396 Py_XINCREF(lineterminator);
397 Py_XINCREF(quotechar);
398 Py_XINCREF(quoting);
399 Py_XINCREF(skipinitialspace);
400 Py_XINCREF(strict);
401 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000402#define DIALECT_GETATTR(v, n) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000403 if (v == NULL) \
404 v = PyObject_GetAttrString(dialect, n)
405 DIALECT_GETATTR(delimiter, "delimiter");
406 DIALECT_GETATTR(doublequote, "doublequote");
407 DIALECT_GETATTR(escapechar, "escapechar");
408 DIALECT_GETATTR(lineterminator, "lineterminator");
409 DIALECT_GETATTR(quotechar, "quotechar");
410 DIALECT_GETATTR(quoting, "quoting");
411 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
412 DIALECT_GETATTR(strict, "strict");
413 PyErr_Clear();
414 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000415
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000416 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000417#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000418 if (meth(name, target, src, dflt)) \
419 goto err
420 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
421 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
422 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
423 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
424 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
425 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
426 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
427 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000428
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000429 /* validate options */
430 if (dialect_check_quoting(self->quoting))
431 goto err;
432 if (self->delimiter == 0) {
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200433 PyErr_SetString(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300434 "\"delimiter\" must be a 1-character string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000435 goto err;
436 }
437 if (quotechar == Py_None && quoting == NULL)
438 self->quoting = QUOTE_NONE;
439 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
440 PyErr_SetString(PyExc_TypeError,
441 "quotechar must be set if quoting enabled");
442 goto err;
443 }
444 if (self->lineterminator == 0) {
445 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
446 goto err;
447 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000448
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000449 ret = (PyObject *)self;
450 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000451err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000452 Py_XDECREF(self);
453 Py_XDECREF(dialect);
454 Py_XDECREF(delimiter);
455 Py_XDECREF(doublequote);
456 Py_XDECREF(escapechar);
457 Py_XDECREF(lineterminator);
458 Py_XDECREF(quotechar);
459 Py_XDECREF(quoting);
460 Py_XDECREF(skipinitialspace);
461 Py_XDECREF(strict);
462 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000463}
464
465
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000466PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000467"CSV dialect\n"
468"\n"
469"The Dialect type records CSV parsing and generation options.\n");
470
471static PyTypeObject Dialect_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000472 PyVarObject_HEAD_INIT(NULL, 0)
473 "_csv.Dialect", /* tp_name */
474 sizeof(DialectObj), /* tp_basicsize */
475 0, /* tp_itemsize */
476 /* methods */
477 (destructor)Dialect_dealloc, /* tp_dealloc */
478 (printfunc)0, /* tp_print */
479 (getattrfunc)0, /* tp_getattr */
480 (setattrfunc)0, /* tp_setattr */
481 0, /* tp_reserved */
482 (reprfunc)0, /* tp_repr */
483 0, /* tp_as_number */
484 0, /* tp_as_sequence */
485 0, /* tp_as_mapping */
486 (hashfunc)0, /* tp_hash */
487 (ternaryfunc)0, /* tp_call */
488 (reprfunc)0, /* tp_str */
489 0, /* tp_getattro */
490 0, /* tp_setattro */
491 0, /* tp_as_buffer */
492 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
493 Dialect_Type_doc, /* tp_doc */
494 0, /* tp_traverse */
495 0, /* tp_clear */
496 0, /* tp_richcompare */
497 0, /* tp_weaklistoffset */
498 0, /* tp_iter */
499 0, /* tp_iternext */
500 0, /* tp_methods */
501 Dialect_memberlist, /* tp_members */
502 Dialect_getsetlist, /* tp_getset */
503 0, /* tp_base */
504 0, /* tp_dict */
505 0, /* tp_descr_get */
506 0, /* tp_descr_set */
507 0, /* tp_dictoffset */
508 0, /* tp_init */
509 0, /* tp_alloc */
510 dialect_new, /* tp_new */
511 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000512};
513
Andrew McNamara91b97462005-01-11 01:07:23 +0000514/*
515 * Return an instance of the dialect type, given a Python instance or kwarg
516 * description of the dialect
517 */
518static PyObject *
519_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
520{
Victor Stinner6412f492016-08-23 00:21:34 +0200521 PyObject *type = (PyObject *)&Dialect_Type;
522 if (dialect_inst) {
523 return _PyObject_FastCallDict(type, &dialect_inst, 1, kwargs);
524 }
525 else {
526 return _PyObject_FastCallDict(type, NULL, 0, kwargs);
527 }
Andrew McNamara91b97462005-01-11 01:07:23 +0000528}
529
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000530/*
531 * READER
532 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000533static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000534parse_save_field(ReaderObj *self)
535{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000536 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000537
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200538 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
539 (void *) self->field, self->field_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000540 if (field == NULL)
541 return -1;
542 self->field_len = 0;
543 if (self->numeric_field) {
544 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000545
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000546 self->numeric_field = 0;
547 tmp = PyNumber_Float(field);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000548 Py_DECREF(field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200549 if (tmp == NULL)
550 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000551 field = tmp;
552 }
Victor Stinnerb80b3782013-11-14 21:29:34 +0100553 if (PyList_Append(self->fields, field) < 0) {
554 Py_DECREF(field);
555 return -1;
556 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000557 Py_DECREF(field);
558 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000559}
560
561static int
562parse_grow_buff(ReaderObj *self)
563{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000564 if (self->field_size == 0) {
565 self->field_size = 4096;
566 if (self->field != NULL)
567 PyMem_Free(self->field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200568 self->field = PyMem_New(Py_UCS4, self->field_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000569 }
570 else {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200571 Py_UCS4 *field = self->field;
Antoine Pitrou40455752010-08-15 18:51:10 +0000572 if (self->field_size > PY_SSIZE_T_MAX / 2) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000573 PyErr_NoMemory();
574 return 0;
575 }
576 self->field_size *= 2;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200577 self->field = PyMem_Resize(field, Py_UCS4, self->field_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000578 }
579 if (self->field == NULL) {
580 PyErr_NoMemory();
581 return 0;
582 }
583 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000584}
585
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000586static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200587parse_add_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000588{
Antoine Pitroue7672d32012-05-16 11:33:08 +0200589 if (self->field_len >= _csvstate_global->field_limit) {
590 PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
591 _csvstate_global->field_limit);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000592 return -1;
593 }
594 if (self->field_len == self->field_size && !parse_grow_buff(self))
595 return -1;
596 self->field[self->field_len++] = c;
597 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000598}
599
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000600static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200601parse_process_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000602{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000603 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000604
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000605 switch (self->state) {
606 case START_RECORD:
607 /* start of record */
608 if (c == '\0')
609 /* empty line - return [] */
610 break;
611 else if (c == '\n' || c == '\r') {
612 self->state = EAT_CRNL;
613 break;
614 }
615 /* normal character - handle as START_FIELD */
616 self->state = START_FIELD;
617 /* fallthru */
618 case START_FIELD:
619 /* expecting field */
620 if (c == '\n' || c == '\r' || c == '\0') {
621 /* save empty field - return [fields] */
622 if (parse_save_field(self) < 0)
623 return -1;
624 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
625 }
626 else if (c == dialect->quotechar &&
627 dialect->quoting != QUOTE_NONE) {
628 /* start quoted field */
629 self->state = IN_QUOTED_FIELD;
630 }
631 else if (c == dialect->escapechar) {
632 /* possible escaped character */
633 self->state = ESCAPED_CHAR;
634 }
635 else if (c == ' ' && dialect->skipinitialspace)
636 /* ignore space at start of field */
637 ;
638 else if (c == dialect->delimiter) {
639 /* save empty field */
640 if (parse_save_field(self) < 0)
641 return -1;
642 }
643 else {
644 /* begin new unquoted field */
645 if (dialect->quoting == QUOTE_NONNUMERIC)
646 self->numeric_field = 1;
647 if (parse_add_char(self, c) < 0)
648 return -1;
649 self->state = IN_FIELD;
650 }
651 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000652
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000653 case ESCAPED_CHAR:
R David Murray9a7d3762013-03-20 00:15:20 -0400654 if (c == '\n' || c=='\r') {
R David Murrayc7c42ef2013-03-19 22:41:47 -0400655 if (parse_add_char(self, c) < 0)
656 return -1;
657 self->state = AFTER_ESCAPED_CRNL;
658 break;
659 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000660 if (c == '\0')
661 c = '\n';
662 if (parse_add_char(self, c) < 0)
663 return -1;
664 self->state = IN_FIELD;
665 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000666
R David Murrayc7c42ef2013-03-19 22:41:47 -0400667 case AFTER_ESCAPED_CRNL:
668 if (c == '\0')
669 break;
670 /*fallthru*/
671
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000672 case IN_FIELD:
673 /* in unquoted field */
674 if (c == '\n' || c == '\r' || c == '\0') {
675 /* end of line - return [fields] */
676 if (parse_save_field(self) < 0)
677 return -1;
678 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
679 }
680 else if (c == dialect->escapechar) {
681 /* possible escaped character */
682 self->state = ESCAPED_CHAR;
683 }
684 else if (c == dialect->delimiter) {
685 /* save field - wait for new field */
686 if (parse_save_field(self) < 0)
687 return -1;
688 self->state = START_FIELD;
689 }
690 else {
691 /* normal character - save in field */
692 if (parse_add_char(self, c) < 0)
693 return -1;
694 }
695 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000696
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000697 case IN_QUOTED_FIELD:
698 /* in quoted field */
699 if (c == '\0')
700 ;
701 else if (c == dialect->escapechar) {
702 /* Possible escape character */
703 self->state = ESCAPE_IN_QUOTED_FIELD;
704 }
705 else if (c == dialect->quotechar &&
706 dialect->quoting != QUOTE_NONE) {
707 if (dialect->doublequote) {
708 /* doublequote; " represented by "" */
709 self->state = QUOTE_IN_QUOTED_FIELD;
710 }
711 else {
712 /* end of quote part of field */
713 self->state = IN_FIELD;
714 }
715 }
716 else {
717 /* normal character - save in field */
718 if (parse_add_char(self, c) < 0)
719 return -1;
720 }
721 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000722
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000723 case ESCAPE_IN_QUOTED_FIELD:
724 if (c == '\0')
725 c = '\n';
726 if (parse_add_char(self, c) < 0)
727 return -1;
728 self->state = IN_QUOTED_FIELD;
729 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000730
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000731 case QUOTE_IN_QUOTED_FIELD:
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300732 /* doublequote - seen a quote in a quoted field */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000733 if (dialect->quoting != QUOTE_NONE &&
734 c == dialect->quotechar) {
735 /* save "" as " */
736 if (parse_add_char(self, c) < 0)
737 return -1;
738 self->state = IN_QUOTED_FIELD;
739 }
740 else if (c == dialect->delimiter) {
741 /* save field - wait for new field */
742 if (parse_save_field(self) < 0)
743 return -1;
744 self->state = START_FIELD;
745 }
746 else if (c == '\n' || c == '\r' || c == '\0') {
747 /* end of line - return [fields] */
748 if (parse_save_field(self) < 0)
749 return -1;
750 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
751 }
752 else if (!dialect->strict) {
753 if (parse_add_char(self, c) < 0)
754 return -1;
755 self->state = IN_FIELD;
756 }
757 else {
758 /* illegal */
Antoine Pitroue7672d32012-05-16 11:33:08 +0200759 PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000760 dialect->delimiter,
761 dialect->quotechar);
762 return -1;
763 }
764 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000765
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000766 case EAT_CRNL:
767 if (c == '\n' || c == '\r')
768 ;
769 else if (c == '\0')
770 self->state = START_RECORD;
771 else {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200772 PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000773 return -1;
774 }
775 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000776
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000777 }
778 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000779}
780
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000781static int
782parse_reset(ReaderObj *self)
783{
Serhiy Storchaka48842712016-04-06 09:45:48 +0300784 Py_XSETREF(self->fields, PyList_New(0));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000785 if (self->fields == NULL)
786 return -1;
787 self->field_len = 0;
788 self->state = START_RECORD;
789 self->numeric_field = 0;
790 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000791}
Skip Montanarob4a04172003-03-20 23:29:12 +0000792
793static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000794Reader_iternext(ReaderObj *self)
795{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000796 PyObject *fields = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200797 Py_UCS4 c;
798 Py_ssize_t pos, linelen;
799 unsigned int kind;
800 void *data;
801 PyObject *lineobj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000802
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000803 if (parse_reset(self) < 0)
804 return NULL;
805 do {
806 lineobj = PyIter_Next(self->input_iter);
807 if (lineobj == NULL) {
808 /* End of input OR exception */
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700809 if (!PyErr_Occurred() && (self->field_len != 0 ||
810 self->state == IN_QUOTED_FIELD)) {
811 if (self->dialect->strict)
Senthil Kumaran49d13022012-09-25 02:37:20 -0700812 PyErr_SetString(_csvstate_global->error_obj,
813 "unexpected end of data");
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700814 else if (parse_save_field(self) >= 0)
815 break;
816 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000817 return NULL;
818 }
819 if (!PyUnicode_Check(lineobj)) {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200820 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000821 "iterator should return strings, "
822 "not %.200s "
823 "(did you open the file in text mode?)",
824 lineobj->ob_type->tp_name
825 );
826 Py_DECREF(lineobj);
827 return NULL;
828 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100829 if (PyUnicode_READY(lineobj) == -1) {
830 Py_DECREF(lineobj);
831 return NULL;
832 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000833 ++self->line_num;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200834 kind = PyUnicode_KIND(lineobj);
835 data = PyUnicode_DATA(lineobj);
836 pos = 0;
837 linelen = PyUnicode_GET_LENGTH(lineobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000838 while (linelen--) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200839 c = PyUnicode_READ(kind, data, pos);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000840 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000841 Py_DECREF(lineobj);
Antoine Pitroue7672d32012-05-16 11:33:08 +0200842 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000843 "line contains NULL byte");
844 goto err;
845 }
846 if (parse_process_char(self, c) < 0) {
847 Py_DECREF(lineobj);
848 goto err;
849 }
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200850 pos++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000851 }
852 Py_DECREF(lineobj);
853 if (parse_process_char(self, 0) < 0)
854 goto err;
855 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000856
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000857 fields = self->fields;
858 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000859err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000860 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000861}
862
863static void
864Reader_dealloc(ReaderObj *self)
865{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000866 PyObject_GC_UnTrack(self);
867 Py_XDECREF(self->dialect);
868 Py_XDECREF(self->input_iter);
869 Py_XDECREF(self->fields);
870 if (self->field != NULL)
871 PyMem_Free(self->field);
872 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000873}
874
875static int
876Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
877{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000878 Py_VISIT(self->dialect);
879 Py_VISIT(self->input_iter);
880 Py_VISIT(self->fields);
881 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000882}
883
884static int
885Reader_clear(ReaderObj *self)
886{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000887 Py_CLEAR(self->dialect);
888 Py_CLEAR(self->input_iter);
889 Py_CLEAR(self->fields);
890 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000891}
892
893PyDoc_STRVAR(Reader_Type_doc,
894"CSV reader\n"
895"\n"
896"Reader objects are responsible for reading and parsing tabular data\n"
897"in CSV format.\n"
898);
899
900static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000901 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000902};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000903#define R_OFF(x) offsetof(ReaderObj, x)
904
905static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000906 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
907 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
908 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000909};
910
Skip Montanarob4a04172003-03-20 23:29:12 +0000911
912static PyTypeObject Reader_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000913 PyVarObject_HEAD_INIT(NULL, 0)
914 "_csv.reader", /*tp_name*/
915 sizeof(ReaderObj), /*tp_basicsize*/
916 0, /*tp_itemsize*/
917 /* methods */
918 (destructor)Reader_dealloc, /*tp_dealloc*/
919 (printfunc)0, /*tp_print*/
920 (getattrfunc)0, /*tp_getattr*/
921 (setattrfunc)0, /*tp_setattr*/
922 0, /*tp_reserved*/
923 (reprfunc)0, /*tp_repr*/
924 0, /*tp_as_number*/
925 0, /*tp_as_sequence*/
926 0, /*tp_as_mapping*/
927 (hashfunc)0, /*tp_hash*/
928 (ternaryfunc)0, /*tp_call*/
929 (reprfunc)0, /*tp_str*/
930 0, /*tp_getattro*/
931 0, /*tp_setattro*/
932 0, /*tp_as_buffer*/
933 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
934 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
935 Reader_Type_doc, /*tp_doc*/
936 (traverseproc)Reader_traverse, /*tp_traverse*/
937 (inquiry)Reader_clear, /*tp_clear*/
938 0, /*tp_richcompare*/
939 0, /*tp_weaklistoffset*/
940 PyObject_SelfIter, /*tp_iter*/
941 (getiterfunc)Reader_iternext, /*tp_iternext*/
942 Reader_methods, /*tp_methods*/
943 Reader_memberlist, /*tp_members*/
944 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000945
946};
947
948static PyObject *
949csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
950{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000951 PyObject * iterator, * dialect = NULL;
952 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000953
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000954 if (!self)
955 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000956
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000957 self->dialect = NULL;
958 self->fields = NULL;
959 self->input_iter = NULL;
960 self->field = NULL;
961 self->field_size = 0;
962 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000963
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000964 if (parse_reset(self) < 0) {
965 Py_DECREF(self);
966 return NULL;
967 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000968
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000969 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
970 Py_DECREF(self);
971 return NULL;
972 }
973 self->input_iter = PyObject_GetIter(iterator);
974 if (self->input_iter == NULL) {
975 PyErr_SetString(PyExc_TypeError,
976 "argument 1 must be an iterator");
977 Py_DECREF(self);
978 return NULL;
979 }
980 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
981 if (self->dialect == NULL) {
982 Py_DECREF(self);
983 return NULL;
984 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000985
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000986 PyObject_GC_Track(self);
987 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +0000988}
989
990/*
991 * WRITER
992 */
993/* ---------------------------------------------------------------- */
994static void
995join_reset(WriterObj *self)
996{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000997 self->rec_len = 0;
998 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000999}
1000
1001#define MEM_INCR 32768
1002
1003/* Calculate new record length or append field to record. Return new
1004 * record length.
1005 */
Antoine Pitrou40455752010-08-15 18:51:10 +00001006static Py_ssize_t
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001007join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001008 Py_ssize_t field_len, int *quoted,
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001009 int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +00001010{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001011 DialectObj *dialect = self->dialect;
1012 int i;
Antoine Pitrou40455752010-08-15 18:51:10 +00001013 Py_ssize_t rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001014
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001015#define INCLEN \
1016 do {\
1017 if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \
1018 goto overflow; \
1019 } \
1020 rec_len++; \
1021 } while(0)
1022
1023#define ADDCH(c) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001024 do {\
1025 if (copy_phase) \
1026 self->rec[rec_len] = c;\
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001027 INCLEN;\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001028 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001029
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001030 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001031
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001032 /* If this is not the first field we need a field separator */
1033 if (self->num_fields > 0)
1034 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001035
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001036 /* Handle preceding quote */
1037 if (copy_phase && *quoted)
1038 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001039
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001040 /* Copy/count field data */
1041 /* If field is null just pass over */
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001042 for (i = 0; field_data && (i < field_len); i++) {
1043 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001044 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001045
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001046 if (c == dialect->delimiter ||
1047 c == dialect->escapechar ||
1048 c == dialect->quotechar ||
Martin v. Löwis5f4f4c52011-11-01 18:42:23 +01001049 PyUnicode_FindChar(
1050 dialect->lineterminator, c, 0,
1051 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001052 if (dialect->quoting == QUOTE_NONE)
1053 want_escape = 1;
1054 else {
1055 if (c == dialect->quotechar) {
1056 if (dialect->doublequote)
1057 ADDCH(dialect->quotechar);
1058 else
1059 want_escape = 1;
1060 }
1061 if (!want_escape)
1062 *quoted = 1;
1063 }
1064 if (want_escape) {
1065 if (!dialect->escapechar) {
Antoine Pitroue7672d32012-05-16 11:33:08 +02001066 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001067 "need to escape, but no escapechar set");
1068 return -1;
1069 }
1070 ADDCH(dialect->escapechar);
1071 }
1072 }
1073 /* Copy field character into record buffer.
1074 */
1075 ADDCH(c);
1076 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001077
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001078 if (*quoted) {
1079 if (copy_phase)
1080 ADDCH(dialect->quotechar);
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001081 else {
1082 INCLEN; /* starting quote */
1083 INCLEN; /* ending quote */
1084 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001085 }
1086 return rec_len;
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001087
1088 overflow:
1089 PyErr_NoMemory();
1090 return -1;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001091#undef ADDCH
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001092#undef INCLEN
Skip Montanarob4a04172003-03-20 23:29:12 +00001093}
1094
1095static int
Antoine Pitrou40455752010-08-15 18:51:10 +00001096join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
Skip Montanarob4a04172003-03-20 23:29:12 +00001097{
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001098
Antoine Pitrou40455752010-08-15 18:51:10 +00001099 if (rec_len < 0 || rec_len > PY_SSIZE_T_MAX - MEM_INCR) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001100 PyErr_NoMemory();
1101 return 0;
1102 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001103
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001104 if (rec_len > self->rec_size) {
1105 if (self->rec_size == 0) {
1106 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1107 if (self->rec != NULL)
1108 PyMem_Free(self->rec);
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001109 self->rec = PyMem_New(Py_UCS4, self->rec_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001110 }
1111 else {
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001112 Py_UCS4* old_rec = self->rec;
Skip Montanarob4a04172003-03-20 23:29:12 +00001113
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001114 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001115 self->rec = PyMem_Resize(old_rec, Py_UCS4, self->rec_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001116 if (self->rec == NULL)
1117 PyMem_Free(old_rec);
1118 }
1119 if (self->rec == NULL) {
1120 PyErr_NoMemory();
1121 return 0;
1122 }
1123 }
1124 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001125}
1126
1127static int
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001128join_append(WriterObj *self, PyObject *field, int quoted)
Skip Montanarob4a04172003-03-20 23:29:12 +00001129{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001130 unsigned int field_kind = -1;
1131 void *field_data = NULL;
1132 Py_ssize_t field_len = 0;
Antoine Pitrou40455752010-08-15 18:51:10 +00001133 Py_ssize_t rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001134
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001135 if (field != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +01001136 if (PyUnicode_READY(field) == -1)
1137 return 0;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001138 field_kind = PyUnicode_KIND(field);
1139 field_data = PyUnicode_DATA(field);
1140 field_len = PyUnicode_GET_LENGTH(field);
1141 }
1142 rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001143 &quoted, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001144 if (rec_len < 0)
1145 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001146
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001147 /* grow record buffer if necessary */
1148 if (!join_check_rec_size(self, rec_len))
1149 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001150
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001151 self->rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001152 &quoted, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001153 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001154
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001155 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001156}
1157
1158static int
1159join_append_lineterminator(WriterObj *self)
1160{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001161 Py_ssize_t terminator_len, i;
1162 unsigned int term_kind;
1163 void *term_data;
Skip Montanarob4a04172003-03-20 23:29:12 +00001164
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001165 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001166 if (terminator_len == -1)
1167 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001168
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001169 /* grow record buffer if necessary */
1170 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1171 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001172
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001173 term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1174 term_data = PyUnicode_DATA(self->dialect->lineterminator);
1175 for (i = 0; i < terminator_len; i++)
1176 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001177 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001178
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001179 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001180}
1181
1182PyDoc_STRVAR(csv_writerow_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001183"writerow(iterable)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001184"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001185"Construct and write a CSV record from an iterable of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001186"elements will be converted to string.");
1187
1188static PyObject *
1189csv_writerow(WriterObj *self, PyObject *seq)
1190{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001191 DialectObj *dialect = self->dialect;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001192 PyObject *iter, *field, *line, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001193
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001194 iter = PyObject_GetIter(seq);
1195 if (iter == NULL)
1196 return PyErr_Format(_csvstate_global->error_obj,
1197 "iterable expected, not %.200s",
1198 seq->ob_type->tp_name);
Skip Montanarob4a04172003-03-20 23:29:12 +00001199
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001200 /* Join all fields in internal buffer.
1201 */
1202 join_reset(self);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001203 while ((field = PyIter_Next(iter))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001204 int append_ok;
1205 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001206
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001207 switch (dialect->quoting) {
1208 case QUOTE_NONNUMERIC:
1209 quoted = !PyNumber_Check(field);
1210 break;
1211 case QUOTE_ALL:
1212 quoted = 1;
1213 break;
1214 default:
1215 quoted = 0;
1216 break;
1217 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001218
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001219 if (PyUnicode_Check(field)) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001220 append_ok = join_append(self, field, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001221 Py_DECREF(field);
1222 }
1223 else if (field == Py_None) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001224 append_ok = join_append(self, NULL, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001225 Py_DECREF(field);
1226 }
1227 else {
1228 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001229
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001230 str = PyObject_Str(field);
1231 Py_DECREF(field);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001232 if (str == NULL) {
1233 Py_DECREF(iter);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001234 return NULL;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001235 }
1236 append_ok = join_append(self, str, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001237 Py_DECREF(str);
1238 }
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001239 if (!append_ok) {
1240 Py_DECREF(iter);
1241 return NULL;
1242 }
1243 }
1244 Py_DECREF(iter);
1245 if (PyErr_Occurred())
1246 return NULL;
1247
1248 if (self->num_fields > 0 && self->rec_size == 0) {
1249 if (dialect->quoting == QUOTE_NONE) {
1250 PyErr_Format(_csvstate_global->error_obj,
1251 "single empty field record must be quoted");
1252 return NULL;
1253 }
1254 self->num_fields--;
1255 if (!join_append(self, NULL, 1))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001256 return NULL;
1257 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001258
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001259 /* Add line terminator.
1260 */
1261 if (!join_append_lineterminator(self))
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001262 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001263
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001264 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1265 (void *) self->rec, self->rec_len);
1266 if (line == NULL)
1267 return NULL;
1268 result = PyObject_CallFunctionObjArgs(self->writeline, line, NULL);
1269 Py_DECREF(line);
1270 return result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001271}
1272
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001273PyDoc_STRVAR(csv_writerows_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001274"writerows(iterable of iterables)\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001275"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001276"Construct and write a series of iterables to a csv file. Non-string\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001277"elements will be converted to string.");
1278
Skip Montanarob4a04172003-03-20 23:29:12 +00001279static PyObject *
1280csv_writerows(WriterObj *self, PyObject *seqseq)
1281{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001282 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001283
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001284 row_iter = PyObject_GetIter(seqseq);
1285 if (row_iter == NULL) {
1286 PyErr_SetString(PyExc_TypeError,
1287 "writerows() argument must be iterable");
1288 return NULL;
1289 }
1290 while ((row_obj = PyIter_Next(row_iter))) {
1291 result = csv_writerow(self, row_obj);
1292 Py_DECREF(row_obj);
1293 if (!result) {
1294 Py_DECREF(row_iter);
1295 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001296 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001297 else
1298 Py_DECREF(result);
1299 }
1300 Py_DECREF(row_iter);
1301 if (PyErr_Occurred())
1302 return NULL;
1303 Py_INCREF(Py_None);
1304 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001305}
1306
1307static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001308 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1309 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1310 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001311};
1312
1313#define W_OFF(x) offsetof(WriterObj, x)
1314
1315static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001316 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1317 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001318};
1319
1320static void
1321Writer_dealloc(WriterObj *self)
1322{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001323 PyObject_GC_UnTrack(self);
1324 Py_XDECREF(self->dialect);
1325 Py_XDECREF(self->writeline);
1326 if (self->rec != NULL)
1327 PyMem_Free(self->rec);
1328 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001329}
1330
1331static int
1332Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1333{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001334 Py_VISIT(self->dialect);
1335 Py_VISIT(self->writeline);
1336 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001337}
1338
1339static int
1340Writer_clear(WriterObj *self)
1341{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001342 Py_CLEAR(self->dialect);
1343 Py_CLEAR(self->writeline);
1344 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001345}
1346
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001347PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001348"CSV writer\n"
1349"\n"
1350"Writer objects are responsible for generating tabular data\n"
1351"in CSV format from sequence input.\n"
1352);
1353
1354static PyTypeObject Writer_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001355 PyVarObject_HEAD_INIT(NULL, 0)
1356 "_csv.writer", /*tp_name*/
1357 sizeof(WriterObj), /*tp_basicsize*/
1358 0, /*tp_itemsize*/
1359 /* methods */
1360 (destructor)Writer_dealloc, /*tp_dealloc*/
1361 (printfunc)0, /*tp_print*/
1362 (getattrfunc)0, /*tp_getattr*/
1363 (setattrfunc)0, /*tp_setattr*/
1364 0, /*tp_reserved*/
1365 (reprfunc)0, /*tp_repr*/
1366 0, /*tp_as_number*/
1367 0, /*tp_as_sequence*/
1368 0, /*tp_as_mapping*/
1369 (hashfunc)0, /*tp_hash*/
1370 (ternaryfunc)0, /*tp_call*/
1371 (reprfunc)0, /*tp_str*/
1372 0, /*tp_getattro*/
1373 0, /*tp_setattro*/
1374 0, /*tp_as_buffer*/
1375 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1376 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1377 Writer_Type_doc,
1378 (traverseproc)Writer_traverse, /*tp_traverse*/
1379 (inquiry)Writer_clear, /*tp_clear*/
1380 0, /*tp_richcompare*/
1381 0, /*tp_weaklistoffset*/
1382 (getiterfunc)0, /*tp_iter*/
1383 (getiterfunc)0, /*tp_iternext*/
1384 Writer_methods, /*tp_methods*/
1385 Writer_memberlist, /*tp_members*/
1386 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001387};
1388
1389static PyObject *
1390csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1391{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001392 PyObject * output_file, * dialect = NULL;
1393 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001394 _Py_IDENTIFIER(write);
Skip Montanarob4a04172003-03-20 23:29:12 +00001395
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001396 if (!self)
1397 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001398
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001399 self->dialect = NULL;
1400 self->writeline = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001401
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001402 self->rec = NULL;
1403 self->rec_size = 0;
1404 self->rec_len = 0;
1405 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001406
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001407 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1408 Py_DECREF(self);
1409 return NULL;
1410 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001411 self->writeline = _PyObject_GetAttrId(output_file, &PyId_write);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001412 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1413 PyErr_SetString(PyExc_TypeError,
1414 "argument 1 must have a \"write\" method");
1415 Py_DECREF(self);
1416 return NULL;
1417 }
1418 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1419 if (self->dialect == NULL) {
1420 Py_DECREF(self);
1421 return NULL;
1422 }
1423 PyObject_GC_Track(self);
1424 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001425}
1426
1427/*
1428 * DIALECT REGISTRY
1429 */
1430static PyObject *
1431csv_list_dialects(PyObject *module, PyObject *args)
1432{
Antoine Pitroue7672d32012-05-16 11:33:08 +02001433 return PyDict_Keys(_csvstate_global->dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001434}
1435
1436static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001437csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001438{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001439 PyObject *name_obj, *dialect_obj = NULL;
1440 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001441
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001442 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1443 return NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +01001444 if (!PyUnicode_Check(name_obj)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001445 PyErr_SetString(PyExc_TypeError,
Stefan Krahe6996ed2012-11-02 14:44:20 +01001446 "dialect name must be a string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001447 return NULL;
1448 }
Stefan Krahe6996ed2012-11-02 14:44:20 +01001449 if (PyUnicode_READY(name_obj) == -1)
1450 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001451 dialect = _call_dialect(dialect_obj, kwargs);
1452 if (dialect == NULL)
1453 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001454 if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001455 Py_DECREF(dialect);
1456 return NULL;
1457 }
1458 Py_DECREF(dialect);
1459 Py_INCREF(Py_None);
1460 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001461}
1462
1463static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001464csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001465{
Antoine Pitroue7672d32012-05-16 11:33:08 +02001466 if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0)
1467 return PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001468 Py_INCREF(Py_None);
1469 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001470}
1471
1472static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001473csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001474{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001475 return get_dialect_from_registry(name_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001476}
1477
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001478static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001479csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001480{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001481 PyObject *new_limit = NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001482 long old_limit = _csvstate_global->field_limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001483
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001484 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1485 return NULL;
1486 if (new_limit != NULL) {
1487 if (!PyLong_CheckExact(new_limit)) {
1488 PyErr_Format(PyExc_TypeError,
1489 "limit must be an integer");
1490 return NULL;
1491 }
Antoine Pitroue7672d32012-05-16 11:33:08 +02001492 _csvstate_global->field_limit = PyLong_AsLong(new_limit);
1493 if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
1494 _csvstate_global->field_limit = old_limit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001495 return NULL;
1496 }
1497 }
1498 return PyLong_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001499}
1500
Skip Montanarob4a04172003-03-20 23:29:12 +00001501/*
1502 * MODULE
1503 */
1504
1505PyDoc_STRVAR(csv_module_doc,
1506"CSV parsing and writing.\n"
1507"\n"
1508"This module provides classes that assist in the reading and writing\n"
1509"of Comma Separated Value (CSV) files, and implements the interface\n"
1510"described by PEP 305. Although many CSV files are simple to parse,\n"
1511"the format is not formally defined by a stable specification and\n"
1512"is subtle enough that parsing lines of a CSV file with something\n"
1513"like line.split(\",\") is bound to fail. The module supports three\n"
1514"basic APIs: reading, writing, and registration of dialects.\n"
1515"\n"
1516"\n"
1517"DIALECT REGISTRATION:\n"
1518"\n"
1519"Readers and writers support a dialect argument, which is a convenient\n"
1520"handle on a group of settings. When the dialect argument is a string,\n"
1521"it identifies one of the dialects previously registered with the module.\n"
1522"If it is a class or instance, the attributes of the argument are used as\n"
1523"the settings for the reader or writer:\n"
1524"\n"
1525" class excel:\n"
1526" delimiter = ','\n"
1527" quotechar = '\"'\n"
1528" escapechar = None\n"
1529" doublequote = True\n"
1530" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001531" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001532" quoting = QUOTE_MINIMAL\n"
1533"\n"
1534"SETTINGS:\n"
1535"\n"
1536" * quotechar - specifies a one-character string to use as the \n"
1537" quoting character. It defaults to '\"'.\n"
1538" * delimiter - specifies a one-character string to use as the \n"
1539" field separator. It defaults to ','.\n"
1540" * skipinitialspace - specifies how to interpret whitespace which\n"
1541" immediately follows a delimiter. It defaults to False, which\n"
1542" means that whitespace immediately following a delimiter is part\n"
1543" of the following field.\n"
1544" * lineterminator - specifies the character sequence which should \n"
1545" terminate rows.\n"
1546" * quoting - controls when quotes should be generated by the writer.\n"
1547" It can take on any of the following module constants:\n"
1548"\n"
1549" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1550" field contains either the quotechar or the delimiter\n"
1551" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1552" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001553" fields which do not parse as integers or floating point\n"
1554" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001555" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1556" * escapechar - specifies a one-character string used to escape \n"
1557" the delimiter when quoting is set to QUOTE_NONE.\n"
1558" * doublequote - controls the handling of quotes inside fields. When\n"
1559" True, two consecutive quotes are interpreted as one during read,\n"
1560" and when writing, each quote character embedded in the data is\n"
1561" written as two quotes\n");
1562
1563PyDoc_STRVAR(csv_reader_doc,
1564" csv_reader = reader(iterable [, dialect='excel']\n"
1565" [optional keyword args])\n"
1566" for row in csv_reader:\n"
1567" process(row)\n"
1568"\n"
1569"The \"iterable\" argument can be any object that returns a line\n"
1570"of input for each iteration, such as a file object or a list. The\n"
1571"optional \"dialect\" parameter is discussed below. The function\n"
1572"also accepts optional keyword arguments which override settings\n"
1573"provided by the dialect.\n"
1574"\n"
1575"The returned object is an iterator. Each iteration returns a row\n"
Berker Peksage2382c52015-10-02 19:25:32 +03001576"of the CSV file (which can span multiple input lines).\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001577
1578PyDoc_STRVAR(csv_writer_doc,
1579" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1580" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001581" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001582" csv_writer.writerow(row)\n"
1583"\n"
1584" [or]\n"
1585"\n"
1586" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1587" [optional keyword args])\n"
1588" csv_writer.writerows(rows)\n"
1589"\n"
1590"The \"fileobj\" argument can be any object that supports the file API.\n");
1591
1592PyDoc_STRVAR(csv_list_dialects_doc,
1593"Return a list of all know dialect names.\n"
1594" names = csv.list_dialects()");
1595
1596PyDoc_STRVAR(csv_get_dialect_doc,
1597"Return the dialect instance associated with name.\n"
1598" dialect = csv.get_dialect(name)");
1599
1600PyDoc_STRVAR(csv_register_dialect_doc,
1601"Create a mapping from a string name to a dialect class.\n"
Berker Peksag12b50ce2015-06-05 15:17:51 +03001602" dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
Skip Montanarob4a04172003-03-20 23:29:12 +00001603
1604PyDoc_STRVAR(csv_unregister_dialect_doc,
1605"Delete the name/dialect mapping associated with a string name.\n"
1606" csv.unregister_dialect(name)");
1607
Andrew McNamara31d88962005-01-12 03:45:10 +00001608PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001609"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001610" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001611"\n"
1612"Returns old limit. If limit is not given, no new limit is set and\n"
1613"the old limit is returned");
1614
Skip Montanarob4a04172003-03-20 23:29:12 +00001615static struct PyMethodDef csv_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001616 { "reader", (PyCFunction)csv_reader,
1617 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1618 { "writer", (PyCFunction)csv_writer,
1619 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1620 { "list_dialects", (PyCFunction)csv_list_dialects,
1621 METH_NOARGS, csv_list_dialects_doc},
1622 { "register_dialect", (PyCFunction)csv_register_dialect,
1623 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1624 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1625 METH_O, csv_unregister_dialect_doc},
1626 { "get_dialect", (PyCFunction)csv_get_dialect,
1627 METH_O, csv_get_dialect_doc},
1628 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1629 METH_VARARGS, csv_field_size_limit_doc},
1630 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001631};
1632
Martin v. Löwis1a214512008-06-11 05:26:20 +00001633static struct PyModuleDef _csvmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001634 PyModuleDef_HEAD_INIT,
1635 "_csv",
1636 csv_module_doc,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001637 sizeof(_csvstate),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001638 csv_methods,
1639 NULL,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001640 _csv_traverse,
1641 _csv_clear,
1642 _csv_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00001643};
1644
Skip Montanarob4a04172003-03-20 23:29:12 +00001645PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001646PyInit__csv(void)
Skip Montanarob4a04172003-03-20 23:29:12 +00001647{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001648 PyObject *module;
Serhiy Storchaka2d06e842015-12-25 19:53:18 +02001649 const StyleDesc *style;
Skip Montanarob4a04172003-03-20 23:29:12 +00001650
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001651 if (PyType_Ready(&Dialect_Type) < 0)
1652 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001653
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001654 if (PyType_Ready(&Reader_Type) < 0)
1655 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001656
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001657 if (PyType_Ready(&Writer_Type) < 0)
1658 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001659
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001660 /* Create the module and add the functions */
1661 module = PyModule_Create(&_csvmodule);
1662 if (module == NULL)
1663 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001664
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001665 /* Add version to the module. */
1666 if (PyModule_AddStringConstant(module, "__version__",
1667 MODULE_VERSION) == -1)
1668 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001669
Antoine Pitroue7672d32012-05-16 11:33:08 +02001670 /* Set the field limit */
1671 _csvstate(module)->field_limit = 128 * 1024;
1672 /* Do I still need to add this var to the Module Dict? */
1673
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001674 /* Add _dialects dictionary */
Antoine Pitroue7672d32012-05-16 11:33:08 +02001675 _csvstate(module)->dialects = PyDict_New();
1676 if (_csvstate(module)->dialects == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001677 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001678 Py_INCREF(_csvstate(module)->dialects);
1679 if (PyModule_AddObject(module, "_dialects", _csvstate(module)->dialects))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001680 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001681
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001682 /* Add quote styles into dictionary */
1683 for (style = quote_styles; style->name; style++) {
1684 if (PyModule_AddIntConstant(module, style->name,
1685 style->style) == -1)
1686 return NULL;
1687 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001688
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001689 /* Add the Dialect type */
1690 Py_INCREF(&Dialect_Type);
1691 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1692 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001693
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001694 /* Add the CSV exception object to the module. */
Antoine Pitroue7672d32012-05-16 11:33:08 +02001695 _csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1696 if (_csvstate(module)->error_obj == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001697 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001698 Py_INCREF(_csvstate(module)->error_obj);
1699 PyModule_AddObject(module, "Error", _csvstate(module)->error_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001700 return module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001701}