blob: 5314ef6edc438285a732c5a52326d328ccc7edd7 [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
Skip Montanarob4a04172003-03-20 23:29:12 +00009*/
10
Skip Montanaro7b01a832003-04-12 19:23:46 +000011#define MODULE_VERSION "1.0"
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013#include "Python.h"
14#include "structmember.h"
15
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000016
Antoine Pitroue7672d32012-05-16 11:33:08 +020017typedef struct {
18 PyObject *error_obj; /* CSV exception */
19 PyObject *dialects; /* Dialect registry */
20 long field_limit; /* max parsed field size */
21} _csvstate;
22
23#define _csvstate(o) ((_csvstate *)PyModule_GetState(o))
24
25static int
26_csv_clear(PyObject *m)
27{
28 Py_CLEAR(_csvstate(m)->error_obj);
29 Py_CLEAR(_csvstate(m)->dialects);
30 return 0;
31}
32
33static int
34_csv_traverse(PyObject *m, visitproc visit, void *arg)
35{
36 Py_VISIT(_csvstate(m)->error_obj);
37 Py_VISIT(_csvstate(m)->dialects);
38 return 0;
39}
40
41static void
42_csv_free(void *m)
43{
44 _csv_clear((PyObject *)m);
45}
46
47static struct PyModuleDef _csvmodule;
48
49#define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
Skip Montanarob4a04172003-03-20 23:29:12 +000050
51typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000052 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
53 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
R David Murrayc7c42ef2013-03-19 22:41:47 -040054 EAT_CRNL,AFTER_ESCAPED_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000055} ParserState;
56
57typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000058 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000059} QuoteStyle;
60
61typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000062 QuoteStyle style;
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020063 const char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000064} StyleDesc;
65
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020066static const StyleDesc quote_styles[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
68 { QUOTE_ALL, "QUOTE_ALL" },
69 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
70 { QUOTE_NONE, "QUOTE_NONE" },
71 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000072};
73
74typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000075 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 int doublequote; /* is " represented by ""? */
Antoine Pitrou77ea6402011-10-07 04:26:55 +020078 Py_UCS4 delimiter; /* field separator */
79 Py_UCS4 quotechar; /* quote character */
80 Py_UCS4 escapechar; /* escape character */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000081 int skipinitialspace; /* ignore spaces following delimiter? */
82 PyObject *lineterminator; /* string to write between records */
83 int quoting; /* style of quoting to write */
Skip Montanarob4a04172003-03-20 23:29:12 +000084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 int strict; /* raise exception on bad CSV */
Skip Montanarob4a04172003-03-20 23:29:12 +000086} DialectObj;
87
Neal Norwitz227b5332006-03-22 09:28:35 +000088static PyTypeObject Dialect_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000089
90typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +000092
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000093 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +000094
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000095 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +000096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000097 PyObject *fields; /* field list for current record */
98 ParserState state; /* current CSV parse state */
Antoine Pitrou77ea6402011-10-07 04:26:55 +020099 Py_UCS4 *field; /* temporary buffer */
Antoine Pitrou40455752010-08-15 18:51:10 +0000100 Py_ssize_t field_size; /* size of allocated buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000101 Py_ssize_t field_len; /* length of current field */
102 int numeric_field; /* treat field as numeric */
103 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000104} ReaderObj;
105
Neal Norwitz227b5332006-03-22 09:28:35 +0000106static PyTypeObject Reader_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000107
Christian Heimes90aa7642007-12-19 02:45:37 +0000108#define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +0000109
110typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000111 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000112
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000113 PyObject *writeline; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +0000114
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000115 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000116
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200117 Py_UCS4 *rec; /* buffer for parser.join */
Antoine Pitrou40455752010-08-15 18:51:10 +0000118 Py_ssize_t rec_size; /* size of allocated record */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000119 Py_ssize_t rec_len; /* length of record */
120 int num_fields; /* number of fields in record */
Guido van Rossum46264582007-08-06 19:32:18 +0000121} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000122
Neal Norwitz227b5332006-03-22 09:28:35 +0000123static PyTypeObject Writer_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000124
125/*
126 * DIALECT class
127 */
128
129static PyObject *
130get_dialect_from_registry(PyObject * name_obj)
131{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000133
Antoine Pitroue7672d32012-05-16 11:33:08 +0200134 dialect_obj = PyDict_GetItem(_csvstate_global->dialects, name_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000135 if (dialect_obj == NULL) {
136 if (!PyErr_Occurred())
Antoine Pitroue7672d32012-05-16 11:33:08 +0200137 PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000138 }
139 else
140 Py_INCREF(dialect_obj);
141 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000142}
143
Skip Montanarob4a04172003-03-20 23:29:12 +0000144static PyObject *
145get_string(PyObject *str)
146{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000147 Py_XINCREF(str);
148 return str;
Skip Montanarob4a04172003-03-20 23:29:12 +0000149}
150
Skip Montanarob4a04172003-03-20 23:29:12 +0000151static PyObject *
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200152get_nullchar_as_None(Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000153{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 if (c == '\0') {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200155 Py_RETURN_NONE;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 }
157 else
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200158 return PyUnicode_FromOrdinal(c);
Skip Montanarob4a04172003-03-20 23:29:12 +0000159}
160
Skip Montanarob4a04172003-03-20 23:29:12 +0000161static PyObject *
162Dialect_get_lineterminator(DialectObj *self)
163{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000164 return get_string(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000165}
166
Skip Montanarob4a04172003-03-20 23:29:12 +0000167static PyObject *
Guido van Rossuma9769c22007-08-07 23:59:30 +0000168Dialect_get_delimiter(DialectObj *self)
169{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 return get_nullchar_as_None(self->delimiter);
Guido van Rossuma9769c22007-08-07 23:59:30 +0000171}
172
173static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000174Dialect_get_escapechar(DialectObj *self)
175{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000176 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000177}
178
Andrew McNamara1196cf12005-01-07 04:42:45 +0000179static PyObject *
180Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000181{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000182 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000183}
184
185static PyObject *
186Dialect_get_quoting(DialectObj *self)
187{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000188 return PyLong_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000189}
190
191static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000192_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000193{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000194 if (src == NULL)
195 *target = dflt;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200196 else {
197 int b = PyObject_IsTrue(src);
198 if (b < 0)
199 return -1;
200 *target = b;
201 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000202 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000203}
204
Andrew McNamara1196cf12005-01-07 04:42:45 +0000205static int
206_set_int(const char *name, int *target, PyObject *src, int dflt)
207{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000208 if (src == NULL)
209 *target = dflt;
210 else {
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200211 int value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000212 if (!PyLong_CheckExact(src)) {
213 PyErr_Format(PyExc_TypeError,
214 "\"%s\" must be an integer", name);
215 return -1;
216 }
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200217 value = _PyLong_AsInt(src);
218 if (value == -1 && PyErr_Occurred()) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000219 return -1;
220 }
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200221 *target = value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000222 }
223 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000224}
225
226static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200227_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000228{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000229 if (src == NULL)
230 *target = dflt;
231 else {
232 *target = '\0';
233 if (src != Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000234 Py_ssize_t len;
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200235 if (!PyUnicode_Check(src)) {
236 PyErr_Format(PyExc_TypeError,
237 "\"%s\" must be string, not %.200s", name,
238 src->ob_type->tp_name);
239 return -1;
240 }
Victor Stinner9e30aa52011-11-21 02:49:52 +0100241 len = PyUnicode_GetLength(src);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200242 if (len > 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000243 PyErr_Format(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300244 "\"%s\" must be a 1-character string",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000245 name);
246 return -1;
247 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100248 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000249 if (len > 0)
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200250 *target = PyUnicode_READ_CHAR(src, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000251 }
252 }
253 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000254}
255
256static int
257_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
258{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000259 if (src == NULL)
260 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
261 else {
262 if (src == Py_None)
263 *target = NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +0100264 else if (!PyUnicode_Check(src)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000265 PyErr_Format(PyExc_TypeError,
266 "\"%s\" must be a string", name);
267 return -1;
268 }
269 else {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100270 if (PyUnicode_READY(src) == -1)
271 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 Py_INCREF(src);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300273 Py_XSETREF(*target, src);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000274 }
275 }
276 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000277}
278
279static int
280dialect_check_quoting(int quoting)
281{
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200282 const StyleDesc *qs;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000283
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000284 for (qs = quote_styles; qs->name; qs++) {
Victor Stinner706768c2014-08-16 01:03:39 +0200285 if ((int)qs->style == quoting)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000286 return 0;
287 }
288 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
289 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000290}
Skip Montanarob4a04172003-03-20 23:29:12 +0000291
292#define D_OFF(x) offsetof(DialectObj, x)
293
294static struct PyMemberDef Dialect_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000295 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
296 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
297 { "strict", T_INT, D_OFF(strict), READONLY },
298 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000299};
300
301static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000302 { "delimiter", (getter)Dialect_get_delimiter},
303 { "escapechar", (getter)Dialect_get_escapechar},
304 { "lineterminator", (getter)Dialect_get_lineterminator},
305 { "quotechar", (getter)Dialect_get_quotechar},
306 { "quoting", (getter)Dialect_get_quoting},
307 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000308};
309
310static void
311Dialect_dealloc(DialectObj *self)
312{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000313 Py_XDECREF(self->lineterminator);
314 Py_TYPE(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000315}
316
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000317static char *dialect_kws[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000318 "dialect",
319 "delimiter",
320 "doublequote",
321 "escapechar",
322 "lineterminator",
323 "quotechar",
324 "quoting",
325 "skipinitialspace",
326 "strict",
327 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000328};
329
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000330static PyObject *
331dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000332{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000333 DialectObj *self;
334 PyObject *ret = NULL;
335 PyObject *dialect = NULL;
336 PyObject *delimiter = NULL;
337 PyObject *doublequote = NULL;
338 PyObject *escapechar = NULL;
339 PyObject *lineterminator = NULL;
340 PyObject *quotechar = NULL;
341 PyObject *quoting = NULL;
342 PyObject *skipinitialspace = NULL;
343 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000344
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000345 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
346 "|OOOOOOOOO", dialect_kws,
347 &dialect,
348 &delimiter,
349 &doublequote,
350 &escapechar,
351 &lineterminator,
352 &quotechar,
353 &quoting,
354 &skipinitialspace,
355 &strict))
356 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000357
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000358 if (dialect != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100359 if (PyUnicode_Check(dialect)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000360 dialect = get_dialect_from_registry(dialect);
361 if (dialect == NULL)
362 return NULL;
363 }
364 else
365 Py_INCREF(dialect);
366 /* Can we reuse this instance? */
367 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
Serhiy Storchaka0b3ec192017-03-23 17:53:47 +0200368 delimiter == NULL &&
369 doublequote == NULL &&
370 escapechar == NULL &&
371 lineterminator == NULL &&
372 quotechar == NULL &&
373 quoting == NULL &&
374 skipinitialspace == NULL &&
375 strict == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000376 return dialect;
377 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000378
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000379 self = (DialectObj *)type->tp_alloc(type, 0);
380 if (self == NULL) {
381 Py_XDECREF(dialect);
382 return NULL;
383 }
384 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000385
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000386 Py_XINCREF(delimiter);
387 Py_XINCREF(doublequote);
388 Py_XINCREF(escapechar);
389 Py_XINCREF(lineterminator);
390 Py_XINCREF(quotechar);
391 Py_XINCREF(quoting);
392 Py_XINCREF(skipinitialspace);
393 Py_XINCREF(strict);
394 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000395#define DIALECT_GETATTR(v, n) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000396 if (v == NULL) \
397 v = PyObject_GetAttrString(dialect, n)
398 DIALECT_GETATTR(delimiter, "delimiter");
399 DIALECT_GETATTR(doublequote, "doublequote");
400 DIALECT_GETATTR(escapechar, "escapechar");
401 DIALECT_GETATTR(lineterminator, "lineterminator");
402 DIALECT_GETATTR(quotechar, "quotechar");
403 DIALECT_GETATTR(quoting, "quoting");
404 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
405 DIALECT_GETATTR(strict, "strict");
406 PyErr_Clear();
407 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000408
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000409 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000410#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000411 if (meth(name, target, src, dflt)) \
412 goto err
413 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
414 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
415 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
416 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
417 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
418 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
419 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
420 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000421
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000422 /* validate options */
423 if (dialect_check_quoting(self->quoting))
424 goto err;
425 if (self->delimiter == 0) {
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200426 PyErr_SetString(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300427 "\"delimiter\" must be a 1-character string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000428 goto err;
429 }
430 if (quotechar == Py_None && quoting == NULL)
431 self->quoting = QUOTE_NONE;
432 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
433 PyErr_SetString(PyExc_TypeError,
434 "quotechar must be set if quoting enabled");
435 goto err;
436 }
437 if (self->lineterminator == 0) {
438 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
439 goto err;
440 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000441
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000442 ret = (PyObject *)self;
443 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000444err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000445 Py_XDECREF(self);
446 Py_XDECREF(dialect);
447 Py_XDECREF(delimiter);
448 Py_XDECREF(doublequote);
449 Py_XDECREF(escapechar);
450 Py_XDECREF(lineterminator);
451 Py_XDECREF(quotechar);
452 Py_XDECREF(quoting);
453 Py_XDECREF(skipinitialspace);
454 Py_XDECREF(strict);
455 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000456}
457
458
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000459PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000460"CSV dialect\n"
461"\n"
462"The Dialect type records CSV parsing and generation options.\n");
463
464static PyTypeObject Dialect_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000465 PyVarObject_HEAD_INIT(NULL, 0)
466 "_csv.Dialect", /* tp_name */
467 sizeof(DialectObj), /* tp_basicsize */
468 0, /* tp_itemsize */
469 /* methods */
470 (destructor)Dialect_dealloc, /* tp_dealloc */
471 (printfunc)0, /* tp_print */
472 (getattrfunc)0, /* tp_getattr */
473 (setattrfunc)0, /* tp_setattr */
474 0, /* tp_reserved */
475 (reprfunc)0, /* tp_repr */
476 0, /* tp_as_number */
477 0, /* tp_as_sequence */
478 0, /* tp_as_mapping */
479 (hashfunc)0, /* tp_hash */
480 (ternaryfunc)0, /* tp_call */
481 (reprfunc)0, /* tp_str */
482 0, /* tp_getattro */
483 0, /* tp_setattro */
484 0, /* tp_as_buffer */
485 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
486 Dialect_Type_doc, /* tp_doc */
487 0, /* tp_traverse */
488 0, /* tp_clear */
489 0, /* tp_richcompare */
490 0, /* tp_weaklistoffset */
491 0, /* tp_iter */
492 0, /* tp_iternext */
493 0, /* tp_methods */
494 Dialect_memberlist, /* tp_members */
495 Dialect_getsetlist, /* tp_getset */
496 0, /* tp_base */
497 0, /* tp_dict */
498 0, /* tp_descr_get */
499 0, /* tp_descr_set */
500 0, /* tp_dictoffset */
501 0, /* tp_init */
502 0, /* tp_alloc */
503 dialect_new, /* tp_new */
504 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000505};
506
Andrew McNamara91b97462005-01-11 01:07:23 +0000507/*
508 * Return an instance of the dialect type, given a Python instance or kwarg
509 * description of the dialect
510 */
511static PyObject *
512_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
513{
Victor Stinner6412f492016-08-23 00:21:34 +0200514 PyObject *type = (PyObject *)&Dialect_Type;
515 if (dialect_inst) {
516 return _PyObject_FastCallDict(type, &dialect_inst, 1, kwargs);
517 }
518 else {
519 return _PyObject_FastCallDict(type, NULL, 0, kwargs);
520 }
Andrew McNamara91b97462005-01-11 01:07:23 +0000521}
522
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000523/*
524 * READER
525 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000526static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000527parse_save_field(ReaderObj *self)
528{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000529 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000530
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200531 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
532 (void *) self->field, self->field_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000533 if (field == NULL)
534 return -1;
535 self->field_len = 0;
536 if (self->numeric_field) {
537 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000538
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000539 self->numeric_field = 0;
540 tmp = PyNumber_Float(field);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000541 Py_DECREF(field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200542 if (tmp == NULL)
543 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000544 field = tmp;
545 }
Victor Stinnerb80b3782013-11-14 21:29:34 +0100546 if (PyList_Append(self->fields, field) < 0) {
547 Py_DECREF(field);
548 return -1;
549 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000550 Py_DECREF(field);
551 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000552}
553
554static int
555parse_grow_buff(ReaderObj *self)
556{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000557 if (self->field_size == 0) {
558 self->field_size = 4096;
559 if (self->field != NULL)
560 PyMem_Free(self->field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200561 self->field = PyMem_New(Py_UCS4, self->field_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000562 }
563 else {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200564 Py_UCS4 *field = self->field;
Antoine Pitrou40455752010-08-15 18:51:10 +0000565 if (self->field_size > PY_SSIZE_T_MAX / 2) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000566 PyErr_NoMemory();
567 return 0;
568 }
569 self->field_size *= 2;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200570 self->field = PyMem_Resize(field, Py_UCS4, self->field_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000571 }
572 if (self->field == NULL) {
573 PyErr_NoMemory();
574 return 0;
575 }
576 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000577}
578
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000579static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200580parse_add_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000581{
Antoine Pitroue7672d32012-05-16 11:33:08 +0200582 if (self->field_len >= _csvstate_global->field_limit) {
583 PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
584 _csvstate_global->field_limit);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000585 return -1;
586 }
587 if (self->field_len == self->field_size && !parse_grow_buff(self))
588 return -1;
589 self->field[self->field_len++] = c;
590 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000591}
592
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000593static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200594parse_process_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000595{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000596 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000597
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000598 switch (self->state) {
599 case START_RECORD:
600 /* start of record */
601 if (c == '\0')
602 /* empty line - return [] */
603 break;
604 else if (c == '\n' || c == '\r') {
605 self->state = EAT_CRNL;
606 break;
607 }
608 /* normal character - handle as START_FIELD */
609 self->state = START_FIELD;
610 /* fallthru */
611 case START_FIELD:
612 /* expecting field */
613 if (c == '\n' || c == '\r' || c == '\0') {
614 /* save empty field - return [fields] */
615 if (parse_save_field(self) < 0)
616 return -1;
617 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
618 }
619 else if (c == dialect->quotechar &&
620 dialect->quoting != QUOTE_NONE) {
621 /* start quoted field */
622 self->state = IN_QUOTED_FIELD;
623 }
624 else if (c == dialect->escapechar) {
625 /* possible escaped character */
626 self->state = ESCAPED_CHAR;
627 }
628 else if (c == ' ' && dialect->skipinitialspace)
629 /* ignore space at start of field */
630 ;
631 else if (c == dialect->delimiter) {
632 /* save empty field */
633 if (parse_save_field(self) < 0)
634 return -1;
635 }
636 else {
637 /* begin new unquoted field */
638 if (dialect->quoting == QUOTE_NONNUMERIC)
639 self->numeric_field = 1;
640 if (parse_add_char(self, c) < 0)
641 return -1;
642 self->state = IN_FIELD;
643 }
644 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000645
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000646 case ESCAPED_CHAR:
R David Murray9a7d3762013-03-20 00:15:20 -0400647 if (c == '\n' || c=='\r') {
R David Murrayc7c42ef2013-03-19 22:41:47 -0400648 if (parse_add_char(self, c) < 0)
649 return -1;
650 self->state = AFTER_ESCAPED_CRNL;
651 break;
652 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000653 if (c == '\0')
654 c = '\n';
655 if (parse_add_char(self, c) < 0)
656 return -1;
657 self->state = IN_FIELD;
658 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000659
R David Murrayc7c42ef2013-03-19 22:41:47 -0400660 case AFTER_ESCAPED_CRNL:
661 if (c == '\0')
662 break;
663 /*fallthru*/
664
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000665 case IN_FIELD:
666 /* in unquoted field */
667 if (c == '\n' || c == '\r' || c == '\0') {
668 /* end of line - return [fields] */
669 if (parse_save_field(self) < 0)
670 return -1;
671 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
672 }
673 else if (c == dialect->escapechar) {
674 /* possible escaped character */
675 self->state = ESCAPED_CHAR;
676 }
677 else if (c == dialect->delimiter) {
678 /* save field - wait for new field */
679 if (parse_save_field(self) < 0)
680 return -1;
681 self->state = START_FIELD;
682 }
683 else {
684 /* normal character - save in field */
685 if (parse_add_char(self, c) < 0)
686 return -1;
687 }
688 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000689
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000690 case IN_QUOTED_FIELD:
691 /* in quoted field */
692 if (c == '\0')
693 ;
694 else if (c == dialect->escapechar) {
695 /* Possible escape character */
696 self->state = ESCAPE_IN_QUOTED_FIELD;
697 }
698 else if (c == dialect->quotechar &&
699 dialect->quoting != QUOTE_NONE) {
700 if (dialect->doublequote) {
701 /* doublequote; " represented by "" */
702 self->state = QUOTE_IN_QUOTED_FIELD;
703 }
704 else {
705 /* end of quote part of field */
706 self->state = IN_FIELD;
707 }
708 }
709 else {
710 /* normal character - save in field */
711 if (parse_add_char(self, c) < 0)
712 return -1;
713 }
714 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000715
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000716 case ESCAPE_IN_QUOTED_FIELD:
717 if (c == '\0')
718 c = '\n';
719 if (parse_add_char(self, c) < 0)
720 return -1;
721 self->state = IN_QUOTED_FIELD;
722 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000723
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000724 case QUOTE_IN_QUOTED_FIELD:
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300725 /* doublequote - seen a quote in a quoted field */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000726 if (dialect->quoting != QUOTE_NONE &&
727 c == dialect->quotechar) {
728 /* save "" as " */
729 if (parse_add_char(self, c) < 0)
730 return -1;
731 self->state = IN_QUOTED_FIELD;
732 }
733 else if (c == dialect->delimiter) {
734 /* save field - wait for new field */
735 if (parse_save_field(self) < 0)
736 return -1;
737 self->state = START_FIELD;
738 }
739 else if (c == '\n' || c == '\r' || c == '\0') {
740 /* end of line - return [fields] */
741 if (parse_save_field(self) < 0)
742 return -1;
743 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
744 }
745 else if (!dialect->strict) {
746 if (parse_add_char(self, c) < 0)
747 return -1;
748 self->state = IN_FIELD;
749 }
750 else {
751 /* illegal */
Antoine Pitroue7672d32012-05-16 11:33:08 +0200752 PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000753 dialect->delimiter,
754 dialect->quotechar);
755 return -1;
756 }
757 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000758
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000759 case EAT_CRNL:
760 if (c == '\n' || c == '\r')
761 ;
762 else if (c == '\0')
763 self->state = START_RECORD;
764 else {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200765 PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000766 return -1;
767 }
768 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000769
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000770 }
771 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000772}
773
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000774static int
775parse_reset(ReaderObj *self)
776{
Serhiy Storchaka48842712016-04-06 09:45:48 +0300777 Py_XSETREF(self->fields, PyList_New(0));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000778 if (self->fields == NULL)
779 return -1;
780 self->field_len = 0;
781 self->state = START_RECORD;
782 self->numeric_field = 0;
783 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000784}
Skip Montanarob4a04172003-03-20 23:29:12 +0000785
786static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000787Reader_iternext(ReaderObj *self)
788{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000789 PyObject *fields = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200790 Py_UCS4 c;
791 Py_ssize_t pos, linelen;
792 unsigned int kind;
793 void *data;
794 PyObject *lineobj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000795
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000796 if (parse_reset(self) < 0)
797 return NULL;
798 do {
799 lineobj = PyIter_Next(self->input_iter);
800 if (lineobj == NULL) {
801 /* End of input OR exception */
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700802 if (!PyErr_Occurred() && (self->field_len != 0 ||
803 self->state == IN_QUOTED_FIELD)) {
804 if (self->dialect->strict)
Senthil Kumaran49d13022012-09-25 02:37:20 -0700805 PyErr_SetString(_csvstate_global->error_obj,
806 "unexpected end of data");
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700807 else if (parse_save_field(self) >= 0)
808 break;
809 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000810 return NULL;
811 }
812 if (!PyUnicode_Check(lineobj)) {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200813 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000814 "iterator should return strings, "
815 "not %.200s "
816 "(did you open the file in text mode?)",
817 lineobj->ob_type->tp_name
818 );
819 Py_DECREF(lineobj);
820 return NULL;
821 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100822 if (PyUnicode_READY(lineobj) == -1) {
823 Py_DECREF(lineobj);
824 return NULL;
825 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000826 ++self->line_num;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200827 kind = PyUnicode_KIND(lineobj);
828 data = PyUnicode_DATA(lineobj);
829 pos = 0;
830 linelen = PyUnicode_GET_LENGTH(lineobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000831 while (linelen--) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200832 c = PyUnicode_READ(kind, data, pos);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000833 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000834 Py_DECREF(lineobj);
Antoine Pitroue7672d32012-05-16 11:33:08 +0200835 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000836 "line contains NULL byte");
837 goto err;
838 }
839 if (parse_process_char(self, c) < 0) {
840 Py_DECREF(lineobj);
841 goto err;
842 }
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200843 pos++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000844 }
845 Py_DECREF(lineobj);
846 if (parse_process_char(self, 0) < 0)
847 goto err;
848 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000849
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000850 fields = self->fields;
851 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000852err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000853 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000854}
855
856static void
857Reader_dealloc(ReaderObj *self)
858{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000859 PyObject_GC_UnTrack(self);
860 Py_XDECREF(self->dialect);
861 Py_XDECREF(self->input_iter);
862 Py_XDECREF(self->fields);
863 if (self->field != NULL)
864 PyMem_Free(self->field);
865 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000866}
867
868static int
869Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
870{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000871 Py_VISIT(self->dialect);
872 Py_VISIT(self->input_iter);
873 Py_VISIT(self->fields);
874 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000875}
876
877static int
878Reader_clear(ReaderObj *self)
879{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000880 Py_CLEAR(self->dialect);
881 Py_CLEAR(self->input_iter);
882 Py_CLEAR(self->fields);
883 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000884}
885
886PyDoc_STRVAR(Reader_Type_doc,
887"CSV reader\n"
888"\n"
889"Reader objects are responsible for reading and parsing tabular data\n"
890"in CSV format.\n"
891);
892
893static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000894 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000895};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000896#define R_OFF(x) offsetof(ReaderObj, x)
897
898static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000899 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
900 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
901 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000902};
903
Skip Montanarob4a04172003-03-20 23:29:12 +0000904
905static PyTypeObject Reader_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000906 PyVarObject_HEAD_INIT(NULL, 0)
907 "_csv.reader", /*tp_name*/
908 sizeof(ReaderObj), /*tp_basicsize*/
909 0, /*tp_itemsize*/
910 /* methods */
911 (destructor)Reader_dealloc, /*tp_dealloc*/
912 (printfunc)0, /*tp_print*/
913 (getattrfunc)0, /*tp_getattr*/
914 (setattrfunc)0, /*tp_setattr*/
915 0, /*tp_reserved*/
916 (reprfunc)0, /*tp_repr*/
917 0, /*tp_as_number*/
918 0, /*tp_as_sequence*/
919 0, /*tp_as_mapping*/
920 (hashfunc)0, /*tp_hash*/
921 (ternaryfunc)0, /*tp_call*/
922 (reprfunc)0, /*tp_str*/
923 0, /*tp_getattro*/
924 0, /*tp_setattro*/
925 0, /*tp_as_buffer*/
926 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
927 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
928 Reader_Type_doc, /*tp_doc*/
929 (traverseproc)Reader_traverse, /*tp_traverse*/
930 (inquiry)Reader_clear, /*tp_clear*/
931 0, /*tp_richcompare*/
932 0, /*tp_weaklistoffset*/
933 PyObject_SelfIter, /*tp_iter*/
934 (getiterfunc)Reader_iternext, /*tp_iternext*/
935 Reader_methods, /*tp_methods*/
936 Reader_memberlist, /*tp_members*/
937 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000938
939};
940
941static PyObject *
942csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
943{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000944 PyObject * iterator, * dialect = NULL;
945 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000946
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000947 if (!self)
948 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000949
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000950 self->dialect = NULL;
951 self->fields = NULL;
952 self->input_iter = NULL;
953 self->field = NULL;
954 self->field_size = 0;
955 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000956
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000957 if (parse_reset(self) < 0) {
958 Py_DECREF(self);
959 return NULL;
960 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000961
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000962 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
963 Py_DECREF(self);
964 return NULL;
965 }
966 self->input_iter = PyObject_GetIter(iterator);
967 if (self->input_iter == NULL) {
968 PyErr_SetString(PyExc_TypeError,
969 "argument 1 must be an iterator");
970 Py_DECREF(self);
971 return NULL;
972 }
973 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
974 if (self->dialect == NULL) {
975 Py_DECREF(self);
976 return NULL;
977 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000978
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000979 PyObject_GC_Track(self);
980 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +0000981}
982
983/*
984 * WRITER
985 */
986/* ---------------------------------------------------------------- */
987static void
988join_reset(WriterObj *self)
989{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000990 self->rec_len = 0;
991 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000992}
993
994#define MEM_INCR 32768
995
996/* Calculate new record length or append field to record. Return new
997 * record length.
998 */
Antoine Pitrou40455752010-08-15 18:51:10 +0000999static Py_ssize_t
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001000join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001001 Py_ssize_t field_len, int *quoted,
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001002 int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +00001003{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001004 DialectObj *dialect = self->dialect;
1005 int i;
Antoine Pitrou40455752010-08-15 18:51:10 +00001006 Py_ssize_t rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001007
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001008#define INCLEN \
1009 do {\
1010 if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \
1011 goto overflow; \
1012 } \
1013 rec_len++; \
1014 } while(0)
1015
1016#define ADDCH(c) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001017 do {\
1018 if (copy_phase) \
1019 self->rec[rec_len] = c;\
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001020 INCLEN;\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001021 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001022
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001023 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001024
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001025 /* If this is not the first field we need a field separator */
1026 if (self->num_fields > 0)
1027 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001028
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001029 /* Handle preceding quote */
1030 if (copy_phase && *quoted)
1031 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001032
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001033 /* Copy/count field data */
1034 /* If field is null just pass over */
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001035 for (i = 0; field_data && (i < field_len); i++) {
1036 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001037 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001038
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001039 if (c == dialect->delimiter ||
1040 c == dialect->escapechar ||
1041 c == dialect->quotechar ||
Martin v. Löwis5f4f4c52011-11-01 18:42:23 +01001042 PyUnicode_FindChar(
1043 dialect->lineterminator, c, 0,
1044 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001045 if (dialect->quoting == QUOTE_NONE)
1046 want_escape = 1;
1047 else {
1048 if (c == dialect->quotechar) {
1049 if (dialect->doublequote)
1050 ADDCH(dialect->quotechar);
1051 else
1052 want_escape = 1;
1053 }
1054 if (!want_escape)
1055 *quoted = 1;
1056 }
1057 if (want_escape) {
1058 if (!dialect->escapechar) {
Antoine Pitroue7672d32012-05-16 11:33:08 +02001059 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001060 "need to escape, but no escapechar set");
1061 return -1;
1062 }
1063 ADDCH(dialect->escapechar);
1064 }
1065 }
1066 /* Copy field character into record buffer.
1067 */
1068 ADDCH(c);
1069 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001070
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001071 if (*quoted) {
1072 if (copy_phase)
1073 ADDCH(dialect->quotechar);
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001074 else {
1075 INCLEN; /* starting quote */
1076 INCLEN; /* ending quote */
1077 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001078 }
1079 return rec_len;
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001080
1081 overflow:
1082 PyErr_NoMemory();
1083 return -1;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001084#undef ADDCH
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001085#undef INCLEN
Skip Montanarob4a04172003-03-20 23:29:12 +00001086}
1087
1088static int
Antoine Pitrou40455752010-08-15 18:51:10 +00001089join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
Skip Montanarob4a04172003-03-20 23:29:12 +00001090{
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001091
Antoine Pitrou40455752010-08-15 18:51:10 +00001092 if (rec_len < 0 || rec_len > PY_SSIZE_T_MAX - MEM_INCR) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001093 PyErr_NoMemory();
1094 return 0;
1095 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001097 if (rec_len > self->rec_size) {
1098 if (self->rec_size == 0) {
1099 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1100 if (self->rec != NULL)
1101 PyMem_Free(self->rec);
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001102 self->rec = PyMem_New(Py_UCS4, self->rec_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001103 }
1104 else {
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001105 Py_UCS4* old_rec = self->rec;
Skip Montanarob4a04172003-03-20 23:29:12 +00001106
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001107 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001108 self->rec = PyMem_Resize(old_rec, Py_UCS4, self->rec_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001109 if (self->rec == NULL)
1110 PyMem_Free(old_rec);
1111 }
1112 if (self->rec == NULL) {
1113 PyErr_NoMemory();
1114 return 0;
1115 }
1116 }
1117 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001118}
1119
1120static int
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001121join_append(WriterObj *self, PyObject *field, int quoted)
Skip Montanarob4a04172003-03-20 23:29:12 +00001122{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001123 unsigned int field_kind = -1;
1124 void *field_data = NULL;
1125 Py_ssize_t field_len = 0;
Antoine Pitrou40455752010-08-15 18:51:10 +00001126 Py_ssize_t rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001127
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001128 if (field != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +01001129 if (PyUnicode_READY(field) == -1)
1130 return 0;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001131 field_kind = PyUnicode_KIND(field);
1132 field_data = PyUnicode_DATA(field);
1133 field_len = PyUnicode_GET_LENGTH(field);
1134 }
1135 rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001136 &quoted, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001137 if (rec_len < 0)
1138 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001139
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001140 /* grow record buffer if necessary */
1141 if (!join_check_rec_size(self, rec_len))
1142 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001143
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001144 self->rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001145 &quoted, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001146 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001147
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001148 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001149}
1150
1151static int
1152join_append_lineterminator(WriterObj *self)
1153{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001154 Py_ssize_t terminator_len, i;
1155 unsigned int term_kind;
1156 void *term_data;
Skip Montanarob4a04172003-03-20 23:29:12 +00001157
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001158 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001159 if (terminator_len == -1)
1160 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001161
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001162 /* grow record buffer if necessary */
1163 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1164 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001165
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001166 term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1167 term_data = PyUnicode_DATA(self->dialect->lineterminator);
1168 for (i = 0; i < terminator_len; i++)
1169 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001170 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001171
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001172 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001173}
1174
1175PyDoc_STRVAR(csv_writerow_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001176"writerow(iterable)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001177"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001178"Construct and write a CSV record from an iterable of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001179"elements will be converted to string.");
1180
1181static PyObject *
1182csv_writerow(WriterObj *self, PyObject *seq)
1183{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001184 DialectObj *dialect = self->dialect;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001185 PyObject *iter, *field, *line, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001186
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001187 iter = PyObject_GetIter(seq);
1188 if (iter == NULL)
1189 return PyErr_Format(_csvstate_global->error_obj,
1190 "iterable expected, not %.200s",
1191 seq->ob_type->tp_name);
Skip Montanarob4a04172003-03-20 23:29:12 +00001192
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001193 /* Join all fields in internal buffer.
1194 */
1195 join_reset(self);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001196 while ((field = PyIter_Next(iter))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001197 int append_ok;
1198 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001199
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001200 switch (dialect->quoting) {
1201 case QUOTE_NONNUMERIC:
1202 quoted = !PyNumber_Check(field);
1203 break;
1204 case QUOTE_ALL:
1205 quoted = 1;
1206 break;
1207 default:
1208 quoted = 0;
1209 break;
1210 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001211
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001212 if (PyUnicode_Check(field)) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001213 append_ok = join_append(self, field, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001214 Py_DECREF(field);
1215 }
1216 else if (field == Py_None) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001217 append_ok = join_append(self, NULL, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001218 Py_DECREF(field);
1219 }
1220 else {
1221 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001222
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001223 str = PyObject_Str(field);
1224 Py_DECREF(field);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001225 if (str == NULL) {
1226 Py_DECREF(iter);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001227 return NULL;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001228 }
1229 append_ok = join_append(self, str, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001230 Py_DECREF(str);
1231 }
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001232 if (!append_ok) {
1233 Py_DECREF(iter);
1234 return NULL;
1235 }
1236 }
1237 Py_DECREF(iter);
1238 if (PyErr_Occurred())
1239 return NULL;
1240
1241 if (self->num_fields > 0 && self->rec_size == 0) {
1242 if (dialect->quoting == QUOTE_NONE) {
1243 PyErr_Format(_csvstate_global->error_obj,
1244 "single empty field record must be quoted");
1245 return NULL;
1246 }
1247 self->num_fields--;
1248 if (!join_append(self, NULL, 1))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001249 return NULL;
1250 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001251
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001252 /* Add line terminator.
1253 */
1254 if (!join_append_lineterminator(self))
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001255 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001256
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001257 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1258 (void *) self->rec, self->rec_len);
1259 if (line == NULL)
1260 return NULL;
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01001261 result = PyObject_CallFunctionObjArgs(self->writeline, line, NULL);
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001262 Py_DECREF(line);
1263 return result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001264}
1265
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001266PyDoc_STRVAR(csv_writerows_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001267"writerows(iterable of iterables)\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001268"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001269"Construct and write a series of iterables to a csv file. Non-string\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001270"elements will be converted to string.");
1271
Skip Montanarob4a04172003-03-20 23:29:12 +00001272static PyObject *
1273csv_writerows(WriterObj *self, PyObject *seqseq)
1274{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001275 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001276
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001277 row_iter = PyObject_GetIter(seqseq);
1278 if (row_iter == NULL) {
1279 PyErr_SetString(PyExc_TypeError,
1280 "writerows() argument must be iterable");
1281 return NULL;
1282 }
1283 while ((row_obj = PyIter_Next(row_iter))) {
1284 result = csv_writerow(self, row_obj);
1285 Py_DECREF(row_obj);
1286 if (!result) {
1287 Py_DECREF(row_iter);
1288 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001289 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001290 else
1291 Py_DECREF(result);
1292 }
1293 Py_DECREF(row_iter);
1294 if (PyErr_Occurred())
1295 return NULL;
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001296 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001297}
1298
1299static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001300 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1301 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1302 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001303};
1304
1305#define W_OFF(x) offsetof(WriterObj, x)
1306
1307static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001308 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1309 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001310};
1311
1312static void
1313Writer_dealloc(WriterObj *self)
1314{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001315 PyObject_GC_UnTrack(self);
1316 Py_XDECREF(self->dialect);
1317 Py_XDECREF(self->writeline);
1318 if (self->rec != NULL)
1319 PyMem_Free(self->rec);
1320 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001321}
1322
1323static int
1324Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1325{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001326 Py_VISIT(self->dialect);
1327 Py_VISIT(self->writeline);
1328 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001329}
1330
1331static int
1332Writer_clear(WriterObj *self)
1333{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001334 Py_CLEAR(self->dialect);
1335 Py_CLEAR(self->writeline);
1336 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001337}
1338
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001339PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001340"CSV writer\n"
1341"\n"
1342"Writer objects are responsible for generating tabular data\n"
1343"in CSV format from sequence input.\n"
1344);
1345
1346static PyTypeObject Writer_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001347 PyVarObject_HEAD_INIT(NULL, 0)
1348 "_csv.writer", /*tp_name*/
1349 sizeof(WriterObj), /*tp_basicsize*/
1350 0, /*tp_itemsize*/
1351 /* methods */
1352 (destructor)Writer_dealloc, /*tp_dealloc*/
1353 (printfunc)0, /*tp_print*/
1354 (getattrfunc)0, /*tp_getattr*/
1355 (setattrfunc)0, /*tp_setattr*/
1356 0, /*tp_reserved*/
1357 (reprfunc)0, /*tp_repr*/
1358 0, /*tp_as_number*/
1359 0, /*tp_as_sequence*/
1360 0, /*tp_as_mapping*/
1361 (hashfunc)0, /*tp_hash*/
1362 (ternaryfunc)0, /*tp_call*/
1363 (reprfunc)0, /*tp_str*/
1364 0, /*tp_getattro*/
1365 0, /*tp_setattro*/
1366 0, /*tp_as_buffer*/
1367 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1368 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1369 Writer_Type_doc,
1370 (traverseproc)Writer_traverse, /*tp_traverse*/
1371 (inquiry)Writer_clear, /*tp_clear*/
1372 0, /*tp_richcompare*/
1373 0, /*tp_weaklistoffset*/
1374 (getiterfunc)0, /*tp_iter*/
1375 (getiterfunc)0, /*tp_iternext*/
1376 Writer_methods, /*tp_methods*/
1377 Writer_memberlist, /*tp_members*/
1378 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001379};
1380
1381static PyObject *
1382csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1383{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001384 PyObject * output_file, * dialect = NULL;
1385 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001386 _Py_IDENTIFIER(write);
Skip Montanarob4a04172003-03-20 23:29:12 +00001387
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001388 if (!self)
1389 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001390
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001391 self->dialect = NULL;
1392 self->writeline = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001393
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001394 self->rec = NULL;
1395 self->rec_size = 0;
1396 self->rec_len = 0;
1397 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001398
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001399 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1400 Py_DECREF(self);
1401 return NULL;
1402 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001403 self->writeline = _PyObject_GetAttrId(output_file, &PyId_write);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001404 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1405 PyErr_SetString(PyExc_TypeError,
1406 "argument 1 must have a \"write\" method");
1407 Py_DECREF(self);
1408 return NULL;
1409 }
1410 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1411 if (self->dialect == NULL) {
1412 Py_DECREF(self);
1413 return NULL;
1414 }
1415 PyObject_GC_Track(self);
1416 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001417}
1418
1419/*
1420 * DIALECT REGISTRY
1421 */
1422static PyObject *
1423csv_list_dialects(PyObject *module, PyObject *args)
1424{
Antoine Pitroue7672d32012-05-16 11:33:08 +02001425 return PyDict_Keys(_csvstate_global->dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001426}
1427
1428static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001429csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001430{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001431 PyObject *name_obj, *dialect_obj = NULL;
1432 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001433
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001434 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1435 return NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +01001436 if (!PyUnicode_Check(name_obj)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001437 PyErr_SetString(PyExc_TypeError,
Stefan Krahe6996ed2012-11-02 14:44:20 +01001438 "dialect name must be a string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001439 return NULL;
1440 }
Stefan Krahe6996ed2012-11-02 14:44:20 +01001441 if (PyUnicode_READY(name_obj) == -1)
1442 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001443 dialect = _call_dialect(dialect_obj, kwargs);
1444 if (dialect == NULL)
1445 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001446 if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001447 Py_DECREF(dialect);
1448 return NULL;
1449 }
1450 Py_DECREF(dialect);
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001451 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001452}
1453
1454static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001455csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001456{
Antoine Pitroue7672d32012-05-16 11:33:08 +02001457 if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0)
1458 return PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001459 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001460}
1461
1462static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001463csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001464{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001465 return get_dialect_from_registry(name_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001466}
1467
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001468static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001469csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001470{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001471 PyObject *new_limit = NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001472 long old_limit = _csvstate_global->field_limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001473
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001474 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1475 return NULL;
1476 if (new_limit != NULL) {
1477 if (!PyLong_CheckExact(new_limit)) {
1478 PyErr_Format(PyExc_TypeError,
1479 "limit must be an integer");
1480 return NULL;
1481 }
Antoine Pitroue7672d32012-05-16 11:33:08 +02001482 _csvstate_global->field_limit = PyLong_AsLong(new_limit);
1483 if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
1484 _csvstate_global->field_limit = old_limit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001485 return NULL;
1486 }
1487 }
1488 return PyLong_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001489}
1490
Skip Montanarob4a04172003-03-20 23:29:12 +00001491/*
1492 * MODULE
1493 */
1494
1495PyDoc_STRVAR(csv_module_doc,
1496"CSV parsing and writing.\n"
1497"\n"
1498"This module provides classes that assist in the reading and writing\n"
1499"of Comma Separated Value (CSV) files, and implements the interface\n"
1500"described by PEP 305. Although many CSV files are simple to parse,\n"
1501"the format is not formally defined by a stable specification and\n"
1502"is subtle enough that parsing lines of a CSV file with something\n"
1503"like line.split(\",\") is bound to fail. The module supports three\n"
1504"basic APIs: reading, writing, and registration of dialects.\n"
1505"\n"
1506"\n"
1507"DIALECT REGISTRATION:\n"
1508"\n"
1509"Readers and writers support a dialect argument, which is a convenient\n"
1510"handle on a group of settings. When the dialect argument is a string,\n"
1511"it identifies one of the dialects previously registered with the module.\n"
1512"If it is a class or instance, the attributes of the argument are used as\n"
1513"the settings for the reader or writer:\n"
1514"\n"
1515" class excel:\n"
1516" delimiter = ','\n"
1517" quotechar = '\"'\n"
1518" escapechar = None\n"
1519" doublequote = True\n"
1520" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001521" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001522" quoting = QUOTE_MINIMAL\n"
1523"\n"
1524"SETTINGS:\n"
1525"\n"
1526" * quotechar - specifies a one-character string to use as the \n"
1527" quoting character. It defaults to '\"'.\n"
1528" * delimiter - specifies a one-character string to use as the \n"
1529" field separator. It defaults to ','.\n"
1530" * skipinitialspace - specifies how to interpret whitespace which\n"
1531" immediately follows a delimiter. It defaults to False, which\n"
1532" means that whitespace immediately following a delimiter is part\n"
1533" of the following field.\n"
1534" * lineterminator - specifies the character sequence which should \n"
1535" terminate rows.\n"
1536" * quoting - controls when quotes should be generated by the writer.\n"
1537" It can take on any of the following module constants:\n"
1538"\n"
1539" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1540" field contains either the quotechar or the delimiter\n"
1541" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1542" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001543" fields which do not parse as integers or floating point\n"
1544" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001545" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1546" * escapechar - specifies a one-character string used to escape \n"
1547" the delimiter when quoting is set to QUOTE_NONE.\n"
1548" * doublequote - controls the handling of quotes inside fields. When\n"
1549" True, two consecutive quotes are interpreted as one during read,\n"
1550" and when writing, each quote character embedded in the data is\n"
1551" written as two quotes\n");
1552
1553PyDoc_STRVAR(csv_reader_doc,
1554" csv_reader = reader(iterable [, dialect='excel']\n"
1555" [optional keyword args])\n"
1556" for row in csv_reader:\n"
1557" process(row)\n"
1558"\n"
1559"The \"iterable\" argument can be any object that returns a line\n"
1560"of input for each iteration, such as a file object or a list. The\n"
1561"optional \"dialect\" parameter is discussed below. The function\n"
1562"also accepts optional keyword arguments which override settings\n"
1563"provided by the dialect.\n"
1564"\n"
1565"The returned object is an iterator. Each iteration returns a row\n"
Berker Peksage2382c52015-10-02 19:25:32 +03001566"of the CSV file (which can span multiple input lines).\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001567
1568PyDoc_STRVAR(csv_writer_doc,
1569" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1570" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001571" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001572" csv_writer.writerow(row)\n"
1573"\n"
1574" [or]\n"
1575"\n"
1576" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1577" [optional keyword args])\n"
1578" csv_writer.writerows(rows)\n"
1579"\n"
1580"The \"fileobj\" argument can be any object that supports the file API.\n");
1581
1582PyDoc_STRVAR(csv_list_dialects_doc,
1583"Return a list of all know dialect names.\n"
1584" names = csv.list_dialects()");
1585
1586PyDoc_STRVAR(csv_get_dialect_doc,
1587"Return the dialect instance associated with name.\n"
1588" dialect = csv.get_dialect(name)");
1589
1590PyDoc_STRVAR(csv_register_dialect_doc,
1591"Create a mapping from a string name to a dialect class.\n"
Berker Peksag12b50ce2015-06-05 15:17:51 +03001592" dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
Skip Montanarob4a04172003-03-20 23:29:12 +00001593
1594PyDoc_STRVAR(csv_unregister_dialect_doc,
1595"Delete the name/dialect mapping associated with a string name.\n"
1596" csv.unregister_dialect(name)");
1597
Andrew McNamara31d88962005-01-12 03:45:10 +00001598PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001599"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001600" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001601"\n"
1602"Returns old limit. If limit is not given, no new limit is set and\n"
1603"the old limit is returned");
1604
Skip Montanarob4a04172003-03-20 23:29:12 +00001605static struct PyMethodDef csv_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001606 { "reader", (PyCFunction)csv_reader,
1607 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1608 { "writer", (PyCFunction)csv_writer,
1609 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1610 { "list_dialects", (PyCFunction)csv_list_dialects,
1611 METH_NOARGS, csv_list_dialects_doc},
1612 { "register_dialect", (PyCFunction)csv_register_dialect,
1613 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1614 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1615 METH_O, csv_unregister_dialect_doc},
1616 { "get_dialect", (PyCFunction)csv_get_dialect,
1617 METH_O, csv_get_dialect_doc},
1618 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1619 METH_VARARGS, csv_field_size_limit_doc},
1620 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001621};
1622
Martin v. Löwis1a214512008-06-11 05:26:20 +00001623static struct PyModuleDef _csvmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001624 PyModuleDef_HEAD_INIT,
1625 "_csv",
1626 csv_module_doc,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001627 sizeof(_csvstate),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001628 csv_methods,
1629 NULL,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001630 _csv_traverse,
1631 _csv_clear,
1632 _csv_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00001633};
1634
Skip Montanarob4a04172003-03-20 23:29:12 +00001635PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001636PyInit__csv(void)
Skip Montanarob4a04172003-03-20 23:29:12 +00001637{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001638 PyObject *module;
Serhiy Storchaka2d06e842015-12-25 19:53:18 +02001639 const StyleDesc *style;
Skip Montanarob4a04172003-03-20 23:29:12 +00001640
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001641 if (PyType_Ready(&Dialect_Type) < 0)
1642 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001643
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001644 if (PyType_Ready(&Reader_Type) < 0)
1645 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001646
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001647 if (PyType_Ready(&Writer_Type) < 0)
1648 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001649
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001650 /* Create the module and add the functions */
1651 module = PyModule_Create(&_csvmodule);
1652 if (module == NULL)
1653 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001654
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001655 /* Add version to the module. */
1656 if (PyModule_AddStringConstant(module, "__version__",
1657 MODULE_VERSION) == -1)
1658 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001659
Antoine Pitroue7672d32012-05-16 11:33:08 +02001660 /* Set the field limit */
1661 _csvstate(module)->field_limit = 128 * 1024;
1662 /* Do I still need to add this var to the Module Dict? */
1663
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001664 /* Add _dialects dictionary */
Antoine Pitroue7672d32012-05-16 11:33:08 +02001665 _csvstate(module)->dialects = PyDict_New();
1666 if (_csvstate(module)->dialects == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001667 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001668 Py_INCREF(_csvstate(module)->dialects);
1669 if (PyModule_AddObject(module, "_dialects", _csvstate(module)->dialects))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001670 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001671
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001672 /* Add quote styles into dictionary */
1673 for (style = quote_styles; style->name; style++) {
1674 if (PyModule_AddIntConstant(module, style->name,
1675 style->style) == -1)
1676 return NULL;
1677 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001678
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001679 /* Add the Dialect type */
1680 Py_INCREF(&Dialect_Type);
1681 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1682 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001683
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001684 /* Add the CSV exception object to the module. */
Antoine Pitroue7672d32012-05-16 11:33:08 +02001685 _csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1686 if (_csvstate(module)->error_obj == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001687 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001688 Py_INCREF(_csvstate(module)->error_obj);
1689 PyModule_AddObject(module, "Error", _csvstate(module)->error_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001690 return module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001691}