blob: 180f3be5aa4764788874a7b0b3a892917eb7333a [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
Skip Montanarob4a04172003-03-20 23:29:12 +00009*/
10
Skip Montanaro7b01a832003-04-12 19:23:46 +000011#define MODULE_VERSION "1.0"
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013#include "Python.h"
14#include "structmember.h"
15
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000016
Antoine Pitroue7672d32012-05-16 11:33:08 +020017typedef struct {
18 PyObject *error_obj; /* CSV exception */
19 PyObject *dialects; /* Dialect registry */
20 long field_limit; /* max parsed field size */
21} _csvstate;
22
23#define _csvstate(o) ((_csvstate *)PyModule_GetState(o))
24
25static int
26_csv_clear(PyObject *m)
27{
28 Py_CLEAR(_csvstate(m)->error_obj);
29 Py_CLEAR(_csvstate(m)->dialects);
30 return 0;
31}
32
33static int
34_csv_traverse(PyObject *m, visitproc visit, void *arg)
35{
36 Py_VISIT(_csvstate(m)->error_obj);
37 Py_VISIT(_csvstate(m)->dialects);
38 return 0;
39}
40
41static void
42_csv_free(void *m)
43{
44 _csv_clear((PyObject *)m);
45}
46
47static struct PyModuleDef _csvmodule;
48
49#define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
Skip Montanarob4a04172003-03-20 23:29:12 +000050
51typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000052 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
53 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
R David Murrayc7c42ef2013-03-19 22:41:47 -040054 EAT_CRNL,AFTER_ESCAPED_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000055} ParserState;
56
57typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000058 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000059} QuoteStyle;
60
61typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000062 QuoteStyle style;
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020063 const char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000064} StyleDesc;
65
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020066static const StyleDesc quote_styles[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
68 { QUOTE_ALL, "QUOTE_ALL" },
69 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
70 { QUOTE_NONE, "QUOTE_NONE" },
71 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000072};
73
74typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000075 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 int doublequote; /* is " represented by ""? */
Antoine Pitrou77ea6402011-10-07 04:26:55 +020078 Py_UCS4 delimiter; /* field separator */
79 Py_UCS4 quotechar; /* quote character */
80 Py_UCS4 escapechar; /* escape character */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000081 int skipinitialspace; /* ignore spaces following delimiter? */
82 PyObject *lineterminator; /* string to write between records */
83 int quoting; /* style of quoting to write */
Skip Montanarob4a04172003-03-20 23:29:12 +000084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 int strict; /* raise exception on bad CSV */
Skip Montanarob4a04172003-03-20 23:29:12 +000086} DialectObj;
87
Neal Norwitz227b5332006-03-22 09:28:35 +000088static PyTypeObject Dialect_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000089
90typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +000092
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000093 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +000094
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000095 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +000096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000097 PyObject *fields; /* field list for current record */
98 ParserState state; /* current CSV parse state */
Antoine Pitrou77ea6402011-10-07 04:26:55 +020099 Py_UCS4 *field; /* temporary buffer */
Antoine Pitrou40455752010-08-15 18:51:10 +0000100 Py_ssize_t field_size; /* size of allocated buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000101 Py_ssize_t field_len; /* length of current field */
102 int numeric_field; /* treat field as numeric */
103 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000104} ReaderObj;
105
Neal Norwitz227b5332006-03-22 09:28:35 +0000106static PyTypeObject Reader_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000107
Christian Heimes90aa7642007-12-19 02:45:37 +0000108#define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +0000109
110typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000111 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000112
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000113 PyObject *writeline; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +0000114
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000115 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000116
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200117 Py_UCS4 *rec; /* buffer for parser.join */
Antoine Pitrou40455752010-08-15 18:51:10 +0000118 Py_ssize_t rec_size; /* size of allocated record */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000119 Py_ssize_t rec_len; /* length of record */
120 int num_fields; /* number of fields in record */
Guido van Rossum46264582007-08-06 19:32:18 +0000121} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000122
Neal Norwitz227b5332006-03-22 09:28:35 +0000123static PyTypeObject Writer_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000124
125/*
126 * DIALECT class
127 */
128
129static PyObject *
130get_dialect_from_registry(PyObject * name_obj)
131{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000133
Antoine Pitroue7672d32012-05-16 11:33:08 +0200134 dialect_obj = PyDict_GetItem(_csvstate_global->dialects, name_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000135 if (dialect_obj == NULL) {
136 if (!PyErr_Occurred())
Antoine Pitroue7672d32012-05-16 11:33:08 +0200137 PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000138 }
139 else
140 Py_INCREF(dialect_obj);
141 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000142}
143
Skip Montanarob4a04172003-03-20 23:29:12 +0000144static PyObject *
145get_string(PyObject *str)
146{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000147 Py_XINCREF(str);
148 return str;
Skip Montanarob4a04172003-03-20 23:29:12 +0000149}
150
Skip Montanarob4a04172003-03-20 23:29:12 +0000151static PyObject *
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200152get_nullchar_as_None(Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000153{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 if (c == '\0') {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200155 Py_RETURN_NONE;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000156 }
157 else
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200158 return PyUnicode_FromOrdinal(c);
Skip Montanarob4a04172003-03-20 23:29:12 +0000159}
160
Skip Montanarob4a04172003-03-20 23:29:12 +0000161static PyObject *
162Dialect_get_lineterminator(DialectObj *self)
163{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000164 return get_string(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000165}
166
Skip Montanarob4a04172003-03-20 23:29:12 +0000167static PyObject *
Guido van Rossuma9769c22007-08-07 23:59:30 +0000168Dialect_get_delimiter(DialectObj *self)
169{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000170 return get_nullchar_as_None(self->delimiter);
Guido van Rossuma9769c22007-08-07 23:59:30 +0000171}
172
173static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000174Dialect_get_escapechar(DialectObj *self)
175{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000176 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000177}
178
Andrew McNamara1196cf12005-01-07 04:42:45 +0000179static PyObject *
180Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000181{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000182 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000183}
184
185static PyObject *
186Dialect_get_quoting(DialectObj *self)
187{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000188 return PyLong_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000189}
190
191static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000192_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000193{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000194 if (src == NULL)
195 *target = dflt;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200196 else {
197 int b = PyObject_IsTrue(src);
198 if (b < 0)
199 return -1;
200 *target = b;
201 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000202 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000203}
204
Andrew McNamara1196cf12005-01-07 04:42:45 +0000205static int
206_set_int(const char *name, int *target, PyObject *src, int dflt)
207{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000208 if (src == NULL)
209 *target = dflt;
210 else {
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200211 int value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000212 if (!PyLong_CheckExact(src)) {
213 PyErr_Format(PyExc_TypeError,
214 "\"%s\" must be an integer", name);
215 return -1;
216 }
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200217 value = _PyLong_AsInt(src);
218 if (value == -1 && PyErr_Occurred()) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000219 return -1;
220 }
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200221 *target = value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000222 }
223 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000224}
225
226static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200227_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000228{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000229 if (src == NULL)
230 *target = dflt;
231 else {
232 *target = '\0';
233 if (src != Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000234 Py_ssize_t len;
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200235 if (!PyUnicode_Check(src)) {
236 PyErr_Format(PyExc_TypeError,
237 "\"%s\" must be string, not %.200s", name,
238 src->ob_type->tp_name);
239 return -1;
240 }
Victor Stinner9e30aa52011-11-21 02:49:52 +0100241 len = PyUnicode_GetLength(src);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200242 if (len > 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000243 PyErr_Format(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300244 "\"%s\" must be a 1-character string",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000245 name);
246 return -1;
247 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100248 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000249 if (len > 0)
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200250 *target = PyUnicode_READ_CHAR(src, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000251 }
252 }
253 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000254}
255
256static int
257_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
258{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000259 if (src == NULL)
260 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
261 else {
262 if (src == Py_None)
263 *target = NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +0100264 else if (!PyUnicode_Check(src)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000265 PyErr_Format(PyExc_TypeError,
266 "\"%s\" must be a string", name);
267 return -1;
268 }
269 else {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100270 if (PyUnicode_READY(src) == -1)
271 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 Py_INCREF(src);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300273 Py_XSETREF(*target, src);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000274 }
275 }
276 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000277}
278
279static int
280dialect_check_quoting(int quoting)
281{
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200282 const StyleDesc *qs;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000283
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000284 for (qs = quote_styles; qs->name; qs++) {
Victor Stinner706768c2014-08-16 01:03:39 +0200285 if ((int)qs->style == quoting)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000286 return 0;
287 }
288 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
289 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000290}
Skip Montanarob4a04172003-03-20 23:29:12 +0000291
292#define D_OFF(x) offsetof(DialectObj, x)
293
294static struct PyMemberDef Dialect_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000295 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
296 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
297 { "strict", T_INT, D_OFF(strict), READONLY },
298 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000299};
300
301static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000302 { "delimiter", (getter)Dialect_get_delimiter},
303 { "escapechar", (getter)Dialect_get_escapechar},
304 { "lineterminator", (getter)Dialect_get_lineterminator},
305 { "quotechar", (getter)Dialect_get_quotechar},
306 { "quoting", (getter)Dialect_get_quoting},
307 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000308};
309
310static void
311Dialect_dealloc(DialectObj *self)
312{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000313 Py_XDECREF(self->lineterminator);
314 Py_TYPE(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000315}
316
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000317static char *dialect_kws[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000318 "dialect",
319 "delimiter",
320 "doublequote",
321 "escapechar",
322 "lineterminator",
323 "quotechar",
324 "quoting",
325 "skipinitialspace",
326 "strict",
327 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000328};
329
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000330static PyObject *
331dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000332{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000333 DialectObj *self;
334 PyObject *ret = NULL;
335 PyObject *dialect = NULL;
336 PyObject *delimiter = NULL;
337 PyObject *doublequote = NULL;
338 PyObject *escapechar = NULL;
339 PyObject *lineterminator = NULL;
340 PyObject *quotechar = NULL;
341 PyObject *quoting = NULL;
342 PyObject *skipinitialspace = NULL;
343 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000344
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000345 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
346 "|OOOOOOOOO", dialect_kws,
347 &dialect,
348 &delimiter,
349 &doublequote,
350 &escapechar,
351 &lineterminator,
352 &quotechar,
353 &quoting,
354 &skipinitialspace,
355 &strict))
356 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000357
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000358 if (dialect != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100359 if (PyUnicode_Check(dialect)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000360 dialect = get_dialect_from_registry(dialect);
361 if (dialect == NULL)
362 return NULL;
363 }
364 else
365 Py_INCREF(dialect);
366 /* Can we reuse this instance? */
367 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
Serhiy Storchaka0b3ec192017-03-23 17:53:47 +0200368 delimiter == NULL &&
369 doublequote == NULL &&
370 escapechar == NULL &&
371 lineterminator == NULL &&
372 quotechar == NULL &&
373 quoting == NULL &&
374 skipinitialspace == NULL &&
375 strict == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000376 return dialect;
377 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000378
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000379 self = (DialectObj *)type->tp_alloc(type, 0);
380 if (self == NULL) {
381 Py_XDECREF(dialect);
382 return NULL;
383 }
384 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000385
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000386 Py_XINCREF(delimiter);
387 Py_XINCREF(doublequote);
388 Py_XINCREF(escapechar);
389 Py_XINCREF(lineterminator);
390 Py_XINCREF(quotechar);
391 Py_XINCREF(quoting);
392 Py_XINCREF(skipinitialspace);
393 Py_XINCREF(strict);
394 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000395#define DIALECT_GETATTR(v, n) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000396 if (v == NULL) \
397 v = PyObject_GetAttrString(dialect, n)
398 DIALECT_GETATTR(delimiter, "delimiter");
399 DIALECT_GETATTR(doublequote, "doublequote");
400 DIALECT_GETATTR(escapechar, "escapechar");
401 DIALECT_GETATTR(lineterminator, "lineterminator");
402 DIALECT_GETATTR(quotechar, "quotechar");
403 DIALECT_GETATTR(quoting, "quoting");
404 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
405 DIALECT_GETATTR(strict, "strict");
406 PyErr_Clear();
407 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000408
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000409 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000410#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000411 if (meth(name, target, src, dflt)) \
412 goto err
413 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
414 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
415 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
416 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
417 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
418 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
419 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
420 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000421
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000422 /* validate options */
423 if (dialect_check_quoting(self->quoting))
424 goto err;
425 if (self->delimiter == 0) {
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200426 PyErr_SetString(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300427 "\"delimiter\" must be a 1-character string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000428 goto err;
429 }
430 if (quotechar == Py_None && quoting == NULL)
431 self->quoting = QUOTE_NONE;
432 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
433 PyErr_SetString(PyExc_TypeError,
434 "quotechar must be set if quoting enabled");
435 goto err;
436 }
437 if (self->lineterminator == 0) {
438 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
439 goto err;
440 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000441
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000442 ret = (PyObject *)self;
443 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000444err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000445 Py_XDECREF(self);
446 Py_XDECREF(dialect);
447 Py_XDECREF(delimiter);
448 Py_XDECREF(doublequote);
449 Py_XDECREF(escapechar);
450 Py_XDECREF(lineterminator);
451 Py_XDECREF(quotechar);
452 Py_XDECREF(quoting);
453 Py_XDECREF(skipinitialspace);
454 Py_XDECREF(strict);
455 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000456}
457
458
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000459PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000460"CSV dialect\n"
461"\n"
462"The Dialect type records CSV parsing and generation options.\n");
463
464static PyTypeObject Dialect_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000465 PyVarObject_HEAD_INIT(NULL, 0)
466 "_csv.Dialect", /* tp_name */
467 sizeof(DialectObj), /* tp_basicsize */
468 0, /* tp_itemsize */
469 /* methods */
470 (destructor)Dialect_dealloc, /* tp_dealloc */
471 (printfunc)0, /* tp_print */
472 (getattrfunc)0, /* tp_getattr */
473 (setattrfunc)0, /* tp_setattr */
474 0, /* tp_reserved */
475 (reprfunc)0, /* tp_repr */
476 0, /* tp_as_number */
477 0, /* tp_as_sequence */
478 0, /* tp_as_mapping */
479 (hashfunc)0, /* tp_hash */
480 (ternaryfunc)0, /* tp_call */
481 (reprfunc)0, /* tp_str */
482 0, /* tp_getattro */
483 0, /* tp_setattro */
484 0, /* tp_as_buffer */
485 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
486 Dialect_Type_doc, /* tp_doc */
487 0, /* tp_traverse */
488 0, /* tp_clear */
489 0, /* tp_richcompare */
490 0, /* tp_weaklistoffset */
491 0, /* tp_iter */
492 0, /* tp_iternext */
493 0, /* tp_methods */
494 Dialect_memberlist, /* tp_members */
495 Dialect_getsetlist, /* tp_getset */
496 0, /* tp_base */
497 0, /* tp_dict */
498 0, /* tp_descr_get */
499 0, /* tp_descr_set */
500 0, /* tp_dictoffset */
501 0, /* tp_init */
502 0, /* tp_alloc */
503 dialect_new, /* tp_new */
504 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000505};
506
Andrew McNamara91b97462005-01-11 01:07:23 +0000507/*
508 * Return an instance of the dialect type, given a Python instance or kwarg
509 * description of the dialect
510 */
511static PyObject *
512_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
513{
Victor Stinner6412f492016-08-23 00:21:34 +0200514 PyObject *type = (PyObject *)&Dialect_Type;
515 if (dialect_inst) {
516 return _PyObject_FastCallDict(type, &dialect_inst, 1, kwargs);
517 }
518 else {
519 return _PyObject_FastCallDict(type, NULL, 0, kwargs);
520 }
Andrew McNamara91b97462005-01-11 01:07:23 +0000521}
522
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000523/*
524 * READER
525 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000526static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000527parse_save_field(ReaderObj *self)
528{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000529 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000530
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200531 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
532 (void *) self->field, self->field_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000533 if (field == NULL)
534 return -1;
535 self->field_len = 0;
536 if (self->numeric_field) {
537 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000538
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000539 self->numeric_field = 0;
540 tmp = PyNumber_Float(field);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000541 Py_DECREF(field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200542 if (tmp == NULL)
543 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000544 field = tmp;
545 }
Victor Stinnerb80b3782013-11-14 21:29:34 +0100546 if (PyList_Append(self->fields, field) < 0) {
547 Py_DECREF(field);
548 return -1;
549 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000550 Py_DECREF(field);
551 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000552}
553
554static int
555parse_grow_buff(ReaderObj *self)
556{
Miss Islington (bot)962051e2018-08-16 00:53:00 -0400557 assert((size_t)self->field_size <= PY_SSIZE_T_MAX / sizeof(Py_UCS4));
558
559 Py_ssize_t field_size_new = self->field_size ? 2 * self->field_size : 4096;
560 Py_UCS4 *field_new = self->field;
561 PyMem_Resize(field_new, Py_UCS4, field_size_new);
562 if (field_new == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000563 PyErr_NoMemory();
564 return 0;
565 }
Miss Islington (bot)962051e2018-08-16 00:53:00 -0400566 self->field = field_new;
567 self->field_size = field_size_new;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000568 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000569}
570
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000571static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200572parse_add_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000573{
Antoine Pitroue7672d32012-05-16 11:33:08 +0200574 if (self->field_len >= _csvstate_global->field_limit) {
575 PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
576 _csvstate_global->field_limit);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000577 return -1;
578 }
579 if (self->field_len == self->field_size && !parse_grow_buff(self))
580 return -1;
581 self->field[self->field_len++] = c;
582 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000583}
584
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000585static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200586parse_process_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000587{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000588 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000589
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000590 switch (self->state) {
591 case START_RECORD:
592 /* start of record */
593 if (c == '\0')
594 /* empty line - return [] */
595 break;
596 else if (c == '\n' || c == '\r') {
597 self->state = EAT_CRNL;
598 break;
599 }
600 /* normal character - handle as START_FIELD */
601 self->state = START_FIELD;
602 /* fallthru */
603 case START_FIELD:
604 /* expecting field */
605 if (c == '\n' || c == '\r' || c == '\0') {
606 /* save empty field - return [fields] */
607 if (parse_save_field(self) < 0)
608 return -1;
609 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
610 }
611 else if (c == dialect->quotechar &&
612 dialect->quoting != QUOTE_NONE) {
613 /* start quoted field */
614 self->state = IN_QUOTED_FIELD;
615 }
616 else if (c == dialect->escapechar) {
617 /* possible escaped character */
618 self->state = ESCAPED_CHAR;
619 }
620 else if (c == ' ' && dialect->skipinitialspace)
621 /* ignore space at start of field */
622 ;
623 else if (c == dialect->delimiter) {
624 /* save empty field */
625 if (parse_save_field(self) < 0)
626 return -1;
627 }
628 else {
629 /* begin new unquoted field */
630 if (dialect->quoting == QUOTE_NONNUMERIC)
631 self->numeric_field = 1;
632 if (parse_add_char(self, c) < 0)
633 return -1;
634 self->state = IN_FIELD;
635 }
636 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000637
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000638 case ESCAPED_CHAR:
R David Murray9a7d3762013-03-20 00:15:20 -0400639 if (c == '\n' || c=='\r') {
R David Murrayc7c42ef2013-03-19 22:41:47 -0400640 if (parse_add_char(self, c) < 0)
641 return -1;
642 self->state = AFTER_ESCAPED_CRNL;
643 break;
644 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000645 if (c == '\0')
646 c = '\n';
647 if (parse_add_char(self, c) < 0)
648 return -1;
649 self->state = IN_FIELD;
650 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000651
R David Murrayc7c42ef2013-03-19 22:41:47 -0400652 case AFTER_ESCAPED_CRNL:
653 if (c == '\0')
654 break;
655 /*fallthru*/
656
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000657 case IN_FIELD:
658 /* in unquoted field */
659 if (c == '\n' || c == '\r' || c == '\0') {
660 /* end of line - return [fields] */
661 if (parse_save_field(self) < 0)
662 return -1;
663 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
664 }
665 else if (c == dialect->escapechar) {
666 /* possible escaped character */
667 self->state = ESCAPED_CHAR;
668 }
669 else if (c == dialect->delimiter) {
670 /* save field - wait for new field */
671 if (parse_save_field(self) < 0)
672 return -1;
673 self->state = START_FIELD;
674 }
675 else {
676 /* normal character - save in field */
677 if (parse_add_char(self, c) < 0)
678 return -1;
679 }
680 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000681
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000682 case IN_QUOTED_FIELD:
683 /* in quoted field */
684 if (c == '\0')
685 ;
686 else if (c == dialect->escapechar) {
687 /* Possible escape character */
688 self->state = ESCAPE_IN_QUOTED_FIELD;
689 }
690 else if (c == dialect->quotechar &&
691 dialect->quoting != QUOTE_NONE) {
692 if (dialect->doublequote) {
693 /* doublequote; " represented by "" */
694 self->state = QUOTE_IN_QUOTED_FIELD;
695 }
696 else {
697 /* end of quote part of field */
698 self->state = IN_FIELD;
699 }
700 }
701 else {
702 /* normal character - save in field */
703 if (parse_add_char(self, c) < 0)
704 return -1;
705 }
706 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000707
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000708 case ESCAPE_IN_QUOTED_FIELD:
709 if (c == '\0')
710 c = '\n';
711 if (parse_add_char(self, c) < 0)
712 return -1;
713 self->state = IN_QUOTED_FIELD;
714 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000715
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000716 case QUOTE_IN_QUOTED_FIELD:
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300717 /* doublequote - seen a quote in a quoted field */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000718 if (dialect->quoting != QUOTE_NONE &&
719 c == dialect->quotechar) {
720 /* save "" as " */
721 if (parse_add_char(self, c) < 0)
722 return -1;
723 self->state = IN_QUOTED_FIELD;
724 }
725 else if (c == dialect->delimiter) {
726 /* save field - wait for new field */
727 if (parse_save_field(self) < 0)
728 return -1;
729 self->state = START_FIELD;
730 }
731 else if (c == '\n' || c == '\r' || c == '\0') {
732 /* end of line - return [fields] */
733 if (parse_save_field(self) < 0)
734 return -1;
735 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
736 }
737 else if (!dialect->strict) {
738 if (parse_add_char(self, c) < 0)
739 return -1;
740 self->state = IN_FIELD;
741 }
742 else {
743 /* illegal */
Antoine Pitroue7672d32012-05-16 11:33:08 +0200744 PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000745 dialect->delimiter,
746 dialect->quotechar);
747 return -1;
748 }
749 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000750
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000751 case EAT_CRNL:
752 if (c == '\n' || c == '\r')
753 ;
754 else if (c == '\0')
755 self->state = START_RECORD;
756 else {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200757 PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000758 return -1;
759 }
760 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000761
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000762 }
763 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000764}
765
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000766static int
767parse_reset(ReaderObj *self)
768{
Serhiy Storchaka48842712016-04-06 09:45:48 +0300769 Py_XSETREF(self->fields, PyList_New(0));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000770 if (self->fields == NULL)
771 return -1;
772 self->field_len = 0;
773 self->state = START_RECORD;
774 self->numeric_field = 0;
775 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000776}
Skip Montanarob4a04172003-03-20 23:29:12 +0000777
778static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000779Reader_iternext(ReaderObj *self)
780{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000781 PyObject *fields = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200782 Py_UCS4 c;
783 Py_ssize_t pos, linelen;
784 unsigned int kind;
785 void *data;
786 PyObject *lineobj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000787
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000788 if (parse_reset(self) < 0)
789 return NULL;
790 do {
791 lineobj = PyIter_Next(self->input_iter);
792 if (lineobj == NULL) {
793 /* End of input OR exception */
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700794 if (!PyErr_Occurred() && (self->field_len != 0 ||
795 self->state == IN_QUOTED_FIELD)) {
796 if (self->dialect->strict)
Senthil Kumaran49d13022012-09-25 02:37:20 -0700797 PyErr_SetString(_csvstate_global->error_obj,
798 "unexpected end of data");
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700799 else if (parse_save_field(self) >= 0)
800 break;
801 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000802 return NULL;
803 }
804 if (!PyUnicode_Check(lineobj)) {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200805 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000806 "iterator should return strings, "
807 "not %.200s "
808 "(did you open the file in text mode?)",
809 lineobj->ob_type->tp_name
810 );
811 Py_DECREF(lineobj);
812 return NULL;
813 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100814 if (PyUnicode_READY(lineobj) == -1) {
815 Py_DECREF(lineobj);
816 return NULL;
817 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000818 ++self->line_num;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200819 kind = PyUnicode_KIND(lineobj);
820 data = PyUnicode_DATA(lineobj);
821 pos = 0;
822 linelen = PyUnicode_GET_LENGTH(lineobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000823 while (linelen--) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200824 c = PyUnicode_READ(kind, data, pos);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000825 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000826 Py_DECREF(lineobj);
Antoine Pitroue7672d32012-05-16 11:33:08 +0200827 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000828 "line contains NULL byte");
829 goto err;
830 }
831 if (parse_process_char(self, c) < 0) {
832 Py_DECREF(lineobj);
833 goto err;
834 }
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200835 pos++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000836 }
837 Py_DECREF(lineobj);
838 if (parse_process_char(self, 0) < 0)
839 goto err;
840 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000841
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000842 fields = self->fields;
843 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000844err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000845 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000846}
847
848static void
849Reader_dealloc(ReaderObj *self)
850{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000851 PyObject_GC_UnTrack(self);
852 Py_XDECREF(self->dialect);
853 Py_XDECREF(self->input_iter);
854 Py_XDECREF(self->fields);
855 if (self->field != NULL)
856 PyMem_Free(self->field);
857 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000858}
859
860static int
861Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
862{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000863 Py_VISIT(self->dialect);
864 Py_VISIT(self->input_iter);
865 Py_VISIT(self->fields);
866 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000867}
868
869static int
870Reader_clear(ReaderObj *self)
871{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000872 Py_CLEAR(self->dialect);
873 Py_CLEAR(self->input_iter);
874 Py_CLEAR(self->fields);
875 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000876}
877
878PyDoc_STRVAR(Reader_Type_doc,
879"CSV reader\n"
880"\n"
881"Reader objects are responsible for reading and parsing tabular data\n"
882"in CSV format.\n"
883);
884
885static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000886 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000887};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000888#define R_OFF(x) offsetof(ReaderObj, x)
889
890static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000891 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
892 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
893 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000894};
895
Skip Montanarob4a04172003-03-20 23:29:12 +0000896
897static PyTypeObject Reader_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000898 PyVarObject_HEAD_INIT(NULL, 0)
899 "_csv.reader", /*tp_name*/
900 sizeof(ReaderObj), /*tp_basicsize*/
901 0, /*tp_itemsize*/
902 /* methods */
903 (destructor)Reader_dealloc, /*tp_dealloc*/
904 (printfunc)0, /*tp_print*/
905 (getattrfunc)0, /*tp_getattr*/
906 (setattrfunc)0, /*tp_setattr*/
907 0, /*tp_reserved*/
908 (reprfunc)0, /*tp_repr*/
909 0, /*tp_as_number*/
910 0, /*tp_as_sequence*/
911 0, /*tp_as_mapping*/
912 (hashfunc)0, /*tp_hash*/
913 (ternaryfunc)0, /*tp_call*/
914 (reprfunc)0, /*tp_str*/
915 0, /*tp_getattro*/
916 0, /*tp_setattro*/
917 0, /*tp_as_buffer*/
918 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
919 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
920 Reader_Type_doc, /*tp_doc*/
921 (traverseproc)Reader_traverse, /*tp_traverse*/
922 (inquiry)Reader_clear, /*tp_clear*/
923 0, /*tp_richcompare*/
924 0, /*tp_weaklistoffset*/
925 PyObject_SelfIter, /*tp_iter*/
926 (getiterfunc)Reader_iternext, /*tp_iternext*/
927 Reader_methods, /*tp_methods*/
928 Reader_memberlist, /*tp_members*/
929 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000930
931};
932
933static PyObject *
934csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
935{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000936 PyObject * iterator, * dialect = NULL;
937 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000938
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000939 if (!self)
940 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000941
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000942 self->dialect = NULL;
943 self->fields = NULL;
944 self->input_iter = NULL;
945 self->field = NULL;
946 self->field_size = 0;
947 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000948
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000949 if (parse_reset(self) < 0) {
950 Py_DECREF(self);
951 return NULL;
952 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000953
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000954 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
955 Py_DECREF(self);
956 return NULL;
957 }
958 self->input_iter = PyObject_GetIter(iterator);
959 if (self->input_iter == NULL) {
960 PyErr_SetString(PyExc_TypeError,
961 "argument 1 must be an iterator");
962 Py_DECREF(self);
963 return NULL;
964 }
965 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
966 if (self->dialect == NULL) {
967 Py_DECREF(self);
968 return NULL;
969 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000970
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000971 PyObject_GC_Track(self);
972 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +0000973}
974
975/*
976 * WRITER
977 */
978/* ---------------------------------------------------------------- */
979static void
980join_reset(WriterObj *self)
981{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000982 self->rec_len = 0;
983 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000984}
985
986#define MEM_INCR 32768
987
988/* Calculate new record length or append field to record. Return new
989 * record length.
990 */
Antoine Pitrou40455752010-08-15 18:51:10 +0000991static Py_ssize_t
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200992join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
Serhiy Storchaka7901b482015-03-30 09:09:54 +0300993 Py_ssize_t field_len, int *quoted,
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200994 int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +0000995{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000996 DialectObj *dialect = self->dialect;
997 int i;
Antoine Pitrou40455752010-08-15 18:51:10 +0000998 Py_ssize_t rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +0000999
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001000#define INCLEN \
1001 do {\
1002 if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \
1003 goto overflow; \
1004 } \
1005 rec_len++; \
1006 } while(0)
1007
1008#define ADDCH(c) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001009 do {\
1010 if (copy_phase) \
1011 self->rec[rec_len] = c;\
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001012 INCLEN;\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001013 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001014
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001015 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001016
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001017 /* If this is not the first field we need a field separator */
1018 if (self->num_fields > 0)
1019 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001020
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001021 /* Handle preceding quote */
1022 if (copy_phase && *quoted)
1023 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001024
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001025 /* Copy/count field data */
1026 /* If field is null just pass over */
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001027 for (i = 0; field_data && (i < field_len); i++) {
1028 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001029 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001030
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001031 if (c == dialect->delimiter ||
1032 c == dialect->escapechar ||
1033 c == dialect->quotechar ||
Martin v. Löwis5f4f4c52011-11-01 18:42:23 +01001034 PyUnicode_FindChar(
1035 dialect->lineterminator, c, 0,
1036 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001037 if (dialect->quoting == QUOTE_NONE)
1038 want_escape = 1;
1039 else {
1040 if (c == dialect->quotechar) {
1041 if (dialect->doublequote)
1042 ADDCH(dialect->quotechar);
1043 else
1044 want_escape = 1;
1045 }
1046 if (!want_escape)
1047 *quoted = 1;
1048 }
1049 if (want_escape) {
1050 if (!dialect->escapechar) {
Antoine Pitroue7672d32012-05-16 11:33:08 +02001051 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001052 "need to escape, but no escapechar set");
1053 return -1;
1054 }
1055 ADDCH(dialect->escapechar);
1056 }
1057 }
1058 /* Copy field character into record buffer.
1059 */
1060 ADDCH(c);
1061 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001062
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001063 if (*quoted) {
1064 if (copy_phase)
1065 ADDCH(dialect->quotechar);
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001066 else {
1067 INCLEN; /* starting quote */
1068 INCLEN; /* ending quote */
1069 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001070 }
1071 return rec_len;
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001072
1073 overflow:
1074 PyErr_NoMemory();
1075 return -1;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001076#undef ADDCH
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001077#undef INCLEN
Skip Montanarob4a04172003-03-20 23:29:12 +00001078}
1079
1080static int
Antoine Pitrou40455752010-08-15 18:51:10 +00001081join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
Skip Montanarob4a04172003-03-20 23:29:12 +00001082{
Miss Islington (bot)962051e2018-08-16 00:53:00 -04001083 assert(rec_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001085 if (rec_len > self->rec_size) {
Miss Islington (bot)962051e2018-08-16 00:53:00 -04001086 size_t rec_size_new = (size_t)(rec_len / MEM_INCR + 1) * MEM_INCR;
1087 Py_UCS4 *rec_new = self->rec;
1088 PyMem_Resize(rec_new, Py_UCS4, rec_size_new);
1089 if (rec_new == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001090 PyErr_NoMemory();
1091 return 0;
1092 }
Miss Islington (bot)962051e2018-08-16 00:53:00 -04001093 self->rec = rec_new;
1094 self->rec_size = (Py_ssize_t)rec_size_new;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001095 }
1096 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001097}
1098
1099static int
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001100join_append(WriterObj *self, PyObject *field, int quoted)
Skip Montanarob4a04172003-03-20 23:29:12 +00001101{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001102 unsigned int field_kind = -1;
1103 void *field_data = NULL;
1104 Py_ssize_t field_len = 0;
Antoine Pitrou40455752010-08-15 18:51:10 +00001105 Py_ssize_t rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001106
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001107 if (field != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +01001108 if (PyUnicode_READY(field) == -1)
1109 return 0;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001110 field_kind = PyUnicode_KIND(field);
1111 field_data = PyUnicode_DATA(field);
1112 field_len = PyUnicode_GET_LENGTH(field);
1113 }
1114 rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001115 &quoted, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001116 if (rec_len < 0)
1117 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001118
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001119 /* grow record buffer if necessary */
1120 if (!join_check_rec_size(self, rec_len))
1121 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001122
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001123 self->rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001124 &quoted, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001125 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001126
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001127 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001128}
1129
1130static int
1131join_append_lineterminator(WriterObj *self)
1132{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001133 Py_ssize_t terminator_len, i;
1134 unsigned int term_kind;
1135 void *term_data;
Skip Montanarob4a04172003-03-20 23:29:12 +00001136
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001137 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001138 if (terminator_len == -1)
1139 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001141 /* grow record buffer if necessary */
1142 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1143 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001144
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001145 term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1146 term_data = PyUnicode_DATA(self->dialect->lineterminator);
1147 for (i = 0; i < terminator_len; i++)
1148 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001149 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001150
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001151 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001152}
1153
1154PyDoc_STRVAR(csv_writerow_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001155"writerow(iterable)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001156"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001157"Construct and write a CSV record from an iterable of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001158"elements will be converted to string.");
1159
1160static PyObject *
1161csv_writerow(WriterObj *self, PyObject *seq)
1162{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001163 DialectObj *dialect = self->dialect;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001164 PyObject *iter, *field, *line, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001165
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001166 iter = PyObject_GetIter(seq);
1167 if (iter == NULL)
1168 return PyErr_Format(_csvstate_global->error_obj,
1169 "iterable expected, not %.200s",
1170 seq->ob_type->tp_name);
Skip Montanarob4a04172003-03-20 23:29:12 +00001171
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001172 /* Join all fields in internal buffer.
1173 */
1174 join_reset(self);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001175 while ((field = PyIter_Next(iter))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001176 int append_ok;
1177 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001178
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001179 switch (dialect->quoting) {
1180 case QUOTE_NONNUMERIC:
1181 quoted = !PyNumber_Check(field);
1182 break;
1183 case QUOTE_ALL:
1184 quoted = 1;
1185 break;
1186 default:
1187 quoted = 0;
1188 break;
1189 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001190
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001191 if (PyUnicode_Check(field)) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001192 append_ok = join_append(self, field, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001193 Py_DECREF(field);
1194 }
1195 else if (field == Py_None) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001196 append_ok = join_append(self, NULL, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001197 Py_DECREF(field);
1198 }
1199 else {
1200 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001201
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001202 str = PyObject_Str(field);
1203 Py_DECREF(field);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001204 if (str == NULL) {
1205 Py_DECREF(iter);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001206 return NULL;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001207 }
1208 append_ok = join_append(self, str, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001209 Py_DECREF(str);
1210 }
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001211 if (!append_ok) {
1212 Py_DECREF(iter);
1213 return NULL;
1214 }
1215 }
1216 Py_DECREF(iter);
1217 if (PyErr_Occurred())
1218 return NULL;
1219
Licht Takeuchi20019002017-12-12 18:57:06 +09001220 if (self->num_fields > 0 && self->rec_len == 0) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001221 if (dialect->quoting == QUOTE_NONE) {
1222 PyErr_Format(_csvstate_global->error_obj,
1223 "single empty field record must be quoted");
1224 return NULL;
1225 }
1226 self->num_fields--;
1227 if (!join_append(self, NULL, 1))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001228 return NULL;
1229 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001230
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001231 /* Add line terminator.
1232 */
1233 if (!join_append_lineterminator(self))
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001234 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001235
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001236 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1237 (void *) self->rec, self->rec_len);
1238 if (line == NULL)
1239 return NULL;
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01001240 result = PyObject_CallFunctionObjArgs(self->writeline, line, NULL);
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001241 Py_DECREF(line);
1242 return result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001243}
1244
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001245PyDoc_STRVAR(csv_writerows_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001246"writerows(iterable of iterables)\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001247"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001248"Construct and write a series of iterables to a csv file. Non-string\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001249"elements will be converted to string.");
1250
Skip Montanarob4a04172003-03-20 23:29:12 +00001251static PyObject *
1252csv_writerows(WriterObj *self, PyObject *seqseq)
1253{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001254 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001255
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001256 row_iter = PyObject_GetIter(seqseq);
1257 if (row_iter == NULL) {
1258 PyErr_SetString(PyExc_TypeError,
1259 "writerows() argument must be iterable");
1260 return NULL;
1261 }
1262 while ((row_obj = PyIter_Next(row_iter))) {
1263 result = csv_writerow(self, row_obj);
1264 Py_DECREF(row_obj);
1265 if (!result) {
1266 Py_DECREF(row_iter);
1267 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001268 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001269 else
1270 Py_DECREF(result);
1271 }
1272 Py_DECREF(row_iter);
1273 if (PyErr_Occurred())
1274 return NULL;
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001275 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001276}
1277
1278static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001279 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1280 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1281 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001282};
1283
1284#define W_OFF(x) offsetof(WriterObj, x)
1285
1286static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001287 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1288 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001289};
1290
1291static void
1292Writer_dealloc(WriterObj *self)
1293{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001294 PyObject_GC_UnTrack(self);
1295 Py_XDECREF(self->dialect);
1296 Py_XDECREF(self->writeline);
1297 if (self->rec != NULL)
1298 PyMem_Free(self->rec);
1299 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001300}
1301
1302static int
1303Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1304{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001305 Py_VISIT(self->dialect);
1306 Py_VISIT(self->writeline);
1307 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001308}
1309
1310static int
1311Writer_clear(WriterObj *self)
1312{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001313 Py_CLEAR(self->dialect);
1314 Py_CLEAR(self->writeline);
1315 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001316}
1317
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001318PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001319"CSV writer\n"
1320"\n"
1321"Writer objects are responsible for generating tabular data\n"
1322"in CSV format from sequence input.\n"
1323);
1324
1325static PyTypeObject Writer_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001326 PyVarObject_HEAD_INIT(NULL, 0)
1327 "_csv.writer", /*tp_name*/
1328 sizeof(WriterObj), /*tp_basicsize*/
1329 0, /*tp_itemsize*/
1330 /* methods */
1331 (destructor)Writer_dealloc, /*tp_dealloc*/
1332 (printfunc)0, /*tp_print*/
1333 (getattrfunc)0, /*tp_getattr*/
1334 (setattrfunc)0, /*tp_setattr*/
1335 0, /*tp_reserved*/
1336 (reprfunc)0, /*tp_repr*/
1337 0, /*tp_as_number*/
1338 0, /*tp_as_sequence*/
1339 0, /*tp_as_mapping*/
1340 (hashfunc)0, /*tp_hash*/
1341 (ternaryfunc)0, /*tp_call*/
1342 (reprfunc)0, /*tp_str*/
1343 0, /*tp_getattro*/
1344 0, /*tp_setattro*/
1345 0, /*tp_as_buffer*/
1346 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1347 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1348 Writer_Type_doc,
1349 (traverseproc)Writer_traverse, /*tp_traverse*/
1350 (inquiry)Writer_clear, /*tp_clear*/
1351 0, /*tp_richcompare*/
1352 0, /*tp_weaklistoffset*/
1353 (getiterfunc)0, /*tp_iter*/
1354 (getiterfunc)0, /*tp_iternext*/
1355 Writer_methods, /*tp_methods*/
1356 Writer_memberlist, /*tp_members*/
1357 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001358};
1359
1360static PyObject *
1361csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1362{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001363 PyObject * output_file, * dialect = NULL;
1364 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001365 _Py_IDENTIFIER(write);
Skip Montanarob4a04172003-03-20 23:29:12 +00001366
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001367 if (!self)
1368 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001369
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001370 self->dialect = NULL;
1371 self->writeline = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001372
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001373 self->rec = NULL;
1374 self->rec_size = 0;
1375 self->rec_len = 0;
1376 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001377
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001378 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1379 Py_DECREF(self);
1380 return NULL;
1381 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001382 self->writeline = _PyObject_GetAttrId(output_file, &PyId_write);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001383 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1384 PyErr_SetString(PyExc_TypeError,
1385 "argument 1 must have a \"write\" method");
1386 Py_DECREF(self);
1387 return NULL;
1388 }
1389 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1390 if (self->dialect == NULL) {
1391 Py_DECREF(self);
1392 return NULL;
1393 }
1394 PyObject_GC_Track(self);
1395 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001396}
1397
1398/*
1399 * DIALECT REGISTRY
1400 */
1401static PyObject *
1402csv_list_dialects(PyObject *module, PyObject *args)
1403{
Antoine Pitroue7672d32012-05-16 11:33:08 +02001404 return PyDict_Keys(_csvstate_global->dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001405}
1406
1407static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001408csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001409{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001410 PyObject *name_obj, *dialect_obj = NULL;
1411 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001412
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001413 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1414 return NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +01001415 if (!PyUnicode_Check(name_obj)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001416 PyErr_SetString(PyExc_TypeError,
Stefan Krahe6996ed2012-11-02 14:44:20 +01001417 "dialect name must be a string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001418 return NULL;
1419 }
Stefan Krahe6996ed2012-11-02 14:44:20 +01001420 if (PyUnicode_READY(name_obj) == -1)
1421 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001422 dialect = _call_dialect(dialect_obj, kwargs);
1423 if (dialect == NULL)
1424 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001425 if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001426 Py_DECREF(dialect);
1427 return NULL;
1428 }
1429 Py_DECREF(dialect);
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001430 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001431}
1432
1433static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001434csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001435{
Antoine Pitroue7672d32012-05-16 11:33:08 +02001436 if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0)
1437 return PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001438 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001439}
1440
1441static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001442csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001443{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001444 return get_dialect_from_registry(name_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001445}
1446
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001447static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001448csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001449{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001450 PyObject *new_limit = NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001451 long old_limit = _csvstate_global->field_limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001452
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001453 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1454 return NULL;
1455 if (new_limit != NULL) {
1456 if (!PyLong_CheckExact(new_limit)) {
1457 PyErr_Format(PyExc_TypeError,
1458 "limit must be an integer");
1459 return NULL;
1460 }
Antoine Pitroue7672d32012-05-16 11:33:08 +02001461 _csvstate_global->field_limit = PyLong_AsLong(new_limit);
1462 if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
1463 _csvstate_global->field_limit = old_limit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001464 return NULL;
1465 }
1466 }
1467 return PyLong_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001468}
1469
Skip Montanarob4a04172003-03-20 23:29:12 +00001470/*
1471 * MODULE
1472 */
1473
1474PyDoc_STRVAR(csv_module_doc,
1475"CSV parsing and writing.\n"
1476"\n"
1477"This module provides classes that assist in the reading and writing\n"
1478"of Comma Separated Value (CSV) files, and implements the interface\n"
1479"described by PEP 305. Although many CSV files are simple to parse,\n"
1480"the format is not formally defined by a stable specification and\n"
1481"is subtle enough that parsing lines of a CSV file with something\n"
1482"like line.split(\",\") is bound to fail. The module supports three\n"
1483"basic APIs: reading, writing, and registration of dialects.\n"
1484"\n"
1485"\n"
1486"DIALECT REGISTRATION:\n"
1487"\n"
1488"Readers and writers support a dialect argument, which is a convenient\n"
1489"handle on a group of settings. When the dialect argument is a string,\n"
1490"it identifies one of the dialects previously registered with the module.\n"
1491"If it is a class or instance, the attributes of the argument are used as\n"
1492"the settings for the reader or writer:\n"
1493"\n"
1494" class excel:\n"
1495" delimiter = ','\n"
1496" quotechar = '\"'\n"
1497" escapechar = None\n"
1498" doublequote = True\n"
1499" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001500" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001501" quoting = QUOTE_MINIMAL\n"
1502"\n"
1503"SETTINGS:\n"
1504"\n"
1505" * quotechar - specifies a one-character string to use as the \n"
1506" quoting character. It defaults to '\"'.\n"
1507" * delimiter - specifies a one-character string to use as the \n"
1508" field separator. It defaults to ','.\n"
1509" * skipinitialspace - specifies how to interpret whitespace which\n"
1510" immediately follows a delimiter. It defaults to False, which\n"
1511" means that whitespace immediately following a delimiter is part\n"
1512" of the following field.\n"
1513" * lineterminator - specifies the character sequence which should \n"
1514" terminate rows.\n"
1515" * quoting - controls when quotes should be generated by the writer.\n"
1516" It can take on any of the following module constants:\n"
1517"\n"
1518" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1519" field contains either the quotechar or the delimiter\n"
1520" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1521" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001522" fields which do not parse as integers or floating point\n"
1523" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001524" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1525" * escapechar - specifies a one-character string used to escape \n"
1526" the delimiter when quoting is set to QUOTE_NONE.\n"
1527" * doublequote - controls the handling of quotes inside fields. When\n"
1528" True, two consecutive quotes are interpreted as one during read,\n"
1529" and when writing, each quote character embedded in the data is\n"
1530" written as two quotes\n");
1531
1532PyDoc_STRVAR(csv_reader_doc,
1533" csv_reader = reader(iterable [, dialect='excel']\n"
1534" [optional keyword args])\n"
1535" for row in csv_reader:\n"
1536" process(row)\n"
1537"\n"
1538"The \"iterable\" argument can be any object that returns a line\n"
1539"of input for each iteration, such as a file object or a list. The\n"
1540"optional \"dialect\" parameter is discussed below. The function\n"
1541"also accepts optional keyword arguments which override settings\n"
1542"provided by the dialect.\n"
1543"\n"
1544"The returned object is an iterator. Each iteration returns a row\n"
Berker Peksage2382c52015-10-02 19:25:32 +03001545"of the CSV file (which can span multiple input lines).\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001546
1547PyDoc_STRVAR(csv_writer_doc,
1548" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1549" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001550" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001551" csv_writer.writerow(row)\n"
1552"\n"
1553" [or]\n"
1554"\n"
1555" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1556" [optional keyword args])\n"
1557" csv_writer.writerows(rows)\n"
1558"\n"
1559"The \"fileobj\" argument can be any object that supports the file API.\n");
1560
1561PyDoc_STRVAR(csv_list_dialects_doc,
1562"Return a list of all know dialect names.\n"
1563" names = csv.list_dialects()");
1564
1565PyDoc_STRVAR(csv_get_dialect_doc,
1566"Return the dialect instance associated with name.\n"
1567" dialect = csv.get_dialect(name)");
1568
1569PyDoc_STRVAR(csv_register_dialect_doc,
1570"Create a mapping from a string name to a dialect class.\n"
Berker Peksag12b50ce2015-06-05 15:17:51 +03001571" dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
Skip Montanarob4a04172003-03-20 23:29:12 +00001572
1573PyDoc_STRVAR(csv_unregister_dialect_doc,
1574"Delete the name/dialect mapping associated with a string name.\n"
1575" csv.unregister_dialect(name)");
1576
Andrew McNamara31d88962005-01-12 03:45:10 +00001577PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001578"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001579" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001580"\n"
1581"Returns old limit. If limit is not given, no new limit is set and\n"
1582"the old limit is returned");
1583
Skip Montanarob4a04172003-03-20 23:29:12 +00001584static struct PyMethodDef csv_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001585 { "reader", (PyCFunction)csv_reader,
1586 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1587 { "writer", (PyCFunction)csv_writer,
1588 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1589 { "list_dialects", (PyCFunction)csv_list_dialects,
1590 METH_NOARGS, csv_list_dialects_doc},
1591 { "register_dialect", (PyCFunction)csv_register_dialect,
1592 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1593 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1594 METH_O, csv_unregister_dialect_doc},
1595 { "get_dialect", (PyCFunction)csv_get_dialect,
1596 METH_O, csv_get_dialect_doc},
1597 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1598 METH_VARARGS, csv_field_size_limit_doc},
1599 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001600};
1601
Martin v. Löwis1a214512008-06-11 05:26:20 +00001602static struct PyModuleDef _csvmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001603 PyModuleDef_HEAD_INIT,
1604 "_csv",
1605 csv_module_doc,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001606 sizeof(_csvstate),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001607 csv_methods,
1608 NULL,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001609 _csv_traverse,
1610 _csv_clear,
1611 _csv_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00001612};
1613
Skip Montanarob4a04172003-03-20 23:29:12 +00001614PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001615PyInit__csv(void)
Skip Montanarob4a04172003-03-20 23:29:12 +00001616{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001617 PyObject *module;
Serhiy Storchaka2d06e842015-12-25 19:53:18 +02001618 const StyleDesc *style;
Skip Montanarob4a04172003-03-20 23:29:12 +00001619
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001620 if (PyType_Ready(&Dialect_Type) < 0)
1621 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001622
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001623 if (PyType_Ready(&Reader_Type) < 0)
1624 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001625
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001626 if (PyType_Ready(&Writer_Type) < 0)
1627 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001628
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001629 /* Create the module and add the functions */
1630 module = PyModule_Create(&_csvmodule);
1631 if (module == NULL)
1632 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001633
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001634 /* Add version to the module. */
1635 if (PyModule_AddStringConstant(module, "__version__",
1636 MODULE_VERSION) == -1)
1637 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001638
Antoine Pitroue7672d32012-05-16 11:33:08 +02001639 /* Set the field limit */
1640 _csvstate(module)->field_limit = 128 * 1024;
1641 /* Do I still need to add this var to the Module Dict? */
1642
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001643 /* Add _dialects dictionary */
Antoine Pitroue7672d32012-05-16 11:33:08 +02001644 _csvstate(module)->dialects = PyDict_New();
1645 if (_csvstate(module)->dialects == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001646 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001647 Py_INCREF(_csvstate(module)->dialects);
1648 if (PyModule_AddObject(module, "_dialects", _csvstate(module)->dialects))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001649 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001650
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001651 /* Add quote styles into dictionary */
1652 for (style = quote_styles; style->name; style++) {
1653 if (PyModule_AddIntConstant(module, style->name,
1654 style->style) == -1)
1655 return NULL;
1656 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001657
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001658 /* Add the Dialect type */
1659 Py_INCREF(&Dialect_Type);
1660 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1661 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001662
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001663 /* Add the CSV exception object to the module. */
Antoine Pitroue7672d32012-05-16 11:33:08 +02001664 _csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1665 if (_csvstate(module)->error_obj == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001666 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001667 Py_INCREF(_csvstate(module)->error_obj);
1668 PyModule_AddObject(module, "Error", _csvstate(module)->error_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001669 return module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001670}