blob: 9c497154ffb5c8f0e6642edf5b738e0f2ac8eb38 [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
Skip Montanarob4a04172003-03-20 23:29:12 +00009*/
10
Skip Montanaro7b01a832003-04-12 19:23:46 +000011#define MODULE_VERSION "1.0"
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013#include "Python.h"
14#include "structmember.h"
Serhiy Storchaka323748a2018-07-26 13:21:09 +030015#include <stdbool.h>
Skip Montanarob4a04172003-03-20 23:29:12 +000016
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000017
Antoine Pitroue7672d32012-05-16 11:33:08 +020018typedef struct {
19 PyObject *error_obj; /* CSV exception */
20 PyObject *dialects; /* Dialect registry */
21 long field_limit; /* max parsed field size */
22} _csvstate;
23
Hai Shif707d942020-03-16 21:15:01 +080024static inline _csvstate*
25get_csv_state(PyObject *module)
26{
27 void *state = PyModule_GetState(module);
28 assert(state != NULL);
29 return (_csvstate *)state;
30}
Antoine Pitroue7672d32012-05-16 11:33:08 +020031
32static int
33_csv_clear(PyObject *m)
34{
Hai Shif707d942020-03-16 21:15:01 +080035 Py_CLEAR(get_csv_state(m)->error_obj);
36 Py_CLEAR(get_csv_state(m)->dialects);
Antoine Pitroue7672d32012-05-16 11:33:08 +020037 return 0;
38}
39
40static int
41_csv_traverse(PyObject *m, visitproc visit, void *arg)
42{
Hai Shif707d942020-03-16 21:15:01 +080043 Py_VISIT(get_csv_state(m)->error_obj);
44 Py_VISIT(get_csv_state(m)->dialects);
Antoine Pitroue7672d32012-05-16 11:33:08 +020045 return 0;
46}
47
48static void
49_csv_free(void *m)
50{
51 _csv_clear((PyObject *)m);
52}
53
54static struct PyModuleDef _csvmodule;
55
56#define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
Skip Montanarob4a04172003-03-20 23:29:12 +000057
58typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000059 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
60 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
R David Murrayc7c42ef2013-03-19 22:41:47 -040061 EAT_CRNL,AFTER_ESCAPED_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000062} ParserState;
63
64typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000066} QuoteStyle;
67
68typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000069 QuoteStyle style;
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020070 const char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000071} StyleDesc;
72
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020073static const StyleDesc quote_styles[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000074 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
75 { QUOTE_ALL, "QUOTE_ALL" },
76 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
77 { QUOTE_NONE, "QUOTE_NONE" },
78 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000079};
80
81typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000082 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000083
Serhiy Storchaka323748a2018-07-26 13:21:09 +030084 char doublequote; /* is " represented by ""? */
85 char skipinitialspace; /* ignore spaces following delimiter? */
86 char strict; /* raise exception on bad CSV */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 int quoting; /* style of quoting to write */
Serhiy Storchaka323748a2018-07-26 13:21:09 +030088 Py_UCS4 delimiter; /* field separator */
89 Py_UCS4 quotechar; /* quote character */
90 Py_UCS4 escapechar; /* escape character */
91 PyObject *lineterminator; /* string to write between records */
Skip Montanarob4a04172003-03-20 23:29:12 +000092
Skip Montanarob4a04172003-03-20 23:29:12 +000093} DialectObj;
94
Neal Norwitz227b5332006-03-22 09:28:35 +000095static PyTypeObject Dialect_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000096
97typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000098 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +000099
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000100 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +0000101
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000102 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000103
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000104 PyObject *fields; /* field list for current record */
105 ParserState state; /* current CSV parse state */
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200106 Py_UCS4 *field; /* temporary buffer */
Antoine Pitrou40455752010-08-15 18:51:10 +0000107 Py_ssize_t field_size; /* size of allocated buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000108 Py_ssize_t field_len; /* length of current field */
109 int numeric_field; /* treat field as numeric */
110 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000111} ReaderObj;
112
Neal Norwitz227b5332006-03-22 09:28:35 +0000113static PyTypeObject Reader_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000114
Dong-hee Na1b55b652020-02-17 19:09:15 +0900115#define ReaderObject_Check(v) Py_IS_TYPE(v, &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +0000116
117typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000118 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000119
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +0200120 PyObject *write; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +0000121
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000122 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000123
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200124 Py_UCS4 *rec; /* buffer for parser.join */
Antoine Pitrou40455752010-08-15 18:51:10 +0000125 Py_ssize_t rec_size; /* size of allocated record */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000126 Py_ssize_t rec_len; /* length of record */
127 int num_fields; /* number of fields in record */
Guido van Rossum46264582007-08-06 19:32:18 +0000128} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000129
Neal Norwitz227b5332006-03-22 09:28:35 +0000130static PyTypeObject Writer_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000131
132/*
133 * DIALECT class
134 */
135
136static PyObject *
137get_dialect_from_registry(PyObject * name_obj)
138{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000139 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000140
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200141 dialect_obj = PyDict_GetItemWithError(_csvstate_global->dialects, name_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000142 if (dialect_obj == NULL) {
143 if (!PyErr_Occurred())
Antoine Pitroue7672d32012-05-16 11:33:08 +0200144 PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 }
146 else
147 Py_INCREF(dialect_obj);
148 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000149}
150
Skip Montanarob4a04172003-03-20 23:29:12 +0000151static PyObject *
152get_string(PyObject *str)
153{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 Py_XINCREF(str);
155 return str;
Skip Montanarob4a04172003-03-20 23:29:12 +0000156}
157
Skip Montanarob4a04172003-03-20 23:29:12 +0000158static PyObject *
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200159get_nullchar_as_None(Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000160{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000161 if (c == '\0') {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200162 Py_RETURN_NONE;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000163 }
164 else
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200165 return PyUnicode_FromOrdinal(c);
Skip Montanarob4a04172003-03-20 23:29:12 +0000166}
167
Skip Montanarob4a04172003-03-20 23:29:12 +0000168static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200169Dialect_get_lineterminator(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000170{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000171 return get_string(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000172}
173
Skip Montanarob4a04172003-03-20 23:29:12 +0000174static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200175Dialect_get_delimiter(DialectObj *self, void *Py_UNUSED(ignored))
Guido van Rossuma9769c22007-08-07 23:59:30 +0000176{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 return get_nullchar_as_None(self->delimiter);
Guido van Rossuma9769c22007-08-07 23:59:30 +0000178}
179
180static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200181Dialect_get_escapechar(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000182{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000183 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000184}
185
Andrew McNamara1196cf12005-01-07 04:42:45 +0000186static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200187Dialect_get_quotechar(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000188{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000189 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000190}
191
192static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200193Dialect_get_quoting(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000194{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000195 return PyLong_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000196}
197
198static int
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300199_set_bool(const char *name, char *target, PyObject *src, bool dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000200{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000201 if (src == NULL)
202 *target = dflt;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200203 else {
204 int b = PyObject_IsTrue(src);
205 if (b < 0)
206 return -1;
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300207 *target = (char)b;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200208 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000209 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000210}
211
Andrew McNamara1196cf12005-01-07 04:42:45 +0000212static int
213_set_int(const char *name, int *target, PyObject *src, int dflt)
214{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000215 if (src == NULL)
216 *target = dflt;
217 else {
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200218 int value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000219 if (!PyLong_CheckExact(src)) {
220 PyErr_Format(PyExc_TypeError,
221 "\"%s\" must be an integer", name);
222 return -1;
223 }
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200224 value = _PyLong_AsInt(src);
225 if (value == -1 && PyErr_Occurred()) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000226 return -1;
227 }
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200228 *target = value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000229 }
230 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000231}
232
233static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200234_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000235{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000236 if (src == NULL)
237 *target = dflt;
238 else {
239 *target = '\0';
240 if (src != Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000241 Py_ssize_t len;
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200242 if (!PyUnicode_Check(src)) {
243 PyErr_Format(PyExc_TypeError,
244 "\"%s\" must be string, not %.200s", name,
Victor Stinnerdaa97562020-02-07 03:37:06 +0100245 Py_TYPE(src)->tp_name);
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200246 return -1;
247 }
Victor Stinner9e30aa52011-11-21 02:49:52 +0100248 len = PyUnicode_GetLength(src);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200249 if (len > 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000250 PyErr_Format(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300251 "\"%s\" must be a 1-character string",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000252 name);
253 return -1;
254 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100255 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000256 if (len > 0)
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200257 *target = PyUnicode_READ_CHAR(src, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000258 }
259 }
260 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000261}
262
263static int
264_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
265{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000266 if (src == NULL)
267 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
268 else {
269 if (src == Py_None)
270 *target = NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +0100271 else if (!PyUnicode_Check(src)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 PyErr_Format(PyExc_TypeError,
273 "\"%s\" must be a string", name);
274 return -1;
275 }
276 else {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100277 if (PyUnicode_READY(src) == -1)
278 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000279 Py_INCREF(src);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300280 Py_XSETREF(*target, src);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000281 }
282 }
283 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000284}
285
286static int
287dialect_check_quoting(int quoting)
288{
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200289 const StyleDesc *qs;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000290
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000291 for (qs = quote_styles; qs->name; qs++) {
Victor Stinner706768c2014-08-16 01:03:39 +0200292 if ((int)qs->style == quoting)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000293 return 0;
294 }
295 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
296 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000297}
Skip Montanarob4a04172003-03-20 23:29:12 +0000298
299#define D_OFF(x) offsetof(DialectObj, x)
300
301static struct PyMemberDef Dialect_memberlist[] = {
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300302 { "skipinitialspace", T_BOOL, D_OFF(skipinitialspace), READONLY },
303 { "doublequote", T_BOOL, D_OFF(doublequote), READONLY },
304 { "strict", T_BOOL, D_OFF(strict), READONLY },
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000305 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000306};
307
308static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000309 { "delimiter", (getter)Dialect_get_delimiter},
310 { "escapechar", (getter)Dialect_get_escapechar},
311 { "lineterminator", (getter)Dialect_get_lineterminator},
312 { "quotechar", (getter)Dialect_get_quotechar},
313 { "quoting", (getter)Dialect_get_quoting},
314 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000315};
316
317static void
318Dialect_dealloc(DialectObj *self)
319{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000320 Py_XDECREF(self->lineterminator);
321 Py_TYPE(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000322}
323
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000324static char *dialect_kws[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000325 "dialect",
326 "delimiter",
327 "doublequote",
328 "escapechar",
329 "lineterminator",
330 "quotechar",
331 "quoting",
332 "skipinitialspace",
333 "strict",
334 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000335};
336
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000337static PyObject *
338dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000339{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000340 DialectObj *self;
341 PyObject *ret = NULL;
342 PyObject *dialect = NULL;
343 PyObject *delimiter = NULL;
344 PyObject *doublequote = NULL;
345 PyObject *escapechar = NULL;
346 PyObject *lineterminator = NULL;
347 PyObject *quotechar = NULL;
348 PyObject *quoting = NULL;
349 PyObject *skipinitialspace = NULL;
350 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000351
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000352 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
353 "|OOOOOOOOO", dialect_kws,
354 &dialect,
355 &delimiter,
356 &doublequote,
357 &escapechar,
358 &lineterminator,
359 &quotechar,
360 &quoting,
361 &skipinitialspace,
362 &strict))
363 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000364
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000365 if (dialect != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100366 if (PyUnicode_Check(dialect)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000367 dialect = get_dialect_from_registry(dialect);
368 if (dialect == NULL)
369 return NULL;
370 }
371 else
372 Py_INCREF(dialect);
373 /* Can we reuse this instance? */
374 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
Serhiy Storchaka0b3ec192017-03-23 17:53:47 +0200375 delimiter == NULL &&
376 doublequote == NULL &&
377 escapechar == NULL &&
378 lineterminator == NULL &&
379 quotechar == NULL &&
380 quoting == NULL &&
381 skipinitialspace == NULL &&
382 strict == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000383 return dialect;
384 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000385
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000386 self = (DialectObj *)type->tp_alloc(type, 0);
387 if (self == NULL) {
388 Py_XDECREF(dialect);
389 return NULL;
390 }
391 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000392
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000393 Py_XINCREF(delimiter);
394 Py_XINCREF(doublequote);
395 Py_XINCREF(escapechar);
396 Py_XINCREF(lineterminator);
397 Py_XINCREF(quotechar);
398 Py_XINCREF(quoting);
399 Py_XINCREF(skipinitialspace);
400 Py_XINCREF(strict);
401 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000402#define DIALECT_GETATTR(v, n) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000403 if (v == NULL) \
404 v = PyObject_GetAttrString(dialect, n)
405 DIALECT_GETATTR(delimiter, "delimiter");
406 DIALECT_GETATTR(doublequote, "doublequote");
407 DIALECT_GETATTR(escapechar, "escapechar");
408 DIALECT_GETATTR(lineterminator, "lineterminator");
409 DIALECT_GETATTR(quotechar, "quotechar");
410 DIALECT_GETATTR(quoting, "quoting");
411 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
412 DIALECT_GETATTR(strict, "strict");
413 PyErr_Clear();
414 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000415
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000416 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000417#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000418 if (meth(name, target, src, dflt)) \
419 goto err
420 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300421 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, true);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000422 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
423 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
424 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
425 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300426 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, false);
427 DIASET(_set_bool, "strict", &self->strict, strict, false);
Skip Montanarob4a04172003-03-20 23:29:12 +0000428
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000429 /* validate options */
430 if (dialect_check_quoting(self->quoting))
431 goto err;
432 if (self->delimiter == 0) {
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200433 PyErr_SetString(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300434 "\"delimiter\" must be a 1-character string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000435 goto err;
436 }
437 if (quotechar == Py_None && quoting == NULL)
438 self->quoting = QUOTE_NONE;
439 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
440 PyErr_SetString(PyExc_TypeError,
441 "quotechar must be set if quoting enabled");
442 goto err;
443 }
444 if (self->lineterminator == 0) {
445 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
446 goto err;
447 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000448
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000449 ret = (PyObject *)self;
450 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000451err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000452 Py_XDECREF(self);
453 Py_XDECREF(dialect);
454 Py_XDECREF(delimiter);
455 Py_XDECREF(doublequote);
456 Py_XDECREF(escapechar);
457 Py_XDECREF(lineterminator);
458 Py_XDECREF(quotechar);
459 Py_XDECREF(quoting);
460 Py_XDECREF(skipinitialspace);
461 Py_XDECREF(strict);
462 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000463}
464
465
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000466PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000467"CSV dialect\n"
468"\n"
469"The Dialect type records CSV parsing and generation options.\n");
470
471static PyTypeObject Dialect_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000472 PyVarObject_HEAD_INIT(NULL, 0)
473 "_csv.Dialect", /* tp_name */
474 sizeof(DialectObj), /* tp_basicsize */
475 0, /* tp_itemsize */
476 /* methods */
477 (destructor)Dialect_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200478 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000479 (getattrfunc)0, /* tp_getattr */
480 (setattrfunc)0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200481 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000482 (reprfunc)0, /* tp_repr */
483 0, /* tp_as_number */
484 0, /* tp_as_sequence */
485 0, /* tp_as_mapping */
486 (hashfunc)0, /* tp_hash */
487 (ternaryfunc)0, /* tp_call */
488 (reprfunc)0, /* tp_str */
489 0, /* tp_getattro */
490 0, /* tp_setattro */
491 0, /* tp_as_buffer */
492 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
493 Dialect_Type_doc, /* tp_doc */
494 0, /* tp_traverse */
495 0, /* tp_clear */
496 0, /* tp_richcompare */
497 0, /* tp_weaklistoffset */
498 0, /* tp_iter */
499 0, /* tp_iternext */
500 0, /* tp_methods */
501 Dialect_memberlist, /* tp_members */
502 Dialect_getsetlist, /* tp_getset */
503 0, /* tp_base */
504 0, /* tp_dict */
505 0, /* tp_descr_get */
506 0, /* tp_descr_set */
507 0, /* tp_dictoffset */
508 0, /* tp_init */
509 0, /* tp_alloc */
510 dialect_new, /* tp_new */
511 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000512};
513
Andrew McNamara91b97462005-01-11 01:07:23 +0000514/*
515 * Return an instance of the dialect type, given a Python instance or kwarg
516 * description of the dialect
517 */
518static PyObject *
519_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
520{
Victor Stinner6412f492016-08-23 00:21:34 +0200521 PyObject *type = (PyObject *)&Dialect_Type;
522 if (dialect_inst) {
Petr Viktorinffd97532020-02-11 17:46:57 +0100523 return PyObject_VectorcallDict(type, &dialect_inst, 1, kwargs);
Victor Stinner6412f492016-08-23 00:21:34 +0200524 }
525 else {
Petr Viktorinffd97532020-02-11 17:46:57 +0100526 return PyObject_VectorcallDict(type, NULL, 0, kwargs);
Victor Stinner6412f492016-08-23 00:21:34 +0200527 }
Andrew McNamara91b97462005-01-11 01:07:23 +0000528}
529
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000530/*
531 * READER
532 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000533static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000534parse_save_field(ReaderObj *self)
535{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000536 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000537
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200538 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
539 (void *) self->field, self->field_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000540 if (field == NULL)
541 return -1;
542 self->field_len = 0;
543 if (self->numeric_field) {
544 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000545
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000546 self->numeric_field = 0;
547 tmp = PyNumber_Float(field);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000548 Py_DECREF(field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200549 if (tmp == NULL)
550 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000551 field = tmp;
552 }
Victor Stinnerb80b3782013-11-14 21:29:34 +0100553 if (PyList_Append(self->fields, field) < 0) {
554 Py_DECREF(field);
555 return -1;
556 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000557 Py_DECREF(field);
558 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000559}
560
561static int
562parse_grow_buff(ReaderObj *self)
563{
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +0500564 assert((size_t)self->field_size <= PY_SSIZE_T_MAX / sizeof(Py_UCS4));
565
566 Py_ssize_t field_size_new = self->field_size ? 2 * self->field_size : 4096;
567 Py_UCS4 *field_new = self->field;
568 PyMem_Resize(field_new, Py_UCS4, field_size_new);
569 if (field_new == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000570 PyErr_NoMemory();
571 return 0;
572 }
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +0500573 self->field = field_new;
574 self->field_size = field_size_new;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000575 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000576}
577
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000578static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200579parse_add_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000580{
Antoine Pitroue7672d32012-05-16 11:33:08 +0200581 if (self->field_len >= _csvstate_global->field_limit) {
582 PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
583 _csvstate_global->field_limit);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000584 return -1;
585 }
586 if (self->field_len == self->field_size && !parse_grow_buff(self))
587 return -1;
588 self->field[self->field_len++] = c;
589 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000590}
591
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000592static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200593parse_process_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000594{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000595 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000596
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000597 switch (self->state) {
598 case START_RECORD:
599 /* start of record */
600 if (c == '\0')
601 /* empty line - return [] */
602 break;
603 else if (c == '\n' || c == '\r') {
604 self->state = EAT_CRNL;
605 break;
606 }
607 /* normal character - handle as START_FIELD */
608 self->state = START_FIELD;
609 /* fallthru */
610 case START_FIELD:
611 /* expecting field */
612 if (c == '\n' || c == '\r' || c == '\0') {
613 /* save empty field - return [fields] */
614 if (parse_save_field(self) < 0)
615 return -1;
616 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
617 }
618 else if (c == dialect->quotechar &&
619 dialect->quoting != QUOTE_NONE) {
620 /* start quoted field */
621 self->state = IN_QUOTED_FIELD;
622 }
623 else if (c == dialect->escapechar) {
624 /* possible escaped character */
625 self->state = ESCAPED_CHAR;
626 }
627 else if (c == ' ' && dialect->skipinitialspace)
628 /* ignore space at start of field */
629 ;
630 else if (c == dialect->delimiter) {
631 /* save empty field */
632 if (parse_save_field(self) < 0)
633 return -1;
634 }
635 else {
636 /* begin new unquoted field */
637 if (dialect->quoting == QUOTE_NONNUMERIC)
638 self->numeric_field = 1;
639 if (parse_add_char(self, c) < 0)
640 return -1;
641 self->state = IN_FIELD;
642 }
643 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000644
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000645 case ESCAPED_CHAR:
R David Murray9a7d3762013-03-20 00:15:20 -0400646 if (c == '\n' || c=='\r') {
R David Murrayc7c42ef2013-03-19 22:41:47 -0400647 if (parse_add_char(self, c) < 0)
648 return -1;
649 self->state = AFTER_ESCAPED_CRNL;
650 break;
651 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000652 if (c == '\0')
653 c = '\n';
654 if (parse_add_char(self, c) < 0)
655 return -1;
656 self->state = IN_FIELD;
657 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000658
R David Murrayc7c42ef2013-03-19 22:41:47 -0400659 case AFTER_ESCAPED_CRNL:
660 if (c == '\0')
661 break;
662 /*fallthru*/
663
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000664 case IN_FIELD:
665 /* in unquoted field */
666 if (c == '\n' || c == '\r' || c == '\0') {
667 /* end of line - return [fields] */
668 if (parse_save_field(self) < 0)
669 return -1;
670 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
671 }
672 else if (c == dialect->escapechar) {
673 /* possible escaped character */
674 self->state = ESCAPED_CHAR;
675 }
676 else if (c == dialect->delimiter) {
677 /* save field - wait for new field */
678 if (parse_save_field(self) < 0)
679 return -1;
680 self->state = START_FIELD;
681 }
682 else {
683 /* normal character - save in field */
684 if (parse_add_char(self, c) < 0)
685 return -1;
686 }
687 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000688
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000689 case IN_QUOTED_FIELD:
690 /* in quoted field */
691 if (c == '\0')
692 ;
693 else if (c == dialect->escapechar) {
694 /* Possible escape character */
695 self->state = ESCAPE_IN_QUOTED_FIELD;
696 }
697 else if (c == dialect->quotechar &&
698 dialect->quoting != QUOTE_NONE) {
699 if (dialect->doublequote) {
700 /* doublequote; " represented by "" */
701 self->state = QUOTE_IN_QUOTED_FIELD;
702 }
703 else {
704 /* end of quote part of field */
705 self->state = IN_FIELD;
706 }
707 }
708 else {
709 /* normal character - save in field */
710 if (parse_add_char(self, c) < 0)
711 return -1;
712 }
713 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000714
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000715 case ESCAPE_IN_QUOTED_FIELD:
716 if (c == '\0')
717 c = '\n';
718 if (parse_add_char(self, c) < 0)
719 return -1;
720 self->state = IN_QUOTED_FIELD;
721 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000722
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000723 case QUOTE_IN_QUOTED_FIELD:
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300724 /* doublequote - seen a quote in a quoted field */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000725 if (dialect->quoting != QUOTE_NONE &&
726 c == dialect->quotechar) {
727 /* save "" as " */
728 if (parse_add_char(self, c) < 0)
729 return -1;
730 self->state = IN_QUOTED_FIELD;
731 }
732 else if (c == dialect->delimiter) {
733 /* save field - wait for new field */
734 if (parse_save_field(self) < 0)
735 return -1;
736 self->state = START_FIELD;
737 }
738 else if (c == '\n' || c == '\r' || c == '\0') {
739 /* end of line - return [fields] */
740 if (parse_save_field(self) < 0)
741 return -1;
742 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
743 }
744 else if (!dialect->strict) {
745 if (parse_add_char(self, c) < 0)
746 return -1;
747 self->state = IN_FIELD;
748 }
749 else {
750 /* illegal */
Antoine Pitroue7672d32012-05-16 11:33:08 +0200751 PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000752 dialect->delimiter,
753 dialect->quotechar);
754 return -1;
755 }
756 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000757
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000758 case EAT_CRNL:
759 if (c == '\n' || c == '\r')
760 ;
761 else if (c == '\0')
762 self->state = START_RECORD;
763 else {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200764 PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000765 return -1;
766 }
767 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000768
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000769 }
770 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000771}
772
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000773static int
774parse_reset(ReaderObj *self)
775{
Serhiy Storchaka48842712016-04-06 09:45:48 +0300776 Py_XSETREF(self->fields, PyList_New(0));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000777 if (self->fields == NULL)
778 return -1;
779 self->field_len = 0;
780 self->state = START_RECORD;
781 self->numeric_field = 0;
782 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000783}
Skip Montanarob4a04172003-03-20 23:29:12 +0000784
785static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000786Reader_iternext(ReaderObj *self)
787{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000788 PyObject *fields = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200789 Py_UCS4 c;
790 Py_ssize_t pos, linelen;
791 unsigned int kind;
792 void *data;
793 PyObject *lineobj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000794
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000795 if (parse_reset(self) < 0)
796 return NULL;
797 do {
798 lineobj = PyIter_Next(self->input_iter);
799 if (lineobj == NULL) {
800 /* End of input OR exception */
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700801 if (!PyErr_Occurred() && (self->field_len != 0 ||
802 self->state == IN_QUOTED_FIELD)) {
803 if (self->dialect->strict)
Senthil Kumaran49d13022012-09-25 02:37:20 -0700804 PyErr_SetString(_csvstate_global->error_obj,
805 "unexpected end of data");
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700806 else if (parse_save_field(self) >= 0)
807 break;
808 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000809 return NULL;
810 }
811 if (!PyUnicode_Check(lineobj)) {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200812 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000813 "iterator should return strings, "
814 "not %.200s "
815 "(did you open the file in text mode?)",
Victor Stinnerdaa97562020-02-07 03:37:06 +0100816 Py_TYPE(lineobj)->tp_name
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000817 );
818 Py_DECREF(lineobj);
819 return NULL;
820 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100821 if (PyUnicode_READY(lineobj) == -1) {
822 Py_DECREF(lineobj);
823 return NULL;
824 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000825 ++self->line_num;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200826 kind = PyUnicode_KIND(lineobj);
827 data = PyUnicode_DATA(lineobj);
828 pos = 0;
829 linelen = PyUnicode_GET_LENGTH(lineobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000830 while (linelen--) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200831 c = PyUnicode_READ(kind, data, pos);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000832 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000833 Py_DECREF(lineobj);
Antoine Pitroue7672d32012-05-16 11:33:08 +0200834 PyErr_Format(_csvstate_global->error_obj,
Benjamin Peterson7821b4c2019-06-18 21:37:58 -0700835 "line contains NUL");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000836 goto err;
837 }
838 if (parse_process_char(self, c) < 0) {
839 Py_DECREF(lineobj);
840 goto err;
841 }
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200842 pos++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000843 }
844 Py_DECREF(lineobj);
845 if (parse_process_char(self, 0) < 0)
846 goto err;
847 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000848
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000849 fields = self->fields;
850 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000851err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000852 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000853}
854
855static void
856Reader_dealloc(ReaderObj *self)
857{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000858 PyObject_GC_UnTrack(self);
859 Py_XDECREF(self->dialect);
860 Py_XDECREF(self->input_iter);
861 Py_XDECREF(self->fields);
862 if (self->field != NULL)
863 PyMem_Free(self->field);
864 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000865}
866
867static int
868Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
869{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000870 Py_VISIT(self->dialect);
871 Py_VISIT(self->input_iter);
872 Py_VISIT(self->fields);
873 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000874}
875
876static int
877Reader_clear(ReaderObj *self)
878{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000879 Py_CLEAR(self->dialect);
880 Py_CLEAR(self->input_iter);
881 Py_CLEAR(self->fields);
882 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000883}
884
885PyDoc_STRVAR(Reader_Type_doc,
886"CSV reader\n"
887"\n"
888"Reader objects are responsible for reading and parsing tabular data\n"
889"in CSV format.\n"
890);
891
892static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000893 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000894};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000895#define R_OFF(x) offsetof(ReaderObj, x)
896
897static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000898 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
899 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
900 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000901};
902
Skip Montanarob4a04172003-03-20 23:29:12 +0000903
904static PyTypeObject Reader_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000905 PyVarObject_HEAD_INIT(NULL, 0)
906 "_csv.reader", /*tp_name*/
907 sizeof(ReaderObj), /*tp_basicsize*/
908 0, /*tp_itemsize*/
909 /* methods */
910 (destructor)Reader_dealloc, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200911 0, /*tp_vectorcall_offset*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000912 (getattrfunc)0, /*tp_getattr*/
913 (setattrfunc)0, /*tp_setattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200914 0, /*tp_as_async*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000915 (reprfunc)0, /*tp_repr*/
916 0, /*tp_as_number*/
917 0, /*tp_as_sequence*/
918 0, /*tp_as_mapping*/
919 (hashfunc)0, /*tp_hash*/
920 (ternaryfunc)0, /*tp_call*/
921 (reprfunc)0, /*tp_str*/
922 0, /*tp_getattro*/
923 0, /*tp_setattro*/
924 0, /*tp_as_buffer*/
925 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
926 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
927 Reader_Type_doc, /*tp_doc*/
928 (traverseproc)Reader_traverse, /*tp_traverse*/
929 (inquiry)Reader_clear, /*tp_clear*/
930 0, /*tp_richcompare*/
931 0, /*tp_weaklistoffset*/
932 PyObject_SelfIter, /*tp_iter*/
933 (getiterfunc)Reader_iternext, /*tp_iternext*/
934 Reader_methods, /*tp_methods*/
935 Reader_memberlist, /*tp_members*/
936 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000937
938};
939
940static PyObject *
941csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
942{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000943 PyObject * iterator, * dialect = NULL;
944 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000945
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000946 if (!self)
947 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000948
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000949 self->dialect = NULL;
950 self->fields = NULL;
951 self->input_iter = NULL;
952 self->field = NULL;
953 self->field_size = 0;
954 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000955
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000956 if (parse_reset(self) < 0) {
957 Py_DECREF(self);
958 return NULL;
959 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000960
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000961 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
962 Py_DECREF(self);
963 return NULL;
964 }
965 self->input_iter = PyObject_GetIter(iterator);
966 if (self->input_iter == NULL) {
967 PyErr_SetString(PyExc_TypeError,
968 "argument 1 must be an iterator");
969 Py_DECREF(self);
970 return NULL;
971 }
972 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
973 if (self->dialect == NULL) {
974 Py_DECREF(self);
975 return NULL;
976 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000977
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000978 PyObject_GC_Track(self);
979 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +0000980}
981
982/*
983 * WRITER
984 */
985/* ---------------------------------------------------------------- */
986static void
987join_reset(WriterObj *self)
988{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000989 self->rec_len = 0;
990 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000991}
992
993#define MEM_INCR 32768
994
995/* Calculate new record length or append field to record. Return new
996 * record length.
997 */
Antoine Pitrou40455752010-08-15 18:51:10 +0000998static Py_ssize_t
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200999join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001000 Py_ssize_t field_len, int *quoted,
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001001 int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +00001002{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001003 DialectObj *dialect = self->dialect;
1004 int i;
Antoine Pitrou40455752010-08-15 18:51:10 +00001005 Py_ssize_t rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001006
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001007#define INCLEN \
1008 do {\
1009 if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \
1010 goto overflow; \
1011 } \
1012 rec_len++; \
1013 } while(0)
1014
1015#define ADDCH(c) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001016 do {\
1017 if (copy_phase) \
1018 self->rec[rec_len] = c;\
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001019 INCLEN;\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001020 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001021
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001022 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001023
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001024 /* If this is not the first field we need a field separator */
1025 if (self->num_fields > 0)
1026 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001027
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001028 /* Handle preceding quote */
1029 if (copy_phase && *quoted)
1030 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001031
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001032 /* Copy/count field data */
1033 /* If field is null just pass over */
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001034 for (i = 0; field_data && (i < field_len); i++) {
1035 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001036 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001037
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001038 if (c == dialect->delimiter ||
1039 c == dialect->escapechar ||
1040 c == dialect->quotechar ||
Martin v. Löwis5f4f4c52011-11-01 18:42:23 +01001041 PyUnicode_FindChar(
1042 dialect->lineterminator, c, 0,
1043 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001044 if (dialect->quoting == QUOTE_NONE)
1045 want_escape = 1;
1046 else {
1047 if (c == dialect->quotechar) {
1048 if (dialect->doublequote)
1049 ADDCH(dialect->quotechar);
1050 else
1051 want_escape = 1;
1052 }
1053 if (!want_escape)
1054 *quoted = 1;
1055 }
1056 if (want_escape) {
1057 if (!dialect->escapechar) {
Antoine Pitroue7672d32012-05-16 11:33:08 +02001058 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001059 "need to escape, but no escapechar set");
1060 return -1;
1061 }
1062 ADDCH(dialect->escapechar);
1063 }
1064 }
1065 /* Copy field character into record buffer.
1066 */
1067 ADDCH(c);
1068 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001069
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001070 if (*quoted) {
1071 if (copy_phase)
1072 ADDCH(dialect->quotechar);
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001073 else {
1074 INCLEN; /* starting quote */
1075 INCLEN; /* ending quote */
1076 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001077 }
1078 return rec_len;
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001079
1080 overflow:
1081 PyErr_NoMemory();
1082 return -1;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001083#undef ADDCH
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001084#undef INCLEN
Skip Montanarob4a04172003-03-20 23:29:12 +00001085}
1086
1087static int
Antoine Pitrou40455752010-08-15 18:51:10 +00001088join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
Skip Montanarob4a04172003-03-20 23:29:12 +00001089{
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001090 assert(rec_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001091
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001092 if (rec_len > self->rec_size) {
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001093 size_t rec_size_new = (size_t)(rec_len / MEM_INCR + 1) * MEM_INCR;
1094 Py_UCS4 *rec_new = self->rec;
1095 PyMem_Resize(rec_new, Py_UCS4, rec_size_new);
1096 if (rec_new == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001097 PyErr_NoMemory();
1098 return 0;
1099 }
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001100 self->rec = rec_new;
1101 self->rec_size = (Py_ssize_t)rec_size_new;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001102 }
1103 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001104}
1105
1106static int
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001107join_append(WriterObj *self, PyObject *field, int quoted)
Skip Montanarob4a04172003-03-20 23:29:12 +00001108{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001109 unsigned int field_kind = -1;
1110 void *field_data = NULL;
1111 Py_ssize_t field_len = 0;
Antoine Pitrou40455752010-08-15 18:51:10 +00001112 Py_ssize_t rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001113
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001114 if (field != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +01001115 if (PyUnicode_READY(field) == -1)
1116 return 0;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001117 field_kind = PyUnicode_KIND(field);
1118 field_data = PyUnicode_DATA(field);
1119 field_len = PyUnicode_GET_LENGTH(field);
1120 }
1121 rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001122 &quoted, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001123 if (rec_len < 0)
1124 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001125
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001126 /* grow record buffer if necessary */
1127 if (!join_check_rec_size(self, rec_len))
1128 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001129
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001130 self->rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001131 &quoted, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001132 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001133
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001134 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001135}
1136
1137static int
1138join_append_lineterminator(WriterObj *self)
1139{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001140 Py_ssize_t terminator_len, i;
1141 unsigned int term_kind;
1142 void *term_data;
Skip Montanarob4a04172003-03-20 23:29:12 +00001143
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001144 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001145 if (terminator_len == -1)
1146 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001147
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001148 /* grow record buffer if necessary */
1149 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1150 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001151
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001152 term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1153 term_data = PyUnicode_DATA(self->dialect->lineterminator);
1154 for (i = 0; i < terminator_len; i++)
1155 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001156 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001157
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001158 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001159}
1160
1161PyDoc_STRVAR(csv_writerow_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001162"writerow(iterable)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001163"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001164"Construct and write a CSV record from an iterable of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001165"elements will be converted to string.");
1166
1167static PyObject *
1168csv_writerow(WriterObj *self, PyObject *seq)
1169{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001170 DialectObj *dialect = self->dialect;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001171 PyObject *iter, *field, *line, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001172
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001173 iter = PyObject_GetIter(seq);
1174 if (iter == NULL)
1175 return PyErr_Format(_csvstate_global->error_obj,
1176 "iterable expected, not %.200s",
Victor Stinnerdaa97562020-02-07 03:37:06 +01001177 Py_TYPE(seq)->tp_name);
Skip Montanarob4a04172003-03-20 23:29:12 +00001178
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001179 /* Join all fields in internal buffer.
1180 */
1181 join_reset(self);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001182 while ((field = PyIter_Next(iter))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001183 int append_ok;
1184 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001185
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001186 switch (dialect->quoting) {
1187 case QUOTE_NONNUMERIC:
1188 quoted = !PyNumber_Check(field);
1189 break;
1190 case QUOTE_ALL:
1191 quoted = 1;
1192 break;
1193 default:
1194 quoted = 0;
1195 break;
1196 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001197
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001198 if (PyUnicode_Check(field)) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001199 append_ok = join_append(self, field, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001200 Py_DECREF(field);
1201 }
1202 else if (field == Py_None) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001203 append_ok = join_append(self, NULL, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001204 Py_DECREF(field);
1205 }
1206 else {
1207 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001208
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001209 str = PyObject_Str(field);
1210 Py_DECREF(field);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001211 if (str == NULL) {
1212 Py_DECREF(iter);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001213 return NULL;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001214 }
1215 append_ok = join_append(self, str, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001216 Py_DECREF(str);
1217 }
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001218 if (!append_ok) {
1219 Py_DECREF(iter);
1220 return NULL;
1221 }
1222 }
1223 Py_DECREF(iter);
1224 if (PyErr_Occurred())
1225 return NULL;
1226
Licht Takeuchi20019002017-12-12 18:57:06 +09001227 if (self->num_fields > 0 && self->rec_len == 0) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001228 if (dialect->quoting == QUOTE_NONE) {
1229 PyErr_Format(_csvstate_global->error_obj,
1230 "single empty field record must be quoted");
1231 return NULL;
1232 }
1233 self->num_fields--;
1234 if (!join_append(self, NULL, 1))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001235 return NULL;
1236 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001237
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001238 /* Add line terminator.
1239 */
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001240 if (!join_append_lineterminator(self)) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001241 return NULL;
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001242 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001243
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001244 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1245 (void *) self->rec, self->rec_len);
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001246 if (line == NULL) {
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001247 return NULL;
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001248 }
Petr Viktorinffd97532020-02-11 17:46:57 +01001249 result = PyObject_CallOneArg(self->write, line);
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001250 Py_DECREF(line);
1251 return result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001252}
1253
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001254PyDoc_STRVAR(csv_writerows_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001255"writerows(iterable of iterables)\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001256"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001257"Construct and write a series of iterables to a csv file. Non-string\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001258"elements will be converted to string.");
1259
Skip Montanarob4a04172003-03-20 23:29:12 +00001260static PyObject *
1261csv_writerows(WriterObj *self, PyObject *seqseq)
1262{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001263 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001264
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001265 row_iter = PyObject_GetIter(seqseq);
1266 if (row_iter == NULL) {
1267 PyErr_SetString(PyExc_TypeError,
1268 "writerows() argument must be iterable");
1269 return NULL;
1270 }
1271 while ((row_obj = PyIter_Next(row_iter))) {
1272 result = csv_writerow(self, row_obj);
1273 Py_DECREF(row_obj);
1274 if (!result) {
1275 Py_DECREF(row_iter);
1276 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001277 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001278 else
1279 Py_DECREF(result);
1280 }
1281 Py_DECREF(row_iter);
1282 if (PyErr_Occurred())
1283 return NULL;
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001284 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001285}
1286
1287static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001288 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1289 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1290 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001291};
1292
1293#define W_OFF(x) offsetof(WriterObj, x)
1294
1295static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001296 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1297 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001298};
1299
1300static void
1301Writer_dealloc(WriterObj *self)
1302{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001303 PyObject_GC_UnTrack(self);
1304 Py_XDECREF(self->dialect);
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001305 Py_XDECREF(self->write);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001306 if (self->rec != NULL)
1307 PyMem_Free(self->rec);
1308 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001309}
1310
1311static int
1312Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1313{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001314 Py_VISIT(self->dialect);
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001315 Py_VISIT(self->write);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001316 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001317}
1318
1319static int
1320Writer_clear(WriterObj *self)
1321{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001322 Py_CLEAR(self->dialect);
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001323 Py_CLEAR(self->write);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001324 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001325}
1326
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001327PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001328"CSV writer\n"
1329"\n"
1330"Writer objects are responsible for generating tabular data\n"
1331"in CSV format from sequence input.\n"
1332);
1333
1334static PyTypeObject Writer_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001335 PyVarObject_HEAD_INIT(NULL, 0)
1336 "_csv.writer", /*tp_name*/
1337 sizeof(WriterObj), /*tp_basicsize*/
1338 0, /*tp_itemsize*/
1339 /* methods */
1340 (destructor)Writer_dealloc, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02001341 0, /*tp_vectorcall_offset*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001342 (getattrfunc)0, /*tp_getattr*/
1343 (setattrfunc)0, /*tp_setattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02001344 0, /*tp_as_async*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001345 (reprfunc)0, /*tp_repr*/
1346 0, /*tp_as_number*/
1347 0, /*tp_as_sequence*/
1348 0, /*tp_as_mapping*/
1349 (hashfunc)0, /*tp_hash*/
1350 (ternaryfunc)0, /*tp_call*/
1351 (reprfunc)0, /*tp_str*/
1352 0, /*tp_getattro*/
1353 0, /*tp_setattro*/
1354 0, /*tp_as_buffer*/
1355 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1356 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1357 Writer_Type_doc,
1358 (traverseproc)Writer_traverse, /*tp_traverse*/
1359 (inquiry)Writer_clear, /*tp_clear*/
1360 0, /*tp_richcompare*/
1361 0, /*tp_weaklistoffset*/
1362 (getiterfunc)0, /*tp_iter*/
1363 (getiterfunc)0, /*tp_iternext*/
1364 Writer_methods, /*tp_methods*/
1365 Writer_memberlist, /*tp_members*/
1366 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001367};
1368
1369static PyObject *
1370csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1371{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001372 PyObject * output_file, * dialect = NULL;
1373 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001374 _Py_IDENTIFIER(write);
Skip Montanarob4a04172003-03-20 23:29:12 +00001375
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001376 if (!self)
1377 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001378
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001379 self->dialect = NULL;
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001380 self->write = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001381
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001382 self->rec = NULL;
1383 self->rec_size = 0;
1384 self->rec_len = 0;
1385 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001386
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001387 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1388 Py_DECREF(self);
1389 return NULL;
1390 }
Serhiy Storchaka41c57b32019-09-01 12:03:39 +03001391 if (_PyObject_LookupAttrId(output_file, &PyId_write, &self->write) < 0) {
1392 Py_DECREF(self);
1393 return NULL;
1394 }
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001395 if (self->write == NULL || !PyCallable_Check(self->write)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001396 PyErr_SetString(PyExc_TypeError,
1397 "argument 1 must have a \"write\" method");
1398 Py_DECREF(self);
1399 return NULL;
1400 }
1401 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1402 if (self->dialect == NULL) {
1403 Py_DECREF(self);
1404 return NULL;
1405 }
1406 PyObject_GC_Track(self);
1407 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001408}
1409
1410/*
1411 * DIALECT REGISTRY
1412 */
1413static PyObject *
1414csv_list_dialects(PyObject *module, PyObject *args)
1415{
Antoine Pitroue7672d32012-05-16 11:33:08 +02001416 return PyDict_Keys(_csvstate_global->dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001417}
1418
1419static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001420csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001421{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001422 PyObject *name_obj, *dialect_obj = NULL;
1423 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001424
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001425 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1426 return NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +01001427 if (!PyUnicode_Check(name_obj)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001428 PyErr_SetString(PyExc_TypeError,
Stefan Krahe6996ed2012-11-02 14:44:20 +01001429 "dialect name must be a string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001430 return NULL;
1431 }
Stefan Krahe6996ed2012-11-02 14:44:20 +01001432 if (PyUnicode_READY(name_obj) == -1)
1433 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001434 dialect = _call_dialect(dialect_obj, kwargs);
1435 if (dialect == NULL)
1436 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001437 if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001438 Py_DECREF(dialect);
1439 return NULL;
1440 }
1441 Py_DECREF(dialect);
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001442 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001443}
1444
1445static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001446csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001447{
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001448 if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0) {
1449 if (PyErr_ExceptionMatches(PyExc_KeyError)) {
1450 PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
1451 }
1452 return NULL;
1453 }
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001454 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001455}
1456
1457static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001458csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001459{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001460 return get_dialect_from_registry(name_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001461}
1462
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001463static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001464csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001465{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001466 PyObject *new_limit = NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001467 long old_limit = _csvstate_global->field_limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001468
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001469 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1470 return NULL;
1471 if (new_limit != NULL) {
1472 if (!PyLong_CheckExact(new_limit)) {
1473 PyErr_Format(PyExc_TypeError,
1474 "limit must be an integer");
1475 return NULL;
1476 }
Antoine Pitroue7672d32012-05-16 11:33:08 +02001477 _csvstate_global->field_limit = PyLong_AsLong(new_limit);
1478 if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
1479 _csvstate_global->field_limit = old_limit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001480 return NULL;
1481 }
1482 }
1483 return PyLong_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001484}
1485
Skip Montanarob4a04172003-03-20 23:29:12 +00001486/*
1487 * MODULE
1488 */
1489
1490PyDoc_STRVAR(csv_module_doc,
1491"CSV parsing and writing.\n"
1492"\n"
1493"This module provides classes that assist in the reading and writing\n"
1494"of Comma Separated Value (CSV) files, and implements the interface\n"
1495"described by PEP 305. Although many CSV files are simple to parse,\n"
1496"the format is not formally defined by a stable specification and\n"
1497"is subtle enough that parsing lines of a CSV file with something\n"
1498"like line.split(\",\") is bound to fail. The module supports three\n"
1499"basic APIs: reading, writing, and registration of dialects.\n"
1500"\n"
1501"\n"
1502"DIALECT REGISTRATION:\n"
1503"\n"
1504"Readers and writers support a dialect argument, which is a convenient\n"
1505"handle on a group of settings. When the dialect argument is a string,\n"
1506"it identifies one of the dialects previously registered with the module.\n"
1507"If it is a class or instance, the attributes of the argument are used as\n"
1508"the settings for the reader or writer:\n"
1509"\n"
1510" class excel:\n"
1511" delimiter = ','\n"
1512" quotechar = '\"'\n"
1513" escapechar = None\n"
1514" doublequote = True\n"
1515" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001516" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001517" quoting = QUOTE_MINIMAL\n"
1518"\n"
1519"SETTINGS:\n"
1520"\n"
oldkaa0735f2018-02-02 16:52:55 +08001521" * quotechar - specifies a one-character string to use as the\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001522" quoting character. It defaults to '\"'.\n"
oldkaa0735f2018-02-02 16:52:55 +08001523" * delimiter - specifies a one-character string to use as the\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001524" field separator. It defaults to ','.\n"
1525" * skipinitialspace - specifies how to interpret whitespace which\n"
1526" immediately follows a delimiter. It defaults to False, which\n"
1527" means that whitespace immediately following a delimiter is part\n"
1528" of the following field.\n"
oldkaa0735f2018-02-02 16:52:55 +08001529" * lineterminator - specifies the character sequence which should\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001530" terminate rows.\n"
1531" * quoting - controls when quotes should be generated by the writer.\n"
1532" It can take on any of the following module constants:\n"
1533"\n"
1534" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1535" field contains either the quotechar or the delimiter\n"
1536" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1537" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001538" fields which do not parse as integers or floating point\n"
1539" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001540" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
oldkaa0735f2018-02-02 16:52:55 +08001541" * escapechar - specifies a one-character string used to escape\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001542" the delimiter when quoting is set to QUOTE_NONE.\n"
1543" * doublequote - controls the handling of quotes inside fields. When\n"
1544" True, two consecutive quotes are interpreted as one during read,\n"
1545" and when writing, each quote character embedded in the data is\n"
1546" written as two quotes\n");
1547
1548PyDoc_STRVAR(csv_reader_doc,
1549" csv_reader = reader(iterable [, dialect='excel']\n"
1550" [optional keyword args])\n"
1551" for row in csv_reader:\n"
1552" process(row)\n"
1553"\n"
1554"The \"iterable\" argument can be any object that returns a line\n"
1555"of input for each iteration, such as a file object or a list. The\n"
1556"optional \"dialect\" parameter is discussed below. The function\n"
1557"also accepts optional keyword arguments which override settings\n"
1558"provided by the dialect.\n"
1559"\n"
1560"The returned object is an iterator. Each iteration returns a row\n"
Berker Peksage2382c52015-10-02 19:25:32 +03001561"of the CSV file (which can span multiple input lines).\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001562
1563PyDoc_STRVAR(csv_writer_doc,
1564" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1565" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001566" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001567" csv_writer.writerow(row)\n"
1568"\n"
1569" [or]\n"
1570"\n"
1571" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1572" [optional keyword args])\n"
1573" csv_writer.writerows(rows)\n"
1574"\n"
1575"The \"fileobj\" argument can be any object that supports the file API.\n");
1576
1577PyDoc_STRVAR(csv_list_dialects_doc,
1578"Return a list of all know dialect names.\n"
1579" names = csv.list_dialects()");
1580
1581PyDoc_STRVAR(csv_get_dialect_doc,
1582"Return the dialect instance associated with name.\n"
1583" dialect = csv.get_dialect(name)");
1584
1585PyDoc_STRVAR(csv_register_dialect_doc,
1586"Create a mapping from a string name to a dialect class.\n"
Berker Peksag12b50ce2015-06-05 15:17:51 +03001587" dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
Skip Montanarob4a04172003-03-20 23:29:12 +00001588
1589PyDoc_STRVAR(csv_unregister_dialect_doc,
1590"Delete the name/dialect mapping associated with a string name.\n"
1591" csv.unregister_dialect(name)");
1592
Andrew McNamara31d88962005-01-12 03:45:10 +00001593PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001594"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001595" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001596"\n"
1597"Returns old limit. If limit is not given, no new limit is set and\n"
1598"the old limit is returned");
1599
Skip Montanarob4a04172003-03-20 23:29:12 +00001600static struct PyMethodDef csv_methods[] = {
Serhiy Storchaka62be7422018-11-27 13:27:31 +02001601 { "reader", (PyCFunction)(void(*)(void))csv_reader,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001602 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
Serhiy Storchaka62be7422018-11-27 13:27:31 +02001603 { "writer", (PyCFunction)(void(*)(void))csv_writer,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001604 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1605 { "list_dialects", (PyCFunction)csv_list_dialects,
1606 METH_NOARGS, csv_list_dialects_doc},
Serhiy Storchaka62be7422018-11-27 13:27:31 +02001607 { "register_dialect", (PyCFunction)(void(*)(void))csv_register_dialect,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001608 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1609 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1610 METH_O, csv_unregister_dialect_doc},
1611 { "get_dialect", (PyCFunction)csv_get_dialect,
1612 METH_O, csv_get_dialect_doc},
1613 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1614 METH_VARARGS, csv_field_size_limit_doc},
1615 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001616};
1617
Martin v. Löwis1a214512008-06-11 05:26:20 +00001618static struct PyModuleDef _csvmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001619 PyModuleDef_HEAD_INIT,
1620 "_csv",
1621 csv_module_doc,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001622 sizeof(_csvstate),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001623 csv_methods,
1624 NULL,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001625 _csv_traverse,
1626 _csv_clear,
1627 _csv_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00001628};
1629
Skip Montanarob4a04172003-03-20 23:29:12 +00001630PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001631PyInit__csv(void)
Skip Montanarob4a04172003-03-20 23:29:12 +00001632{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001633 PyObject *module;
Serhiy Storchaka2d06e842015-12-25 19:53:18 +02001634 const StyleDesc *style;
Skip Montanarob4a04172003-03-20 23:29:12 +00001635
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001636 if (PyType_Ready(&Reader_Type) < 0)
1637 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001638
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001639 if (PyType_Ready(&Writer_Type) < 0)
1640 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001641
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001642 /* Create the module and add the functions */
1643 module = PyModule_Create(&_csvmodule);
1644 if (module == NULL)
1645 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001646
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001647 /* Add version to the module. */
1648 if (PyModule_AddStringConstant(module, "__version__",
1649 MODULE_VERSION) == -1)
1650 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001651
Antoine Pitroue7672d32012-05-16 11:33:08 +02001652 /* Set the field limit */
Hai Shif707d942020-03-16 21:15:01 +08001653 get_csv_state(module)->field_limit = 128 * 1024;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001654 /* Do I still need to add this var to the Module Dict? */
1655
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001656 /* Add _dialects dictionary */
Hai Shif707d942020-03-16 21:15:01 +08001657 get_csv_state(module)->dialects = PyDict_New();
1658 if (get_csv_state(module)->dialects == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001659 return NULL;
Hai Shif707d942020-03-16 21:15:01 +08001660 Py_INCREF(get_csv_state(module)->dialects);
1661 if (PyModule_AddObject(module, "_dialects", get_csv_state(module)->dialects))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001662 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001663
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001664 /* Add quote styles into dictionary */
1665 for (style = quote_styles; style->name; style++) {
1666 if (PyModule_AddIntConstant(module, style->name,
1667 style->style) == -1)
1668 return NULL;
1669 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001670
Dong-hee Na37fcbb62020-03-25 07:08:51 +09001671 if (PyModule_AddType(module, &Dialect_Type)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001672 return NULL;
Dong-hee Na37fcbb62020-03-25 07:08:51 +09001673 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001674
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001675 /* Add the CSV exception object to the module. */
Hai Shif707d942020-03-16 21:15:01 +08001676 get_csv_state(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1677 if (get_csv_state(module)->error_obj == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001678 return NULL;
Hai Shif707d942020-03-16 21:15:01 +08001679 Py_INCREF(get_csv_state(module)->error_obj);
1680 PyModule_AddObject(module, "Error", get_csv_state(module)->error_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001681 return module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001682}