blob: 594f6c14727262773e65e42c5e1181e014287edd [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
Skip Montanarob4a04172003-03-20 23:29:12 +00009*/
10
Skip Montanaro7b01a832003-04-12 19:23:46 +000011#define MODULE_VERSION "1.0"
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013#include "Python.h"
Victor Stinner4a21e572020-04-15 02:35:41 +020014#include "structmember.h" // PyMemberDef
Serhiy Storchaka323748a2018-07-26 13:21:09 +030015#include <stdbool.h>
Skip Montanarob4a04172003-03-20 23:29:12 +000016
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000017
Antoine Pitroue7672d32012-05-16 11:33:08 +020018typedef struct {
19 PyObject *error_obj; /* CSV exception */
20 PyObject *dialects; /* Dialect registry */
21 long field_limit; /* max parsed field size */
22} _csvstate;
23
Hai Shif707d942020-03-16 21:15:01 +080024static inline _csvstate*
25get_csv_state(PyObject *module)
26{
27 void *state = PyModule_GetState(module);
28 assert(state != NULL);
29 return (_csvstate *)state;
30}
Antoine Pitroue7672d32012-05-16 11:33:08 +020031
32static int
33_csv_clear(PyObject *m)
34{
Hai Shif707d942020-03-16 21:15:01 +080035 Py_CLEAR(get_csv_state(m)->error_obj);
36 Py_CLEAR(get_csv_state(m)->dialects);
Antoine Pitroue7672d32012-05-16 11:33:08 +020037 return 0;
38}
39
40static int
41_csv_traverse(PyObject *m, visitproc visit, void *arg)
42{
Hai Shif707d942020-03-16 21:15:01 +080043 Py_VISIT(get_csv_state(m)->error_obj);
44 Py_VISIT(get_csv_state(m)->dialects);
Antoine Pitroue7672d32012-05-16 11:33:08 +020045 return 0;
46}
47
48static void
49_csv_free(void *m)
50{
51 _csv_clear((PyObject *)m);
52}
53
54static struct PyModuleDef _csvmodule;
55
56#define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
Skip Montanarob4a04172003-03-20 23:29:12 +000057
58typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000059 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
60 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
R David Murrayc7c42ef2013-03-19 22:41:47 -040061 EAT_CRNL,AFTER_ESCAPED_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000062} ParserState;
63
64typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000066} QuoteStyle;
67
68typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000069 QuoteStyle style;
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020070 const char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000071} StyleDesc;
72
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020073static const StyleDesc quote_styles[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000074 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
75 { QUOTE_ALL, "QUOTE_ALL" },
76 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
77 { QUOTE_NONE, "QUOTE_NONE" },
78 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000079};
80
81typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000082 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000083
Serhiy Storchaka323748a2018-07-26 13:21:09 +030084 char doublequote; /* is " represented by ""? */
85 char skipinitialspace; /* ignore spaces following delimiter? */
86 char strict; /* raise exception on bad CSV */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 int quoting; /* style of quoting to write */
Serhiy Storchaka323748a2018-07-26 13:21:09 +030088 Py_UCS4 delimiter; /* field separator */
89 Py_UCS4 quotechar; /* quote character */
90 Py_UCS4 escapechar; /* escape character */
91 PyObject *lineterminator; /* string to write between records */
Skip Montanarob4a04172003-03-20 23:29:12 +000092
Skip Montanarob4a04172003-03-20 23:29:12 +000093} DialectObj;
94
Neal Norwitz227b5332006-03-22 09:28:35 +000095static PyTypeObject Dialect_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000096
97typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000098 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +000099
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000100 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +0000101
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000102 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000103
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000104 PyObject *fields; /* field list for current record */
105 ParserState state; /* current CSV parse state */
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200106 Py_UCS4 *field; /* temporary buffer */
Antoine Pitrou40455752010-08-15 18:51:10 +0000107 Py_ssize_t field_size; /* size of allocated buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000108 Py_ssize_t field_len; /* length of current field */
109 int numeric_field; /* treat field as numeric */
110 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000111} ReaderObj;
112
Neal Norwitz227b5332006-03-22 09:28:35 +0000113static PyTypeObject Reader_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000114
Skip Montanarob4a04172003-03-20 23:29:12 +0000115typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000116 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000117
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +0200118 PyObject *write; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +0000119
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000120 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000121
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200122 Py_UCS4 *rec; /* buffer for parser.join */
Antoine Pitrou40455752010-08-15 18:51:10 +0000123 Py_ssize_t rec_size; /* size of allocated record */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000124 Py_ssize_t rec_len; /* length of record */
125 int num_fields; /* number of fields in record */
Guido van Rossum46264582007-08-06 19:32:18 +0000126} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000127
Neal Norwitz227b5332006-03-22 09:28:35 +0000128static PyTypeObject Writer_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000129
130/*
131 * DIALECT class
132 */
133
134static PyObject *
135get_dialect_from_registry(PyObject * name_obj)
136{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000137 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000138
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200139 dialect_obj = PyDict_GetItemWithError(_csvstate_global->dialects, name_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000140 if (dialect_obj == NULL) {
141 if (!PyErr_Occurred())
Antoine Pitroue7672d32012-05-16 11:33:08 +0200142 PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 }
144 else
145 Py_INCREF(dialect_obj);
146 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000147}
148
Skip Montanarob4a04172003-03-20 23:29:12 +0000149static PyObject *
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200150get_nullchar_as_None(Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000151{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000152 if (c == '\0') {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200153 Py_RETURN_NONE;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 }
155 else
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200156 return PyUnicode_FromOrdinal(c);
Skip Montanarob4a04172003-03-20 23:29:12 +0000157}
158
Skip Montanarob4a04172003-03-20 23:29:12 +0000159static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200160Dialect_get_lineterminator(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000161{
Dong-hee Na0383be42020-06-10 00:33:43 +0900162 Py_XINCREF(self->lineterminator);
163 return self->lineterminator;
Skip Montanarob4a04172003-03-20 23:29:12 +0000164}
165
Skip Montanarob4a04172003-03-20 23:29:12 +0000166static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200167Dialect_get_delimiter(DialectObj *self, void *Py_UNUSED(ignored))
Guido van Rossuma9769c22007-08-07 23:59:30 +0000168{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000169 return get_nullchar_as_None(self->delimiter);
Guido van Rossuma9769c22007-08-07 23:59:30 +0000170}
171
172static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200173Dialect_get_escapechar(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000174{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000175 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000176}
177
Andrew McNamara1196cf12005-01-07 04:42:45 +0000178static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200179Dialect_get_quotechar(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000180{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000181 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000182}
183
184static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200185Dialect_get_quoting(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000186{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 return PyLong_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000188}
189
190static int
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300191_set_bool(const char *name, char *target, PyObject *src, bool dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000192{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000193 if (src == NULL)
194 *target = dflt;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200195 else {
196 int b = PyObject_IsTrue(src);
197 if (b < 0)
198 return -1;
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300199 *target = (char)b;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200200 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000201 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000202}
203
Andrew McNamara1196cf12005-01-07 04:42:45 +0000204static int
205_set_int(const char *name, int *target, PyObject *src, int dflt)
206{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000207 if (src == NULL)
208 *target = dflt;
209 else {
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200210 int value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000211 if (!PyLong_CheckExact(src)) {
212 PyErr_Format(PyExc_TypeError,
213 "\"%s\" must be an integer", name);
214 return -1;
215 }
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200216 value = _PyLong_AsInt(src);
217 if (value == -1 && PyErr_Occurred()) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000218 return -1;
219 }
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200220 *target = value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000221 }
222 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000223}
224
225static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200226_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000227{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 if (src == NULL)
229 *target = dflt;
230 else {
231 *target = '\0';
232 if (src != Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000233 Py_ssize_t len;
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200234 if (!PyUnicode_Check(src)) {
235 PyErr_Format(PyExc_TypeError,
236 "\"%s\" must be string, not %.200s", name,
Victor Stinnerdaa97562020-02-07 03:37:06 +0100237 Py_TYPE(src)->tp_name);
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200238 return -1;
239 }
Victor Stinner9e30aa52011-11-21 02:49:52 +0100240 len = PyUnicode_GetLength(src);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200241 if (len > 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000242 PyErr_Format(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300243 "\"%s\" must be a 1-character string",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000244 name);
245 return -1;
246 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100247 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000248 if (len > 0)
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200249 *target = PyUnicode_READ_CHAR(src, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000250 }
251 }
252 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000253}
254
255static int
256_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
257{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000258 if (src == NULL)
259 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
260 else {
261 if (src == Py_None)
262 *target = NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +0100263 else if (!PyUnicode_Check(src)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000264 PyErr_Format(PyExc_TypeError,
265 "\"%s\" must be a string", name);
266 return -1;
267 }
268 else {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100269 if (PyUnicode_READY(src) == -1)
270 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000271 Py_INCREF(src);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300272 Py_XSETREF(*target, src);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000273 }
274 }
275 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000276}
277
278static int
279dialect_check_quoting(int quoting)
280{
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200281 const StyleDesc *qs;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000282
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000283 for (qs = quote_styles; qs->name; qs++) {
Victor Stinner706768c2014-08-16 01:03:39 +0200284 if ((int)qs->style == quoting)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000285 return 0;
286 }
287 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
288 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000289}
Skip Montanarob4a04172003-03-20 23:29:12 +0000290
291#define D_OFF(x) offsetof(DialectObj, x)
292
293static struct PyMemberDef Dialect_memberlist[] = {
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300294 { "skipinitialspace", T_BOOL, D_OFF(skipinitialspace), READONLY },
295 { "doublequote", T_BOOL, D_OFF(doublequote), READONLY },
296 { "strict", T_BOOL, D_OFF(strict), READONLY },
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000297 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000298};
299
300static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000301 { "delimiter", (getter)Dialect_get_delimiter},
302 { "escapechar", (getter)Dialect_get_escapechar},
303 { "lineterminator", (getter)Dialect_get_lineterminator},
304 { "quotechar", (getter)Dialect_get_quotechar},
305 { "quoting", (getter)Dialect_get_quoting},
306 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000307};
308
309static void
310Dialect_dealloc(DialectObj *self)
311{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000312 Py_XDECREF(self->lineterminator);
313 Py_TYPE(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000314}
315
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000316static char *dialect_kws[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000317 "dialect",
318 "delimiter",
319 "doublequote",
320 "escapechar",
321 "lineterminator",
322 "quotechar",
323 "quoting",
324 "skipinitialspace",
325 "strict",
326 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000327};
328
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000329static PyObject *
330dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000331{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000332 DialectObj *self;
333 PyObject *ret = NULL;
334 PyObject *dialect = NULL;
335 PyObject *delimiter = NULL;
336 PyObject *doublequote = NULL;
337 PyObject *escapechar = NULL;
338 PyObject *lineterminator = NULL;
339 PyObject *quotechar = NULL;
340 PyObject *quoting = NULL;
341 PyObject *skipinitialspace = NULL;
342 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000343
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000344 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
345 "|OOOOOOOOO", dialect_kws,
346 &dialect,
347 &delimiter,
348 &doublequote,
349 &escapechar,
350 &lineterminator,
351 &quotechar,
352 &quoting,
353 &skipinitialspace,
354 &strict))
355 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000356
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000357 if (dialect != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100358 if (PyUnicode_Check(dialect)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 dialect = get_dialect_from_registry(dialect);
360 if (dialect == NULL)
361 return NULL;
362 }
363 else
364 Py_INCREF(dialect);
365 /* Can we reuse this instance? */
366 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
Serhiy Storchaka0b3ec192017-03-23 17:53:47 +0200367 delimiter == NULL &&
368 doublequote == NULL &&
369 escapechar == NULL &&
370 lineterminator == NULL &&
371 quotechar == NULL &&
372 quoting == NULL &&
373 skipinitialspace == NULL &&
374 strict == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000375 return dialect;
376 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000377
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000378 self = (DialectObj *)type->tp_alloc(type, 0);
379 if (self == NULL) {
380 Py_XDECREF(dialect);
381 return NULL;
382 }
383 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000384
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000385 Py_XINCREF(delimiter);
386 Py_XINCREF(doublequote);
387 Py_XINCREF(escapechar);
388 Py_XINCREF(lineterminator);
389 Py_XINCREF(quotechar);
390 Py_XINCREF(quoting);
391 Py_XINCREF(skipinitialspace);
392 Py_XINCREF(strict);
393 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000394#define DIALECT_GETATTR(v, n) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000395 if (v == NULL) \
396 v = PyObject_GetAttrString(dialect, n)
397 DIALECT_GETATTR(delimiter, "delimiter");
398 DIALECT_GETATTR(doublequote, "doublequote");
399 DIALECT_GETATTR(escapechar, "escapechar");
400 DIALECT_GETATTR(lineterminator, "lineterminator");
401 DIALECT_GETATTR(quotechar, "quotechar");
402 DIALECT_GETATTR(quoting, "quoting");
403 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
404 DIALECT_GETATTR(strict, "strict");
405 PyErr_Clear();
406 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000407
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000408 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000409#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000410 if (meth(name, target, src, dflt)) \
411 goto err
412 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300413 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, true);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000414 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
415 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
416 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
417 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300418 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, false);
419 DIASET(_set_bool, "strict", &self->strict, strict, false);
Skip Montanarob4a04172003-03-20 23:29:12 +0000420
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000421 /* validate options */
422 if (dialect_check_quoting(self->quoting))
423 goto err;
424 if (self->delimiter == 0) {
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200425 PyErr_SetString(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300426 "\"delimiter\" must be a 1-character string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000427 goto err;
428 }
429 if (quotechar == Py_None && quoting == NULL)
430 self->quoting = QUOTE_NONE;
431 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
432 PyErr_SetString(PyExc_TypeError,
433 "quotechar must be set if quoting enabled");
434 goto err;
435 }
436 if (self->lineterminator == 0) {
437 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
438 goto err;
439 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000440
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000441 ret = (PyObject *)self;
442 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000443err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000444 Py_XDECREF(self);
445 Py_XDECREF(dialect);
446 Py_XDECREF(delimiter);
447 Py_XDECREF(doublequote);
448 Py_XDECREF(escapechar);
449 Py_XDECREF(lineterminator);
450 Py_XDECREF(quotechar);
451 Py_XDECREF(quoting);
452 Py_XDECREF(skipinitialspace);
453 Py_XDECREF(strict);
454 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000455}
456
457
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000458PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000459"CSV dialect\n"
460"\n"
461"The Dialect type records CSV parsing and generation options.\n");
462
463static PyTypeObject Dialect_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000464 PyVarObject_HEAD_INIT(NULL, 0)
465 "_csv.Dialect", /* tp_name */
466 sizeof(DialectObj), /* tp_basicsize */
467 0, /* tp_itemsize */
468 /* methods */
469 (destructor)Dialect_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200470 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000471 (getattrfunc)0, /* tp_getattr */
472 (setattrfunc)0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200473 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000474 (reprfunc)0, /* tp_repr */
475 0, /* tp_as_number */
476 0, /* tp_as_sequence */
477 0, /* tp_as_mapping */
478 (hashfunc)0, /* tp_hash */
479 (ternaryfunc)0, /* tp_call */
480 (reprfunc)0, /* tp_str */
481 0, /* tp_getattro */
482 0, /* tp_setattro */
483 0, /* tp_as_buffer */
484 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
485 Dialect_Type_doc, /* tp_doc */
486 0, /* tp_traverse */
487 0, /* tp_clear */
488 0, /* tp_richcompare */
489 0, /* tp_weaklistoffset */
490 0, /* tp_iter */
491 0, /* tp_iternext */
492 0, /* tp_methods */
493 Dialect_memberlist, /* tp_members */
494 Dialect_getsetlist, /* tp_getset */
495 0, /* tp_base */
496 0, /* tp_dict */
497 0, /* tp_descr_get */
498 0, /* tp_descr_set */
499 0, /* tp_dictoffset */
500 0, /* tp_init */
501 0, /* tp_alloc */
502 dialect_new, /* tp_new */
503 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000504};
505
Andrew McNamara91b97462005-01-11 01:07:23 +0000506/*
507 * Return an instance of the dialect type, given a Python instance or kwarg
508 * description of the dialect
509 */
510static PyObject *
511_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
512{
Victor Stinner6412f492016-08-23 00:21:34 +0200513 PyObject *type = (PyObject *)&Dialect_Type;
514 if (dialect_inst) {
Petr Viktorinffd97532020-02-11 17:46:57 +0100515 return PyObject_VectorcallDict(type, &dialect_inst, 1, kwargs);
Victor Stinner6412f492016-08-23 00:21:34 +0200516 }
517 else {
Petr Viktorinffd97532020-02-11 17:46:57 +0100518 return PyObject_VectorcallDict(type, NULL, 0, kwargs);
Victor Stinner6412f492016-08-23 00:21:34 +0200519 }
Andrew McNamara91b97462005-01-11 01:07:23 +0000520}
521
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000522/*
523 * READER
524 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000525static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000526parse_save_field(ReaderObj *self)
527{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000528 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000529
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200530 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
531 (void *) self->field, self->field_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000532 if (field == NULL)
533 return -1;
534 self->field_len = 0;
535 if (self->numeric_field) {
536 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000537
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000538 self->numeric_field = 0;
539 tmp = PyNumber_Float(field);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000540 Py_DECREF(field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200541 if (tmp == NULL)
542 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000543 field = tmp;
544 }
Victor Stinnerb80b3782013-11-14 21:29:34 +0100545 if (PyList_Append(self->fields, field) < 0) {
546 Py_DECREF(field);
547 return -1;
548 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000549 Py_DECREF(field);
550 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000551}
552
553static int
554parse_grow_buff(ReaderObj *self)
555{
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +0500556 assert((size_t)self->field_size <= PY_SSIZE_T_MAX / sizeof(Py_UCS4));
557
558 Py_ssize_t field_size_new = self->field_size ? 2 * self->field_size : 4096;
559 Py_UCS4 *field_new = self->field;
560 PyMem_Resize(field_new, Py_UCS4, field_size_new);
561 if (field_new == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000562 PyErr_NoMemory();
563 return 0;
564 }
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +0500565 self->field = field_new;
566 self->field_size = field_size_new;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000567 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000568}
569
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000570static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200571parse_add_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000572{
Antoine Pitroue7672d32012-05-16 11:33:08 +0200573 if (self->field_len >= _csvstate_global->field_limit) {
574 PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
575 _csvstate_global->field_limit);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000576 return -1;
577 }
578 if (self->field_len == self->field_size && !parse_grow_buff(self))
579 return -1;
580 self->field[self->field_len++] = c;
581 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000582}
583
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000584static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200585parse_process_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000586{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000587 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000588
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000589 switch (self->state) {
590 case START_RECORD:
591 /* start of record */
592 if (c == '\0')
593 /* empty line - return [] */
594 break;
595 else if (c == '\n' || c == '\r') {
596 self->state = EAT_CRNL;
597 break;
598 }
599 /* normal character - handle as START_FIELD */
600 self->state = START_FIELD;
601 /* fallthru */
602 case START_FIELD:
603 /* expecting field */
604 if (c == '\n' || c == '\r' || c == '\0') {
605 /* save empty field - return [fields] */
606 if (parse_save_field(self) < 0)
607 return -1;
608 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
609 }
610 else if (c == dialect->quotechar &&
611 dialect->quoting != QUOTE_NONE) {
612 /* start quoted field */
613 self->state = IN_QUOTED_FIELD;
614 }
615 else if (c == dialect->escapechar) {
616 /* possible escaped character */
617 self->state = ESCAPED_CHAR;
618 }
619 else if (c == ' ' && dialect->skipinitialspace)
620 /* ignore space at start of field */
621 ;
622 else if (c == dialect->delimiter) {
623 /* save empty field */
624 if (parse_save_field(self) < 0)
625 return -1;
626 }
627 else {
628 /* begin new unquoted field */
629 if (dialect->quoting == QUOTE_NONNUMERIC)
630 self->numeric_field = 1;
631 if (parse_add_char(self, c) < 0)
632 return -1;
633 self->state = IN_FIELD;
634 }
635 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000636
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000637 case ESCAPED_CHAR:
R David Murray9a7d3762013-03-20 00:15:20 -0400638 if (c == '\n' || c=='\r') {
R David Murrayc7c42ef2013-03-19 22:41:47 -0400639 if (parse_add_char(self, c) < 0)
640 return -1;
641 self->state = AFTER_ESCAPED_CRNL;
642 break;
643 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000644 if (c == '\0')
645 c = '\n';
646 if (parse_add_char(self, c) < 0)
647 return -1;
648 self->state = IN_FIELD;
649 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000650
R David Murrayc7c42ef2013-03-19 22:41:47 -0400651 case AFTER_ESCAPED_CRNL:
652 if (c == '\0')
653 break;
654 /*fallthru*/
655
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000656 case IN_FIELD:
657 /* in unquoted field */
658 if (c == '\n' || c == '\r' || c == '\0') {
659 /* end of line - return [fields] */
660 if (parse_save_field(self) < 0)
661 return -1;
662 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
663 }
664 else if (c == dialect->escapechar) {
665 /* possible escaped character */
666 self->state = ESCAPED_CHAR;
667 }
668 else if (c == dialect->delimiter) {
669 /* save field - wait for new field */
670 if (parse_save_field(self) < 0)
671 return -1;
672 self->state = START_FIELD;
673 }
674 else {
675 /* normal character - save in field */
676 if (parse_add_char(self, c) < 0)
677 return -1;
678 }
679 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000680
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000681 case IN_QUOTED_FIELD:
682 /* in quoted field */
683 if (c == '\0')
684 ;
685 else if (c == dialect->escapechar) {
686 /* Possible escape character */
687 self->state = ESCAPE_IN_QUOTED_FIELD;
688 }
689 else if (c == dialect->quotechar &&
690 dialect->quoting != QUOTE_NONE) {
691 if (dialect->doublequote) {
692 /* doublequote; " represented by "" */
693 self->state = QUOTE_IN_QUOTED_FIELD;
694 }
695 else {
696 /* end of quote part of field */
697 self->state = IN_FIELD;
698 }
699 }
700 else {
701 /* normal character - save in field */
702 if (parse_add_char(self, c) < 0)
703 return -1;
704 }
705 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000706
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000707 case ESCAPE_IN_QUOTED_FIELD:
708 if (c == '\0')
709 c = '\n';
710 if (parse_add_char(self, c) < 0)
711 return -1;
712 self->state = IN_QUOTED_FIELD;
713 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000714
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000715 case QUOTE_IN_QUOTED_FIELD:
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300716 /* doublequote - seen a quote in a quoted field */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000717 if (dialect->quoting != QUOTE_NONE &&
718 c == dialect->quotechar) {
719 /* save "" as " */
720 if (parse_add_char(self, c) < 0)
721 return -1;
722 self->state = IN_QUOTED_FIELD;
723 }
724 else if (c == dialect->delimiter) {
725 /* save field - wait for new field */
726 if (parse_save_field(self) < 0)
727 return -1;
728 self->state = START_FIELD;
729 }
730 else if (c == '\n' || c == '\r' || c == '\0') {
731 /* end of line - return [fields] */
732 if (parse_save_field(self) < 0)
733 return -1;
734 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
735 }
736 else if (!dialect->strict) {
737 if (parse_add_char(self, c) < 0)
738 return -1;
739 self->state = IN_FIELD;
740 }
741 else {
742 /* illegal */
Antoine Pitroue7672d32012-05-16 11:33:08 +0200743 PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000744 dialect->delimiter,
745 dialect->quotechar);
746 return -1;
747 }
748 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000749
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000750 case EAT_CRNL:
751 if (c == '\n' || c == '\r')
752 ;
753 else if (c == '\0')
754 self->state = START_RECORD;
755 else {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200756 PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000757 return -1;
758 }
759 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000760
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000761 }
762 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000763}
764
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000765static int
766parse_reset(ReaderObj *self)
767{
Serhiy Storchaka48842712016-04-06 09:45:48 +0300768 Py_XSETREF(self->fields, PyList_New(0));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000769 if (self->fields == NULL)
770 return -1;
771 self->field_len = 0;
772 self->state = START_RECORD;
773 self->numeric_field = 0;
774 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000775}
Skip Montanarob4a04172003-03-20 23:29:12 +0000776
777static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000778Reader_iternext(ReaderObj *self)
779{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000780 PyObject *fields = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200781 Py_UCS4 c;
782 Py_ssize_t pos, linelen;
783 unsigned int kind;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300784 const void *data;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200785 PyObject *lineobj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000786
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000787 if (parse_reset(self) < 0)
788 return NULL;
789 do {
790 lineobj = PyIter_Next(self->input_iter);
791 if (lineobj == NULL) {
792 /* End of input OR exception */
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700793 if (!PyErr_Occurred() && (self->field_len != 0 ||
794 self->state == IN_QUOTED_FIELD)) {
795 if (self->dialect->strict)
Senthil Kumaran49d13022012-09-25 02:37:20 -0700796 PyErr_SetString(_csvstate_global->error_obj,
797 "unexpected end of data");
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700798 else if (parse_save_field(self) >= 0)
799 break;
800 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000801 return NULL;
802 }
803 if (!PyUnicode_Check(lineobj)) {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200804 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000805 "iterator should return strings, "
806 "not %.200s "
Ram Rachum235f9182020-06-05 23:56:06 +0300807 "(the file should be opened in text mode)",
Victor Stinnerdaa97562020-02-07 03:37:06 +0100808 Py_TYPE(lineobj)->tp_name
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000809 );
810 Py_DECREF(lineobj);
811 return NULL;
812 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100813 if (PyUnicode_READY(lineobj) == -1) {
814 Py_DECREF(lineobj);
815 return NULL;
816 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000817 ++self->line_num;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200818 kind = PyUnicode_KIND(lineobj);
819 data = PyUnicode_DATA(lineobj);
820 pos = 0;
821 linelen = PyUnicode_GET_LENGTH(lineobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000822 while (linelen--) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200823 c = PyUnicode_READ(kind, data, pos);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000824 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000825 Py_DECREF(lineobj);
Antoine Pitroue7672d32012-05-16 11:33:08 +0200826 PyErr_Format(_csvstate_global->error_obj,
Benjamin Peterson7821b4c2019-06-18 21:37:58 -0700827 "line contains NUL");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000828 goto err;
829 }
830 if (parse_process_char(self, c) < 0) {
831 Py_DECREF(lineobj);
832 goto err;
833 }
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200834 pos++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000835 }
836 Py_DECREF(lineobj);
837 if (parse_process_char(self, 0) < 0)
838 goto err;
839 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000840
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000841 fields = self->fields;
842 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000843err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000844 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000845}
846
847static void
848Reader_dealloc(ReaderObj *self)
849{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000850 PyObject_GC_UnTrack(self);
851 Py_XDECREF(self->dialect);
852 Py_XDECREF(self->input_iter);
853 Py_XDECREF(self->fields);
854 if (self->field != NULL)
855 PyMem_Free(self->field);
856 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000857}
858
859static int
860Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
861{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000862 Py_VISIT(self->dialect);
863 Py_VISIT(self->input_iter);
864 Py_VISIT(self->fields);
865 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000866}
867
868static int
869Reader_clear(ReaderObj *self)
870{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000871 Py_CLEAR(self->dialect);
872 Py_CLEAR(self->input_iter);
873 Py_CLEAR(self->fields);
874 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000875}
876
877PyDoc_STRVAR(Reader_Type_doc,
878"CSV reader\n"
879"\n"
880"Reader objects are responsible for reading and parsing tabular data\n"
881"in CSV format.\n"
882);
883
884static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000885 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000886};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000887#define R_OFF(x) offsetof(ReaderObj, x)
888
889static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000890 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
891 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
892 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000893};
894
Skip Montanarob4a04172003-03-20 23:29:12 +0000895
896static PyTypeObject Reader_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000897 PyVarObject_HEAD_INIT(NULL, 0)
898 "_csv.reader", /*tp_name*/
899 sizeof(ReaderObj), /*tp_basicsize*/
900 0, /*tp_itemsize*/
901 /* methods */
902 (destructor)Reader_dealloc, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200903 0, /*tp_vectorcall_offset*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000904 (getattrfunc)0, /*tp_getattr*/
905 (setattrfunc)0, /*tp_setattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200906 0, /*tp_as_async*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000907 (reprfunc)0, /*tp_repr*/
908 0, /*tp_as_number*/
909 0, /*tp_as_sequence*/
910 0, /*tp_as_mapping*/
911 (hashfunc)0, /*tp_hash*/
912 (ternaryfunc)0, /*tp_call*/
913 (reprfunc)0, /*tp_str*/
914 0, /*tp_getattro*/
915 0, /*tp_setattro*/
916 0, /*tp_as_buffer*/
917 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
918 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
919 Reader_Type_doc, /*tp_doc*/
920 (traverseproc)Reader_traverse, /*tp_traverse*/
921 (inquiry)Reader_clear, /*tp_clear*/
922 0, /*tp_richcompare*/
923 0, /*tp_weaklistoffset*/
924 PyObject_SelfIter, /*tp_iter*/
925 (getiterfunc)Reader_iternext, /*tp_iternext*/
926 Reader_methods, /*tp_methods*/
927 Reader_memberlist, /*tp_members*/
928 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000929
930};
931
932static PyObject *
933csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
934{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000935 PyObject * iterator, * dialect = NULL;
936 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000937
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000938 if (!self)
939 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000940
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000941 self->dialect = NULL;
942 self->fields = NULL;
943 self->input_iter = NULL;
944 self->field = NULL;
945 self->field_size = 0;
946 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000947
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000948 if (parse_reset(self) < 0) {
949 Py_DECREF(self);
950 return NULL;
951 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000952
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000953 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
954 Py_DECREF(self);
955 return NULL;
956 }
957 self->input_iter = PyObject_GetIter(iterator);
958 if (self->input_iter == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000959 Py_DECREF(self);
960 return NULL;
961 }
962 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
963 if (self->dialect == NULL) {
964 Py_DECREF(self);
965 return NULL;
966 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000967
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000968 PyObject_GC_Track(self);
969 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +0000970}
971
972/*
973 * WRITER
974 */
975/* ---------------------------------------------------------------- */
976static void
977join_reset(WriterObj *self)
978{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000979 self->rec_len = 0;
980 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000981}
982
983#define MEM_INCR 32768
984
985/* Calculate new record length or append field to record. Return new
986 * record length.
987 */
Antoine Pitrou40455752010-08-15 18:51:10 +0000988static Py_ssize_t
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300989join_append_data(WriterObj *self, unsigned int field_kind, const void *field_data,
Serhiy Storchaka7901b482015-03-30 09:09:54 +0300990 Py_ssize_t field_len, int *quoted,
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200991 int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +0000992{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000993 DialectObj *dialect = self->dialect;
994 int i;
Antoine Pitrou40455752010-08-15 18:51:10 +0000995 Py_ssize_t rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +0000996
Benjamin Peterson6e01d902016-08-13 17:17:06 -0700997#define INCLEN \
998 do {\
999 if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \
1000 goto overflow; \
1001 } \
1002 rec_len++; \
1003 } while(0)
1004
1005#define ADDCH(c) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001006 do {\
1007 if (copy_phase) \
1008 self->rec[rec_len] = c;\
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001009 INCLEN;\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001010 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001011
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001012 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001013
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001014 /* If this is not the first field we need a field separator */
1015 if (self->num_fields > 0)
1016 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001017
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001018 /* Handle preceding quote */
1019 if (copy_phase && *quoted)
1020 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001021
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001022 /* Copy/count field data */
1023 /* If field is null just pass over */
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001024 for (i = 0; field_data && (i < field_len); i++) {
1025 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001026 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001027
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001028 if (c == dialect->delimiter ||
1029 c == dialect->escapechar ||
1030 c == dialect->quotechar ||
Martin v. Löwis5f4f4c52011-11-01 18:42:23 +01001031 PyUnicode_FindChar(
1032 dialect->lineterminator, c, 0,
1033 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001034 if (dialect->quoting == QUOTE_NONE)
1035 want_escape = 1;
1036 else {
1037 if (c == dialect->quotechar) {
1038 if (dialect->doublequote)
1039 ADDCH(dialect->quotechar);
1040 else
1041 want_escape = 1;
1042 }
Berker Peksag5c0eed72020-09-20 09:38:07 +03001043 else if (c == dialect->escapechar) {
1044 want_escape = 1;
1045 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001046 if (!want_escape)
1047 *quoted = 1;
1048 }
1049 if (want_escape) {
1050 if (!dialect->escapechar) {
Antoine Pitroue7672d32012-05-16 11:33:08 +02001051 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001052 "need to escape, but no escapechar set");
1053 return -1;
1054 }
1055 ADDCH(dialect->escapechar);
1056 }
1057 }
1058 /* Copy field character into record buffer.
1059 */
1060 ADDCH(c);
1061 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001062
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001063 if (*quoted) {
1064 if (copy_phase)
1065 ADDCH(dialect->quotechar);
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001066 else {
1067 INCLEN; /* starting quote */
1068 INCLEN; /* ending quote */
1069 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001070 }
1071 return rec_len;
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001072
1073 overflow:
1074 PyErr_NoMemory();
1075 return -1;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001076#undef ADDCH
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001077#undef INCLEN
Skip Montanarob4a04172003-03-20 23:29:12 +00001078}
1079
1080static int
Antoine Pitrou40455752010-08-15 18:51:10 +00001081join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
Skip Montanarob4a04172003-03-20 23:29:12 +00001082{
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001083 assert(rec_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001085 if (rec_len > self->rec_size) {
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001086 size_t rec_size_new = (size_t)(rec_len / MEM_INCR + 1) * MEM_INCR;
1087 Py_UCS4 *rec_new = self->rec;
1088 PyMem_Resize(rec_new, Py_UCS4, rec_size_new);
1089 if (rec_new == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001090 PyErr_NoMemory();
1091 return 0;
1092 }
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001093 self->rec = rec_new;
1094 self->rec_size = (Py_ssize_t)rec_size_new;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001095 }
1096 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001097}
1098
1099static int
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001100join_append(WriterObj *self, PyObject *field, int quoted)
Skip Montanarob4a04172003-03-20 23:29:12 +00001101{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001102 unsigned int field_kind = -1;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001103 const void *field_data = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001104 Py_ssize_t field_len = 0;
Antoine Pitrou40455752010-08-15 18:51:10 +00001105 Py_ssize_t rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001106
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001107 if (field != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +01001108 if (PyUnicode_READY(field) == -1)
1109 return 0;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001110 field_kind = PyUnicode_KIND(field);
1111 field_data = PyUnicode_DATA(field);
1112 field_len = PyUnicode_GET_LENGTH(field);
1113 }
1114 rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001115 &quoted, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001116 if (rec_len < 0)
1117 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001118
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001119 /* grow record buffer if necessary */
1120 if (!join_check_rec_size(self, rec_len))
1121 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001122
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001123 self->rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001124 &quoted, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001125 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001126
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001127 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001128}
1129
1130static int
1131join_append_lineterminator(WriterObj *self)
1132{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001133 Py_ssize_t terminator_len, i;
1134 unsigned int term_kind;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001135 const void *term_data;
Skip Montanarob4a04172003-03-20 23:29:12 +00001136
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001137 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001138 if (terminator_len == -1)
1139 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001141 /* grow record buffer if necessary */
1142 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1143 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001144
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001145 term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1146 term_data = PyUnicode_DATA(self->dialect->lineterminator);
1147 for (i = 0; i < terminator_len; i++)
1148 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001149 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001150
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001151 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001152}
1153
1154PyDoc_STRVAR(csv_writerow_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001155"writerow(iterable)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001156"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001157"Construct and write a CSV record from an iterable of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001158"elements will be converted to string.");
1159
1160static PyObject *
1161csv_writerow(WriterObj *self, PyObject *seq)
1162{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001163 DialectObj *dialect = self->dialect;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001164 PyObject *iter, *field, *line, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001165
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001166 iter = PyObject_GetIter(seq);
Serhiy Storchakac88239f2020-06-22 11:21:59 +03001167 if (iter == NULL) {
1168 if (PyErr_ExceptionMatches(PyExc_TypeError)) {
1169 PyErr_Format(_csvstate_global->error_obj,
1170 "iterable expected, not %.200s",
1171 Py_TYPE(seq)->tp_name);
1172 }
1173 return NULL;
1174 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001175
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001176 /* Join all fields in internal buffer.
1177 */
1178 join_reset(self);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001179 while ((field = PyIter_Next(iter))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001180 int append_ok;
1181 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001182
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001183 switch (dialect->quoting) {
1184 case QUOTE_NONNUMERIC:
1185 quoted = !PyNumber_Check(field);
1186 break;
1187 case QUOTE_ALL:
1188 quoted = 1;
1189 break;
1190 default:
1191 quoted = 0;
1192 break;
1193 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001194
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001195 if (PyUnicode_Check(field)) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001196 append_ok = join_append(self, field, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001197 Py_DECREF(field);
1198 }
1199 else if (field == Py_None) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001200 append_ok = join_append(self, NULL, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001201 Py_DECREF(field);
1202 }
1203 else {
1204 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001205
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001206 str = PyObject_Str(field);
1207 Py_DECREF(field);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001208 if (str == NULL) {
1209 Py_DECREF(iter);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001210 return NULL;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001211 }
1212 append_ok = join_append(self, str, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001213 Py_DECREF(str);
1214 }
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001215 if (!append_ok) {
1216 Py_DECREF(iter);
1217 return NULL;
1218 }
1219 }
1220 Py_DECREF(iter);
1221 if (PyErr_Occurred())
1222 return NULL;
1223
Licht Takeuchi20019002017-12-12 18:57:06 +09001224 if (self->num_fields > 0 && self->rec_len == 0) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001225 if (dialect->quoting == QUOTE_NONE) {
1226 PyErr_Format(_csvstate_global->error_obj,
1227 "single empty field record must be quoted");
1228 return NULL;
1229 }
1230 self->num_fields--;
1231 if (!join_append(self, NULL, 1))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001232 return NULL;
1233 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001234
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001235 /* Add line terminator.
1236 */
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001237 if (!join_append_lineterminator(self)) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001238 return NULL;
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001239 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001240
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001241 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1242 (void *) self->rec, self->rec_len);
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001243 if (line == NULL) {
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001244 return NULL;
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001245 }
Petr Viktorinffd97532020-02-11 17:46:57 +01001246 result = PyObject_CallOneArg(self->write, line);
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001247 Py_DECREF(line);
1248 return result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001249}
1250
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001251PyDoc_STRVAR(csv_writerows_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001252"writerows(iterable of iterables)\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001253"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001254"Construct and write a series of iterables to a csv file. Non-string\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001255"elements will be converted to string.");
1256
Skip Montanarob4a04172003-03-20 23:29:12 +00001257static PyObject *
1258csv_writerows(WriterObj *self, PyObject *seqseq)
1259{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001260 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001261
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001262 row_iter = PyObject_GetIter(seqseq);
1263 if (row_iter == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001264 return NULL;
1265 }
1266 while ((row_obj = PyIter_Next(row_iter))) {
1267 result = csv_writerow(self, row_obj);
1268 Py_DECREF(row_obj);
1269 if (!result) {
1270 Py_DECREF(row_iter);
1271 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001272 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001273 else
1274 Py_DECREF(result);
1275 }
1276 Py_DECREF(row_iter);
1277 if (PyErr_Occurred())
1278 return NULL;
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001279 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001280}
1281
1282static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001283 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1284 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1285 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001286};
1287
1288#define W_OFF(x) offsetof(WriterObj, x)
1289
1290static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001291 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1292 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001293};
1294
1295static void
1296Writer_dealloc(WriterObj *self)
1297{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001298 PyObject_GC_UnTrack(self);
1299 Py_XDECREF(self->dialect);
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001300 Py_XDECREF(self->write);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001301 if (self->rec != NULL)
1302 PyMem_Free(self->rec);
1303 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001304}
1305
1306static int
1307Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1308{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001309 Py_VISIT(self->dialect);
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001310 Py_VISIT(self->write);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001311 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001312}
1313
1314static int
1315Writer_clear(WriterObj *self)
1316{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001317 Py_CLEAR(self->dialect);
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001318 Py_CLEAR(self->write);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001319 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001320}
1321
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001322PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001323"CSV writer\n"
1324"\n"
1325"Writer objects are responsible for generating tabular data\n"
1326"in CSV format from sequence input.\n"
1327);
1328
1329static PyTypeObject Writer_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001330 PyVarObject_HEAD_INIT(NULL, 0)
1331 "_csv.writer", /*tp_name*/
1332 sizeof(WriterObj), /*tp_basicsize*/
1333 0, /*tp_itemsize*/
1334 /* methods */
1335 (destructor)Writer_dealloc, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02001336 0, /*tp_vectorcall_offset*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001337 (getattrfunc)0, /*tp_getattr*/
1338 (setattrfunc)0, /*tp_setattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02001339 0, /*tp_as_async*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001340 (reprfunc)0, /*tp_repr*/
1341 0, /*tp_as_number*/
1342 0, /*tp_as_sequence*/
1343 0, /*tp_as_mapping*/
1344 (hashfunc)0, /*tp_hash*/
1345 (ternaryfunc)0, /*tp_call*/
1346 (reprfunc)0, /*tp_str*/
1347 0, /*tp_getattro*/
1348 0, /*tp_setattro*/
1349 0, /*tp_as_buffer*/
1350 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1351 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1352 Writer_Type_doc,
1353 (traverseproc)Writer_traverse, /*tp_traverse*/
1354 (inquiry)Writer_clear, /*tp_clear*/
1355 0, /*tp_richcompare*/
1356 0, /*tp_weaklistoffset*/
1357 (getiterfunc)0, /*tp_iter*/
1358 (getiterfunc)0, /*tp_iternext*/
1359 Writer_methods, /*tp_methods*/
1360 Writer_memberlist, /*tp_members*/
1361 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001362};
1363
1364static PyObject *
1365csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1366{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001367 PyObject * output_file, * dialect = NULL;
1368 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001369 _Py_IDENTIFIER(write);
Skip Montanarob4a04172003-03-20 23:29:12 +00001370
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001371 if (!self)
1372 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001373
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001374 self->dialect = NULL;
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001375 self->write = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001376
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001377 self->rec = NULL;
1378 self->rec_size = 0;
1379 self->rec_len = 0;
1380 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001381
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001382 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1383 Py_DECREF(self);
1384 return NULL;
1385 }
Serhiy Storchaka41c57b32019-09-01 12:03:39 +03001386 if (_PyObject_LookupAttrId(output_file, &PyId_write, &self->write) < 0) {
1387 Py_DECREF(self);
1388 return NULL;
1389 }
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001390 if (self->write == NULL || !PyCallable_Check(self->write)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001391 PyErr_SetString(PyExc_TypeError,
1392 "argument 1 must have a \"write\" method");
1393 Py_DECREF(self);
1394 return NULL;
1395 }
1396 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1397 if (self->dialect == NULL) {
1398 Py_DECREF(self);
1399 return NULL;
1400 }
1401 PyObject_GC_Track(self);
1402 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001403}
1404
1405/*
1406 * DIALECT REGISTRY
1407 */
1408static PyObject *
1409csv_list_dialects(PyObject *module, PyObject *args)
1410{
Antoine Pitroue7672d32012-05-16 11:33:08 +02001411 return PyDict_Keys(_csvstate_global->dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001412}
1413
1414static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001415csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001416{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001417 PyObject *name_obj, *dialect_obj = NULL;
1418 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001419
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001420 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1421 return NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +01001422 if (!PyUnicode_Check(name_obj)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001423 PyErr_SetString(PyExc_TypeError,
Stefan Krahe6996ed2012-11-02 14:44:20 +01001424 "dialect name must be a string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001425 return NULL;
1426 }
Stefan Krahe6996ed2012-11-02 14:44:20 +01001427 if (PyUnicode_READY(name_obj) == -1)
1428 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001429 dialect = _call_dialect(dialect_obj, kwargs);
1430 if (dialect == NULL)
1431 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001432 if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001433 Py_DECREF(dialect);
1434 return NULL;
1435 }
1436 Py_DECREF(dialect);
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001437 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001438}
1439
1440static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001441csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001442{
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001443 if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0) {
1444 if (PyErr_ExceptionMatches(PyExc_KeyError)) {
1445 PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
1446 }
1447 return NULL;
1448 }
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001449 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001450}
1451
1452static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001453csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001454{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001455 return get_dialect_from_registry(name_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001456}
1457
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001458static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001459csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001460{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001461 PyObject *new_limit = NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001462 long old_limit = _csvstate_global->field_limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001463
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001464 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1465 return NULL;
1466 if (new_limit != NULL) {
1467 if (!PyLong_CheckExact(new_limit)) {
1468 PyErr_Format(PyExc_TypeError,
1469 "limit must be an integer");
1470 return NULL;
1471 }
Antoine Pitroue7672d32012-05-16 11:33:08 +02001472 _csvstate_global->field_limit = PyLong_AsLong(new_limit);
1473 if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
1474 _csvstate_global->field_limit = old_limit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001475 return NULL;
1476 }
1477 }
1478 return PyLong_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001479}
1480
Skip Montanarob4a04172003-03-20 23:29:12 +00001481/*
1482 * MODULE
1483 */
1484
1485PyDoc_STRVAR(csv_module_doc,
1486"CSV parsing and writing.\n"
1487"\n"
1488"This module provides classes that assist in the reading and writing\n"
1489"of Comma Separated Value (CSV) files, and implements the interface\n"
1490"described by PEP 305. Although many CSV files are simple to parse,\n"
1491"the format is not formally defined by a stable specification and\n"
1492"is subtle enough that parsing lines of a CSV file with something\n"
1493"like line.split(\",\") is bound to fail. The module supports three\n"
1494"basic APIs: reading, writing, and registration of dialects.\n"
1495"\n"
1496"\n"
1497"DIALECT REGISTRATION:\n"
1498"\n"
1499"Readers and writers support a dialect argument, which is a convenient\n"
1500"handle on a group of settings. When the dialect argument is a string,\n"
1501"it identifies one of the dialects previously registered with the module.\n"
1502"If it is a class or instance, the attributes of the argument are used as\n"
1503"the settings for the reader or writer:\n"
1504"\n"
1505" class excel:\n"
1506" delimiter = ','\n"
1507" quotechar = '\"'\n"
1508" escapechar = None\n"
1509" doublequote = True\n"
1510" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001511" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001512" quoting = QUOTE_MINIMAL\n"
1513"\n"
1514"SETTINGS:\n"
1515"\n"
oldkaa0735f2018-02-02 16:52:55 +08001516" * quotechar - specifies a one-character string to use as the\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001517" quoting character. It defaults to '\"'.\n"
oldkaa0735f2018-02-02 16:52:55 +08001518" * delimiter - specifies a one-character string to use as the\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001519" field separator. It defaults to ','.\n"
1520" * skipinitialspace - specifies how to interpret whitespace which\n"
1521" immediately follows a delimiter. It defaults to False, which\n"
1522" means that whitespace immediately following a delimiter is part\n"
1523" of the following field.\n"
oldkaa0735f2018-02-02 16:52:55 +08001524" * lineterminator - specifies the character sequence which should\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001525" terminate rows.\n"
1526" * quoting - controls when quotes should be generated by the writer.\n"
1527" It can take on any of the following module constants:\n"
1528"\n"
1529" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1530" field contains either the quotechar or the delimiter\n"
1531" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1532" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001533" fields which do not parse as integers or floating point\n"
1534" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001535" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
oldkaa0735f2018-02-02 16:52:55 +08001536" * escapechar - specifies a one-character string used to escape\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001537" the delimiter when quoting is set to QUOTE_NONE.\n"
1538" * doublequote - controls the handling of quotes inside fields. When\n"
1539" True, two consecutive quotes are interpreted as one during read,\n"
1540" and when writing, each quote character embedded in the data is\n"
1541" written as two quotes\n");
1542
1543PyDoc_STRVAR(csv_reader_doc,
1544" csv_reader = reader(iterable [, dialect='excel']\n"
1545" [optional keyword args])\n"
1546" for row in csv_reader:\n"
1547" process(row)\n"
1548"\n"
1549"The \"iterable\" argument can be any object that returns a line\n"
1550"of input for each iteration, such as a file object or a list. The\n"
1551"optional \"dialect\" parameter is discussed below. The function\n"
1552"also accepts optional keyword arguments which override settings\n"
1553"provided by the dialect.\n"
1554"\n"
1555"The returned object is an iterator. Each iteration returns a row\n"
Berker Peksage2382c52015-10-02 19:25:32 +03001556"of the CSV file (which can span multiple input lines).\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001557
1558PyDoc_STRVAR(csv_writer_doc,
1559" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1560" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001561" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001562" csv_writer.writerow(row)\n"
1563"\n"
1564" [or]\n"
1565"\n"
1566" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1567" [optional keyword args])\n"
1568" csv_writer.writerows(rows)\n"
1569"\n"
1570"The \"fileobj\" argument can be any object that supports the file API.\n");
1571
1572PyDoc_STRVAR(csv_list_dialects_doc,
1573"Return a list of all know dialect names.\n"
1574" names = csv.list_dialects()");
1575
1576PyDoc_STRVAR(csv_get_dialect_doc,
1577"Return the dialect instance associated with name.\n"
1578" dialect = csv.get_dialect(name)");
1579
1580PyDoc_STRVAR(csv_register_dialect_doc,
1581"Create a mapping from a string name to a dialect class.\n"
Berker Peksag12b50ce2015-06-05 15:17:51 +03001582" dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
Skip Montanarob4a04172003-03-20 23:29:12 +00001583
1584PyDoc_STRVAR(csv_unregister_dialect_doc,
1585"Delete the name/dialect mapping associated with a string name.\n"
1586" csv.unregister_dialect(name)");
1587
Andrew McNamara31d88962005-01-12 03:45:10 +00001588PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001589"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001590" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001591"\n"
1592"Returns old limit. If limit is not given, no new limit is set and\n"
1593"the old limit is returned");
1594
Skip Montanarob4a04172003-03-20 23:29:12 +00001595static struct PyMethodDef csv_methods[] = {
Serhiy Storchaka62be7422018-11-27 13:27:31 +02001596 { "reader", (PyCFunction)(void(*)(void))csv_reader,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001597 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
Serhiy Storchaka62be7422018-11-27 13:27:31 +02001598 { "writer", (PyCFunction)(void(*)(void))csv_writer,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001599 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1600 { "list_dialects", (PyCFunction)csv_list_dialects,
1601 METH_NOARGS, csv_list_dialects_doc},
Serhiy Storchaka62be7422018-11-27 13:27:31 +02001602 { "register_dialect", (PyCFunction)(void(*)(void))csv_register_dialect,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001603 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1604 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1605 METH_O, csv_unregister_dialect_doc},
1606 { "get_dialect", (PyCFunction)csv_get_dialect,
1607 METH_O, csv_get_dialect_doc},
1608 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1609 METH_VARARGS, csv_field_size_limit_doc},
1610 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001611};
1612
Martin v. Löwis1a214512008-06-11 05:26:20 +00001613static struct PyModuleDef _csvmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001614 PyModuleDef_HEAD_INIT,
1615 "_csv",
1616 csv_module_doc,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001617 sizeof(_csvstate),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001618 csv_methods,
1619 NULL,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001620 _csv_traverse,
1621 _csv_clear,
1622 _csv_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00001623};
1624
Skip Montanarob4a04172003-03-20 23:29:12 +00001625PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001626PyInit__csv(void)
Skip Montanarob4a04172003-03-20 23:29:12 +00001627{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001628 PyObject *module;
Serhiy Storchaka2d06e842015-12-25 19:53:18 +02001629 const StyleDesc *style;
Skip Montanarob4a04172003-03-20 23:29:12 +00001630
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001631 if (PyType_Ready(&Reader_Type) < 0)
1632 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001633
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001634 if (PyType_Ready(&Writer_Type) < 0)
1635 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001636
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001637 /* Create the module and add the functions */
1638 module = PyModule_Create(&_csvmodule);
1639 if (module == NULL)
1640 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001641
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001642 /* Add version to the module. */
1643 if (PyModule_AddStringConstant(module, "__version__",
1644 MODULE_VERSION) == -1)
1645 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001646
Antoine Pitroue7672d32012-05-16 11:33:08 +02001647 /* Set the field limit */
Hai Shif707d942020-03-16 21:15:01 +08001648 get_csv_state(module)->field_limit = 128 * 1024;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001649 /* Do I still need to add this var to the Module Dict? */
1650
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001651 /* Add _dialects dictionary */
Hai Shif707d942020-03-16 21:15:01 +08001652 get_csv_state(module)->dialects = PyDict_New();
1653 if (get_csv_state(module)->dialects == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001654 return NULL;
Hai Shif707d942020-03-16 21:15:01 +08001655 Py_INCREF(get_csv_state(module)->dialects);
1656 if (PyModule_AddObject(module, "_dialects", get_csv_state(module)->dialects))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001657 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001658
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001659 /* Add quote styles into dictionary */
1660 for (style = quote_styles; style->name; style++) {
1661 if (PyModule_AddIntConstant(module, style->name,
1662 style->style) == -1)
1663 return NULL;
1664 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001665
Dong-hee Na37fcbb62020-03-25 07:08:51 +09001666 if (PyModule_AddType(module, &Dialect_Type)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001667 return NULL;
Dong-hee Na37fcbb62020-03-25 07:08:51 +09001668 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001669
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001670 /* Add the CSV exception object to the module. */
Hai Shif707d942020-03-16 21:15:01 +08001671 get_csv_state(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1672 if (get_csv_state(module)->error_obj == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001673 return NULL;
Hai Shif707d942020-03-16 21:15:01 +08001674 Py_INCREF(get_csv_state(module)->error_obj);
1675 PyModule_AddObject(module, "Error", get_csv_state(module)->error_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001676 return module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001677}