blob: 2d4247740eb29e18b1006bade1be2a8b15a44736 [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
Skip Montanarob4a04172003-03-20 23:29:12 +00009*/
10
Skip Montanaro7b01a832003-04-12 19:23:46 +000011#define MODULE_VERSION "1.0"
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013#include "Python.h"
Victor Stinner4a21e572020-04-15 02:35:41 +020014#include "structmember.h" // PyMemberDef
Serhiy Storchaka323748a2018-07-26 13:21:09 +030015#include <stdbool.h>
Skip Montanarob4a04172003-03-20 23:29:12 +000016
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000017
Antoine Pitroue7672d32012-05-16 11:33:08 +020018typedef struct {
19 PyObject *error_obj; /* CSV exception */
20 PyObject *dialects; /* Dialect registry */
21 long field_limit; /* max parsed field size */
22} _csvstate;
23
Hai Shif707d942020-03-16 21:15:01 +080024static inline _csvstate*
25get_csv_state(PyObject *module)
26{
27 void *state = PyModule_GetState(module);
28 assert(state != NULL);
29 return (_csvstate *)state;
30}
Antoine Pitroue7672d32012-05-16 11:33:08 +020031
32static int
33_csv_clear(PyObject *m)
34{
Hai Shif707d942020-03-16 21:15:01 +080035 Py_CLEAR(get_csv_state(m)->error_obj);
36 Py_CLEAR(get_csv_state(m)->dialects);
Antoine Pitroue7672d32012-05-16 11:33:08 +020037 return 0;
38}
39
40static int
41_csv_traverse(PyObject *m, visitproc visit, void *arg)
42{
Hai Shif707d942020-03-16 21:15:01 +080043 Py_VISIT(get_csv_state(m)->error_obj);
44 Py_VISIT(get_csv_state(m)->dialects);
Antoine Pitroue7672d32012-05-16 11:33:08 +020045 return 0;
46}
47
48static void
49_csv_free(void *m)
50{
51 _csv_clear((PyObject *)m);
52}
53
54static struct PyModuleDef _csvmodule;
55
56#define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
Skip Montanarob4a04172003-03-20 23:29:12 +000057
58typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000059 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
60 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
R David Murrayc7c42ef2013-03-19 22:41:47 -040061 EAT_CRNL,AFTER_ESCAPED_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000062} ParserState;
63
64typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000066} QuoteStyle;
67
68typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000069 QuoteStyle style;
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020070 const char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000071} StyleDesc;
72
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020073static const StyleDesc quote_styles[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000074 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
75 { QUOTE_ALL, "QUOTE_ALL" },
76 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
77 { QUOTE_NONE, "QUOTE_NONE" },
78 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000079};
80
81typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000082 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000083
Serhiy Storchaka323748a2018-07-26 13:21:09 +030084 char doublequote; /* is " represented by ""? */
85 char skipinitialspace; /* ignore spaces following delimiter? */
86 char strict; /* raise exception on bad CSV */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 int quoting; /* style of quoting to write */
Serhiy Storchaka323748a2018-07-26 13:21:09 +030088 Py_UCS4 delimiter; /* field separator */
89 Py_UCS4 quotechar; /* quote character */
90 Py_UCS4 escapechar; /* escape character */
91 PyObject *lineterminator; /* string to write between records */
Skip Montanarob4a04172003-03-20 23:29:12 +000092
Skip Montanarob4a04172003-03-20 23:29:12 +000093} DialectObj;
94
Neal Norwitz227b5332006-03-22 09:28:35 +000095static PyTypeObject Dialect_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000096
97typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000098 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +000099
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000100 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +0000101
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000102 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000103
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000104 PyObject *fields; /* field list for current record */
105 ParserState state; /* current CSV parse state */
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200106 Py_UCS4 *field; /* temporary buffer */
Antoine Pitrou40455752010-08-15 18:51:10 +0000107 Py_ssize_t field_size; /* size of allocated buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000108 Py_ssize_t field_len; /* length of current field */
109 int numeric_field; /* treat field as numeric */
110 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000111} ReaderObj;
112
Neal Norwitz227b5332006-03-22 09:28:35 +0000113static PyTypeObject Reader_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000114
Skip Montanarob4a04172003-03-20 23:29:12 +0000115typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000116 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000117
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +0200118 PyObject *write; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +0000119
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000120 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000121
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200122 Py_UCS4 *rec; /* buffer for parser.join */
Antoine Pitrou40455752010-08-15 18:51:10 +0000123 Py_ssize_t rec_size; /* size of allocated record */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000124 Py_ssize_t rec_len; /* length of record */
125 int num_fields; /* number of fields in record */
Guido van Rossum46264582007-08-06 19:32:18 +0000126} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000127
Neal Norwitz227b5332006-03-22 09:28:35 +0000128static PyTypeObject Writer_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000129
130/*
131 * DIALECT class
132 */
133
134static PyObject *
135get_dialect_from_registry(PyObject * name_obj)
136{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000137 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000138
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200139 dialect_obj = PyDict_GetItemWithError(_csvstate_global->dialects, name_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000140 if (dialect_obj == NULL) {
141 if (!PyErr_Occurred())
Antoine Pitroue7672d32012-05-16 11:33:08 +0200142 PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 }
144 else
145 Py_INCREF(dialect_obj);
146 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000147}
148
Skip Montanarob4a04172003-03-20 23:29:12 +0000149static PyObject *
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200150get_nullchar_as_None(Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000151{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000152 if (c == '\0') {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200153 Py_RETURN_NONE;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 }
155 else
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200156 return PyUnicode_FromOrdinal(c);
Skip Montanarob4a04172003-03-20 23:29:12 +0000157}
158
Skip Montanarob4a04172003-03-20 23:29:12 +0000159static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200160Dialect_get_lineterminator(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000161{
Dong-hee Na0383be42020-06-10 00:33:43 +0900162 Py_XINCREF(self->lineterminator);
163 return self->lineterminator;
Skip Montanarob4a04172003-03-20 23:29:12 +0000164}
165
Skip Montanarob4a04172003-03-20 23:29:12 +0000166static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200167Dialect_get_delimiter(DialectObj *self, void *Py_UNUSED(ignored))
Guido van Rossuma9769c22007-08-07 23:59:30 +0000168{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000169 return get_nullchar_as_None(self->delimiter);
Guido van Rossuma9769c22007-08-07 23:59:30 +0000170}
171
172static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200173Dialect_get_escapechar(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000174{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000175 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000176}
177
Andrew McNamara1196cf12005-01-07 04:42:45 +0000178static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200179Dialect_get_quotechar(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000180{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000181 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000182}
183
184static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200185Dialect_get_quoting(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000186{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 return PyLong_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000188}
189
190static int
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300191_set_bool(const char *name, char *target, PyObject *src, bool dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000192{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000193 if (src == NULL)
194 *target = dflt;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200195 else {
196 int b = PyObject_IsTrue(src);
197 if (b < 0)
198 return -1;
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300199 *target = (char)b;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200200 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000201 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000202}
203
Andrew McNamara1196cf12005-01-07 04:42:45 +0000204static int
205_set_int(const char *name, int *target, PyObject *src, int dflt)
206{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000207 if (src == NULL)
208 *target = dflt;
209 else {
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200210 int value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000211 if (!PyLong_CheckExact(src)) {
212 PyErr_Format(PyExc_TypeError,
213 "\"%s\" must be an integer", name);
214 return -1;
215 }
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200216 value = _PyLong_AsInt(src);
217 if (value == -1 && PyErr_Occurred()) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000218 return -1;
219 }
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200220 *target = value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000221 }
222 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000223}
224
225static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200226_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000227{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 if (src == NULL)
229 *target = dflt;
230 else {
231 *target = '\0';
232 if (src != Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000233 Py_ssize_t len;
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200234 if (!PyUnicode_Check(src)) {
235 PyErr_Format(PyExc_TypeError,
236 "\"%s\" must be string, not %.200s", name,
Victor Stinnerdaa97562020-02-07 03:37:06 +0100237 Py_TYPE(src)->tp_name);
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200238 return -1;
239 }
Victor Stinner9e30aa52011-11-21 02:49:52 +0100240 len = PyUnicode_GetLength(src);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200241 if (len > 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000242 PyErr_Format(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300243 "\"%s\" must be a 1-character string",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000244 name);
245 return -1;
246 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100247 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000248 if (len > 0)
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200249 *target = PyUnicode_READ_CHAR(src, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000250 }
251 }
252 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000253}
254
255static int
256_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
257{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000258 if (src == NULL)
259 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
260 else {
261 if (src == Py_None)
262 *target = NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +0100263 else if (!PyUnicode_Check(src)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000264 PyErr_Format(PyExc_TypeError,
265 "\"%s\" must be a string", name);
266 return -1;
267 }
268 else {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100269 if (PyUnicode_READY(src) == -1)
270 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000271 Py_INCREF(src);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300272 Py_XSETREF(*target, src);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000273 }
274 }
275 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000276}
277
278static int
279dialect_check_quoting(int quoting)
280{
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200281 const StyleDesc *qs;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000282
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000283 for (qs = quote_styles; qs->name; qs++) {
Victor Stinner706768c2014-08-16 01:03:39 +0200284 if ((int)qs->style == quoting)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000285 return 0;
286 }
287 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
288 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000289}
Skip Montanarob4a04172003-03-20 23:29:12 +0000290
291#define D_OFF(x) offsetof(DialectObj, x)
292
293static struct PyMemberDef Dialect_memberlist[] = {
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300294 { "skipinitialspace", T_BOOL, D_OFF(skipinitialspace), READONLY },
295 { "doublequote", T_BOOL, D_OFF(doublequote), READONLY },
296 { "strict", T_BOOL, D_OFF(strict), READONLY },
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000297 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000298};
299
300static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000301 { "delimiter", (getter)Dialect_get_delimiter},
302 { "escapechar", (getter)Dialect_get_escapechar},
303 { "lineterminator", (getter)Dialect_get_lineterminator},
304 { "quotechar", (getter)Dialect_get_quotechar},
305 { "quoting", (getter)Dialect_get_quoting},
306 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000307};
308
309static void
310Dialect_dealloc(DialectObj *self)
311{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000312 Py_XDECREF(self->lineterminator);
313 Py_TYPE(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000314}
315
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000316static char *dialect_kws[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000317 "dialect",
318 "delimiter",
319 "doublequote",
320 "escapechar",
321 "lineterminator",
322 "quotechar",
323 "quoting",
324 "skipinitialspace",
325 "strict",
326 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000327};
328
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000329static PyObject *
330dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000331{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000332 DialectObj *self;
333 PyObject *ret = NULL;
334 PyObject *dialect = NULL;
335 PyObject *delimiter = NULL;
336 PyObject *doublequote = NULL;
337 PyObject *escapechar = NULL;
338 PyObject *lineterminator = NULL;
339 PyObject *quotechar = NULL;
340 PyObject *quoting = NULL;
341 PyObject *skipinitialspace = NULL;
342 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000343
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000344 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
345 "|OOOOOOOOO", dialect_kws,
346 &dialect,
347 &delimiter,
348 &doublequote,
349 &escapechar,
350 &lineterminator,
351 &quotechar,
352 &quoting,
353 &skipinitialspace,
354 &strict))
355 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000356
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000357 if (dialect != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100358 if (PyUnicode_Check(dialect)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 dialect = get_dialect_from_registry(dialect);
360 if (dialect == NULL)
361 return NULL;
362 }
363 else
364 Py_INCREF(dialect);
365 /* Can we reuse this instance? */
366 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
Serhiy Storchaka0b3ec192017-03-23 17:53:47 +0200367 delimiter == NULL &&
368 doublequote == NULL &&
369 escapechar == NULL &&
370 lineterminator == NULL &&
371 quotechar == NULL &&
372 quoting == NULL &&
373 skipinitialspace == NULL &&
374 strict == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000375 return dialect;
376 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000377
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000378 self = (DialectObj *)type->tp_alloc(type, 0);
379 if (self == NULL) {
380 Py_XDECREF(dialect);
381 return NULL;
382 }
383 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000384
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000385 Py_XINCREF(delimiter);
386 Py_XINCREF(doublequote);
387 Py_XINCREF(escapechar);
388 Py_XINCREF(lineterminator);
389 Py_XINCREF(quotechar);
390 Py_XINCREF(quoting);
391 Py_XINCREF(skipinitialspace);
392 Py_XINCREF(strict);
393 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000394#define DIALECT_GETATTR(v, n) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000395 if (v == NULL) \
396 v = PyObject_GetAttrString(dialect, n)
397 DIALECT_GETATTR(delimiter, "delimiter");
398 DIALECT_GETATTR(doublequote, "doublequote");
399 DIALECT_GETATTR(escapechar, "escapechar");
400 DIALECT_GETATTR(lineterminator, "lineterminator");
401 DIALECT_GETATTR(quotechar, "quotechar");
402 DIALECT_GETATTR(quoting, "quoting");
403 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
404 DIALECT_GETATTR(strict, "strict");
405 PyErr_Clear();
406 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000407
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000408 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000409#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000410 if (meth(name, target, src, dflt)) \
411 goto err
412 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300413 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, true);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000414 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
415 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
416 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
417 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300418 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, false);
419 DIASET(_set_bool, "strict", &self->strict, strict, false);
Skip Montanarob4a04172003-03-20 23:29:12 +0000420
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000421 /* validate options */
422 if (dialect_check_quoting(self->quoting))
423 goto err;
424 if (self->delimiter == 0) {
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200425 PyErr_SetString(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300426 "\"delimiter\" must be a 1-character string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000427 goto err;
428 }
429 if (quotechar == Py_None && quoting == NULL)
430 self->quoting = QUOTE_NONE;
431 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
432 PyErr_SetString(PyExc_TypeError,
433 "quotechar must be set if quoting enabled");
434 goto err;
435 }
436 if (self->lineterminator == 0) {
437 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
438 goto err;
439 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000440
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000441 ret = (PyObject *)self;
442 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000443err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000444 Py_XDECREF(self);
445 Py_XDECREF(dialect);
446 Py_XDECREF(delimiter);
447 Py_XDECREF(doublequote);
448 Py_XDECREF(escapechar);
449 Py_XDECREF(lineterminator);
450 Py_XDECREF(quotechar);
451 Py_XDECREF(quoting);
452 Py_XDECREF(skipinitialspace);
453 Py_XDECREF(strict);
454 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000455}
456
457
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000458PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000459"CSV dialect\n"
460"\n"
461"The Dialect type records CSV parsing and generation options.\n");
462
463static PyTypeObject Dialect_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000464 PyVarObject_HEAD_INIT(NULL, 0)
465 "_csv.Dialect", /* tp_name */
466 sizeof(DialectObj), /* tp_basicsize */
467 0, /* tp_itemsize */
468 /* methods */
469 (destructor)Dialect_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200470 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000471 (getattrfunc)0, /* tp_getattr */
472 (setattrfunc)0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200473 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000474 (reprfunc)0, /* tp_repr */
475 0, /* tp_as_number */
476 0, /* tp_as_sequence */
477 0, /* tp_as_mapping */
478 (hashfunc)0, /* tp_hash */
479 (ternaryfunc)0, /* tp_call */
480 (reprfunc)0, /* tp_str */
481 0, /* tp_getattro */
482 0, /* tp_setattro */
483 0, /* tp_as_buffer */
484 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
485 Dialect_Type_doc, /* tp_doc */
486 0, /* tp_traverse */
487 0, /* tp_clear */
488 0, /* tp_richcompare */
489 0, /* tp_weaklistoffset */
490 0, /* tp_iter */
491 0, /* tp_iternext */
492 0, /* tp_methods */
493 Dialect_memberlist, /* tp_members */
494 Dialect_getsetlist, /* tp_getset */
495 0, /* tp_base */
496 0, /* tp_dict */
497 0, /* tp_descr_get */
498 0, /* tp_descr_set */
499 0, /* tp_dictoffset */
500 0, /* tp_init */
501 0, /* tp_alloc */
502 dialect_new, /* tp_new */
503 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000504};
505
Andrew McNamara91b97462005-01-11 01:07:23 +0000506/*
507 * Return an instance of the dialect type, given a Python instance or kwarg
508 * description of the dialect
509 */
510static PyObject *
511_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
512{
Victor Stinner6412f492016-08-23 00:21:34 +0200513 PyObject *type = (PyObject *)&Dialect_Type;
514 if (dialect_inst) {
Petr Viktorinffd97532020-02-11 17:46:57 +0100515 return PyObject_VectorcallDict(type, &dialect_inst, 1, kwargs);
Victor Stinner6412f492016-08-23 00:21:34 +0200516 }
517 else {
Petr Viktorinffd97532020-02-11 17:46:57 +0100518 return PyObject_VectorcallDict(type, NULL, 0, kwargs);
Victor Stinner6412f492016-08-23 00:21:34 +0200519 }
Andrew McNamara91b97462005-01-11 01:07:23 +0000520}
521
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000522/*
523 * READER
524 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000525static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000526parse_save_field(ReaderObj *self)
527{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000528 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000529
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200530 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
531 (void *) self->field, self->field_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000532 if (field == NULL)
533 return -1;
534 self->field_len = 0;
535 if (self->numeric_field) {
536 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000537
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000538 self->numeric_field = 0;
539 tmp = PyNumber_Float(field);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000540 Py_DECREF(field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200541 if (tmp == NULL)
542 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000543 field = tmp;
544 }
Victor Stinnerb80b3782013-11-14 21:29:34 +0100545 if (PyList_Append(self->fields, field) < 0) {
546 Py_DECREF(field);
547 return -1;
548 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000549 Py_DECREF(field);
550 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000551}
552
553static int
554parse_grow_buff(ReaderObj *self)
555{
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +0500556 assert((size_t)self->field_size <= PY_SSIZE_T_MAX / sizeof(Py_UCS4));
557
558 Py_ssize_t field_size_new = self->field_size ? 2 * self->field_size : 4096;
559 Py_UCS4 *field_new = self->field;
560 PyMem_Resize(field_new, Py_UCS4, field_size_new);
561 if (field_new == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000562 PyErr_NoMemory();
563 return 0;
564 }
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +0500565 self->field = field_new;
566 self->field_size = field_size_new;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000567 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000568}
569
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000570static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200571parse_add_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000572{
Antoine Pitroue7672d32012-05-16 11:33:08 +0200573 if (self->field_len >= _csvstate_global->field_limit) {
574 PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
575 _csvstate_global->field_limit);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000576 return -1;
577 }
578 if (self->field_len == self->field_size && !parse_grow_buff(self))
579 return -1;
580 self->field[self->field_len++] = c;
581 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000582}
583
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000584static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200585parse_process_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000586{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000587 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000588
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000589 switch (self->state) {
590 case START_RECORD:
591 /* start of record */
592 if (c == '\0')
593 /* empty line - return [] */
594 break;
595 else if (c == '\n' || c == '\r') {
596 self->state = EAT_CRNL;
597 break;
598 }
599 /* normal character - handle as START_FIELD */
600 self->state = START_FIELD;
601 /* fallthru */
602 case START_FIELD:
603 /* expecting field */
604 if (c == '\n' || c == '\r' || c == '\0') {
605 /* save empty field - return [fields] */
606 if (parse_save_field(self) < 0)
607 return -1;
608 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
609 }
610 else if (c == dialect->quotechar &&
611 dialect->quoting != QUOTE_NONE) {
612 /* start quoted field */
613 self->state = IN_QUOTED_FIELD;
614 }
615 else if (c == dialect->escapechar) {
616 /* possible escaped character */
617 self->state = ESCAPED_CHAR;
618 }
619 else if (c == ' ' && dialect->skipinitialspace)
620 /* ignore space at start of field */
621 ;
622 else if (c == dialect->delimiter) {
623 /* save empty field */
624 if (parse_save_field(self) < 0)
625 return -1;
626 }
627 else {
628 /* begin new unquoted field */
629 if (dialect->quoting == QUOTE_NONNUMERIC)
630 self->numeric_field = 1;
631 if (parse_add_char(self, c) < 0)
632 return -1;
633 self->state = IN_FIELD;
634 }
635 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000636
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000637 case ESCAPED_CHAR:
R David Murray9a7d3762013-03-20 00:15:20 -0400638 if (c == '\n' || c=='\r') {
R David Murrayc7c42ef2013-03-19 22:41:47 -0400639 if (parse_add_char(self, c) < 0)
640 return -1;
641 self->state = AFTER_ESCAPED_CRNL;
642 break;
643 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000644 if (c == '\0')
645 c = '\n';
646 if (parse_add_char(self, c) < 0)
647 return -1;
648 self->state = IN_FIELD;
649 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000650
R David Murrayc7c42ef2013-03-19 22:41:47 -0400651 case AFTER_ESCAPED_CRNL:
652 if (c == '\0')
653 break;
654 /*fallthru*/
655
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000656 case IN_FIELD:
657 /* in unquoted field */
658 if (c == '\n' || c == '\r' || c == '\0') {
659 /* end of line - return [fields] */
660 if (parse_save_field(self) < 0)
661 return -1;
662 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
663 }
664 else if (c == dialect->escapechar) {
665 /* possible escaped character */
666 self->state = ESCAPED_CHAR;
667 }
668 else if (c == dialect->delimiter) {
669 /* save field - wait for new field */
670 if (parse_save_field(self) < 0)
671 return -1;
672 self->state = START_FIELD;
673 }
674 else {
675 /* normal character - save in field */
676 if (parse_add_char(self, c) < 0)
677 return -1;
678 }
679 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000680
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000681 case IN_QUOTED_FIELD:
682 /* in quoted field */
683 if (c == '\0')
684 ;
685 else if (c == dialect->escapechar) {
686 /* Possible escape character */
687 self->state = ESCAPE_IN_QUOTED_FIELD;
688 }
689 else if (c == dialect->quotechar &&
690 dialect->quoting != QUOTE_NONE) {
691 if (dialect->doublequote) {
692 /* doublequote; " represented by "" */
693 self->state = QUOTE_IN_QUOTED_FIELD;
694 }
695 else {
696 /* end of quote part of field */
697 self->state = IN_FIELD;
698 }
699 }
700 else {
701 /* normal character - save in field */
702 if (parse_add_char(self, c) < 0)
703 return -1;
704 }
705 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000706
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000707 case ESCAPE_IN_QUOTED_FIELD:
708 if (c == '\0')
709 c = '\n';
710 if (parse_add_char(self, c) < 0)
711 return -1;
712 self->state = IN_QUOTED_FIELD;
713 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000714
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000715 case QUOTE_IN_QUOTED_FIELD:
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300716 /* doublequote - seen a quote in a quoted field */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000717 if (dialect->quoting != QUOTE_NONE &&
718 c == dialect->quotechar) {
719 /* save "" as " */
720 if (parse_add_char(self, c) < 0)
721 return -1;
722 self->state = IN_QUOTED_FIELD;
723 }
724 else if (c == dialect->delimiter) {
725 /* save field - wait for new field */
726 if (parse_save_field(self) < 0)
727 return -1;
728 self->state = START_FIELD;
729 }
730 else if (c == '\n' || c == '\r' || c == '\0') {
731 /* end of line - return [fields] */
732 if (parse_save_field(self) < 0)
733 return -1;
734 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
735 }
736 else if (!dialect->strict) {
737 if (parse_add_char(self, c) < 0)
738 return -1;
739 self->state = IN_FIELD;
740 }
741 else {
742 /* illegal */
Antoine Pitroue7672d32012-05-16 11:33:08 +0200743 PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000744 dialect->delimiter,
745 dialect->quotechar);
746 return -1;
747 }
748 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000749
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000750 case EAT_CRNL:
751 if (c == '\n' || c == '\r')
752 ;
753 else if (c == '\0')
754 self->state = START_RECORD;
755 else {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200756 PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000757 return -1;
758 }
759 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000760
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000761 }
762 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000763}
764
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000765static int
766parse_reset(ReaderObj *self)
767{
Serhiy Storchaka48842712016-04-06 09:45:48 +0300768 Py_XSETREF(self->fields, PyList_New(0));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000769 if (self->fields == NULL)
770 return -1;
771 self->field_len = 0;
772 self->state = START_RECORD;
773 self->numeric_field = 0;
774 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000775}
Skip Montanarob4a04172003-03-20 23:29:12 +0000776
777static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000778Reader_iternext(ReaderObj *self)
779{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000780 PyObject *fields = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200781 Py_UCS4 c;
782 Py_ssize_t pos, linelen;
783 unsigned int kind;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300784 const void *data;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200785 PyObject *lineobj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000786
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000787 if (parse_reset(self) < 0)
788 return NULL;
789 do {
790 lineobj = PyIter_Next(self->input_iter);
791 if (lineobj == NULL) {
792 /* End of input OR exception */
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700793 if (!PyErr_Occurred() && (self->field_len != 0 ||
794 self->state == IN_QUOTED_FIELD)) {
795 if (self->dialect->strict)
Senthil Kumaran49d13022012-09-25 02:37:20 -0700796 PyErr_SetString(_csvstate_global->error_obj,
797 "unexpected end of data");
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700798 else if (parse_save_field(self) >= 0)
799 break;
800 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000801 return NULL;
802 }
803 if (!PyUnicode_Check(lineobj)) {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200804 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000805 "iterator should return strings, "
806 "not %.200s "
Ram Rachum235f9182020-06-05 23:56:06 +0300807 "(the file should be opened in text mode)",
Victor Stinnerdaa97562020-02-07 03:37:06 +0100808 Py_TYPE(lineobj)->tp_name
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000809 );
810 Py_DECREF(lineobj);
811 return NULL;
812 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100813 if (PyUnicode_READY(lineobj) == -1) {
814 Py_DECREF(lineobj);
815 return NULL;
816 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000817 ++self->line_num;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200818 kind = PyUnicode_KIND(lineobj);
819 data = PyUnicode_DATA(lineobj);
820 pos = 0;
821 linelen = PyUnicode_GET_LENGTH(lineobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000822 while (linelen--) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200823 c = PyUnicode_READ(kind, data, pos);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000824 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000825 Py_DECREF(lineobj);
Antoine Pitroue7672d32012-05-16 11:33:08 +0200826 PyErr_Format(_csvstate_global->error_obj,
Benjamin Peterson7821b4c2019-06-18 21:37:58 -0700827 "line contains NUL");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000828 goto err;
829 }
830 if (parse_process_char(self, c) < 0) {
831 Py_DECREF(lineobj);
832 goto err;
833 }
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200834 pos++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000835 }
836 Py_DECREF(lineobj);
837 if (parse_process_char(self, 0) < 0)
838 goto err;
839 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000840
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000841 fields = self->fields;
842 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000843err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000844 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000845}
846
847static void
848Reader_dealloc(ReaderObj *self)
849{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000850 PyObject_GC_UnTrack(self);
851 Py_XDECREF(self->dialect);
852 Py_XDECREF(self->input_iter);
853 Py_XDECREF(self->fields);
854 if (self->field != NULL)
855 PyMem_Free(self->field);
856 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000857}
858
859static int
860Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
861{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000862 Py_VISIT(self->dialect);
863 Py_VISIT(self->input_iter);
864 Py_VISIT(self->fields);
865 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000866}
867
868static int
869Reader_clear(ReaderObj *self)
870{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000871 Py_CLEAR(self->dialect);
872 Py_CLEAR(self->input_iter);
873 Py_CLEAR(self->fields);
874 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000875}
876
877PyDoc_STRVAR(Reader_Type_doc,
878"CSV reader\n"
879"\n"
880"Reader objects are responsible for reading and parsing tabular data\n"
881"in CSV format.\n"
882);
883
884static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000885 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000886};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000887#define R_OFF(x) offsetof(ReaderObj, x)
888
889static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000890 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
891 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
892 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000893};
894
Skip Montanarob4a04172003-03-20 23:29:12 +0000895
896static PyTypeObject Reader_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000897 PyVarObject_HEAD_INIT(NULL, 0)
898 "_csv.reader", /*tp_name*/
899 sizeof(ReaderObj), /*tp_basicsize*/
900 0, /*tp_itemsize*/
901 /* methods */
902 (destructor)Reader_dealloc, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200903 0, /*tp_vectorcall_offset*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000904 (getattrfunc)0, /*tp_getattr*/
905 (setattrfunc)0, /*tp_setattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200906 0, /*tp_as_async*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000907 (reprfunc)0, /*tp_repr*/
908 0, /*tp_as_number*/
909 0, /*tp_as_sequence*/
910 0, /*tp_as_mapping*/
911 (hashfunc)0, /*tp_hash*/
912 (ternaryfunc)0, /*tp_call*/
913 (reprfunc)0, /*tp_str*/
914 0, /*tp_getattro*/
915 0, /*tp_setattro*/
916 0, /*tp_as_buffer*/
917 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
918 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
919 Reader_Type_doc, /*tp_doc*/
920 (traverseproc)Reader_traverse, /*tp_traverse*/
921 (inquiry)Reader_clear, /*tp_clear*/
922 0, /*tp_richcompare*/
923 0, /*tp_weaklistoffset*/
924 PyObject_SelfIter, /*tp_iter*/
925 (getiterfunc)Reader_iternext, /*tp_iternext*/
926 Reader_methods, /*tp_methods*/
927 Reader_memberlist, /*tp_members*/
928 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000929
930};
931
932static PyObject *
933csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
934{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000935 PyObject * iterator, * dialect = NULL;
936 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000937
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000938 if (!self)
939 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000940
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000941 self->dialect = NULL;
942 self->fields = NULL;
943 self->input_iter = NULL;
944 self->field = NULL;
945 self->field_size = 0;
946 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000947
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000948 if (parse_reset(self) < 0) {
949 Py_DECREF(self);
950 return NULL;
951 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000952
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000953 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
954 Py_DECREF(self);
955 return NULL;
956 }
957 self->input_iter = PyObject_GetIter(iterator);
958 if (self->input_iter == NULL) {
959 PyErr_SetString(PyExc_TypeError,
960 "argument 1 must be an iterator");
961 Py_DECREF(self);
962 return NULL;
963 }
964 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
965 if (self->dialect == NULL) {
966 Py_DECREF(self);
967 return NULL;
968 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000969
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000970 PyObject_GC_Track(self);
971 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +0000972}
973
974/*
975 * WRITER
976 */
977/* ---------------------------------------------------------------- */
978static void
979join_reset(WriterObj *self)
980{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000981 self->rec_len = 0;
982 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000983}
984
985#define MEM_INCR 32768
986
987/* Calculate new record length or append field to record. Return new
988 * record length.
989 */
Antoine Pitrou40455752010-08-15 18:51:10 +0000990static Py_ssize_t
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300991join_append_data(WriterObj *self, unsigned int field_kind, const void *field_data,
Serhiy Storchaka7901b482015-03-30 09:09:54 +0300992 Py_ssize_t field_len, int *quoted,
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200993 int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +0000994{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000995 DialectObj *dialect = self->dialect;
996 int i;
Antoine Pitrou40455752010-08-15 18:51:10 +0000997 Py_ssize_t rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +0000998
Benjamin Peterson6e01d902016-08-13 17:17:06 -0700999#define INCLEN \
1000 do {\
1001 if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \
1002 goto overflow; \
1003 } \
1004 rec_len++; \
1005 } while(0)
1006
1007#define ADDCH(c) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001008 do {\
1009 if (copy_phase) \
1010 self->rec[rec_len] = c;\
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001011 INCLEN;\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001012 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001013
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001014 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001015
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001016 /* If this is not the first field we need a field separator */
1017 if (self->num_fields > 0)
1018 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001019
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001020 /* Handle preceding quote */
1021 if (copy_phase && *quoted)
1022 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001023
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001024 /* Copy/count field data */
1025 /* If field is null just pass over */
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001026 for (i = 0; field_data && (i < field_len); i++) {
1027 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001028 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001029
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001030 if (c == dialect->delimiter ||
1031 c == dialect->escapechar ||
1032 c == dialect->quotechar ||
Martin v. Löwis5f4f4c52011-11-01 18:42:23 +01001033 PyUnicode_FindChar(
1034 dialect->lineterminator, c, 0,
1035 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001036 if (dialect->quoting == QUOTE_NONE)
1037 want_escape = 1;
1038 else {
1039 if (c == dialect->quotechar) {
1040 if (dialect->doublequote)
1041 ADDCH(dialect->quotechar);
1042 else
1043 want_escape = 1;
1044 }
1045 if (!want_escape)
1046 *quoted = 1;
1047 }
1048 if (want_escape) {
1049 if (!dialect->escapechar) {
Antoine Pitroue7672d32012-05-16 11:33:08 +02001050 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001051 "need to escape, but no escapechar set");
1052 return -1;
1053 }
1054 ADDCH(dialect->escapechar);
1055 }
1056 }
1057 /* Copy field character into record buffer.
1058 */
1059 ADDCH(c);
1060 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001061
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001062 if (*quoted) {
1063 if (copy_phase)
1064 ADDCH(dialect->quotechar);
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001065 else {
1066 INCLEN; /* starting quote */
1067 INCLEN; /* ending quote */
1068 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001069 }
1070 return rec_len;
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001071
1072 overflow:
1073 PyErr_NoMemory();
1074 return -1;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001075#undef ADDCH
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001076#undef INCLEN
Skip Montanarob4a04172003-03-20 23:29:12 +00001077}
1078
1079static int
Antoine Pitrou40455752010-08-15 18:51:10 +00001080join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
Skip Montanarob4a04172003-03-20 23:29:12 +00001081{
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001082 assert(rec_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001083
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001084 if (rec_len > self->rec_size) {
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001085 size_t rec_size_new = (size_t)(rec_len / MEM_INCR + 1) * MEM_INCR;
1086 Py_UCS4 *rec_new = self->rec;
1087 PyMem_Resize(rec_new, Py_UCS4, rec_size_new);
1088 if (rec_new == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001089 PyErr_NoMemory();
1090 return 0;
1091 }
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001092 self->rec = rec_new;
1093 self->rec_size = (Py_ssize_t)rec_size_new;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001094 }
1095 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001096}
1097
1098static int
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001099join_append(WriterObj *self, PyObject *field, int quoted)
Skip Montanarob4a04172003-03-20 23:29:12 +00001100{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001101 unsigned int field_kind = -1;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001102 const void *field_data = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001103 Py_ssize_t field_len = 0;
Antoine Pitrou40455752010-08-15 18:51:10 +00001104 Py_ssize_t rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001105
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001106 if (field != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +01001107 if (PyUnicode_READY(field) == -1)
1108 return 0;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001109 field_kind = PyUnicode_KIND(field);
1110 field_data = PyUnicode_DATA(field);
1111 field_len = PyUnicode_GET_LENGTH(field);
1112 }
1113 rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001114 &quoted, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001115 if (rec_len < 0)
1116 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001117
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001118 /* grow record buffer if necessary */
1119 if (!join_check_rec_size(self, rec_len))
1120 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001121
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001122 self->rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001123 &quoted, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001124 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001125
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001126 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001127}
1128
1129static int
1130join_append_lineterminator(WriterObj *self)
1131{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001132 Py_ssize_t terminator_len, i;
1133 unsigned int term_kind;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001134 const void *term_data;
Skip Montanarob4a04172003-03-20 23:29:12 +00001135
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001136 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001137 if (terminator_len == -1)
1138 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001139
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001140 /* grow record buffer if necessary */
1141 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1142 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001143
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001144 term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1145 term_data = PyUnicode_DATA(self->dialect->lineterminator);
1146 for (i = 0; i < terminator_len; i++)
1147 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001148 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001149
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001150 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001151}
1152
1153PyDoc_STRVAR(csv_writerow_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001154"writerow(iterable)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001155"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001156"Construct and write a CSV record from an iterable of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001157"elements will be converted to string.");
1158
1159static PyObject *
1160csv_writerow(WriterObj *self, PyObject *seq)
1161{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001162 DialectObj *dialect = self->dialect;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001163 PyObject *iter, *field, *line, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001164
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001165 iter = PyObject_GetIter(seq);
1166 if (iter == NULL)
1167 return PyErr_Format(_csvstate_global->error_obj,
1168 "iterable expected, not %.200s",
Victor Stinnerdaa97562020-02-07 03:37:06 +01001169 Py_TYPE(seq)->tp_name);
Skip Montanarob4a04172003-03-20 23:29:12 +00001170
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001171 /* Join all fields in internal buffer.
1172 */
1173 join_reset(self);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001174 while ((field = PyIter_Next(iter))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001175 int append_ok;
1176 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001177
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001178 switch (dialect->quoting) {
1179 case QUOTE_NONNUMERIC:
1180 quoted = !PyNumber_Check(field);
1181 break;
1182 case QUOTE_ALL:
1183 quoted = 1;
1184 break;
1185 default:
1186 quoted = 0;
1187 break;
1188 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001189
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001190 if (PyUnicode_Check(field)) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001191 append_ok = join_append(self, field, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001192 Py_DECREF(field);
1193 }
1194 else if (field == Py_None) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001195 append_ok = join_append(self, NULL, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001196 Py_DECREF(field);
1197 }
1198 else {
1199 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001200
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001201 str = PyObject_Str(field);
1202 Py_DECREF(field);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001203 if (str == NULL) {
1204 Py_DECREF(iter);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001205 return NULL;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001206 }
1207 append_ok = join_append(self, str, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001208 Py_DECREF(str);
1209 }
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001210 if (!append_ok) {
1211 Py_DECREF(iter);
1212 return NULL;
1213 }
1214 }
1215 Py_DECREF(iter);
1216 if (PyErr_Occurred())
1217 return NULL;
1218
Licht Takeuchi20019002017-12-12 18:57:06 +09001219 if (self->num_fields > 0 && self->rec_len == 0) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001220 if (dialect->quoting == QUOTE_NONE) {
1221 PyErr_Format(_csvstate_global->error_obj,
1222 "single empty field record must be quoted");
1223 return NULL;
1224 }
1225 self->num_fields--;
1226 if (!join_append(self, NULL, 1))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001227 return NULL;
1228 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001229
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001230 /* Add line terminator.
1231 */
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001232 if (!join_append_lineterminator(self)) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001233 return NULL;
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001234 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001235
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001236 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1237 (void *) self->rec, self->rec_len);
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001238 if (line == NULL) {
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001239 return NULL;
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001240 }
Petr Viktorinffd97532020-02-11 17:46:57 +01001241 result = PyObject_CallOneArg(self->write, line);
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001242 Py_DECREF(line);
1243 return result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001244}
1245
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001246PyDoc_STRVAR(csv_writerows_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001247"writerows(iterable of iterables)\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001248"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001249"Construct and write a series of iterables to a csv file. Non-string\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001250"elements will be converted to string.");
1251
Skip Montanarob4a04172003-03-20 23:29:12 +00001252static PyObject *
1253csv_writerows(WriterObj *self, PyObject *seqseq)
1254{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001255 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001256
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001257 row_iter = PyObject_GetIter(seqseq);
1258 if (row_iter == NULL) {
1259 PyErr_SetString(PyExc_TypeError,
1260 "writerows() argument must be iterable");
1261 return NULL;
1262 }
1263 while ((row_obj = PyIter_Next(row_iter))) {
1264 result = csv_writerow(self, row_obj);
1265 Py_DECREF(row_obj);
1266 if (!result) {
1267 Py_DECREF(row_iter);
1268 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001269 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001270 else
1271 Py_DECREF(result);
1272 }
1273 Py_DECREF(row_iter);
1274 if (PyErr_Occurred())
1275 return NULL;
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001276 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001277}
1278
1279static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001280 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1281 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1282 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001283};
1284
1285#define W_OFF(x) offsetof(WriterObj, x)
1286
1287static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001288 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1289 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001290};
1291
1292static void
1293Writer_dealloc(WriterObj *self)
1294{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001295 PyObject_GC_UnTrack(self);
1296 Py_XDECREF(self->dialect);
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001297 Py_XDECREF(self->write);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001298 if (self->rec != NULL)
1299 PyMem_Free(self->rec);
1300 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001301}
1302
1303static int
1304Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1305{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001306 Py_VISIT(self->dialect);
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001307 Py_VISIT(self->write);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001308 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001309}
1310
1311static int
1312Writer_clear(WriterObj *self)
1313{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001314 Py_CLEAR(self->dialect);
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001315 Py_CLEAR(self->write);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001316 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001317}
1318
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001319PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001320"CSV writer\n"
1321"\n"
1322"Writer objects are responsible for generating tabular data\n"
1323"in CSV format from sequence input.\n"
1324);
1325
1326static PyTypeObject Writer_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001327 PyVarObject_HEAD_INIT(NULL, 0)
1328 "_csv.writer", /*tp_name*/
1329 sizeof(WriterObj), /*tp_basicsize*/
1330 0, /*tp_itemsize*/
1331 /* methods */
1332 (destructor)Writer_dealloc, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02001333 0, /*tp_vectorcall_offset*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001334 (getattrfunc)0, /*tp_getattr*/
1335 (setattrfunc)0, /*tp_setattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02001336 0, /*tp_as_async*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001337 (reprfunc)0, /*tp_repr*/
1338 0, /*tp_as_number*/
1339 0, /*tp_as_sequence*/
1340 0, /*tp_as_mapping*/
1341 (hashfunc)0, /*tp_hash*/
1342 (ternaryfunc)0, /*tp_call*/
1343 (reprfunc)0, /*tp_str*/
1344 0, /*tp_getattro*/
1345 0, /*tp_setattro*/
1346 0, /*tp_as_buffer*/
1347 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1348 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1349 Writer_Type_doc,
1350 (traverseproc)Writer_traverse, /*tp_traverse*/
1351 (inquiry)Writer_clear, /*tp_clear*/
1352 0, /*tp_richcompare*/
1353 0, /*tp_weaklistoffset*/
1354 (getiterfunc)0, /*tp_iter*/
1355 (getiterfunc)0, /*tp_iternext*/
1356 Writer_methods, /*tp_methods*/
1357 Writer_memberlist, /*tp_members*/
1358 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001359};
1360
1361static PyObject *
1362csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1363{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001364 PyObject * output_file, * dialect = NULL;
1365 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001366 _Py_IDENTIFIER(write);
Skip Montanarob4a04172003-03-20 23:29:12 +00001367
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001368 if (!self)
1369 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001370
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001371 self->dialect = NULL;
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001372 self->write = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001373
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001374 self->rec = NULL;
1375 self->rec_size = 0;
1376 self->rec_len = 0;
1377 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001378
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001379 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1380 Py_DECREF(self);
1381 return NULL;
1382 }
Serhiy Storchaka41c57b32019-09-01 12:03:39 +03001383 if (_PyObject_LookupAttrId(output_file, &PyId_write, &self->write) < 0) {
1384 Py_DECREF(self);
1385 return NULL;
1386 }
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001387 if (self->write == NULL || !PyCallable_Check(self->write)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001388 PyErr_SetString(PyExc_TypeError,
1389 "argument 1 must have a \"write\" method");
1390 Py_DECREF(self);
1391 return NULL;
1392 }
1393 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1394 if (self->dialect == NULL) {
1395 Py_DECREF(self);
1396 return NULL;
1397 }
1398 PyObject_GC_Track(self);
1399 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001400}
1401
1402/*
1403 * DIALECT REGISTRY
1404 */
1405static PyObject *
1406csv_list_dialects(PyObject *module, PyObject *args)
1407{
Antoine Pitroue7672d32012-05-16 11:33:08 +02001408 return PyDict_Keys(_csvstate_global->dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001409}
1410
1411static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001412csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001413{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001414 PyObject *name_obj, *dialect_obj = NULL;
1415 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001416
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001417 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1418 return NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +01001419 if (!PyUnicode_Check(name_obj)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001420 PyErr_SetString(PyExc_TypeError,
Stefan Krahe6996ed2012-11-02 14:44:20 +01001421 "dialect name must be a string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001422 return NULL;
1423 }
Stefan Krahe6996ed2012-11-02 14:44:20 +01001424 if (PyUnicode_READY(name_obj) == -1)
1425 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001426 dialect = _call_dialect(dialect_obj, kwargs);
1427 if (dialect == NULL)
1428 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001429 if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001430 Py_DECREF(dialect);
1431 return NULL;
1432 }
1433 Py_DECREF(dialect);
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001434 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001435}
1436
1437static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001438csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001439{
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001440 if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0) {
1441 if (PyErr_ExceptionMatches(PyExc_KeyError)) {
1442 PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
1443 }
1444 return NULL;
1445 }
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001446 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001447}
1448
1449static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001450csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001451{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001452 return get_dialect_from_registry(name_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001453}
1454
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001455static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001456csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001457{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001458 PyObject *new_limit = NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001459 long old_limit = _csvstate_global->field_limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001460
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001461 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1462 return NULL;
1463 if (new_limit != NULL) {
1464 if (!PyLong_CheckExact(new_limit)) {
1465 PyErr_Format(PyExc_TypeError,
1466 "limit must be an integer");
1467 return NULL;
1468 }
Antoine Pitroue7672d32012-05-16 11:33:08 +02001469 _csvstate_global->field_limit = PyLong_AsLong(new_limit);
1470 if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
1471 _csvstate_global->field_limit = old_limit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001472 return NULL;
1473 }
1474 }
1475 return PyLong_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001476}
1477
Skip Montanarob4a04172003-03-20 23:29:12 +00001478/*
1479 * MODULE
1480 */
1481
1482PyDoc_STRVAR(csv_module_doc,
1483"CSV parsing and writing.\n"
1484"\n"
1485"This module provides classes that assist in the reading and writing\n"
1486"of Comma Separated Value (CSV) files, and implements the interface\n"
1487"described by PEP 305. Although many CSV files are simple to parse,\n"
1488"the format is not formally defined by a stable specification and\n"
1489"is subtle enough that parsing lines of a CSV file with something\n"
1490"like line.split(\",\") is bound to fail. The module supports three\n"
1491"basic APIs: reading, writing, and registration of dialects.\n"
1492"\n"
1493"\n"
1494"DIALECT REGISTRATION:\n"
1495"\n"
1496"Readers and writers support a dialect argument, which is a convenient\n"
1497"handle on a group of settings. When the dialect argument is a string,\n"
1498"it identifies one of the dialects previously registered with the module.\n"
1499"If it is a class or instance, the attributes of the argument are used as\n"
1500"the settings for the reader or writer:\n"
1501"\n"
1502" class excel:\n"
1503" delimiter = ','\n"
1504" quotechar = '\"'\n"
1505" escapechar = None\n"
1506" doublequote = True\n"
1507" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001508" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001509" quoting = QUOTE_MINIMAL\n"
1510"\n"
1511"SETTINGS:\n"
1512"\n"
oldkaa0735f2018-02-02 16:52:55 +08001513" * quotechar - specifies a one-character string to use as the\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001514" quoting character. It defaults to '\"'.\n"
oldkaa0735f2018-02-02 16:52:55 +08001515" * delimiter - specifies a one-character string to use as the\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001516" field separator. It defaults to ','.\n"
1517" * skipinitialspace - specifies how to interpret whitespace which\n"
1518" immediately follows a delimiter. It defaults to False, which\n"
1519" means that whitespace immediately following a delimiter is part\n"
1520" of the following field.\n"
oldkaa0735f2018-02-02 16:52:55 +08001521" * lineterminator - specifies the character sequence which should\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001522" terminate rows.\n"
1523" * quoting - controls when quotes should be generated by the writer.\n"
1524" It can take on any of the following module constants:\n"
1525"\n"
1526" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1527" field contains either the quotechar or the delimiter\n"
1528" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1529" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001530" fields which do not parse as integers or floating point\n"
1531" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001532" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
oldkaa0735f2018-02-02 16:52:55 +08001533" * escapechar - specifies a one-character string used to escape\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001534" the delimiter when quoting is set to QUOTE_NONE.\n"
1535" * doublequote - controls the handling of quotes inside fields. When\n"
1536" True, two consecutive quotes are interpreted as one during read,\n"
1537" and when writing, each quote character embedded in the data is\n"
1538" written as two quotes\n");
1539
1540PyDoc_STRVAR(csv_reader_doc,
1541" csv_reader = reader(iterable [, dialect='excel']\n"
1542" [optional keyword args])\n"
1543" for row in csv_reader:\n"
1544" process(row)\n"
1545"\n"
1546"The \"iterable\" argument can be any object that returns a line\n"
1547"of input for each iteration, such as a file object or a list. The\n"
1548"optional \"dialect\" parameter is discussed below. The function\n"
1549"also accepts optional keyword arguments which override settings\n"
1550"provided by the dialect.\n"
1551"\n"
1552"The returned object is an iterator. Each iteration returns a row\n"
Berker Peksage2382c52015-10-02 19:25:32 +03001553"of the CSV file (which can span multiple input lines).\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001554
1555PyDoc_STRVAR(csv_writer_doc,
1556" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1557" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001558" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001559" csv_writer.writerow(row)\n"
1560"\n"
1561" [or]\n"
1562"\n"
1563" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1564" [optional keyword args])\n"
1565" csv_writer.writerows(rows)\n"
1566"\n"
1567"The \"fileobj\" argument can be any object that supports the file API.\n");
1568
1569PyDoc_STRVAR(csv_list_dialects_doc,
1570"Return a list of all know dialect names.\n"
1571" names = csv.list_dialects()");
1572
1573PyDoc_STRVAR(csv_get_dialect_doc,
1574"Return the dialect instance associated with name.\n"
1575" dialect = csv.get_dialect(name)");
1576
1577PyDoc_STRVAR(csv_register_dialect_doc,
1578"Create a mapping from a string name to a dialect class.\n"
Berker Peksag12b50ce2015-06-05 15:17:51 +03001579" dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
Skip Montanarob4a04172003-03-20 23:29:12 +00001580
1581PyDoc_STRVAR(csv_unregister_dialect_doc,
1582"Delete the name/dialect mapping associated with a string name.\n"
1583" csv.unregister_dialect(name)");
1584
Andrew McNamara31d88962005-01-12 03:45:10 +00001585PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001586"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001587" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001588"\n"
1589"Returns old limit. If limit is not given, no new limit is set and\n"
1590"the old limit is returned");
1591
Skip Montanarob4a04172003-03-20 23:29:12 +00001592static struct PyMethodDef csv_methods[] = {
Serhiy Storchaka62be7422018-11-27 13:27:31 +02001593 { "reader", (PyCFunction)(void(*)(void))csv_reader,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001594 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
Serhiy Storchaka62be7422018-11-27 13:27:31 +02001595 { "writer", (PyCFunction)(void(*)(void))csv_writer,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001596 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1597 { "list_dialects", (PyCFunction)csv_list_dialects,
1598 METH_NOARGS, csv_list_dialects_doc},
Serhiy Storchaka62be7422018-11-27 13:27:31 +02001599 { "register_dialect", (PyCFunction)(void(*)(void))csv_register_dialect,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001600 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1601 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1602 METH_O, csv_unregister_dialect_doc},
1603 { "get_dialect", (PyCFunction)csv_get_dialect,
1604 METH_O, csv_get_dialect_doc},
1605 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1606 METH_VARARGS, csv_field_size_limit_doc},
1607 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001608};
1609
Martin v. Löwis1a214512008-06-11 05:26:20 +00001610static struct PyModuleDef _csvmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001611 PyModuleDef_HEAD_INIT,
1612 "_csv",
1613 csv_module_doc,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001614 sizeof(_csvstate),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001615 csv_methods,
1616 NULL,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001617 _csv_traverse,
1618 _csv_clear,
1619 _csv_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00001620};
1621
Skip Montanarob4a04172003-03-20 23:29:12 +00001622PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001623PyInit__csv(void)
Skip Montanarob4a04172003-03-20 23:29:12 +00001624{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001625 PyObject *module;
Serhiy Storchaka2d06e842015-12-25 19:53:18 +02001626 const StyleDesc *style;
Skip Montanarob4a04172003-03-20 23:29:12 +00001627
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001628 if (PyType_Ready(&Reader_Type) < 0)
1629 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001630
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001631 if (PyType_Ready(&Writer_Type) < 0)
1632 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001633
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001634 /* Create the module and add the functions */
1635 module = PyModule_Create(&_csvmodule);
1636 if (module == NULL)
1637 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001638
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001639 /* Add version to the module. */
1640 if (PyModule_AddStringConstant(module, "__version__",
1641 MODULE_VERSION) == -1)
1642 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001643
Antoine Pitroue7672d32012-05-16 11:33:08 +02001644 /* Set the field limit */
Hai Shif707d942020-03-16 21:15:01 +08001645 get_csv_state(module)->field_limit = 128 * 1024;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001646 /* Do I still need to add this var to the Module Dict? */
1647
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001648 /* Add _dialects dictionary */
Hai Shif707d942020-03-16 21:15:01 +08001649 get_csv_state(module)->dialects = PyDict_New();
1650 if (get_csv_state(module)->dialects == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001651 return NULL;
Hai Shif707d942020-03-16 21:15:01 +08001652 Py_INCREF(get_csv_state(module)->dialects);
1653 if (PyModule_AddObject(module, "_dialects", get_csv_state(module)->dialects))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001654 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001655
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001656 /* Add quote styles into dictionary */
1657 for (style = quote_styles; style->name; style++) {
1658 if (PyModule_AddIntConstant(module, style->name,
1659 style->style) == -1)
1660 return NULL;
1661 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001662
Dong-hee Na37fcbb62020-03-25 07:08:51 +09001663 if (PyModule_AddType(module, &Dialect_Type)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001664 return NULL;
Dong-hee Na37fcbb62020-03-25 07:08:51 +09001665 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001666
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001667 /* Add the CSV exception object to the module. */
Hai Shif707d942020-03-16 21:15:01 +08001668 get_csv_state(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1669 if (get_csv_state(module)->error_obj == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001670 return NULL;
Hai Shif707d942020-03-16 21:15:01 +08001671 Py_INCREF(get_csv_state(module)->error_obj);
1672 PyModule_AddObject(module, "Error", get_csv_state(module)->error_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001673 return module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001674}