blob: f33733aaf850d16bca82b03ffb8ec97385fbcea5 [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
Skip Montanarob4a04172003-03-20 23:29:12 +00009*/
10
Skip Montanaro7b01a832003-04-12 19:23:46 +000011#define MODULE_VERSION "1.0"
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013#include "Python.h"
Victor Stinner4a21e572020-04-15 02:35:41 +020014#include "structmember.h" // PyMemberDef
Serhiy Storchaka323748a2018-07-26 13:21:09 +030015#include <stdbool.h>
Skip Montanarob4a04172003-03-20 23:29:12 +000016
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000017
Antoine Pitroue7672d32012-05-16 11:33:08 +020018typedef struct {
19 PyObject *error_obj; /* CSV exception */
20 PyObject *dialects; /* Dialect registry */
21 long field_limit; /* max parsed field size */
22} _csvstate;
23
Hai Shif707d942020-03-16 21:15:01 +080024static inline _csvstate*
25get_csv_state(PyObject *module)
26{
27 void *state = PyModule_GetState(module);
28 assert(state != NULL);
29 return (_csvstate *)state;
30}
Antoine Pitroue7672d32012-05-16 11:33:08 +020031
32static int
33_csv_clear(PyObject *m)
34{
Hai Shif707d942020-03-16 21:15:01 +080035 Py_CLEAR(get_csv_state(m)->error_obj);
36 Py_CLEAR(get_csv_state(m)->dialects);
Antoine Pitroue7672d32012-05-16 11:33:08 +020037 return 0;
38}
39
40static int
41_csv_traverse(PyObject *m, visitproc visit, void *arg)
42{
Hai Shif707d942020-03-16 21:15:01 +080043 Py_VISIT(get_csv_state(m)->error_obj);
44 Py_VISIT(get_csv_state(m)->dialects);
Antoine Pitroue7672d32012-05-16 11:33:08 +020045 return 0;
46}
47
48static void
49_csv_free(void *m)
50{
51 _csv_clear((PyObject *)m);
52}
53
54static struct PyModuleDef _csvmodule;
55
56#define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
Skip Montanarob4a04172003-03-20 23:29:12 +000057
58typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000059 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
60 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
R David Murrayc7c42ef2013-03-19 22:41:47 -040061 EAT_CRNL,AFTER_ESCAPED_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000062} ParserState;
63
64typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000066} QuoteStyle;
67
68typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000069 QuoteStyle style;
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020070 const char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000071} StyleDesc;
72
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020073static const StyleDesc quote_styles[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000074 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
75 { QUOTE_ALL, "QUOTE_ALL" },
76 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
77 { QUOTE_NONE, "QUOTE_NONE" },
78 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000079};
80
81typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000082 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000083
Serhiy Storchaka323748a2018-07-26 13:21:09 +030084 char doublequote; /* is " represented by ""? */
85 char skipinitialspace; /* ignore spaces following delimiter? */
86 char strict; /* raise exception on bad CSV */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 int quoting; /* style of quoting to write */
Serhiy Storchaka323748a2018-07-26 13:21:09 +030088 Py_UCS4 delimiter; /* field separator */
89 Py_UCS4 quotechar; /* quote character */
90 Py_UCS4 escapechar; /* escape character */
91 PyObject *lineterminator; /* string to write between records */
Skip Montanarob4a04172003-03-20 23:29:12 +000092
Skip Montanarob4a04172003-03-20 23:29:12 +000093} DialectObj;
94
Neal Norwitz227b5332006-03-22 09:28:35 +000095static PyTypeObject Dialect_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000096
97typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000098 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +000099
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000100 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +0000101
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000102 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000103
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000104 PyObject *fields; /* field list for current record */
105 ParserState state; /* current CSV parse state */
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200106 Py_UCS4 *field; /* temporary buffer */
Antoine Pitrou40455752010-08-15 18:51:10 +0000107 Py_ssize_t field_size; /* size of allocated buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000108 Py_ssize_t field_len; /* length of current field */
109 int numeric_field; /* treat field as numeric */
110 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000111} ReaderObj;
112
Neal Norwitz227b5332006-03-22 09:28:35 +0000113static PyTypeObject Reader_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000114
Skip Montanarob4a04172003-03-20 23:29:12 +0000115typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000116 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000117
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +0200118 PyObject *write; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +0000119
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000120 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000121
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200122 Py_UCS4 *rec; /* buffer for parser.join */
Antoine Pitrou40455752010-08-15 18:51:10 +0000123 Py_ssize_t rec_size; /* size of allocated record */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000124 Py_ssize_t rec_len; /* length of record */
125 int num_fields; /* number of fields in record */
Guido van Rossum46264582007-08-06 19:32:18 +0000126} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000127
Neal Norwitz227b5332006-03-22 09:28:35 +0000128static PyTypeObject Writer_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000129
130/*
131 * DIALECT class
132 */
133
134static PyObject *
135get_dialect_from_registry(PyObject * name_obj)
136{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000137 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000138
Serhiy Storchakaa24107b2019-02-25 17:59:46 +0200139 dialect_obj = PyDict_GetItemWithError(_csvstate_global->dialects, name_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000140 if (dialect_obj == NULL) {
141 if (!PyErr_Occurred())
Antoine Pitroue7672d32012-05-16 11:33:08 +0200142 PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 }
144 else
145 Py_INCREF(dialect_obj);
146 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000147}
148
Skip Montanarob4a04172003-03-20 23:29:12 +0000149static PyObject *
150get_string(PyObject *str)
151{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000152 Py_XINCREF(str);
153 return str;
Skip Montanarob4a04172003-03-20 23:29:12 +0000154}
155
Skip Montanarob4a04172003-03-20 23:29:12 +0000156static PyObject *
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200157get_nullchar_as_None(Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000158{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000159 if (c == '\0') {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200160 Py_RETURN_NONE;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000161 }
162 else
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200163 return PyUnicode_FromOrdinal(c);
Skip Montanarob4a04172003-03-20 23:29:12 +0000164}
165
Skip Montanarob4a04172003-03-20 23:29:12 +0000166static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200167Dialect_get_lineterminator(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000168{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000169 return get_string(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000170}
171
Skip Montanarob4a04172003-03-20 23:29:12 +0000172static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200173Dialect_get_delimiter(DialectObj *self, void *Py_UNUSED(ignored))
Guido van Rossuma9769c22007-08-07 23:59:30 +0000174{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000175 return get_nullchar_as_None(self->delimiter);
Guido van Rossuma9769c22007-08-07 23:59:30 +0000176}
177
178static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200179Dialect_get_escapechar(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000180{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000181 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000182}
183
Andrew McNamara1196cf12005-01-07 04:42:45 +0000184static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200185Dialect_get_quotechar(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000186{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000188}
189
190static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200191Dialect_get_quoting(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000192{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000193 return PyLong_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000194}
195
196static int
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300197_set_bool(const char *name, char *target, PyObject *src, bool dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000198{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000199 if (src == NULL)
200 *target = dflt;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200201 else {
202 int b = PyObject_IsTrue(src);
203 if (b < 0)
204 return -1;
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300205 *target = (char)b;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200206 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000207 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000208}
209
Andrew McNamara1196cf12005-01-07 04:42:45 +0000210static int
211_set_int(const char *name, int *target, PyObject *src, int dflt)
212{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000213 if (src == NULL)
214 *target = dflt;
215 else {
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200216 int value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000217 if (!PyLong_CheckExact(src)) {
218 PyErr_Format(PyExc_TypeError,
219 "\"%s\" must be an integer", name);
220 return -1;
221 }
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200222 value = _PyLong_AsInt(src);
223 if (value == -1 && PyErr_Occurred()) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000224 return -1;
225 }
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200226 *target = value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000227 }
228 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000229}
230
231static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200232_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000233{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000234 if (src == NULL)
235 *target = dflt;
236 else {
237 *target = '\0';
238 if (src != Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000239 Py_ssize_t len;
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200240 if (!PyUnicode_Check(src)) {
241 PyErr_Format(PyExc_TypeError,
242 "\"%s\" must be string, not %.200s", name,
Victor Stinnerdaa97562020-02-07 03:37:06 +0100243 Py_TYPE(src)->tp_name);
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200244 return -1;
245 }
Victor Stinner9e30aa52011-11-21 02:49:52 +0100246 len = PyUnicode_GetLength(src);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200247 if (len > 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000248 PyErr_Format(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300249 "\"%s\" must be a 1-character string",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000250 name);
251 return -1;
252 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100253 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000254 if (len > 0)
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200255 *target = PyUnicode_READ_CHAR(src, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000256 }
257 }
258 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000259}
260
261static int
262_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
263{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000264 if (src == NULL)
265 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
266 else {
267 if (src == Py_None)
268 *target = NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +0100269 else if (!PyUnicode_Check(src)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000270 PyErr_Format(PyExc_TypeError,
271 "\"%s\" must be a string", name);
272 return -1;
273 }
274 else {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100275 if (PyUnicode_READY(src) == -1)
276 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000277 Py_INCREF(src);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300278 Py_XSETREF(*target, src);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000279 }
280 }
281 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000282}
283
284static int
285dialect_check_quoting(int quoting)
286{
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200287 const StyleDesc *qs;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000288
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000289 for (qs = quote_styles; qs->name; qs++) {
Victor Stinner706768c2014-08-16 01:03:39 +0200290 if ((int)qs->style == quoting)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000291 return 0;
292 }
293 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
294 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000295}
Skip Montanarob4a04172003-03-20 23:29:12 +0000296
297#define D_OFF(x) offsetof(DialectObj, x)
298
299static struct PyMemberDef Dialect_memberlist[] = {
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300300 { "skipinitialspace", T_BOOL, D_OFF(skipinitialspace), READONLY },
301 { "doublequote", T_BOOL, D_OFF(doublequote), READONLY },
302 { "strict", T_BOOL, D_OFF(strict), READONLY },
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000303 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000304};
305
306static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000307 { "delimiter", (getter)Dialect_get_delimiter},
308 { "escapechar", (getter)Dialect_get_escapechar},
309 { "lineterminator", (getter)Dialect_get_lineterminator},
310 { "quotechar", (getter)Dialect_get_quotechar},
311 { "quoting", (getter)Dialect_get_quoting},
312 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000313};
314
315static void
316Dialect_dealloc(DialectObj *self)
317{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000318 Py_XDECREF(self->lineterminator);
319 Py_TYPE(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000320}
321
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000322static char *dialect_kws[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000323 "dialect",
324 "delimiter",
325 "doublequote",
326 "escapechar",
327 "lineterminator",
328 "quotechar",
329 "quoting",
330 "skipinitialspace",
331 "strict",
332 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000333};
334
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000335static PyObject *
336dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000337{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000338 DialectObj *self;
339 PyObject *ret = NULL;
340 PyObject *dialect = NULL;
341 PyObject *delimiter = NULL;
342 PyObject *doublequote = NULL;
343 PyObject *escapechar = NULL;
344 PyObject *lineterminator = NULL;
345 PyObject *quotechar = NULL;
346 PyObject *quoting = NULL;
347 PyObject *skipinitialspace = NULL;
348 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000349
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000350 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
351 "|OOOOOOOOO", dialect_kws,
352 &dialect,
353 &delimiter,
354 &doublequote,
355 &escapechar,
356 &lineterminator,
357 &quotechar,
358 &quoting,
359 &skipinitialspace,
360 &strict))
361 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000362
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000363 if (dialect != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100364 if (PyUnicode_Check(dialect)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000365 dialect = get_dialect_from_registry(dialect);
366 if (dialect == NULL)
367 return NULL;
368 }
369 else
370 Py_INCREF(dialect);
371 /* Can we reuse this instance? */
372 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
Serhiy Storchaka0b3ec192017-03-23 17:53:47 +0200373 delimiter == NULL &&
374 doublequote == NULL &&
375 escapechar == NULL &&
376 lineterminator == NULL &&
377 quotechar == NULL &&
378 quoting == NULL &&
379 skipinitialspace == NULL &&
380 strict == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000381 return dialect;
382 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000383
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000384 self = (DialectObj *)type->tp_alloc(type, 0);
385 if (self == NULL) {
386 Py_XDECREF(dialect);
387 return NULL;
388 }
389 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000390
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000391 Py_XINCREF(delimiter);
392 Py_XINCREF(doublequote);
393 Py_XINCREF(escapechar);
394 Py_XINCREF(lineterminator);
395 Py_XINCREF(quotechar);
396 Py_XINCREF(quoting);
397 Py_XINCREF(skipinitialspace);
398 Py_XINCREF(strict);
399 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000400#define DIALECT_GETATTR(v, n) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000401 if (v == NULL) \
402 v = PyObject_GetAttrString(dialect, n)
403 DIALECT_GETATTR(delimiter, "delimiter");
404 DIALECT_GETATTR(doublequote, "doublequote");
405 DIALECT_GETATTR(escapechar, "escapechar");
406 DIALECT_GETATTR(lineterminator, "lineterminator");
407 DIALECT_GETATTR(quotechar, "quotechar");
408 DIALECT_GETATTR(quoting, "quoting");
409 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
410 DIALECT_GETATTR(strict, "strict");
411 PyErr_Clear();
412 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000413
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000414 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000415#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000416 if (meth(name, target, src, dflt)) \
417 goto err
418 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300419 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, true);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000420 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
421 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
422 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
423 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300424 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, false);
425 DIASET(_set_bool, "strict", &self->strict, strict, false);
Skip Montanarob4a04172003-03-20 23:29:12 +0000426
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000427 /* validate options */
428 if (dialect_check_quoting(self->quoting))
429 goto err;
430 if (self->delimiter == 0) {
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200431 PyErr_SetString(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300432 "\"delimiter\" must be a 1-character string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000433 goto err;
434 }
435 if (quotechar == Py_None && quoting == NULL)
436 self->quoting = QUOTE_NONE;
437 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
438 PyErr_SetString(PyExc_TypeError,
439 "quotechar must be set if quoting enabled");
440 goto err;
441 }
442 if (self->lineterminator == 0) {
443 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
444 goto err;
445 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000446
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000447 ret = (PyObject *)self;
448 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000449err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000450 Py_XDECREF(self);
451 Py_XDECREF(dialect);
452 Py_XDECREF(delimiter);
453 Py_XDECREF(doublequote);
454 Py_XDECREF(escapechar);
455 Py_XDECREF(lineterminator);
456 Py_XDECREF(quotechar);
457 Py_XDECREF(quoting);
458 Py_XDECREF(skipinitialspace);
459 Py_XDECREF(strict);
460 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000461}
462
463
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000464PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000465"CSV dialect\n"
466"\n"
467"The Dialect type records CSV parsing and generation options.\n");
468
469static PyTypeObject Dialect_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000470 PyVarObject_HEAD_INIT(NULL, 0)
471 "_csv.Dialect", /* tp_name */
472 sizeof(DialectObj), /* tp_basicsize */
473 0, /* tp_itemsize */
474 /* methods */
475 (destructor)Dialect_dealloc, /* tp_dealloc */
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200476 0, /* tp_vectorcall_offset */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000477 (getattrfunc)0, /* tp_getattr */
478 (setattrfunc)0, /* tp_setattr */
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200479 0, /* tp_as_async */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000480 (reprfunc)0, /* tp_repr */
481 0, /* tp_as_number */
482 0, /* tp_as_sequence */
483 0, /* tp_as_mapping */
484 (hashfunc)0, /* tp_hash */
485 (ternaryfunc)0, /* tp_call */
486 (reprfunc)0, /* tp_str */
487 0, /* tp_getattro */
488 0, /* tp_setattro */
489 0, /* tp_as_buffer */
490 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
491 Dialect_Type_doc, /* tp_doc */
492 0, /* tp_traverse */
493 0, /* tp_clear */
494 0, /* tp_richcompare */
495 0, /* tp_weaklistoffset */
496 0, /* tp_iter */
497 0, /* tp_iternext */
498 0, /* tp_methods */
499 Dialect_memberlist, /* tp_members */
500 Dialect_getsetlist, /* tp_getset */
501 0, /* tp_base */
502 0, /* tp_dict */
503 0, /* tp_descr_get */
504 0, /* tp_descr_set */
505 0, /* tp_dictoffset */
506 0, /* tp_init */
507 0, /* tp_alloc */
508 dialect_new, /* tp_new */
509 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000510};
511
Andrew McNamara91b97462005-01-11 01:07:23 +0000512/*
513 * Return an instance of the dialect type, given a Python instance or kwarg
514 * description of the dialect
515 */
516static PyObject *
517_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
518{
Victor Stinner6412f492016-08-23 00:21:34 +0200519 PyObject *type = (PyObject *)&Dialect_Type;
520 if (dialect_inst) {
Petr Viktorinffd97532020-02-11 17:46:57 +0100521 return PyObject_VectorcallDict(type, &dialect_inst, 1, kwargs);
Victor Stinner6412f492016-08-23 00:21:34 +0200522 }
523 else {
Petr Viktorinffd97532020-02-11 17:46:57 +0100524 return PyObject_VectorcallDict(type, NULL, 0, kwargs);
Victor Stinner6412f492016-08-23 00:21:34 +0200525 }
Andrew McNamara91b97462005-01-11 01:07:23 +0000526}
527
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000528/*
529 * READER
530 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000531static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000532parse_save_field(ReaderObj *self)
533{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000534 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000535
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200536 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
537 (void *) self->field, self->field_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000538 if (field == NULL)
539 return -1;
540 self->field_len = 0;
541 if (self->numeric_field) {
542 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000543
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000544 self->numeric_field = 0;
545 tmp = PyNumber_Float(field);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000546 Py_DECREF(field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200547 if (tmp == NULL)
548 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000549 field = tmp;
550 }
Victor Stinnerb80b3782013-11-14 21:29:34 +0100551 if (PyList_Append(self->fields, field) < 0) {
552 Py_DECREF(field);
553 return -1;
554 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000555 Py_DECREF(field);
556 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000557}
558
559static int
560parse_grow_buff(ReaderObj *self)
561{
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +0500562 assert((size_t)self->field_size <= PY_SSIZE_T_MAX / sizeof(Py_UCS4));
563
564 Py_ssize_t field_size_new = self->field_size ? 2 * self->field_size : 4096;
565 Py_UCS4 *field_new = self->field;
566 PyMem_Resize(field_new, Py_UCS4, field_size_new);
567 if (field_new == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000568 PyErr_NoMemory();
569 return 0;
570 }
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +0500571 self->field = field_new;
572 self->field_size = field_size_new;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000573 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000574}
575
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000576static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200577parse_add_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000578{
Antoine Pitroue7672d32012-05-16 11:33:08 +0200579 if (self->field_len >= _csvstate_global->field_limit) {
580 PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
581 _csvstate_global->field_limit);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000582 return -1;
583 }
584 if (self->field_len == self->field_size && !parse_grow_buff(self))
585 return -1;
586 self->field[self->field_len++] = c;
587 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000588}
589
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000590static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200591parse_process_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000592{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000593 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000594
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000595 switch (self->state) {
596 case START_RECORD:
597 /* start of record */
598 if (c == '\0')
599 /* empty line - return [] */
600 break;
601 else if (c == '\n' || c == '\r') {
602 self->state = EAT_CRNL;
603 break;
604 }
605 /* normal character - handle as START_FIELD */
606 self->state = START_FIELD;
607 /* fallthru */
608 case START_FIELD:
609 /* expecting field */
610 if (c == '\n' || c == '\r' || c == '\0') {
611 /* save empty field - return [fields] */
612 if (parse_save_field(self) < 0)
613 return -1;
614 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
615 }
616 else if (c == dialect->quotechar &&
617 dialect->quoting != QUOTE_NONE) {
618 /* start quoted field */
619 self->state = IN_QUOTED_FIELD;
620 }
621 else if (c == dialect->escapechar) {
622 /* possible escaped character */
623 self->state = ESCAPED_CHAR;
624 }
625 else if (c == ' ' && dialect->skipinitialspace)
626 /* ignore space at start of field */
627 ;
628 else if (c == dialect->delimiter) {
629 /* save empty field */
630 if (parse_save_field(self) < 0)
631 return -1;
632 }
633 else {
634 /* begin new unquoted field */
635 if (dialect->quoting == QUOTE_NONNUMERIC)
636 self->numeric_field = 1;
637 if (parse_add_char(self, c) < 0)
638 return -1;
639 self->state = IN_FIELD;
640 }
641 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000642
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000643 case ESCAPED_CHAR:
R David Murray9a7d3762013-03-20 00:15:20 -0400644 if (c == '\n' || c=='\r') {
R David Murrayc7c42ef2013-03-19 22:41:47 -0400645 if (parse_add_char(self, c) < 0)
646 return -1;
647 self->state = AFTER_ESCAPED_CRNL;
648 break;
649 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000650 if (c == '\0')
651 c = '\n';
652 if (parse_add_char(self, c) < 0)
653 return -1;
654 self->state = IN_FIELD;
655 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000656
R David Murrayc7c42ef2013-03-19 22:41:47 -0400657 case AFTER_ESCAPED_CRNL:
658 if (c == '\0')
659 break;
660 /*fallthru*/
661
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000662 case IN_FIELD:
663 /* in unquoted field */
664 if (c == '\n' || c == '\r' || c == '\0') {
665 /* end of line - return [fields] */
666 if (parse_save_field(self) < 0)
667 return -1;
668 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
669 }
670 else if (c == dialect->escapechar) {
671 /* possible escaped character */
672 self->state = ESCAPED_CHAR;
673 }
674 else if (c == dialect->delimiter) {
675 /* save field - wait for new field */
676 if (parse_save_field(self) < 0)
677 return -1;
678 self->state = START_FIELD;
679 }
680 else {
681 /* normal character - save in field */
682 if (parse_add_char(self, c) < 0)
683 return -1;
684 }
685 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000686
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000687 case IN_QUOTED_FIELD:
688 /* in quoted field */
689 if (c == '\0')
690 ;
691 else if (c == dialect->escapechar) {
692 /* Possible escape character */
693 self->state = ESCAPE_IN_QUOTED_FIELD;
694 }
695 else if (c == dialect->quotechar &&
696 dialect->quoting != QUOTE_NONE) {
697 if (dialect->doublequote) {
698 /* doublequote; " represented by "" */
699 self->state = QUOTE_IN_QUOTED_FIELD;
700 }
701 else {
702 /* end of quote part of field */
703 self->state = IN_FIELD;
704 }
705 }
706 else {
707 /* normal character - save in field */
708 if (parse_add_char(self, c) < 0)
709 return -1;
710 }
711 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000712
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000713 case ESCAPE_IN_QUOTED_FIELD:
714 if (c == '\0')
715 c = '\n';
716 if (parse_add_char(self, c) < 0)
717 return -1;
718 self->state = IN_QUOTED_FIELD;
719 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000720
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000721 case QUOTE_IN_QUOTED_FIELD:
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300722 /* doublequote - seen a quote in a quoted field */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000723 if (dialect->quoting != QUOTE_NONE &&
724 c == dialect->quotechar) {
725 /* save "" as " */
726 if (parse_add_char(self, c) < 0)
727 return -1;
728 self->state = IN_QUOTED_FIELD;
729 }
730 else if (c == dialect->delimiter) {
731 /* save field - wait for new field */
732 if (parse_save_field(self) < 0)
733 return -1;
734 self->state = START_FIELD;
735 }
736 else if (c == '\n' || c == '\r' || c == '\0') {
737 /* end of line - return [fields] */
738 if (parse_save_field(self) < 0)
739 return -1;
740 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
741 }
742 else if (!dialect->strict) {
743 if (parse_add_char(self, c) < 0)
744 return -1;
745 self->state = IN_FIELD;
746 }
747 else {
748 /* illegal */
Antoine Pitroue7672d32012-05-16 11:33:08 +0200749 PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000750 dialect->delimiter,
751 dialect->quotechar);
752 return -1;
753 }
754 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000755
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000756 case EAT_CRNL:
757 if (c == '\n' || c == '\r')
758 ;
759 else if (c == '\0')
760 self->state = START_RECORD;
761 else {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200762 PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000763 return -1;
764 }
765 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000766
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000767 }
768 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000769}
770
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000771static int
772parse_reset(ReaderObj *self)
773{
Serhiy Storchaka48842712016-04-06 09:45:48 +0300774 Py_XSETREF(self->fields, PyList_New(0));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000775 if (self->fields == NULL)
776 return -1;
777 self->field_len = 0;
778 self->state = START_RECORD;
779 self->numeric_field = 0;
780 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000781}
Skip Montanarob4a04172003-03-20 23:29:12 +0000782
783static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000784Reader_iternext(ReaderObj *self)
785{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000786 PyObject *fields = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200787 Py_UCS4 c;
788 Py_ssize_t pos, linelen;
789 unsigned int kind;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300790 const void *data;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200791 PyObject *lineobj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000792
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000793 if (parse_reset(self) < 0)
794 return NULL;
795 do {
796 lineobj = PyIter_Next(self->input_iter);
797 if (lineobj == NULL) {
798 /* End of input OR exception */
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700799 if (!PyErr_Occurred() && (self->field_len != 0 ||
800 self->state == IN_QUOTED_FIELD)) {
801 if (self->dialect->strict)
Senthil Kumaran49d13022012-09-25 02:37:20 -0700802 PyErr_SetString(_csvstate_global->error_obj,
803 "unexpected end of data");
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700804 else if (parse_save_field(self) >= 0)
805 break;
806 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000807 return NULL;
808 }
809 if (!PyUnicode_Check(lineobj)) {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200810 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000811 "iterator should return strings, "
812 "not %.200s "
813 "(did you open the file in text mode?)",
Victor Stinnerdaa97562020-02-07 03:37:06 +0100814 Py_TYPE(lineobj)->tp_name
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000815 );
816 Py_DECREF(lineobj);
817 return NULL;
818 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100819 if (PyUnicode_READY(lineobj) == -1) {
820 Py_DECREF(lineobj);
821 return NULL;
822 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000823 ++self->line_num;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200824 kind = PyUnicode_KIND(lineobj);
825 data = PyUnicode_DATA(lineobj);
826 pos = 0;
827 linelen = PyUnicode_GET_LENGTH(lineobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000828 while (linelen--) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200829 c = PyUnicode_READ(kind, data, pos);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000830 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000831 Py_DECREF(lineobj);
Antoine Pitroue7672d32012-05-16 11:33:08 +0200832 PyErr_Format(_csvstate_global->error_obj,
Benjamin Peterson7821b4c2019-06-18 21:37:58 -0700833 "line contains NUL");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000834 goto err;
835 }
836 if (parse_process_char(self, c) < 0) {
837 Py_DECREF(lineobj);
838 goto err;
839 }
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200840 pos++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000841 }
842 Py_DECREF(lineobj);
843 if (parse_process_char(self, 0) < 0)
844 goto err;
845 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000846
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000847 fields = self->fields;
848 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000849err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000850 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000851}
852
853static void
854Reader_dealloc(ReaderObj *self)
855{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000856 PyObject_GC_UnTrack(self);
857 Py_XDECREF(self->dialect);
858 Py_XDECREF(self->input_iter);
859 Py_XDECREF(self->fields);
860 if (self->field != NULL)
861 PyMem_Free(self->field);
862 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000863}
864
865static int
866Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
867{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000868 Py_VISIT(self->dialect);
869 Py_VISIT(self->input_iter);
870 Py_VISIT(self->fields);
871 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000872}
873
874static int
875Reader_clear(ReaderObj *self)
876{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000877 Py_CLEAR(self->dialect);
878 Py_CLEAR(self->input_iter);
879 Py_CLEAR(self->fields);
880 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000881}
882
883PyDoc_STRVAR(Reader_Type_doc,
884"CSV reader\n"
885"\n"
886"Reader objects are responsible for reading and parsing tabular data\n"
887"in CSV format.\n"
888);
889
890static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000891 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000892};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000893#define R_OFF(x) offsetof(ReaderObj, x)
894
895static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000896 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
897 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
898 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000899};
900
Skip Montanarob4a04172003-03-20 23:29:12 +0000901
902static PyTypeObject Reader_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000903 PyVarObject_HEAD_INIT(NULL, 0)
904 "_csv.reader", /*tp_name*/
905 sizeof(ReaderObj), /*tp_basicsize*/
906 0, /*tp_itemsize*/
907 /* methods */
908 (destructor)Reader_dealloc, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200909 0, /*tp_vectorcall_offset*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000910 (getattrfunc)0, /*tp_getattr*/
911 (setattrfunc)0, /*tp_setattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +0200912 0, /*tp_as_async*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000913 (reprfunc)0, /*tp_repr*/
914 0, /*tp_as_number*/
915 0, /*tp_as_sequence*/
916 0, /*tp_as_mapping*/
917 (hashfunc)0, /*tp_hash*/
918 (ternaryfunc)0, /*tp_call*/
919 (reprfunc)0, /*tp_str*/
920 0, /*tp_getattro*/
921 0, /*tp_setattro*/
922 0, /*tp_as_buffer*/
923 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
924 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
925 Reader_Type_doc, /*tp_doc*/
926 (traverseproc)Reader_traverse, /*tp_traverse*/
927 (inquiry)Reader_clear, /*tp_clear*/
928 0, /*tp_richcompare*/
929 0, /*tp_weaklistoffset*/
930 PyObject_SelfIter, /*tp_iter*/
931 (getiterfunc)Reader_iternext, /*tp_iternext*/
932 Reader_methods, /*tp_methods*/
933 Reader_memberlist, /*tp_members*/
934 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000935
936};
937
938static PyObject *
939csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
940{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000941 PyObject * iterator, * dialect = NULL;
942 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000943
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000944 if (!self)
945 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000946
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000947 self->dialect = NULL;
948 self->fields = NULL;
949 self->input_iter = NULL;
950 self->field = NULL;
951 self->field_size = 0;
952 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000953
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000954 if (parse_reset(self) < 0) {
955 Py_DECREF(self);
956 return NULL;
957 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000958
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000959 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
960 Py_DECREF(self);
961 return NULL;
962 }
963 self->input_iter = PyObject_GetIter(iterator);
964 if (self->input_iter == NULL) {
965 PyErr_SetString(PyExc_TypeError,
966 "argument 1 must be an iterator");
967 Py_DECREF(self);
968 return NULL;
969 }
970 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
971 if (self->dialect == NULL) {
972 Py_DECREF(self);
973 return NULL;
974 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000975
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000976 PyObject_GC_Track(self);
977 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +0000978}
979
980/*
981 * WRITER
982 */
983/* ---------------------------------------------------------------- */
984static void
985join_reset(WriterObj *self)
986{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000987 self->rec_len = 0;
988 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000989}
990
991#define MEM_INCR 32768
992
993/* Calculate new record length or append field to record. Return new
994 * record length.
995 */
Antoine Pitrou40455752010-08-15 18:51:10 +0000996static Py_ssize_t
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300997join_append_data(WriterObj *self, unsigned int field_kind, const void *field_data,
Serhiy Storchaka7901b482015-03-30 09:09:54 +0300998 Py_ssize_t field_len, int *quoted,
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200999 int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +00001000{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001001 DialectObj *dialect = self->dialect;
1002 int i;
Antoine Pitrou40455752010-08-15 18:51:10 +00001003 Py_ssize_t rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001004
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001005#define INCLEN \
1006 do {\
1007 if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \
1008 goto overflow; \
1009 } \
1010 rec_len++; \
1011 } while(0)
1012
1013#define ADDCH(c) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001014 do {\
1015 if (copy_phase) \
1016 self->rec[rec_len] = c;\
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001017 INCLEN;\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001018 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001019
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001020 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001021
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001022 /* If this is not the first field we need a field separator */
1023 if (self->num_fields > 0)
1024 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001025
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001026 /* Handle preceding quote */
1027 if (copy_phase && *quoted)
1028 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001029
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001030 /* Copy/count field data */
1031 /* If field is null just pass over */
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001032 for (i = 0; field_data && (i < field_len); i++) {
1033 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001034 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001035
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001036 if (c == dialect->delimiter ||
1037 c == dialect->escapechar ||
1038 c == dialect->quotechar ||
Martin v. Löwis5f4f4c52011-11-01 18:42:23 +01001039 PyUnicode_FindChar(
1040 dialect->lineterminator, c, 0,
1041 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001042 if (dialect->quoting == QUOTE_NONE)
1043 want_escape = 1;
1044 else {
1045 if (c == dialect->quotechar) {
1046 if (dialect->doublequote)
1047 ADDCH(dialect->quotechar);
1048 else
1049 want_escape = 1;
1050 }
1051 if (!want_escape)
1052 *quoted = 1;
1053 }
1054 if (want_escape) {
1055 if (!dialect->escapechar) {
Antoine Pitroue7672d32012-05-16 11:33:08 +02001056 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001057 "need to escape, but no escapechar set");
1058 return -1;
1059 }
1060 ADDCH(dialect->escapechar);
1061 }
1062 }
1063 /* Copy field character into record buffer.
1064 */
1065 ADDCH(c);
1066 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001067
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001068 if (*quoted) {
1069 if (copy_phase)
1070 ADDCH(dialect->quotechar);
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001071 else {
1072 INCLEN; /* starting quote */
1073 INCLEN; /* ending quote */
1074 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001075 }
1076 return rec_len;
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001077
1078 overflow:
1079 PyErr_NoMemory();
1080 return -1;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001081#undef ADDCH
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001082#undef INCLEN
Skip Montanarob4a04172003-03-20 23:29:12 +00001083}
1084
1085static int
Antoine Pitrou40455752010-08-15 18:51:10 +00001086join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
Skip Montanarob4a04172003-03-20 23:29:12 +00001087{
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001088 assert(rec_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001089
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001090 if (rec_len > self->rec_size) {
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001091 size_t rec_size_new = (size_t)(rec_len / MEM_INCR + 1) * MEM_INCR;
1092 Py_UCS4 *rec_new = self->rec;
1093 PyMem_Resize(rec_new, Py_UCS4, rec_size_new);
1094 if (rec_new == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001095 PyErr_NoMemory();
1096 return 0;
1097 }
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001098 self->rec = rec_new;
1099 self->rec_size = (Py_ssize_t)rec_size_new;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001100 }
1101 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001102}
1103
1104static int
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001105join_append(WriterObj *self, PyObject *field, int quoted)
Skip Montanarob4a04172003-03-20 23:29:12 +00001106{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001107 unsigned int field_kind = -1;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001108 const void *field_data = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001109 Py_ssize_t field_len = 0;
Antoine Pitrou40455752010-08-15 18:51:10 +00001110 Py_ssize_t rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001111
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001112 if (field != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +01001113 if (PyUnicode_READY(field) == -1)
1114 return 0;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001115 field_kind = PyUnicode_KIND(field);
1116 field_data = PyUnicode_DATA(field);
1117 field_len = PyUnicode_GET_LENGTH(field);
1118 }
1119 rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001120 &quoted, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001121 if (rec_len < 0)
1122 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001123
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001124 /* grow record buffer if necessary */
1125 if (!join_check_rec_size(self, rec_len))
1126 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001127
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001128 self->rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001129 &quoted, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001130 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001131
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001132 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001133}
1134
1135static int
1136join_append_lineterminator(WriterObj *self)
1137{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001138 Py_ssize_t terminator_len, i;
1139 unsigned int term_kind;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001140 const void *term_data;
Skip Montanarob4a04172003-03-20 23:29:12 +00001141
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001142 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001143 if (terminator_len == -1)
1144 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001145
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001146 /* grow record buffer if necessary */
1147 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1148 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001149
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001150 term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1151 term_data = PyUnicode_DATA(self->dialect->lineterminator);
1152 for (i = 0; i < terminator_len; i++)
1153 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001154 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001155
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001156 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001157}
1158
1159PyDoc_STRVAR(csv_writerow_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001160"writerow(iterable)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001161"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001162"Construct and write a CSV record from an iterable of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001163"elements will be converted to string.");
1164
1165static PyObject *
1166csv_writerow(WriterObj *self, PyObject *seq)
1167{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001168 DialectObj *dialect = self->dialect;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001169 PyObject *iter, *field, *line, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001170
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001171 iter = PyObject_GetIter(seq);
1172 if (iter == NULL)
1173 return PyErr_Format(_csvstate_global->error_obj,
1174 "iterable expected, not %.200s",
Victor Stinnerdaa97562020-02-07 03:37:06 +01001175 Py_TYPE(seq)->tp_name);
Skip Montanarob4a04172003-03-20 23:29:12 +00001176
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001177 /* Join all fields in internal buffer.
1178 */
1179 join_reset(self);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001180 while ((field = PyIter_Next(iter))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001181 int append_ok;
1182 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001183
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001184 switch (dialect->quoting) {
1185 case QUOTE_NONNUMERIC:
1186 quoted = !PyNumber_Check(field);
1187 break;
1188 case QUOTE_ALL:
1189 quoted = 1;
1190 break;
1191 default:
1192 quoted = 0;
1193 break;
1194 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001195
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001196 if (PyUnicode_Check(field)) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001197 append_ok = join_append(self, field, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001198 Py_DECREF(field);
1199 }
1200 else if (field == Py_None) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001201 append_ok = join_append(self, NULL, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001202 Py_DECREF(field);
1203 }
1204 else {
1205 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001206
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001207 str = PyObject_Str(field);
1208 Py_DECREF(field);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001209 if (str == NULL) {
1210 Py_DECREF(iter);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001211 return NULL;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001212 }
1213 append_ok = join_append(self, str, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001214 Py_DECREF(str);
1215 }
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001216 if (!append_ok) {
1217 Py_DECREF(iter);
1218 return NULL;
1219 }
1220 }
1221 Py_DECREF(iter);
1222 if (PyErr_Occurred())
1223 return NULL;
1224
Licht Takeuchi20019002017-12-12 18:57:06 +09001225 if (self->num_fields > 0 && self->rec_len == 0) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001226 if (dialect->quoting == QUOTE_NONE) {
1227 PyErr_Format(_csvstate_global->error_obj,
1228 "single empty field record must be quoted");
1229 return NULL;
1230 }
1231 self->num_fields--;
1232 if (!join_append(self, NULL, 1))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001233 return NULL;
1234 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001235
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001236 /* Add line terminator.
1237 */
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001238 if (!join_append_lineterminator(self)) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001239 return NULL;
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001240 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001241
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001242 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1243 (void *) self->rec, self->rec_len);
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001244 if (line == NULL) {
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001245 return NULL;
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001246 }
Petr Viktorinffd97532020-02-11 17:46:57 +01001247 result = PyObject_CallOneArg(self->write, line);
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001248 Py_DECREF(line);
1249 return result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001250}
1251
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001252PyDoc_STRVAR(csv_writerows_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001253"writerows(iterable of iterables)\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001254"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001255"Construct and write a series of iterables to a csv file. Non-string\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001256"elements will be converted to string.");
1257
Skip Montanarob4a04172003-03-20 23:29:12 +00001258static PyObject *
1259csv_writerows(WriterObj *self, PyObject *seqseq)
1260{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001261 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001262
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001263 row_iter = PyObject_GetIter(seqseq);
1264 if (row_iter == NULL) {
1265 PyErr_SetString(PyExc_TypeError,
1266 "writerows() argument must be iterable");
1267 return NULL;
1268 }
1269 while ((row_obj = PyIter_Next(row_iter))) {
1270 result = csv_writerow(self, row_obj);
1271 Py_DECREF(row_obj);
1272 if (!result) {
1273 Py_DECREF(row_iter);
1274 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001275 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001276 else
1277 Py_DECREF(result);
1278 }
1279 Py_DECREF(row_iter);
1280 if (PyErr_Occurred())
1281 return NULL;
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001282 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001283}
1284
1285static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001286 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1287 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1288 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001289};
1290
1291#define W_OFF(x) offsetof(WriterObj, x)
1292
1293static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001294 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1295 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001296};
1297
1298static void
1299Writer_dealloc(WriterObj *self)
1300{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001301 PyObject_GC_UnTrack(self);
1302 Py_XDECREF(self->dialect);
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001303 Py_XDECREF(self->write);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001304 if (self->rec != NULL)
1305 PyMem_Free(self->rec);
1306 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001307}
1308
1309static int
1310Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1311{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001312 Py_VISIT(self->dialect);
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001313 Py_VISIT(self->write);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001314 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001315}
1316
1317static int
1318Writer_clear(WriterObj *self)
1319{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001320 Py_CLEAR(self->dialect);
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001321 Py_CLEAR(self->write);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001322 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001323}
1324
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001325PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001326"CSV writer\n"
1327"\n"
1328"Writer objects are responsible for generating tabular data\n"
1329"in CSV format from sequence input.\n"
1330);
1331
1332static PyTypeObject Writer_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001333 PyVarObject_HEAD_INIT(NULL, 0)
1334 "_csv.writer", /*tp_name*/
1335 sizeof(WriterObj), /*tp_basicsize*/
1336 0, /*tp_itemsize*/
1337 /* methods */
1338 (destructor)Writer_dealloc, /*tp_dealloc*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02001339 0, /*tp_vectorcall_offset*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001340 (getattrfunc)0, /*tp_getattr*/
1341 (setattrfunc)0, /*tp_setattr*/
Jeroen Demeyer530f5062019-05-31 04:13:39 +02001342 0, /*tp_as_async*/
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001343 (reprfunc)0, /*tp_repr*/
1344 0, /*tp_as_number*/
1345 0, /*tp_as_sequence*/
1346 0, /*tp_as_mapping*/
1347 (hashfunc)0, /*tp_hash*/
1348 (ternaryfunc)0, /*tp_call*/
1349 (reprfunc)0, /*tp_str*/
1350 0, /*tp_getattro*/
1351 0, /*tp_setattro*/
1352 0, /*tp_as_buffer*/
1353 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1354 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1355 Writer_Type_doc,
1356 (traverseproc)Writer_traverse, /*tp_traverse*/
1357 (inquiry)Writer_clear, /*tp_clear*/
1358 0, /*tp_richcompare*/
1359 0, /*tp_weaklistoffset*/
1360 (getiterfunc)0, /*tp_iter*/
1361 (getiterfunc)0, /*tp_iternext*/
1362 Writer_methods, /*tp_methods*/
1363 Writer_memberlist, /*tp_members*/
1364 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001365};
1366
1367static PyObject *
1368csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1369{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001370 PyObject * output_file, * dialect = NULL;
1371 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001372 _Py_IDENTIFIER(write);
Skip Montanarob4a04172003-03-20 23:29:12 +00001373
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001374 if (!self)
1375 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001376
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001377 self->dialect = NULL;
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001378 self->write = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001379
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001380 self->rec = NULL;
1381 self->rec_size = 0;
1382 self->rec_len = 0;
1383 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001384
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001385 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1386 Py_DECREF(self);
1387 return NULL;
1388 }
Serhiy Storchaka41c57b32019-09-01 12:03:39 +03001389 if (_PyObject_LookupAttrId(output_file, &PyId_write, &self->write) < 0) {
1390 Py_DECREF(self);
1391 return NULL;
1392 }
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001393 if (self->write == NULL || !PyCallable_Check(self->write)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001394 PyErr_SetString(PyExc_TypeError,
1395 "argument 1 must have a \"write\" method");
1396 Py_DECREF(self);
1397 return NULL;
1398 }
1399 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1400 if (self->dialect == NULL) {
1401 Py_DECREF(self);
1402 return NULL;
1403 }
1404 PyObject_GC_Track(self);
1405 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001406}
1407
1408/*
1409 * DIALECT REGISTRY
1410 */
1411static PyObject *
1412csv_list_dialects(PyObject *module, PyObject *args)
1413{
Antoine Pitroue7672d32012-05-16 11:33:08 +02001414 return PyDict_Keys(_csvstate_global->dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001415}
1416
1417static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001418csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001419{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001420 PyObject *name_obj, *dialect_obj = NULL;
1421 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001422
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001423 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1424 return NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +01001425 if (!PyUnicode_Check(name_obj)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001426 PyErr_SetString(PyExc_TypeError,
Stefan Krahe6996ed2012-11-02 14:44:20 +01001427 "dialect name must be a string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001428 return NULL;
1429 }
Stefan Krahe6996ed2012-11-02 14:44:20 +01001430 if (PyUnicode_READY(name_obj) == -1)
1431 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001432 dialect = _call_dialect(dialect_obj, kwargs);
1433 if (dialect == NULL)
1434 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001435 if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001436 Py_DECREF(dialect);
1437 return NULL;
1438 }
1439 Py_DECREF(dialect);
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001440 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001441}
1442
1443static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001444csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001445{
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001446 if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0) {
1447 if (PyErr_ExceptionMatches(PyExc_KeyError)) {
1448 PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
1449 }
1450 return NULL;
1451 }
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001452 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001453}
1454
1455static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001456csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001457{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001458 return get_dialect_from_registry(name_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001459}
1460
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001461static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001462csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001463{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001464 PyObject *new_limit = NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001465 long old_limit = _csvstate_global->field_limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001466
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001467 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1468 return NULL;
1469 if (new_limit != NULL) {
1470 if (!PyLong_CheckExact(new_limit)) {
1471 PyErr_Format(PyExc_TypeError,
1472 "limit must be an integer");
1473 return NULL;
1474 }
Antoine Pitroue7672d32012-05-16 11:33:08 +02001475 _csvstate_global->field_limit = PyLong_AsLong(new_limit);
1476 if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
1477 _csvstate_global->field_limit = old_limit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001478 return NULL;
1479 }
1480 }
1481 return PyLong_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001482}
1483
Skip Montanarob4a04172003-03-20 23:29:12 +00001484/*
1485 * MODULE
1486 */
1487
1488PyDoc_STRVAR(csv_module_doc,
1489"CSV parsing and writing.\n"
1490"\n"
1491"This module provides classes that assist in the reading and writing\n"
1492"of Comma Separated Value (CSV) files, and implements the interface\n"
1493"described by PEP 305. Although many CSV files are simple to parse,\n"
1494"the format is not formally defined by a stable specification and\n"
1495"is subtle enough that parsing lines of a CSV file with something\n"
1496"like line.split(\",\") is bound to fail. The module supports three\n"
1497"basic APIs: reading, writing, and registration of dialects.\n"
1498"\n"
1499"\n"
1500"DIALECT REGISTRATION:\n"
1501"\n"
1502"Readers and writers support a dialect argument, which is a convenient\n"
1503"handle on a group of settings. When the dialect argument is a string,\n"
1504"it identifies one of the dialects previously registered with the module.\n"
1505"If it is a class or instance, the attributes of the argument are used as\n"
1506"the settings for the reader or writer:\n"
1507"\n"
1508" class excel:\n"
1509" delimiter = ','\n"
1510" quotechar = '\"'\n"
1511" escapechar = None\n"
1512" doublequote = True\n"
1513" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001514" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001515" quoting = QUOTE_MINIMAL\n"
1516"\n"
1517"SETTINGS:\n"
1518"\n"
oldkaa0735f2018-02-02 16:52:55 +08001519" * quotechar - specifies a one-character string to use as the\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001520" quoting character. It defaults to '\"'.\n"
oldkaa0735f2018-02-02 16:52:55 +08001521" * delimiter - specifies a one-character string to use as the\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001522" field separator. It defaults to ','.\n"
1523" * skipinitialspace - specifies how to interpret whitespace which\n"
1524" immediately follows a delimiter. It defaults to False, which\n"
1525" means that whitespace immediately following a delimiter is part\n"
1526" of the following field.\n"
oldkaa0735f2018-02-02 16:52:55 +08001527" * lineterminator - specifies the character sequence which should\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001528" terminate rows.\n"
1529" * quoting - controls when quotes should be generated by the writer.\n"
1530" It can take on any of the following module constants:\n"
1531"\n"
1532" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1533" field contains either the quotechar or the delimiter\n"
1534" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1535" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001536" fields which do not parse as integers or floating point\n"
1537" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001538" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
oldkaa0735f2018-02-02 16:52:55 +08001539" * escapechar - specifies a one-character string used to escape\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001540" the delimiter when quoting is set to QUOTE_NONE.\n"
1541" * doublequote - controls the handling of quotes inside fields. When\n"
1542" True, two consecutive quotes are interpreted as one during read,\n"
1543" and when writing, each quote character embedded in the data is\n"
1544" written as two quotes\n");
1545
1546PyDoc_STRVAR(csv_reader_doc,
1547" csv_reader = reader(iterable [, dialect='excel']\n"
1548" [optional keyword args])\n"
1549" for row in csv_reader:\n"
1550" process(row)\n"
1551"\n"
1552"The \"iterable\" argument can be any object that returns a line\n"
1553"of input for each iteration, such as a file object or a list. The\n"
1554"optional \"dialect\" parameter is discussed below. The function\n"
1555"also accepts optional keyword arguments which override settings\n"
1556"provided by the dialect.\n"
1557"\n"
1558"The returned object is an iterator. Each iteration returns a row\n"
Berker Peksage2382c52015-10-02 19:25:32 +03001559"of the CSV file (which can span multiple input lines).\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001560
1561PyDoc_STRVAR(csv_writer_doc,
1562" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1563" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001564" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001565" csv_writer.writerow(row)\n"
1566"\n"
1567" [or]\n"
1568"\n"
1569" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1570" [optional keyword args])\n"
1571" csv_writer.writerows(rows)\n"
1572"\n"
1573"The \"fileobj\" argument can be any object that supports the file API.\n");
1574
1575PyDoc_STRVAR(csv_list_dialects_doc,
1576"Return a list of all know dialect names.\n"
1577" names = csv.list_dialects()");
1578
1579PyDoc_STRVAR(csv_get_dialect_doc,
1580"Return the dialect instance associated with name.\n"
1581" dialect = csv.get_dialect(name)");
1582
1583PyDoc_STRVAR(csv_register_dialect_doc,
1584"Create a mapping from a string name to a dialect class.\n"
Berker Peksag12b50ce2015-06-05 15:17:51 +03001585" dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
Skip Montanarob4a04172003-03-20 23:29:12 +00001586
1587PyDoc_STRVAR(csv_unregister_dialect_doc,
1588"Delete the name/dialect mapping associated with a string name.\n"
1589" csv.unregister_dialect(name)");
1590
Andrew McNamara31d88962005-01-12 03:45:10 +00001591PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001592"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001593" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001594"\n"
1595"Returns old limit. If limit is not given, no new limit is set and\n"
1596"the old limit is returned");
1597
Skip Montanarob4a04172003-03-20 23:29:12 +00001598static struct PyMethodDef csv_methods[] = {
Serhiy Storchaka62be7422018-11-27 13:27:31 +02001599 { "reader", (PyCFunction)(void(*)(void))csv_reader,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001600 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
Serhiy Storchaka62be7422018-11-27 13:27:31 +02001601 { "writer", (PyCFunction)(void(*)(void))csv_writer,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001602 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1603 { "list_dialects", (PyCFunction)csv_list_dialects,
1604 METH_NOARGS, csv_list_dialects_doc},
Serhiy Storchaka62be7422018-11-27 13:27:31 +02001605 { "register_dialect", (PyCFunction)(void(*)(void))csv_register_dialect,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001606 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1607 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1608 METH_O, csv_unregister_dialect_doc},
1609 { "get_dialect", (PyCFunction)csv_get_dialect,
1610 METH_O, csv_get_dialect_doc},
1611 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1612 METH_VARARGS, csv_field_size_limit_doc},
1613 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001614};
1615
Martin v. Löwis1a214512008-06-11 05:26:20 +00001616static struct PyModuleDef _csvmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001617 PyModuleDef_HEAD_INIT,
1618 "_csv",
1619 csv_module_doc,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001620 sizeof(_csvstate),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001621 csv_methods,
1622 NULL,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001623 _csv_traverse,
1624 _csv_clear,
1625 _csv_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00001626};
1627
Skip Montanarob4a04172003-03-20 23:29:12 +00001628PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001629PyInit__csv(void)
Skip Montanarob4a04172003-03-20 23:29:12 +00001630{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001631 PyObject *module;
Serhiy Storchaka2d06e842015-12-25 19:53:18 +02001632 const StyleDesc *style;
Skip Montanarob4a04172003-03-20 23:29:12 +00001633
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001634 if (PyType_Ready(&Reader_Type) < 0)
1635 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001636
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001637 if (PyType_Ready(&Writer_Type) < 0)
1638 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001639
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001640 /* Create the module and add the functions */
1641 module = PyModule_Create(&_csvmodule);
1642 if (module == NULL)
1643 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001644
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001645 /* Add version to the module. */
1646 if (PyModule_AddStringConstant(module, "__version__",
1647 MODULE_VERSION) == -1)
1648 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001649
Antoine Pitroue7672d32012-05-16 11:33:08 +02001650 /* Set the field limit */
Hai Shif707d942020-03-16 21:15:01 +08001651 get_csv_state(module)->field_limit = 128 * 1024;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001652 /* Do I still need to add this var to the Module Dict? */
1653
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001654 /* Add _dialects dictionary */
Hai Shif707d942020-03-16 21:15:01 +08001655 get_csv_state(module)->dialects = PyDict_New();
1656 if (get_csv_state(module)->dialects == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001657 return NULL;
Hai Shif707d942020-03-16 21:15:01 +08001658 Py_INCREF(get_csv_state(module)->dialects);
1659 if (PyModule_AddObject(module, "_dialects", get_csv_state(module)->dialects))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001660 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001661
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001662 /* Add quote styles into dictionary */
1663 for (style = quote_styles; style->name; style++) {
1664 if (PyModule_AddIntConstant(module, style->name,
1665 style->style) == -1)
1666 return NULL;
1667 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001668
Dong-hee Na37fcbb62020-03-25 07:08:51 +09001669 if (PyModule_AddType(module, &Dialect_Type)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001670 return NULL;
Dong-hee Na37fcbb62020-03-25 07:08:51 +09001671 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001672
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001673 /* Add the CSV exception object to the module. */
Hai Shif707d942020-03-16 21:15:01 +08001674 get_csv_state(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1675 if (get_csv_state(module)->error_obj == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001676 return NULL;
Hai Shif707d942020-03-16 21:15:01 +08001677 Py_INCREF(get_csv_state(module)->error_obj);
1678 PyModule_AddObject(module, "Error", get_csv_state(module)->error_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001679 return module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001680}