blob: cade1ca9d47f06aed074a27e2e8cf2d99b3c66f4 [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
Skip Montanarob4a04172003-03-20 23:29:12 +00009*/
10
Skip Montanaro7b01a832003-04-12 19:23:46 +000011#define MODULE_VERSION "1.0"
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013#include "Python.h"
Victor Stinner4a21e572020-04-15 02:35:41 +020014#include "structmember.h" // PyMemberDef
Serhiy Storchaka323748a2018-07-26 13:21:09 +030015#include <stdbool.h>
Skip Montanarob4a04172003-03-20 23:29:12 +000016
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000017
Antoine Pitroue7672d32012-05-16 11:33:08 +020018typedef struct {
19 PyObject *error_obj; /* CSV exception */
20 PyObject *dialects; /* Dialect registry */
Petr Viktorin6a02b382020-12-15 15:14:35 +010021 PyTypeObject *dialect_type;
22 PyTypeObject *reader_type;
23 PyTypeObject *writer_type;
Antoine Pitroue7672d32012-05-16 11:33:08 +020024 long field_limit; /* max parsed field size */
25} _csvstate;
26
Petr Viktorin6a02b382020-12-15 15:14:35 +010027static struct PyModuleDef _csvmodule;
28
Hai Shif707d942020-03-16 21:15:01 +080029static inline _csvstate*
30get_csv_state(PyObject *module)
31{
32 void *state = PyModule_GetState(module);
33 assert(state != NULL);
34 return (_csvstate *)state;
35}
Antoine Pitroue7672d32012-05-16 11:33:08 +020036
37static int
Petr Viktorin6a02b382020-12-15 15:14:35 +010038_csv_clear(PyObject *module)
Antoine Pitroue7672d32012-05-16 11:33:08 +020039{
Petr Viktorin6a02b382020-12-15 15:14:35 +010040 _csvstate *module_state = PyModule_GetState(module);
41 Py_CLEAR(module_state->error_obj);
42 Py_CLEAR(module_state->dialects);
43 Py_CLEAR(module_state->dialect_type);
44 Py_CLEAR(module_state->reader_type);
45 Py_CLEAR(module_state->writer_type);
Antoine Pitroue7672d32012-05-16 11:33:08 +020046 return 0;
47}
48
49static int
Petr Viktorin6a02b382020-12-15 15:14:35 +010050_csv_traverse(PyObject *module, visitproc visit, void *arg)
Antoine Pitroue7672d32012-05-16 11:33:08 +020051{
Petr Viktorin6a02b382020-12-15 15:14:35 +010052 _csvstate *module_state = PyModule_GetState(module);
53 Py_VISIT(module_state->error_obj);
54 Py_VISIT(module_state->dialects);
55 Py_VISIT(module_state->dialect_type);
56 Py_VISIT(module_state->reader_type);
57 Py_VISIT(module_state->writer_type);
Antoine Pitroue7672d32012-05-16 11:33:08 +020058 return 0;
59}
60
61static void
Petr Viktorin6a02b382020-12-15 15:14:35 +010062_csv_free(void *module)
Antoine Pitroue7672d32012-05-16 11:33:08 +020063{
Petr Viktorin6a02b382020-12-15 15:14:35 +010064 _csv_clear((PyObject *)module);
Antoine Pitroue7672d32012-05-16 11:33:08 +020065}
66
Skip Montanarob4a04172003-03-20 23:29:12 +000067typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000068 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
69 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
R David Murrayc7c42ef2013-03-19 22:41:47 -040070 EAT_CRNL,AFTER_ESCAPED_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000071} ParserState;
72
73typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000074 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000075} QuoteStyle;
76
77typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078 QuoteStyle style;
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020079 const char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000080} StyleDesc;
81
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020082static const StyleDesc quote_styles[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000083 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
84 { QUOTE_ALL, "QUOTE_ALL" },
85 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
86 { QUOTE_NONE, "QUOTE_NONE" },
87 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000088};
89
90typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000092
Serhiy Storchaka323748a2018-07-26 13:21:09 +030093 char doublequote; /* is " represented by ""? */
94 char skipinitialspace; /* ignore spaces following delimiter? */
95 char strict; /* raise exception on bad CSV */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000096 int quoting; /* style of quoting to write */
Serhiy Storchaka323748a2018-07-26 13:21:09 +030097 Py_UCS4 delimiter; /* field separator */
98 Py_UCS4 quotechar; /* quote character */
99 Py_UCS4 escapechar; /* escape character */
100 PyObject *lineterminator; /* string to write between records */
Skip Montanarob4a04172003-03-20 23:29:12 +0000101
Skip Montanarob4a04172003-03-20 23:29:12 +0000102} DialectObj;
103
Skip Montanarob4a04172003-03-20 23:29:12 +0000104typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000105 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000106
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000107 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +0000108
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000109 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000110
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000111 PyObject *fields; /* field list for current record */
112 ParserState state; /* current CSV parse state */
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200113 Py_UCS4 *field; /* temporary buffer */
Antoine Pitrou40455752010-08-15 18:51:10 +0000114 Py_ssize_t field_size; /* size of allocated buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000115 Py_ssize_t field_len; /* length of current field */
116 int numeric_field; /* treat field as numeric */
117 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000118} ReaderObj;
119
Skip Montanarob4a04172003-03-20 23:29:12 +0000120typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000121 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000122
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +0200123 PyObject *write; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +0000124
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000125 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000126
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200127 Py_UCS4 *rec; /* buffer for parser.join */
Antoine Pitrou40455752010-08-15 18:51:10 +0000128 Py_ssize_t rec_size; /* size of allocated record */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000129 Py_ssize_t rec_len; /* length of record */
130 int num_fields; /* number of fields in record */
Skip Montanarob4a04172003-03-20 23:29:12 +0000131
Petr Viktorin6a02b382020-12-15 15:14:35 +0100132 PyObject *error_obj; /* cached error object */
133} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000134
135/*
136 * DIALECT class
137 */
138
139static PyObject *
Petr Viktorin6a02b382020-12-15 15:14:35 +0100140get_dialect_from_registry(PyObject *name_obj, _csvstate *module_state)
Skip Montanarob4a04172003-03-20 23:29:12 +0000141{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000142 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000143
Petr Viktorin6a02b382020-12-15 15:14:35 +0100144 dialect_obj = PyDict_GetItemWithError(module_state->dialects, name_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 if (dialect_obj == NULL) {
146 if (!PyErr_Occurred())
Petr Viktorin6a02b382020-12-15 15:14:35 +0100147 PyErr_Format(module_state->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000148 }
149 else
150 Py_INCREF(dialect_obj);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100151
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000152 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000153}
154
Skip Montanarob4a04172003-03-20 23:29:12 +0000155static PyObject *
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200156get_nullchar_as_None(Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000157{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000158 if (c == '\0') {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200159 Py_RETURN_NONE;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000160 }
161 else
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200162 return PyUnicode_FromOrdinal(c);
Skip Montanarob4a04172003-03-20 23:29:12 +0000163}
164
Skip Montanarob4a04172003-03-20 23:29:12 +0000165static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200166Dialect_get_lineterminator(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000167{
Dong-hee Na0383be42020-06-10 00:33:43 +0900168 Py_XINCREF(self->lineterminator);
169 return self->lineterminator;
Skip Montanarob4a04172003-03-20 23:29:12 +0000170}
171
Skip Montanarob4a04172003-03-20 23:29:12 +0000172static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200173Dialect_get_delimiter(DialectObj *self, void *Py_UNUSED(ignored))
Guido van Rossuma9769c22007-08-07 23:59:30 +0000174{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000175 return get_nullchar_as_None(self->delimiter);
Guido van Rossuma9769c22007-08-07 23:59:30 +0000176}
177
178static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200179Dialect_get_escapechar(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000180{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000181 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000182}
183
Andrew McNamara1196cf12005-01-07 04:42:45 +0000184static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200185Dialect_get_quotechar(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000186{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000188}
189
190static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200191Dialect_get_quoting(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000192{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000193 return PyLong_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000194}
195
196static int
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300197_set_bool(const char *name, char *target, PyObject *src, bool dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000198{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000199 if (src == NULL)
200 *target = dflt;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200201 else {
202 int b = PyObject_IsTrue(src);
203 if (b < 0)
204 return -1;
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300205 *target = (char)b;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200206 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000207 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000208}
209
Andrew McNamara1196cf12005-01-07 04:42:45 +0000210static int
211_set_int(const char *name, int *target, PyObject *src, int dflt)
212{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000213 if (src == NULL)
214 *target = dflt;
215 else {
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200216 int value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000217 if (!PyLong_CheckExact(src)) {
218 PyErr_Format(PyExc_TypeError,
219 "\"%s\" must be an integer", name);
220 return -1;
221 }
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200222 value = _PyLong_AsInt(src);
223 if (value == -1 && PyErr_Occurred()) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000224 return -1;
225 }
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200226 *target = value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000227 }
228 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000229}
230
231static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200232_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000233{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000234 if (src == NULL)
235 *target = dflt;
236 else {
237 *target = '\0';
238 if (src != Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000239 Py_ssize_t len;
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200240 if (!PyUnicode_Check(src)) {
241 PyErr_Format(PyExc_TypeError,
242 "\"%s\" must be string, not %.200s", name,
Victor Stinnerdaa97562020-02-07 03:37:06 +0100243 Py_TYPE(src)->tp_name);
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200244 return -1;
245 }
Victor Stinner9e30aa52011-11-21 02:49:52 +0100246 len = PyUnicode_GetLength(src);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200247 if (len > 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000248 PyErr_Format(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300249 "\"%s\" must be a 1-character string",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000250 name);
251 return -1;
252 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100253 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000254 if (len > 0)
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200255 *target = PyUnicode_READ_CHAR(src, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000256 }
257 }
258 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000259}
260
261static int
262_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
263{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000264 if (src == NULL)
265 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
266 else {
267 if (src == Py_None)
268 *target = NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +0100269 else if (!PyUnicode_Check(src)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000270 PyErr_Format(PyExc_TypeError,
271 "\"%s\" must be a string", name);
272 return -1;
273 }
274 else {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100275 if (PyUnicode_READY(src) == -1)
276 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000277 Py_INCREF(src);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300278 Py_XSETREF(*target, src);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000279 }
280 }
281 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000282}
283
284static int
285dialect_check_quoting(int quoting)
286{
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200287 const StyleDesc *qs;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000288
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000289 for (qs = quote_styles; qs->name; qs++) {
Victor Stinner706768c2014-08-16 01:03:39 +0200290 if ((int)qs->style == quoting)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000291 return 0;
292 }
293 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
294 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000295}
Skip Montanarob4a04172003-03-20 23:29:12 +0000296
297#define D_OFF(x) offsetof(DialectObj, x)
298
299static struct PyMemberDef Dialect_memberlist[] = {
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300300 { "skipinitialspace", T_BOOL, D_OFF(skipinitialspace), READONLY },
301 { "doublequote", T_BOOL, D_OFF(doublequote), READONLY },
302 { "strict", T_BOOL, D_OFF(strict), READONLY },
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000303 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000304};
305
306static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000307 { "delimiter", (getter)Dialect_get_delimiter},
308 { "escapechar", (getter)Dialect_get_escapechar},
309 { "lineterminator", (getter)Dialect_get_lineterminator},
310 { "quotechar", (getter)Dialect_get_quotechar},
311 { "quoting", (getter)Dialect_get_quoting},
312 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000313};
314
315static void
316Dialect_dealloc(DialectObj *self)
317{
Petr Viktorin6a02b382020-12-15 15:14:35 +0100318 PyTypeObject *tp = Py_TYPE(self);
319 Py_CLEAR(self->lineterminator);
320 tp->tp_free((PyObject *)self);
321 Py_DECREF(tp);
322}
323
324static void
325Dialect_finalize(DialectObj *self)
326{
327 Py_CLEAR(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000328}
329
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000330static char *dialect_kws[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000331 "dialect",
332 "delimiter",
333 "doublequote",
334 "escapechar",
335 "lineterminator",
336 "quotechar",
337 "quoting",
338 "skipinitialspace",
339 "strict",
340 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000341};
342
Petr Viktorin6a02b382020-12-15 15:14:35 +0100343static _csvstate *
344_csv_state_from_type(PyTypeObject *type, const char *name)
345{
346 PyObject *module = _PyType_GetModuleByDef(type, &_csvmodule);
347 if (module == NULL) {
348 return NULL;
349 }
350 _csvstate *module_state = PyModule_GetState(module);
351 if (module_state == NULL) {
352 PyErr_Format(PyExc_SystemError,
353 "%s: No _csv module state found", name);
354 return NULL;
355 }
356 return module_state;
357}
358
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000359static PyObject *
360dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000361{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000362 DialectObj *self;
363 PyObject *ret = NULL;
364 PyObject *dialect = NULL;
365 PyObject *delimiter = NULL;
366 PyObject *doublequote = NULL;
367 PyObject *escapechar = NULL;
368 PyObject *lineterminator = NULL;
369 PyObject *quotechar = NULL;
370 PyObject *quoting = NULL;
371 PyObject *skipinitialspace = NULL;
372 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000373
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000374 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
375 "|OOOOOOOOO", dialect_kws,
376 &dialect,
377 &delimiter,
378 &doublequote,
379 &escapechar,
380 &lineterminator,
381 &quotechar,
382 &quoting,
383 &skipinitialspace,
384 &strict))
385 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000386
Petr Viktorin6a02b382020-12-15 15:14:35 +0100387 _csvstate *module_state = _csv_state_from_type(type, "dialect_new");
388 if (module_state == NULL) {
389 return NULL;
390 }
391
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000392 if (dialect != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100393 if (PyUnicode_Check(dialect)) {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100394 dialect = get_dialect_from_registry(dialect, module_state);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000395 if (dialect == NULL)
396 return NULL;
397 }
398 else
399 Py_INCREF(dialect);
400 /* Can we reuse this instance? */
Petr Viktorin6a02b382020-12-15 15:14:35 +0100401 if (PyObject_TypeCheck(dialect, module_state->dialect_type) &&
Serhiy Storchaka0b3ec192017-03-23 17:53:47 +0200402 delimiter == NULL &&
403 doublequote == NULL &&
404 escapechar == NULL &&
405 lineterminator == NULL &&
406 quotechar == NULL &&
407 quoting == NULL &&
408 skipinitialspace == NULL &&
409 strict == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000410 return dialect;
411 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000412
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000413 self = (DialectObj *)type->tp_alloc(type, 0);
414 if (self == NULL) {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100415 Py_CLEAR(dialect);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000416 return NULL;
417 }
418 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000419
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000420 Py_XINCREF(delimiter);
421 Py_XINCREF(doublequote);
422 Py_XINCREF(escapechar);
423 Py_XINCREF(lineterminator);
424 Py_XINCREF(quotechar);
425 Py_XINCREF(quoting);
426 Py_XINCREF(skipinitialspace);
427 Py_XINCREF(strict);
428 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000429#define DIALECT_GETATTR(v, n) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000430 if (v == NULL) \
431 v = PyObject_GetAttrString(dialect, n)
432 DIALECT_GETATTR(delimiter, "delimiter");
433 DIALECT_GETATTR(doublequote, "doublequote");
434 DIALECT_GETATTR(escapechar, "escapechar");
435 DIALECT_GETATTR(lineterminator, "lineterminator");
436 DIALECT_GETATTR(quotechar, "quotechar");
437 DIALECT_GETATTR(quoting, "quoting");
438 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
439 DIALECT_GETATTR(strict, "strict");
440 PyErr_Clear();
441 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000442
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000443 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000444#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000445 if (meth(name, target, src, dflt)) \
446 goto err
447 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300448 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, true);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000449 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
450 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
451 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
452 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300453 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, false);
454 DIASET(_set_bool, "strict", &self->strict, strict, false);
Skip Montanarob4a04172003-03-20 23:29:12 +0000455
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000456 /* validate options */
457 if (dialect_check_quoting(self->quoting))
458 goto err;
459 if (self->delimiter == 0) {
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200460 PyErr_SetString(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300461 "\"delimiter\" must be a 1-character string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000462 goto err;
463 }
464 if (quotechar == Py_None && quoting == NULL)
465 self->quoting = QUOTE_NONE;
466 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
467 PyErr_SetString(PyExc_TypeError,
468 "quotechar must be set if quoting enabled");
469 goto err;
470 }
471 if (self->lineterminator == 0) {
472 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
473 goto err;
474 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000475
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000476 ret = (PyObject *)self;
477 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000478err:
Petr Viktorin6a02b382020-12-15 15:14:35 +0100479 Py_CLEAR(self);
480 Py_CLEAR(dialect);
481 Py_CLEAR(delimiter);
482 Py_CLEAR(doublequote);
483 Py_CLEAR(escapechar);
484 Py_CLEAR(lineterminator);
485 Py_CLEAR(quotechar);
486 Py_CLEAR(quoting);
487 Py_CLEAR(skipinitialspace);
488 Py_CLEAR(strict);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000489 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000490}
491
Petr Viktorin6a02b382020-12-15 15:14:35 +0100492/* Since dialect is now a heap type, it inherits pickling method for
493 * protocol 0 and 1 from object, therefore it needs to be overriden */
494
495PyDoc_STRVAR(dialect_reduce_doc, "raises an exception to avoid pickling");
496
497static PyObject *
498Dialect_reduce(PyObject *self, PyObject *args) {
499 PyErr_Format(PyExc_TypeError,
500 "cannot pickle '%.100s' instances", _PyType_Name(Py_TYPE(self)));
501 return NULL;
502}
503
504static struct PyMethodDef dialect_methods[] = {
505 {"__reduce__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
506 {"__reduce_ex__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
507 {NULL, NULL}
508};
Skip Montanarob4a04172003-03-20 23:29:12 +0000509
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000510PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000511"CSV dialect\n"
512"\n"
513"The Dialect type records CSV parsing and generation options.\n");
514
Petr Viktorin6a02b382020-12-15 15:14:35 +0100515static PyType_Slot Dialect_Type_slots[] = {
516 {Py_tp_doc, (char*)Dialect_Type_doc},
517 {Py_tp_members, Dialect_memberlist},
518 {Py_tp_getset, Dialect_getsetlist},
519 {Py_tp_new, dialect_new},
520 {Py_tp_methods, dialect_methods},
521 {Py_tp_finalize, Dialect_finalize},
522 {Py_tp_dealloc, Dialect_dealloc},
523 {0, NULL}
Skip Montanarob4a04172003-03-20 23:29:12 +0000524};
525
Petr Viktorin6a02b382020-12-15 15:14:35 +0100526PyType_Spec Dialect_Type_spec = {
527 .name = "_csv.Dialect",
528 .basicsize = sizeof(DialectObj),
529 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
530 .slots = Dialect_Type_slots,
531};
532
533
Andrew McNamara91b97462005-01-11 01:07:23 +0000534/*
535 * Return an instance of the dialect type, given a Python instance or kwarg
536 * description of the dialect
537 */
538static PyObject *
Petr Viktorin6a02b382020-12-15 15:14:35 +0100539_call_dialect(_csvstate *module_state, PyObject *dialect_inst, PyObject *kwargs)
Andrew McNamara91b97462005-01-11 01:07:23 +0000540{
Petr Viktorin6a02b382020-12-15 15:14:35 +0100541 PyObject *type = (PyObject *)module_state->dialect_type;
Victor Stinner6412f492016-08-23 00:21:34 +0200542 if (dialect_inst) {
Petr Viktorinffd97532020-02-11 17:46:57 +0100543 return PyObject_VectorcallDict(type, &dialect_inst, 1, kwargs);
Victor Stinner6412f492016-08-23 00:21:34 +0200544 }
545 else {
Petr Viktorinffd97532020-02-11 17:46:57 +0100546 return PyObject_VectorcallDict(type, NULL, 0, kwargs);
Victor Stinner6412f492016-08-23 00:21:34 +0200547 }
Andrew McNamara91b97462005-01-11 01:07:23 +0000548}
549
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000550/*
551 * READER
552 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000553static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000554parse_save_field(ReaderObj *self)
555{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000556 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000557
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200558 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
559 (void *) self->field, self->field_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000560 if (field == NULL)
561 return -1;
562 self->field_len = 0;
563 if (self->numeric_field) {
564 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000565
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000566 self->numeric_field = 0;
567 tmp = PyNumber_Float(field);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000568 Py_DECREF(field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200569 if (tmp == NULL)
570 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000571 field = tmp;
572 }
Victor Stinnerb80b3782013-11-14 21:29:34 +0100573 if (PyList_Append(self->fields, field) < 0) {
574 Py_DECREF(field);
575 return -1;
576 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000577 Py_DECREF(field);
578 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000579}
580
581static int
582parse_grow_buff(ReaderObj *self)
583{
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +0500584 assert((size_t)self->field_size <= PY_SSIZE_T_MAX / sizeof(Py_UCS4));
585
586 Py_ssize_t field_size_new = self->field_size ? 2 * self->field_size : 4096;
587 Py_UCS4 *field_new = self->field;
588 PyMem_Resize(field_new, Py_UCS4, field_size_new);
589 if (field_new == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000590 PyErr_NoMemory();
591 return 0;
592 }
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +0500593 self->field = field_new;
594 self->field_size = field_size_new;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000595 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000596}
597
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000598static int
Petr Viktorin6a02b382020-12-15 15:14:35 +0100599parse_add_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000600{
Petr Viktorin6a02b382020-12-15 15:14:35 +0100601 if (self->field_len >= module_state->field_limit) {
602 PyErr_Format(module_state->error_obj,
603 "field larger than field limit (%ld)",
604 module_state->field_limit);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000605 return -1;
606 }
607 if (self->field_len == self->field_size && !parse_grow_buff(self))
608 return -1;
609 self->field[self->field_len++] = c;
610 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000611}
612
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000613static int
Petr Viktorin6a02b382020-12-15 15:14:35 +0100614parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000615{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000616 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000617
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000618 switch (self->state) {
619 case START_RECORD:
620 /* start of record */
621 if (c == '\0')
622 /* empty line - return [] */
623 break;
624 else if (c == '\n' || c == '\r') {
625 self->state = EAT_CRNL;
626 break;
627 }
628 /* normal character - handle as START_FIELD */
629 self->state = START_FIELD;
630 /* fallthru */
631 case START_FIELD:
632 /* expecting field */
633 if (c == '\n' || c == '\r' || c == '\0') {
634 /* save empty field - return [fields] */
635 if (parse_save_field(self) < 0)
636 return -1;
637 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
638 }
639 else if (c == dialect->quotechar &&
640 dialect->quoting != QUOTE_NONE) {
641 /* start quoted field */
642 self->state = IN_QUOTED_FIELD;
643 }
644 else if (c == dialect->escapechar) {
645 /* possible escaped character */
646 self->state = ESCAPED_CHAR;
647 }
648 else if (c == ' ' && dialect->skipinitialspace)
649 /* ignore space at start of field */
650 ;
651 else if (c == dialect->delimiter) {
652 /* save empty field */
653 if (parse_save_field(self) < 0)
654 return -1;
655 }
656 else {
657 /* begin new unquoted field */
658 if (dialect->quoting == QUOTE_NONNUMERIC)
659 self->numeric_field = 1;
Petr Viktorin6a02b382020-12-15 15:14:35 +0100660 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000661 return -1;
662 self->state = IN_FIELD;
663 }
664 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000665
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000666 case ESCAPED_CHAR:
R David Murray9a7d3762013-03-20 00:15:20 -0400667 if (c == '\n' || c=='\r') {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100668 if (parse_add_char(self, module_state, c) < 0)
R David Murrayc7c42ef2013-03-19 22:41:47 -0400669 return -1;
670 self->state = AFTER_ESCAPED_CRNL;
671 break;
672 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000673 if (c == '\0')
674 c = '\n';
Petr Viktorin6a02b382020-12-15 15:14:35 +0100675 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000676 return -1;
677 self->state = IN_FIELD;
678 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000679
R David Murrayc7c42ef2013-03-19 22:41:47 -0400680 case AFTER_ESCAPED_CRNL:
681 if (c == '\0')
682 break;
683 /*fallthru*/
684
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000685 case IN_FIELD:
686 /* in unquoted field */
687 if (c == '\n' || c == '\r' || c == '\0') {
688 /* end of line - return [fields] */
689 if (parse_save_field(self) < 0)
690 return -1;
691 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
692 }
693 else if (c == dialect->escapechar) {
694 /* possible escaped character */
695 self->state = ESCAPED_CHAR;
696 }
697 else if (c == dialect->delimiter) {
698 /* save field - wait for new field */
699 if (parse_save_field(self) < 0)
700 return -1;
701 self->state = START_FIELD;
702 }
703 else {
704 /* normal character - save in field */
Petr Viktorin6a02b382020-12-15 15:14:35 +0100705 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000706 return -1;
707 }
708 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000709
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000710 case IN_QUOTED_FIELD:
711 /* in quoted field */
712 if (c == '\0')
713 ;
714 else if (c == dialect->escapechar) {
715 /* Possible escape character */
716 self->state = ESCAPE_IN_QUOTED_FIELD;
717 }
718 else if (c == dialect->quotechar &&
719 dialect->quoting != QUOTE_NONE) {
720 if (dialect->doublequote) {
721 /* doublequote; " represented by "" */
722 self->state = QUOTE_IN_QUOTED_FIELD;
723 }
724 else {
725 /* end of quote part of field */
726 self->state = IN_FIELD;
727 }
728 }
729 else {
730 /* normal character - save in field */
Petr Viktorin6a02b382020-12-15 15:14:35 +0100731 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000732 return -1;
733 }
734 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000735
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000736 case ESCAPE_IN_QUOTED_FIELD:
737 if (c == '\0')
738 c = '\n';
Petr Viktorin6a02b382020-12-15 15:14:35 +0100739 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000740 return -1;
741 self->state = IN_QUOTED_FIELD;
742 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000743
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000744 case QUOTE_IN_QUOTED_FIELD:
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300745 /* doublequote - seen a quote in a quoted field */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000746 if (dialect->quoting != QUOTE_NONE &&
747 c == dialect->quotechar) {
748 /* save "" as " */
Petr Viktorin6a02b382020-12-15 15:14:35 +0100749 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000750 return -1;
751 self->state = IN_QUOTED_FIELD;
752 }
753 else if (c == dialect->delimiter) {
754 /* save field - wait for new field */
755 if (parse_save_field(self) < 0)
756 return -1;
757 self->state = START_FIELD;
758 }
759 else if (c == '\n' || c == '\r' || c == '\0') {
760 /* end of line - return [fields] */
761 if (parse_save_field(self) < 0)
762 return -1;
763 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
764 }
765 else if (!dialect->strict) {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100766 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000767 return -1;
768 self->state = IN_FIELD;
769 }
770 else {
771 /* illegal */
Petr Viktorin6a02b382020-12-15 15:14:35 +0100772 PyErr_Format(module_state->error_obj, "'%c' expected after '%c'",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000773 dialect->delimiter,
774 dialect->quotechar);
775 return -1;
776 }
777 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000778
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000779 case EAT_CRNL:
780 if (c == '\n' || c == '\r')
781 ;
782 else if (c == '\0')
783 self->state = START_RECORD;
784 else {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100785 PyErr_Format(module_state->error_obj,
786 "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000787 return -1;
788 }
789 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000790
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000791 }
792 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000793}
794
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000795static int
796parse_reset(ReaderObj *self)
797{
Serhiy Storchaka48842712016-04-06 09:45:48 +0300798 Py_XSETREF(self->fields, PyList_New(0));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000799 if (self->fields == NULL)
800 return -1;
801 self->field_len = 0;
802 self->state = START_RECORD;
803 self->numeric_field = 0;
804 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000805}
Skip Montanarob4a04172003-03-20 23:29:12 +0000806
807static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000808Reader_iternext(ReaderObj *self)
809{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000810 PyObject *fields = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200811 Py_UCS4 c;
812 Py_ssize_t pos, linelen;
813 unsigned int kind;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300814 const void *data;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200815 PyObject *lineobj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000816
Petr Viktorin6a02b382020-12-15 15:14:35 +0100817 _csvstate *module_state = _csv_state_from_type(Py_TYPE(self),
818 "Reader.__next__");
819 if (module_state == NULL) {
820 return NULL;
821 }
822
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000823 if (parse_reset(self) < 0)
824 return NULL;
825 do {
826 lineobj = PyIter_Next(self->input_iter);
827 if (lineobj == NULL) {
828 /* End of input OR exception */
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700829 if (!PyErr_Occurred() && (self->field_len != 0 ||
830 self->state == IN_QUOTED_FIELD)) {
831 if (self->dialect->strict)
Petr Viktorin6a02b382020-12-15 15:14:35 +0100832 PyErr_SetString(module_state->error_obj,
Senthil Kumaran49d13022012-09-25 02:37:20 -0700833 "unexpected end of data");
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700834 else if (parse_save_field(self) >= 0)
835 break;
836 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000837 return NULL;
838 }
839 if (!PyUnicode_Check(lineobj)) {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100840 PyErr_Format(module_state->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000841 "iterator should return strings, "
842 "not %.200s "
Ram Rachum235f9182020-06-05 23:56:06 +0300843 "(the file should be opened in text mode)",
Victor Stinnerdaa97562020-02-07 03:37:06 +0100844 Py_TYPE(lineobj)->tp_name
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000845 );
846 Py_DECREF(lineobj);
847 return NULL;
848 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100849 if (PyUnicode_READY(lineobj) == -1) {
850 Py_DECREF(lineobj);
851 return NULL;
852 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000853 ++self->line_num;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200854 kind = PyUnicode_KIND(lineobj);
855 data = PyUnicode_DATA(lineobj);
856 pos = 0;
857 linelen = PyUnicode_GET_LENGTH(lineobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000858 while (linelen--) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200859 c = PyUnicode_READ(kind, data, pos);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000860 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000861 Py_DECREF(lineobj);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100862 PyErr_Format(module_state->error_obj,
Benjamin Peterson7821b4c2019-06-18 21:37:58 -0700863 "line contains NUL");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000864 goto err;
865 }
Petr Viktorin6a02b382020-12-15 15:14:35 +0100866 if (parse_process_char(self, module_state, c) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000867 Py_DECREF(lineobj);
868 goto err;
869 }
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200870 pos++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000871 }
872 Py_DECREF(lineobj);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100873 if (parse_process_char(self, module_state, 0) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000874 goto err;
875 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000876
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000877 fields = self->fields;
878 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000879err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000880 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000881}
882
883static void
884Reader_dealloc(ReaderObj *self)
885{
Petr Viktorin6a02b382020-12-15 15:14:35 +0100886 PyTypeObject *tp = Py_TYPE(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000887 PyObject_GC_UnTrack(self);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100888 Py_CLEAR(self->dialect);
889 Py_CLEAR(self->input_iter);
890 Py_CLEAR(self->fields);
891 if (self->field != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000892 PyMem_Free(self->field);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100893 self->field = NULL;
894 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000895 PyObject_GC_Del(self);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100896 Py_DECREF(tp);
897}
898
899static void
900Reader_finalize(ReaderObj *self)
901{
902 Py_CLEAR(self->dialect);
903 Py_CLEAR(self->input_iter);
904 Py_CLEAR(self->fields);
905 if (self->field != NULL) {
906 PyMem_Free(self->field);
907 self->field = NULL;
908 }
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000909}
910
911static int
912Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
913{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000914 Py_VISIT(self->dialect);
915 Py_VISIT(self->input_iter);
916 Py_VISIT(self->fields);
917 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000918}
919
920static int
921Reader_clear(ReaderObj *self)
922{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000923 Py_CLEAR(self->dialect);
924 Py_CLEAR(self->input_iter);
925 Py_CLEAR(self->fields);
926 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000927}
928
929PyDoc_STRVAR(Reader_Type_doc,
930"CSV reader\n"
931"\n"
932"Reader objects are responsible for reading and parsing tabular data\n"
933"in CSV format.\n"
934);
935
936static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000937 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000938};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000939#define R_OFF(x) offsetof(ReaderObj, x)
940
941static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000942 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
943 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
944 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000945};
946
Skip Montanarob4a04172003-03-20 23:29:12 +0000947
Petr Viktorin6a02b382020-12-15 15:14:35 +0100948static PyType_Slot Reader_Type_slots[] = {
949 {Py_tp_doc, (char*)Reader_Type_doc},
950 {Py_tp_traverse, Reader_traverse},
951 {Py_tp_clear, Reader_clear},
952 {Py_tp_iter, PyObject_SelfIter},
953 {Py_tp_iternext, Reader_iternext},
954 {Py_tp_methods, Reader_methods},
955 {Py_tp_members, Reader_memberlist},
956 {Py_tp_finalize, Reader_finalize},
957 {Py_tp_dealloc, Reader_dealloc},
958 {0, NULL}
Skip Montanarob4a04172003-03-20 23:29:12 +0000959};
960
Petr Viktorin6a02b382020-12-15 15:14:35 +0100961PyType_Spec Reader_Type_spec = {
962 .name = "_csv.reader",
963 .basicsize = sizeof(ReaderObj),
964 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
965 .slots = Reader_Type_slots
966};
967
968
Skip Montanarob4a04172003-03-20 23:29:12 +0000969static PyObject *
970csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
971{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000972 PyObject * iterator, * dialect = NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +0100973 _csvstate *module_state = get_csv_state(module);
974 ReaderObj * self = PyObject_GC_New(
975 ReaderObj,
976 module_state->reader_type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000977
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000978 if (!self)
979 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000980
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000981 self->dialect = NULL;
982 self->fields = NULL;
983 self->input_iter = NULL;
984 self->field = NULL;
985 self->field_size = 0;
986 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000987
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000988 if (parse_reset(self) < 0) {
989 Py_DECREF(self);
990 return NULL;
991 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000992
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000993 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
994 Py_DECREF(self);
995 return NULL;
996 }
997 self->input_iter = PyObject_GetIter(iterator);
998 if (self->input_iter == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000999 Py_DECREF(self);
1000 return NULL;
1001 }
Petr Viktorin6a02b382020-12-15 15:14:35 +01001002 self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
1003 keyword_args);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001004 if (self->dialect == NULL) {
1005 Py_DECREF(self);
1006 return NULL;
1007 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001008
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001009 PyObject_GC_Track(self);
1010 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001011}
1012
1013/*
1014 * WRITER
1015 */
1016/* ---------------------------------------------------------------- */
1017static void
1018join_reset(WriterObj *self)
1019{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001020 self->rec_len = 0;
1021 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001022}
1023
1024#define MEM_INCR 32768
1025
1026/* Calculate new record length or append field to record. Return new
1027 * record length.
1028 */
Antoine Pitrou40455752010-08-15 18:51:10 +00001029static Py_ssize_t
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001030join_append_data(WriterObj *self, unsigned int field_kind, const void *field_data,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001031 Py_ssize_t field_len, int *quoted,
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001032 int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +00001033{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001034 DialectObj *dialect = self->dialect;
1035 int i;
Antoine Pitrou40455752010-08-15 18:51:10 +00001036 Py_ssize_t rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001037
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001038#define INCLEN \
1039 do {\
1040 if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \
1041 goto overflow; \
1042 } \
1043 rec_len++; \
1044 } while(0)
1045
1046#define ADDCH(c) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001047 do {\
1048 if (copy_phase) \
1049 self->rec[rec_len] = c;\
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001050 INCLEN;\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001051 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001052
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001053 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001054
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001055 /* If this is not the first field we need a field separator */
1056 if (self->num_fields > 0)
1057 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001058
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001059 /* Handle preceding quote */
1060 if (copy_phase && *quoted)
1061 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001062
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001063 /* Copy/count field data */
1064 /* If field is null just pass over */
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001065 for (i = 0; field_data && (i < field_len); i++) {
1066 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001067 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001068
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001069 if (c == dialect->delimiter ||
1070 c == dialect->escapechar ||
1071 c == dialect->quotechar ||
Martin v. Löwis5f4f4c52011-11-01 18:42:23 +01001072 PyUnicode_FindChar(
1073 dialect->lineterminator, c, 0,
1074 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001075 if (dialect->quoting == QUOTE_NONE)
1076 want_escape = 1;
1077 else {
1078 if (c == dialect->quotechar) {
1079 if (dialect->doublequote)
1080 ADDCH(dialect->quotechar);
1081 else
1082 want_escape = 1;
1083 }
Berker Peksag5c0eed72020-09-20 09:38:07 +03001084 else if (c == dialect->escapechar) {
1085 want_escape = 1;
1086 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001087 if (!want_escape)
1088 *quoted = 1;
1089 }
1090 if (want_escape) {
1091 if (!dialect->escapechar) {
Petr Viktorin6a02b382020-12-15 15:14:35 +01001092 PyErr_Format(self->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001093 "need to escape, but no escapechar set");
1094 return -1;
1095 }
1096 ADDCH(dialect->escapechar);
1097 }
1098 }
1099 /* Copy field character into record buffer.
1100 */
1101 ADDCH(c);
1102 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001103
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001104 if (*quoted) {
1105 if (copy_phase)
1106 ADDCH(dialect->quotechar);
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001107 else {
1108 INCLEN; /* starting quote */
1109 INCLEN; /* ending quote */
1110 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001111 }
1112 return rec_len;
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001113
1114 overflow:
1115 PyErr_NoMemory();
1116 return -1;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001117#undef ADDCH
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001118#undef INCLEN
Skip Montanarob4a04172003-03-20 23:29:12 +00001119}
1120
1121static int
Antoine Pitrou40455752010-08-15 18:51:10 +00001122join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
Skip Montanarob4a04172003-03-20 23:29:12 +00001123{
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001124 assert(rec_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001125
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001126 if (rec_len > self->rec_size) {
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001127 size_t rec_size_new = (size_t)(rec_len / MEM_INCR + 1) * MEM_INCR;
1128 Py_UCS4 *rec_new = self->rec;
1129 PyMem_Resize(rec_new, Py_UCS4, rec_size_new);
1130 if (rec_new == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001131 PyErr_NoMemory();
1132 return 0;
1133 }
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001134 self->rec = rec_new;
1135 self->rec_size = (Py_ssize_t)rec_size_new;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001136 }
1137 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001138}
1139
1140static int
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001141join_append(WriterObj *self, PyObject *field, int quoted)
Skip Montanarob4a04172003-03-20 23:29:12 +00001142{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001143 unsigned int field_kind = -1;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001144 const void *field_data = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001145 Py_ssize_t field_len = 0;
Antoine Pitrou40455752010-08-15 18:51:10 +00001146 Py_ssize_t rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001147
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001148 if (field != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +01001149 if (PyUnicode_READY(field) == -1)
1150 return 0;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001151 field_kind = PyUnicode_KIND(field);
1152 field_data = PyUnicode_DATA(field);
1153 field_len = PyUnicode_GET_LENGTH(field);
1154 }
1155 rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001156 &quoted, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001157 if (rec_len < 0)
1158 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001159
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001160 /* grow record buffer if necessary */
1161 if (!join_check_rec_size(self, rec_len))
1162 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001163
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001164 self->rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001165 &quoted, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001166 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001167
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001168 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001169}
1170
1171static int
1172join_append_lineterminator(WriterObj *self)
1173{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001174 Py_ssize_t terminator_len, i;
1175 unsigned int term_kind;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001176 const void *term_data;
Skip Montanarob4a04172003-03-20 23:29:12 +00001177
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001178 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001179 if (terminator_len == -1)
1180 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001181
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001182 /* grow record buffer if necessary */
1183 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1184 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001185
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001186 term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1187 term_data = PyUnicode_DATA(self->dialect->lineterminator);
1188 for (i = 0; i < terminator_len; i++)
1189 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001190 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001191
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001192 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001193}
1194
1195PyDoc_STRVAR(csv_writerow_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001196"writerow(iterable)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001197"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001198"Construct and write a CSV record from an iterable of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001199"elements will be converted to string.");
1200
1201static PyObject *
1202csv_writerow(WriterObj *self, PyObject *seq)
1203{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001204 DialectObj *dialect = self->dialect;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001205 PyObject *iter, *field, *line, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001206
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001207 iter = PyObject_GetIter(seq);
Serhiy Storchakac88239f2020-06-22 11:21:59 +03001208 if (iter == NULL) {
1209 if (PyErr_ExceptionMatches(PyExc_TypeError)) {
Petr Viktorin6a02b382020-12-15 15:14:35 +01001210 PyErr_Format(self->error_obj,
Serhiy Storchakac88239f2020-06-22 11:21:59 +03001211 "iterable expected, not %.200s",
1212 Py_TYPE(seq)->tp_name);
1213 }
1214 return NULL;
1215 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001216
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001217 /* Join all fields in internal buffer.
1218 */
1219 join_reset(self);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001220 while ((field = PyIter_Next(iter))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001221 int append_ok;
1222 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001223
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001224 switch (dialect->quoting) {
1225 case QUOTE_NONNUMERIC:
1226 quoted = !PyNumber_Check(field);
1227 break;
1228 case QUOTE_ALL:
1229 quoted = 1;
1230 break;
1231 default:
1232 quoted = 0;
1233 break;
1234 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001235
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001236 if (PyUnicode_Check(field)) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001237 append_ok = join_append(self, field, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001238 Py_DECREF(field);
1239 }
1240 else if (field == Py_None) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001241 append_ok = join_append(self, NULL, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001242 Py_DECREF(field);
1243 }
1244 else {
1245 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001246
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001247 str = PyObject_Str(field);
1248 Py_DECREF(field);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001249 if (str == NULL) {
1250 Py_DECREF(iter);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001251 return NULL;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001252 }
1253 append_ok = join_append(self, str, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001254 Py_DECREF(str);
1255 }
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001256 if (!append_ok) {
1257 Py_DECREF(iter);
1258 return NULL;
1259 }
1260 }
1261 Py_DECREF(iter);
1262 if (PyErr_Occurred())
1263 return NULL;
1264
Licht Takeuchi20019002017-12-12 18:57:06 +09001265 if (self->num_fields > 0 && self->rec_len == 0) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001266 if (dialect->quoting == QUOTE_NONE) {
Petr Viktorin6a02b382020-12-15 15:14:35 +01001267 PyErr_Format(self->error_obj,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001268 "single empty field record must be quoted");
1269 return NULL;
1270 }
1271 self->num_fields--;
1272 if (!join_append(self, NULL, 1))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001273 return NULL;
1274 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001275
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001276 /* Add line terminator.
1277 */
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001278 if (!join_append_lineterminator(self)) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001279 return NULL;
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001280 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001281
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001282 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1283 (void *) self->rec, self->rec_len);
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001284 if (line == NULL) {
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001285 return NULL;
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001286 }
Petr Viktorinffd97532020-02-11 17:46:57 +01001287 result = PyObject_CallOneArg(self->write, line);
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001288 Py_DECREF(line);
1289 return result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001290}
1291
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001292PyDoc_STRVAR(csv_writerows_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001293"writerows(iterable of iterables)\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001294"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001295"Construct and write a series of iterables to a csv file. Non-string\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001296"elements will be converted to string.");
1297
Skip Montanarob4a04172003-03-20 23:29:12 +00001298static PyObject *
1299csv_writerows(WriterObj *self, PyObject *seqseq)
1300{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001301 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001302
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001303 row_iter = PyObject_GetIter(seqseq);
1304 if (row_iter == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001305 return NULL;
1306 }
1307 while ((row_obj = PyIter_Next(row_iter))) {
1308 result = csv_writerow(self, row_obj);
1309 Py_DECREF(row_obj);
1310 if (!result) {
1311 Py_DECREF(row_iter);
1312 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001313 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001314 else
1315 Py_DECREF(result);
1316 }
1317 Py_DECREF(row_iter);
1318 if (PyErr_Occurred())
1319 return NULL;
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001320 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001321}
1322
1323static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001324 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1325 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1326 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001327};
1328
1329#define W_OFF(x) offsetof(WriterObj, x)
1330
1331static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001332 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1333 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001334};
1335
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001336static int
1337Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1338{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001339 Py_VISIT(self->dialect);
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001340 Py_VISIT(self->write);
Petr Viktorin6a02b382020-12-15 15:14:35 +01001341 Py_VISIT(self->error_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001342 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001343}
1344
1345static int
1346Writer_clear(WriterObj *self)
1347{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001348 Py_CLEAR(self->dialect);
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001349 Py_CLEAR(self->write);
Petr Viktorin6a02b382020-12-15 15:14:35 +01001350 Py_CLEAR(self->error_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001351 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001352}
1353
Petr Viktorin6a02b382020-12-15 15:14:35 +01001354static void
1355Writer_finalize(WriterObj *self)
1356{
1357 Writer_clear(self);
1358 if (self->rec != NULL) {
1359 PyMem_Free(self->rec);
1360 }
1361}
1362
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001363PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001364"CSV writer\n"
1365"\n"
1366"Writer objects are responsible for generating tabular data\n"
1367"in CSV format from sequence input.\n"
1368);
1369
Petr Viktorin6a02b382020-12-15 15:14:35 +01001370static PyType_Slot Writer_Type_slots[] = {
1371 {Py_tp_finalize, Writer_finalize},
1372 {Py_tp_doc, (char*)Writer_Type_doc},
1373 {Py_tp_traverse, Writer_traverse},
1374 {Py_tp_clear, Writer_clear},
1375 {Py_tp_methods, Writer_methods},
1376 {Py_tp_members, Writer_memberlist},
1377 {0, NULL}
Skip Montanarob4a04172003-03-20 23:29:12 +00001378};
1379
Petr Viktorin6a02b382020-12-15 15:14:35 +01001380PyType_Spec Writer_Type_spec = {
1381 .name = "_csv.writer",
1382 .basicsize = sizeof(WriterObj),
1383 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1384 .slots = Writer_Type_slots,
1385};
1386
1387
Skip Montanarob4a04172003-03-20 23:29:12 +00001388static PyObject *
1389csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1390{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001391 PyObject * output_file, * dialect = NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +01001392 _csvstate *module_state = get_csv_state(module);
1393 WriterObj * self = PyObject_GC_New(WriterObj, module_state->writer_type);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001394 _Py_IDENTIFIER(write);
Skip Montanarob4a04172003-03-20 23:29:12 +00001395
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001396 if (!self)
1397 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001398
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001399 self->dialect = NULL;
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001400 self->write = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001401
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001402 self->rec = NULL;
1403 self->rec_size = 0;
1404 self->rec_len = 0;
1405 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001406
Petr Viktorin6a02b382020-12-15 15:14:35 +01001407 self->error_obj = Py_NewRef(module_state->error_obj);
1408
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001409 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1410 Py_DECREF(self);
1411 return NULL;
1412 }
Serhiy Storchaka41c57b32019-09-01 12:03:39 +03001413 if (_PyObject_LookupAttrId(output_file, &PyId_write, &self->write) < 0) {
1414 Py_DECREF(self);
1415 return NULL;
1416 }
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001417 if (self->write == NULL || !PyCallable_Check(self->write)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001418 PyErr_SetString(PyExc_TypeError,
1419 "argument 1 must have a \"write\" method");
1420 Py_DECREF(self);
1421 return NULL;
1422 }
Petr Viktorin6a02b382020-12-15 15:14:35 +01001423 self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
1424 keyword_args);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001425 if (self->dialect == NULL) {
1426 Py_DECREF(self);
1427 return NULL;
1428 }
1429 PyObject_GC_Track(self);
1430 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001431}
1432
1433/*
1434 * DIALECT REGISTRY
1435 */
1436static PyObject *
1437csv_list_dialects(PyObject *module, PyObject *args)
1438{
Petr Viktorin6a02b382020-12-15 15:14:35 +01001439 return PyDict_Keys(get_csv_state(module)->dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001440}
1441
1442static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001443csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001444{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001445 PyObject *name_obj, *dialect_obj = NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +01001446 _csvstate *module_state = get_csv_state(module);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001447 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001448
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001449 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1450 return NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +01001451 if (!PyUnicode_Check(name_obj)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001452 PyErr_SetString(PyExc_TypeError,
Stefan Krahe6996ed2012-11-02 14:44:20 +01001453 "dialect name must be a string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001454 return NULL;
1455 }
Stefan Krahe6996ed2012-11-02 14:44:20 +01001456 if (PyUnicode_READY(name_obj) == -1)
1457 return NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +01001458 dialect = _call_dialect(module_state, dialect_obj, kwargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001459 if (dialect == NULL)
1460 return NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +01001461 if (PyDict_SetItem(module_state->dialects, name_obj, dialect) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001462 Py_DECREF(dialect);
1463 return NULL;
1464 }
1465 Py_DECREF(dialect);
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001466 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001467}
1468
1469static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001470csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001471{
Petr Viktorin6a02b382020-12-15 15:14:35 +01001472 _csvstate *module_state = get_csv_state(module);
1473 if (PyDict_DelItem(module_state->dialects, name_obj) < 0) {
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001474 if (PyErr_ExceptionMatches(PyExc_KeyError)) {
Petr Viktorin6a02b382020-12-15 15:14:35 +01001475 PyErr_Format(module_state->error_obj, "unknown dialect");
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001476 }
1477 return NULL;
1478 }
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001479 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001480}
1481
1482static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001483csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001484{
Petr Viktorin6a02b382020-12-15 15:14:35 +01001485 return get_dialect_from_registry(name_obj, get_csv_state(module));
Skip Montanarob4a04172003-03-20 23:29:12 +00001486}
1487
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001488static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001489csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001490{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001491 PyObject *new_limit = NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +01001492 _csvstate *module_state = get_csv_state(module);
1493 long old_limit = module_state->field_limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001494
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001495 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1496 return NULL;
1497 if (new_limit != NULL) {
1498 if (!PyLong_CheckExact(new_limit)) {
1499 PyErr_Format(PyExc_TypeError,
1500 "limit must be an integer");
1501 return NULL;
1502 }
Petr Viktorin6a02b382020-12-15 15:14:35 +01001503 module_state->field_limit = PyLong_AsLong(new_limit);
1504 if (module_state->field_limit == -1 && PyErr_Occurred()) {
1505 module_state->field_limit = old_limit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001506 return NULL;
1507 }
1508 }
1509 return PyLong_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001510}
1511
Petr Viktorin6a02b382020-12-15 15:14:35 +01001512static PyType_Slot error_slots[] = {
1513 {0, NULL},
1514};
1515
1516PyType_Spec error_spec = {
1517 .name = "_csv.Error",
1518 .flags = Py_TPFLAGS_DEFAULT,
1519 .slots = error_slots,
1520};
1521
Skip Montanarob4a04172003-03-20 23:29:12 +00001522/*
1523 * MODULE
1524 */
1525
1526PyDoc_STRVAR(csv_module_doc,
1527"CSV parsing and writing.\n"
1528"\n"
1529"This module provides classes that assist in the reading and writing\n"
1530"of Comma Separated Value (CSV) files, and implements the interface\n"
1531"described by PEP 305. Although many CSV files are simple to parse,\n"
1532"the format is not formally defined by a stable specification and\n"
1533"is subtle enough that parsing lines of a CSV file with something\n"
1534"like line.split(\",\") is bound to fail. The module supports three\n"
1535"basic APIs: reading, writing, and registration of dialects.\n"
1536"\n"
1537"\n"
1538"DIALECT REGISTRATION:\n"
1539"\n"
1540"Readers and writers support a dialect argument, which is a convenient\n"
1541"handle on a group of settings. When the dialect argument is a string,\n"
1542"it identifies one of the dialects previously registered with the module.\n"
1543"If it is a class or instance, the attributes of the argument are used as\n"
1544"the settings for the reader or writer:\n"
1545"\n"
1546" class excel:\n"
1547" delimiter = ','\n"
1548" quotechar = '\"'\n"
1549" escapechar = None\n"
1550" doublequote = True\n"
1551" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001552" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001553" quoting = QUOTE_MINIMAL\n"
1554"\n"
1555"SETTINGS:\n"
1556"\n"
oldkaa0735f2018-02-02 16:52:55 +08001557" * quotechar - specifies a one-character string to use as the\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001558" quoting character. It defaults to '\"'.\n"
oldkaa0735f2018-02-02 16:52:55 +08001559" * delimiter - specifies a one-character string to use as the\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001560" field separator. It defaults to ','.\n"
1561" * skipinitialspace - specifies how to interpret whitespace which\n"
1562" immediately follows a delimiter. It defaults to False, which\n"
1563" means that whitespace immediately following a delimiter is part\n"
1564" of the following field.\n"
oldkaa0735f2018-02-02 16:52:55 +08001565" * lineterminator - specifies the character sequence which should\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001566" terminate rows.\n"
1567" * quoting - controls when quotes should be generated by the writer.\n"
1568" It can take on any of the following module constants:\n"
1569"\n"
1570" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1571" field contains either the quotechar or the delimiter\n"
1572" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1573" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001574" fields which do not parse as integers or floating point\n"
1575" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001576" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
oldkaa0735f2018-02-02 16:52:55 +08001577" * escapechar - specifies a one-character string used to escape\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001578" the delimiter when quoting is set to QUOTE_NONE.\n"
1579" * doublequote - controls the handling of quotes inside fields. When\n"
1580" True, two consecutive quotes are interpreted as one during read,\n"
1581" and when writing, each quote character embedded in the data is\n"
1582" written as two quotes\n");
1583
1584PyDoc_STRVAR(csv_reader_doc,
1585" csv_reader = reader(iterable [, dialect='excel']\n"
1586" [optional keyword args])\n"
1587" for row in csv_reader:\n"
1588" process(row)\n"
1589"\n"
1590"The \"iterable\" argument can be any object that returns a line\n"
1591"of input for each iteration, such as a file object or a list. The\n"
1592"optional \"dialect\" parameter is discussed below. The function\n"
1593"also accepts optional keyword arguments which override settings\n"
1594"provided by the dialect.\n"
1595"\n"
1596"The returned object is an iterator. Each iteration returns a row\n"
Berker Peksage2382c52015-10-02 19:25:32 +03001597"of the CSV file (which can span multiple input lines).\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001598
1599PyDoc_STRVAR(csv_writer_doc,
1600" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1601" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001602" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001603" csv_writer.writerow(row)\n"
1604"\n"
1605" [or]\n"
1606"\n"
1607" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1608" [optional keyword args])\n"
1609" csv_writer.writerows(rows)\n"
1610"\n"
1611"The \"fileobj\" argument can be any object that supports the file API.\n");
1612
1613PyDoc_STRVAR(csv_list_dialects_doc,
1614"Return a list of all know dialect names.\n"
1615" names = csv.list_dialects()");
1616
1617PyDoc_STRVAR(csv_get_dialect_doc,
1618"Return the dialect instance associated with name.\n"
1619" dialect = csv.get_dialect(name)");
1620
1621PyDoc_STRVAR(csv_register_dialect_doc,
1622"Create a mapping from a string name to a dialect class.\n"
Berker Peksag12b50ce2015-06-05 15:17:51 +03001623" dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
Skip Montanarob4a04172003-03-20 23:29:12 +00001624
1625PyDoc_STRVAR(csv_unregister_dialect_doc,
1626"Delete the name/dialect mapping associated with a string name.\n"
1627" csv.unregister_dialect(name)");
1628
Andrew McNamara31d88962005-01-12 03:45:10 +00001629PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001630"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001631" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001632"\n"
1633"Returns old limit. If limit is not given, no new limit is set and\n"
1634"the old limit is returned");
1635
Skip Montanarob4a04172003-03-20 23:29:12 +00001636static struct PyMethodDef csv_methods[] = {
Serhiy Storchaka62be7422018-11-27 13:27:31 +02001637 { "reader", (PyCFunction)(void(*)(void))csv_reader,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001638 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
Serhiy Storchaka62be7422018-11-27 13:27:31 +02001639 { "writer", (PyCFunction)(void(*)(void))csv_writer,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001640 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1641 { "list_dialects", (PyCFunction)csv_list_dialects,
1642 METH_NOARGS, csv_list_dialects_doc},
Serhiy Storchaka62be7422018-11-27 13:27:31 +02001643 { "register_dialect", (PyCFunction)(void(*)(void))csv_register_dialect,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001644 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1645 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1646 METH_O, csv_unregister_dialect_doc},
1647 { "get_dialect", (PyCFunction)csv_get_dialect,
1648 METH_O, csv_get_dialect_doc},
1649 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1650 METH_VARARGS, csv_field_size_limit_doc},
1651 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001652};
1653
Petr Viktorin6a02b382020-12-15 15:14:35 +01001654static int
1655csv_exec(PyObject *module) {
1656 const StyleDesc *style;
1657 PyObject *temp;
1658 _csvstate *module_state = get_csv_state(module);
1659
1660 temp = PyType_FromModuleAndSpec(module, &Dialect_Type_spec, NULL);
1661 module_state->dialect_type = (PyTypeObject *)temp;
1662 if (PyModule_AddObjectRef(module, "Dialect", temp) < 0) {
1663 return -1;
1664 }
1665
1666 temp = PyType_FromModuleAndSpec(module, &Reader_Type_spec, NULL);
1667 module_state->reader_type = (PyTypeObject *)temp;
1668 if (PyModule_AddObjectRef(module, "Reader", temp) < 0) {
1669 return -1;
1670 }
1671
1672 temp = PyType_FromModuleAndSpec(module, &Writer_Type_spec, NULL);
1673 module_state->writer_type = (PyTypeObject *)temp;
1674 if (PyModule_AddObjectRef(module, "Writer", temp) < 0) {
1675 return -1;
1676 }
1677
1678 /* Add version to the module. */
1679 if (PyModule_AddStringConstant(module, "__version__",
1680 MODULE_VERSION) == -1) {
1681 return -1;
1682 }
1683
1684 /* Set the field limit */
1685 module_state->field_limit = 128 * 1024;
1686
1687 /* Add _dialects dictionary */
1688 module_state->dialects = PyDict_New();
1689 if (PyModule_AddObjectRef(module, "_dialects", module_state->dialects) < 0) {
1690 return -1;
1691 }
1692
1693 /* Add quote styles into dictionary */
1694 for (style = quote_styles; style->name; style++) {
1695 if (PyModule_AddIntConstant(module, style->name,
1696 style->style) == -1)
1697 return -1;
1698 }
1699
1700 /* Add the CSV exception object to the module. */
1701 PyObject *bases = PyTuple_Pack(1, PyExc_Exception);
1702 if (bases == NULL) {
1703 return -1;
1704 }
1705 module_state->error_obj = PyType_FromModuleAndSpec(module, &error_spec,
1706 bases);
1707 Py_DECREF(bases);
1708 if (module_state->error_obj == NULL) {
1709 return -1;
1710 }
1711 if (PyModule_AddType(module, (PyTypeObject *)module_state->error_obj) != 0) {
1712 return -1;
1713 }
1714
1715 return 0;
1716}
1717
1718static PyModuleDef_Slot csv_slots[] = {
1719 {Py_mod_exec, csv_exec},
1720 {0, NULL}
1721};
1722
Martin v. Löwis1a214512008-06-11 05:26:20 +00001723static struct PyModuleDef _csvmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001724 PyModuleDef_HEAD_INIT,
1725 "_csv",
1726 csv_module_doc,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001727 sizeof(_csvstate),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001728 csv_methods,
Petr Viktorin6a02b382020-12-15 15:14:35 +01001729 csv_slots,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001730 _csv_traverse,
1731 _csv_clear,
1732 _csv_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00001733};
1734
Skip Montanarob4a04172003-03-20 23:29:12 +00001735PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001736PyInit__csv(void)
Skip Montanarob4a04172003-03-20 23:29:12 +00001737{
Petr Viktorin6a02b382020-12-15 15:14:35 +01001738 return PyModuleDef_Init(&_csvmodule);
Skip Montanarob4a04172003-03-20 23:29:12 +00001739}