blob: 3109fd16bc744bf8bdb81557f521d2aa67974280 [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
Skip Montanarob4a04172003-03-20 23:29:12 +00009*/
10
Skip Montanaro7b01a832003-04-12 19:23:46 +000011#define MODULE_VERSION "1.0"
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013#include "Python.h"
Victor Stinner4a21e572020-04-15 02:35:41 +020014#include "structmember.h" // PyMemberDef
Serhiy Storchaka323748a2018-07-26 13:21:09 +030015#include <stdbool.h>
Skip Montanarob4a04172003-03-20 23:29:12 +000016
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000017
Antoine Pitroue7672d32012-05-16 11:33:08 +020018typedef struct {
19 PyObject *error_obj; /* CSV exception */
20 PyObject *dialects; /* Dialect registry */
Petr Viktorin6a02b382020-12-15 15:14:35 +010021 PyTypeObject *dialect_type;
22 PyTypeObject *reader_type;
23 PyTypeObject *writer_type;
Antoine Pitroue7672d32012-05-16 11:33:08 +020024 long field_limit; /* max parsed field size */
25} _csvstate;
26
Petr Viktorin6a02b382020-12-15 15:14:35 +010027static struct PyModuleDef _csvmodule;
28
Hai Shif707d942020-03-16 21:15:01 +080029static inline _csvstate*
30get_csv_state(PyObject *module)
31{
32 void *state = PyModule_GetState(module);
33 assert(state != NULL);
34 return (_csvstate *)state;
35}
Antoine Pitroue7672d32012-05-16 11:33:08 +020036
37static int
Petr Viktorin6a02b382020-12-15 15:14:35 +010038_csv_clear(PyObject *module)
Antoine Pitroue7672d32012-05-16 11:33:08 +020039{
Petr Viktorin6a02b382020-12-15 15:14:35 +010040 _csvstate *module_state = PyModule_GetState(module);
41 Py_CLEAR(module_state->error_obj);
42 Py_CLEAR(module_state->dialects);
43 Py_CLEAR(module_state->dialect_type);
44 Py_CLEAR(module_state->reader_type);
45 Py_CLEAR(module_state->writer_type);
Antoine Pitroue7672d32012-05-16 11:33:08 +020046 return 0;
47}
48
49static int
Petr Viktorin6a02b382020-12-15 15:14:35 +010050_csv_traverse(PyObject *module, visitproc visit, void *arg)
Antoine Pitroue7672d32012-05-16 11:33:08 +020051{
Petr Viktorin6a02b382020-12-15 15:14:35 +010052 _csvstate *module_state = PyModule_GetState(module);
53 Py_VISIT(module_state->error_obj);
54 Py_VISIT(module_state->dialects);
55 Py_VISIT(module_state->dialect_type);
56 Py_VISIT(module_state->reader_type);
57 Py_VISIT(module_state->writer_type);
Antoine Pitroue7672d32012-05-16 11:33:08 +020058 return 0;
59}
60
61static void
Petr Viktorin6a02b382020-12-15 15:14:35 +010062_csv_free(void *module)
Antoine Pitroue7672d32012-05-16 11:33:08 +020063{
Petr Viktorin6a02b382020-12-15 15:14:35 +010064 _csv_clear((PyObject *)module);
Antoine Pitroue7672d32012-05-16 11:33:08 +020065}
66
Skip Montanarob4a04172003-03-20 23:29:12 +000067typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000068 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
69 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
R David Murrayc7c42ef2013-03-19 22:41:47 -040070 EAT_CRNL,AFTER_ESCAPED_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000071} ParserState;
72
73typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000074 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000075} QuoteStyle;
76
77typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078 QuoteStyle style;
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020079 const char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000080} StyleDesc;
81
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020082static const StyleDesc quote_styles[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000083 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
84 { QUOTE_ALL, "QUOTE_ALL" },
85 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
86 { QUOTE_NONE, "QUOTE_NONE" },
87 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000088};
89
90typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000092
Serhiy Storchaka323748a2018-07-26 13:21:09 +030093 char doublequote; /* is " represented by ""? */
94 char skipinitialspace; /* ignore spaces following delimiter? */
95 char strict; /* raise exception on bad CSV */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000096 int quoting; /* style of quoting to write */
Serhiy Storchaka323748a2018-07-26 13:21:09 +030097 Py_UCS4 delimiter; /* field separator */
98 Py_UCS4 quotechar; /* quote character */
99 Py_UCS4 escapechar; /* escape character */
100 PyObject *lineterminator; /* string to write between records */
Skip Montanarob4a04172003-03-20 23:29:12 +0000101
Skip Montanarob4a04172003-03-20 23:29:12 +0000102} DialectObj;
103
Skip Montanarob4a04172003-03-20 23:29:12 +0000104typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000105 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000106
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000107 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +0000108
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000109 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000110
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000111 PyObject *fields; /* field list for current record */
112 ParserState state; /* current CSV parse state */
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200113 Py_UCS4 *field; /* temporary buffer */
Antoine Pitrou40455752010-08-15 18:51:10 +0000114 Py_ssize_t field_size; /* size of allocated buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000115 Py_ssize_t field_len; /* length of current field */
116 int numeric_field; /* treat field as numeric */
117 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000118} ReaderObj;
119
Skip Montanarob4a04172003-03-20 23:29:12 +0000120typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000121 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000122
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +0200123 PyObject *write; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +0000124
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000125 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000126
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200127 Py_UCS4 *rec; /* buffer for parser.join */
Antoine Pitrou40455752010-08-15 18:51:10 +0000128 Py_ssize_t rec_size; /* size of allocated record */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000129 Py_ssize_t rec_len; /* length of record */
130 int num_fields; /* number of fields in record */
Skip Montanarob4a04172003-03-20 23:29:12 +0000131
Petr Viktorin6a02b382020-12-15 15:14:35 +0100132 PyObject *error_obj; /* cached error object */
133} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000134
135/*
136 * DIALECT class
137 */
138
139static PyObject *
Petr Viktorin6a02b382020-12-15 15:14:35 +0100140get_dialect_from_registry(PyObject *name_obj, _csvstate *module_state)
Skip Montanarob4a04172003-03-20 23:29:12 +0000141{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000142 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000143
Petr Viktorin6a02b382020-12-15 15:14:35 +0100144 dialect_obj = PyDict_GetItemWithError(module_state->dialects, name_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 if (dialect_obj == NULL) {
146 if (!PyErr_Occurred())
Petr Viktorin6a02b382020-12-15 15:14:35 +0100147 PyErr_Format(module_state->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000148 }
149 else
150 Py_INCREF(dialect_obj);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100151
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000152 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000153}
154
Skip Montanarob4a04172003-03-20 23:29:12 +0000155static PyObject *
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200156get_nullchar_as_None(Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000157{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000158 if (c == '\0') {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200159 Py_RETURN_NONE;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000160 }
161 else
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200162 return PyUnicode_FromOrdinal(c);
Skip Montanarob4a04172003-03-20 23:29:12 +0000163}
164
Skip Montanarob4a04172003-03-20 23:29:12 +0000165static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200166Dialect_get_lineterminator(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000167{
Dong-hee Na0383be42020-06-10 00:33:43 +0900168 Py_XINCREF(self->lineterminator);
169 return self->lineterminator;
Skip Montanarob4a04172003-03-20 23:29:12 +0000170}
171
Skip Montanarob4a04172003-03-20 23:29:12 +0000172static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200173Dialect_get_delimiter(DialectObj *self, void *Py_UNUSED(ignored))
Guido van Rossuma9769c22007-08-07 23:59:30 +0000174{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000175 return get_nullchar_as_None(self->delimiter);
Guido van Rossuma9769c22007-08-07 23:59:30 +0000176}
177
178static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200179Dialect_get_escapechar(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000180{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000181 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000182}
183
Andrew McNamara1196cf12005-01-07 04:42:45 +0000184static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200185Dialect_get_quotechar(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000186{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000188}
189
190static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200191Dialect_get_quoting(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000192{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000193 return PyLong_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000194}
195
196static int
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300197_set_bool(const char *name, char *target, PyObject *src, bool dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000198{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000199 if (src == NULL)
200 *target = dflt;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200201 else {
202 int b = PyObject_IsTrue(src);
203 if (b < 0)
204 return -1;
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300205 *target = (char)b;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200206 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000207 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000208}
209
Andrew McNamara1196cf12005-01-07 04:42:45 +0000210static int
211_set_int(const char *name, int *target, PyObject *src, int dflt)
212{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000213 if (src == NULL)
214 *target = dflt;
215 else {
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200216 int value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000217 if (!PyLong_CheckExact(src)) {
218 PyErr_Format(PyExc_TypeError,
219 "\"%s\" must be an integer", name);
220 return -1;
221 }
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200222 value = _PyLong_AsInt(src);
223 if (value == -1 && PyErr_Occurred()) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000224 return -1;
225 }
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200226 *target = value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000227 }
228 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000229}
230
231static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200232_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000233{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000234 if (src == NULL)
235 *target = dflt;
236 else {
237 *target = '\0';
238 if (src != Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000239 Py_ssize_t len;
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200240 if (!PyUnicode_Check(src)) {
241 PyErr_Format(PyExc_TypeError,
242 "\"%s\" must be string, not %.200s", name,
Victor Stinnerdaa97562020-02-07 03:37:06 +0100243 Py_TYPE(src)->tp_name);
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200244 return -1;
245 }
Victor Stinner9e30aa52011-11-21 02:49:52 +0100246 len = PyUnicode_GetLength(src);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200247 if (len > 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000248 PyErr_Format(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300249 "\"%s\" must be a 1-character string",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000250 name);
251 return -1;
252 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100253 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000254 if (len > 0)
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200255 *target = PyUnicode_READ_CHAR(src, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000256 }
257 }
258 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000259}
260
261static int
262_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
263{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000264 if (src == NULL)
265 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
266 else {
267 if (src == Py_None)
268 *target = NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +0100269 else if (!PyUnicode_Check(src)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000270 PyErr_Format(PyExc_TypeError,
271 "\"%s\" must be a string", name);
272 return -1;
273 }
274 else {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100275 if (PyUnicode_READY(src) == -1)
276 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000277 Py_INCREF(src);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300278 Py_XSETREF(*target, src);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000279 }
280 }
281 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000282}
283
284static int
285dialect_check_quoting(int quoting)
286{
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200287 const StyleDesc *qs;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000288
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000289 for (qs = quote_styles; qs->name; qs++) {
Victor Stinner706768c2014-08-16 01:03:39 +0200290 if ((int)qs->style == quoting)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000291 return 0;
292 }
293 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
294 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000295}
Skip Montanarob4a04172003-03-20 23:29:12 +0000296
297#define D_OFF(x) offsetof(DialectObj, x)
298
299static struct PyMemberDef Dialect_memberlist[] = {
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300300 { "skipinitialspace", T_BOOL, D_OFF(skipinitialspace), READONLY },
301 { "doublequote", T_BOOL, D_OFF(doublequote), READONLY },
302 { "strict", T_BOOL, D_OFF(strict), READONLY },
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000303 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000304};
305
306static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000307 { "delimiter", (getter)Dialect_get_delimiter},
308 { "escapechar", (getter)Dialect_get_escapechar},
309 { "lineterminator", (getter)Dialect_get_lineterminator},
310 { "quotechar", (getter)Dialect_get_quotechar},
311 { "quoting", (getter)Dialect_get_quoting},
312 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000313};
314
315static void
316Dialect_dealloc(DialectObj *self)
317{
Petr Viktorin6a02b382020-12-15 15:14:35 +0100318 PyTypeObject *tp = Py_TYPE(self);
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -0700319 PyObject_GC_UnTrack(self);
320 tp->tp_clear((PyObject *)self);
321 PyObject_GC_Del(self);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100322 Py_DECREF(tp);
323}
324
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000325static char *dialect_kws[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000326 "dialect",
327 "delimiter",
328 "doublequote",
329 "escapechar",
330 "lineterminator",
331 "quotechar",
332 "quoting",
333 "skipinitialspace",
334 "strict",
335 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000336};
337
Petr Viktorin6a02b382020-12-15 15:14:35 +0100338static _csvstate *
339_csv_state_from_type(PyTypeObject *type, const char *name)
340{
341 PyObject *module = _PyType_GetModuleByDef(type, &_csvmodule);
342 if (module == NULL) {
343 return NULL;
344 }
345 _csvstate *module_state = PyModule_GetState(module);
346 if (module_state == NULL) {
347 PyErr_Format(PyExc_SystemError,
348 "%s: No _csv module state found", name);
349 return NULL;
350 }
351 return module_state;
352}
353
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000354static PyObject *
355dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000356{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000357 DialectObj *self;
358 PyObject *ret = NULL;
359 PyObject *dialect = NULL;
360 PyObject *delimiter = NULL;
361 PyObject *doublequote = NULL;
362 PyObject *escapechar = NULL;
363 PyObject *lineterminator = NULL;
364 PyObject *quotechar = NULL;
365 PyObject *quoting = NULL;
366 PyObject *skipinitialspace = NULL;
367 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000368
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000369 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
370 "|OOOOOOOOO", dialect_kws,
371 &dialect,
372 &delimiter,
373 &doublequote,
374 &escapechar,
375 &lineterminator,
376 &quotechar,
377 &quoting,
378 &skipinitialspace,
379 &strict))
380 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000381
Petr Viktorin6a02b382020-12-15 15:14:35 +0100382 _csvstate *module_state = _csv_state_from_type(type, "dialect_new");
383 if (module_state == NULL) {
384 return NULL;
385 }
386
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000387 if (dialect != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100388 if (PyUnicode_Check(dialect)) {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100389 dialect = get_dialect_from_registry(dialect, module_state);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000390 if (dialect == NULL)
391 return NULL;
392 }
393 else
394 Py_INCREF(dialect);
395 /* Can we reuse this instance? */
Petr Viktorin6a02b382020-12-15 15:14:35 +0100396 if (PyObject_TypeCheck(dialect, module_state->dialect_type) &&
Serhiy Storchaka0b3ec192017-03-23 17:53:47 +0200397 delimiter == NULL &&
398 doublequote == NULL &&
399 escapechar == NULL &&
400 lineterminator == NULL &&
401 quotechar == NULL &&
402 quoting == NULL &&
403 skipinitialspace == NULL &&
404 strict == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000405 return dialect;
406 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000407
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000408 self = (DialectObj *)type->tp_alloc(type, 0);
409 if (self == NULL) {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100410 Py_CLEAR(dialect);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000411 return NULL;
412 }
413 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000414
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000415 Py_XINCREF(delimiter);
416 Py_XINCREF(doublequote);
417 Py_XINCREF(escapechar);
418 Py_XINCREF(lineterminator);
419 Py_XINCREF(quotechar);
420 Py_XINCREF(quoting);
421 Py_XINCREF(skipinitialspace);
422 Py_XINCREF(strict);
423 if (dialect != NULL) {
Miss Islington (bot)bb260c22021-07-13 16:18:28 -0700424#define DIALECT_GETATTR(v, n) \
425 do { \
426 if (v == NULL) { \
427 v = PyObject_GetAttrString(dialect, n); \
428 if (v == NULL) \
429 PyErr_Clear(); \
430 } \
431 } while (0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000432 DIALECT_GETATTR(delimiter, "delimiter");
433 DIALECT_GETATTR(doublequote, "doublequote");
434 DIALECT_GETATTR(escapechar, "escapechar");
435 DIALECT_GETATTR(lineterminator, "lineterminator");
436 DIALECT_GETATTR(quotechar, "quotechar");
437 DIALECT_GETATTR(quoting, "quoting");
438 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
439 DIALECT_GETATTR(strict, "strict");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000440 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000441
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000442 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000443#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000444 if (meth(name, target, src, dflt)) \
445 goto err
446 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300447 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, true);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000448 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
449 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
450 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
451 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300452 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, false);
453 DIASET(_set_bool, "strict", &self->strict, strict, false);
Skip Montanarob4a04172003-03-20 23:29:12 +0000454
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000455 /* validate options */
456 if (dialect_check_quoting(self->quoting))
457 goto err;
458 if (self->delimiter == 0) {
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200459 PyErr_SetString(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300460 "\"delimiter\" must be a 1-character string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000461 goto err;
462 }
463 if (quotechar == Py_None && quoting == NULL)
464 self->quoting = QUOTE_NONE;
465 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
466 PyErr_SetString(PyExc_TypeError,
467 "quotechar must be set if quoting enabled");
468 goto err;
469 }
470 if (self->lineterminator == 0) {
471 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
472 goto err;
473 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000474
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000475 ret = (PyObject *)self;
476 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000477err:
Petr Viktorin6a02b382020-12-15 15:14:35 +0100478 Py_CLEAR(self);
479 Py_CLEAR(dialect);
480 Py_CLEAR(delimiter);
481 Py_CLEAR(doublequote);
482 Py_CLEAR(escapechar);
483 Py_CLEAR(lineterminator);
484 Py_CLEAR(quotechar);
485 Py_CLEAR(quoting);
486 Py_CLEAR(skipinitialspace);
487 Py_CLEAR(strict);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000488 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000489}
490
Petr Viktorin6a02b382020-12-15 15:14:35 +0100491/* Since dialect is now a heap type, it inherits pickling method for
492 * protocol 0 and 1 from object, therefore it needs to be overriden */
493
494PyDoc_STRVAR(dialect_reduce_doc, "raises an exception to avoid pickling");
495
496static PyObject *
497Dialect_reduce(PyObject *self, PyObject *args) {
498 PyErr_Format(PyExc_TypeError,
499 "cannot pickle '%.100s' instances", _PyType_Name(Py_TYPE(self)));
500 return NULL;
501}
502
503static struct PyMethodDef dialect_methods[] = {
504 {"__reduce__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
505 {"__reduce_ex__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
506 {NULL, NULL}
507};
Skip Montanarob4a04172003-03-20 23:29:12 +0000508
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000509PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000510"CSV dialect\n"
511"\n"
512"The Dialect type records CSV parsing and generation options.\n");
513
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -0700514static int
515Dialect_clear(DialectObj *self)
516{
517 Py_CLEAR(self->lineterminator);
518 return 0;
519}
520
521static int
522Dialect_traverse(DialectObj *self, visitproc visit, void *arg)
523{
524 Py_VISIT(self->lineterminator);
525 Py_VISIT(Py_TYPE(self));
526 return 0;
527}
528
Petr Viktorin6a02b382020-12-15 15:14:35 +0100529static PyType_Slot Dialect_Type_slots[] = {
530 {Py_tp_doc, (char*)Dialect_Type_doc},
531 {Py_tp_members, Dialect_memberlist},
532 {Py_tp_getset, Dialect_getsetlist},
533 {Py_tp_new, dialect_new},
534 {Py_tp_methods, dialect_methods},
Petr Viktorin6a02b382020-12-15 15:14:35 +0100535 {Py_tp_dealloc, Dialect_dealloc},
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -0700536 {Py_tp_clear, Dialect_clear},
537 {Py_tp_traverse, Dialect_traverse},
Petr Viktorin6a02b382020-12-15 15:14:35 +0100538 {0, NULL}
Skip Montanarob4a04172003-03-20 23:29:12 +0000539};
540
Petr Viktorin6a02b382020-12-15 15:14:35 +0100541PyType_Spec Dialect_Type_spec = {
542 .name = "_csv.Dialect",
543 .basicsize = sizeof(DialectObj),
Miss Islington (bot)7297d742021-06-17 03:19:44 -0700544 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
545 Py_TPFLAGS_IMMUTABLETYPE),
Petr Viktorin6a02b382020-12-15 15:14:35 +0100546 .slots = Dialect_Type_slots,
547};
548
549
Andrew McNamara91b97462005-01-11 01:07:23 +0000550/*
551 * Return an instance of the dialect type, given a Python instance or kwarg
552 * description of the dialect
553 */
554static PyObject *
Petr Viktorin6a02b382020-12-15 15:14:35 +0100555_call_dialect(_csvstate *module_state, PyObject *dialect_inst, PyObject *kwargs)
Andrew McNamara91b97462005-01-11 01:07:23 +0000556{
Petr Viktorin6a02b382020-12-15 15:14:35 +0100557 PyObject *type = (PyObject *)module_state->dialect_type;
Victor Stinner6412f492016-08-23 00:21:34 +0200558 if (dialect_inst) {
Petr Viktorinffd97532020-02-11 17:46:57 +0100559 return PyObject_VectorcallDict(type, &dialect_inst, 1, kwargs);
Victor Stinner6412f492016-08-23 00:21:34 +0200560 }
561 else {
Petr Viktorinffd97532020-02-11 17:46:57 +0100562 return PyObject_VectorcallDict(type, NULL, 0, kwargs);
Victor Stinner6412f492016-08-23 00:21:34 +0200563 }
Andrew McNamara91b97462005-01-11 01:07:23 +0000564}
565
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000566/*
567 * READER
568 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000569static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000570parse_save_field(ReaderObj *self)
571{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000572 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000573
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200574 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
575 (void *) self->field, self->field_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000576 if (field == NULL)
577 return -1;
578 self->field_len = 0;
579 if (self->numeric_field) {
580 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000581
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000582 self->numeric_field = 0;
583 tmp = PyNumber_Float(field);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000584 Py_DECREF(field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200585 if (tmp == NULL)
586 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000587 field = tmp;
588 }
Victor Stinnerb80b3782013-11-14 21:29:34 +0100589 if (PyList_Append(self->fields, field) < 0) {
590 Py_DECREF(field);
591 return -1;
592 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000593 Py_DECREF(field);
594 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000595}
596
597static int
598parse_grow_buff(ReaderObj *self)
599{
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +0500600 assert((size_t)self->field_size <= PY_SSIZE_T_MAX / sizeof(Py_UCS4));
601
602 Py_ssize_t field_size_new = self->field_size ? 2 * self->field_size : 4096;
603 Py_UCS4 *field_new = self->field;
604 PyMem_Resize(field_new, Py_UCS4, field_size_new);
605 if (field_new == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000606 PyErr_NoMemory();
607 return 0;
608 }
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +0500609 self->field = field_new;
610 self->field_size = field_size_new;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000611 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000612}
613
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000614static int
Petr Viktorin6a02b382020-12-15 15:14:35 +0100615parse_add_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000616{
Petr Viktorin6a02b382020-12-15 15:14:35 +0100617 if (self->field_len >= module_state->field_limit) {
618 PyErr_Format(module_state->error_obj,
619 "field larger than field limit (%ld)",
620 module_state->field_limit);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000621 return -1;
622 }
623 if (self->field_len == self->field_size && !parse_grow_buff(self))
624 return -1;
625 self->field[self->field_len++] = c;
626 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000627}
628
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000629static int
Petr Viktorin6a02b382020-12-15 15:14:35 +0100630parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000631{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000632 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000633
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000634 switch (self->state) {
635 case START_RECORD:
636 /* start of record */
637 if (c == '\0')
638 /* empty line - return [] */
639 break;
640 else if (c == '\n' || c == '\r') {
641 self->state = EAT_CRNL;
642 break;
643 }
644 /* normal character - handle as START_FIELD */
645 self->state = START_FIELD;
646 /* fallthru */
647 case START_FIELD:
648 /* expecting field */
649 if (c == '\n' || c == '\r' || c == '\0') {
650 /* save empty field - return [fields] */
651 if (parse_save_field(self) < 0)
652 return -1;
653 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
654 }
655 else if (c == dialect->quotechar &&
656 dialect->quoting != QUOTE_NONE) {
657 /* start quoted field */
658 self->state = IN_QUOTED_FIELD;
659 }
660 else if (c == dialect->escapechar) {
661 /* possible escaped character */
662 self->state = ESCAPED_CHAR;
663 }
664 else if (c == ' ' && dialect->skipinitialspace)
665 /* ignore space at start of field */
666 ;
667 else if (c == dialect->delimiter) {
668 /* save empty field */
669 if (parse_save_field(self) < 0)
670 return -1;
671 }
672 else {
673 /* begin new unquoted field */
674 if (dialect->quoting == QUOTE_NONNUMERIC)
675 self->numeric_field = 1;
Petr Viktorin6a02b382020-12-15 15:14:35 +0100676 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000677 return -1;
678 self->state = IN_FIELD;
679 }
680 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000681
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000682 case ESCAPED_CHAR:
R David Murray9a7d3762013-03-20 00:15:20 -0400683 if (c == '\n' || c=='\r') {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100684 if (parse_add_char(self, module_state, c) < 0)
R David Murrayc7c42ef2013-03-19 22:41:47 -0400685 return -1;
686 self->state = AFTER_ESCAPED_CRNL;
687 break;
688 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000689 if (c == '\0')
690 c = '\n';
Petr Viktorin6a02b382020-12-15 15:14:35 +0100691 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000692 return -1;
693 self->state = IN_FIELD;
694 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000695
R David Murrayc7c42ef2013-03-19 22:41:47 -0400696 case AFTER_ESCAPED_CRNL:
697 if (c == '\0')
698 break;
699 /*fallthru*/
700
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000701 case IN_FIELD:
702 /* in unquoted field */
703 if (c == '\n' || c == '\r' || c == '\0') {
704 /* end of line - return [fields] */
705 if (parse_save_field(self) < 0)
706 return -1;
707 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
708 }
709 else if (c == dialect->escapechar) {
710 /* possible escaped character */
711 self->state = ESCAPED_CHAR;
712 }
713 else if (c == dialect->delimiter) {
714 /* save field - wait for new field */
715 if (parse_save_field(self) < 0)
716 return -1;
717 self->state = START_FIELD;
718 }
719 else {
720 /* normal character - save in field */
Petr Viktorin6a02b382020-12-15 15:14:35 +0100721 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000722 return -1;
723 }
724 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000725
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000726 case IN_QUOTED_FIELD:
727 /* in quoted field */
728 if (c == '\0')
729 ;
730 else if (c == dialect->escapechar) {
731 /* Possible escape character */
732 self->state = ESCAPE_IN_QUOTED_FIELD;
733 }
734 else if (c == dialect->quotechar &&
735 dialect->quoting != QUOTE_NONE) {
736 if (dialect->doublequote) {
737 /* doublequote; " represented by "" */
738 self->state = QUOTE_IN_QUOTED_FIELD;
739 }
740 else {
741 /* end of quote part of field */
742 self->state = IN_FIELD;
743 }
744 }
745 else {
746 /* normal character - save in field */
Petr Viktorin6a02b382020-12-15 15:14:35 +0100747 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000748 return -1;
749 }
750 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000751
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000752 case ESCAPE_IN_QUOTED_FIELD:
753 if (c == '\0')
754 c = '\n';
Petr Viktorin6a02b382020-12-15 15:14:35 +0100755 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000756 return -1;
757 self->state = IN_QUOTED_FIELD;
758 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000759
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000760 case QUOTE_IN_QUOTED_FIELD:
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300761 /* doublequote - seen a quote in a quoted field */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000762 if (dialect->quoting != QUOTE_NONE &&
763 c == dialect->quotechar) {
764 /* save "" as " */
Petr Viktorin6a02b382020-12-15 15:14:35 +0100765 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000766 return -1;
767 self->state = IN_QUOTED_FIELD;
768 }
769 else if (c == dialect->delimiter) {
770 /* save field - wait for new field */
771 if (parse_save_field(self) < 0)
772 return -1;
773 self->state = START_FIELD;
774 }
775 else if (c == '\n' || c == '\r' || c == '\0') {
776 /* end of line - return [fields] */
777 if (parse_save_field(self) < 0)
778 return -1;
779 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
780 }
781 else if (!dialect->strict) {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100782 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000783 return -1;
784 self->state = IN_FIELD;
785 }
786 else {
787 /* illegal */
Petr Viktorin6a02b382020-12-15 15:14:35 +0100788 PyErr_Format(module_state->error_obj, "'%c' expected after '%c'",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000789 dialect->delimiter,
790 dialect->quotechar);
791 return -1;
792 }
793 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000794
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000795 case EAT_CRNL:
796 if (c == '\n' || c == '\r')
797 ;
798 else if (c == '\0')
799 self->state = START_RECORD;
800 else {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100801 PyErr_Format(module_state->error_obj,
802 "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000803 return -1;
804 }
805 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000806
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000807 }
808 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000809}
810
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000811static int
812parse_reset(ReaderObj *self)
813{
Serhiy Storchaka48842712016-04-06 09:45:48 +0300814 Py_XSETREF(self->fields, PyList_New(0));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000815 if (self->fields == NULL)
816 return -1;
817 self->field_len = 0;
818 self->state = START_RECORD;
819 self->numeric_field = 0;
820 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000821}
Skip Montanarob4a04172003-03-20 23:29:12 +0000822
823static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000824Reader_iternext(ReaderObj *self)
825{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000826 PyObject *fields = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200827 Py_UCS4 c;
828 Py_ssize_t pos, linelen;
829 unsigned int kind;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300830 const void *data;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200831 PyObject *lineobj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000832
Petr Viktorin6a02b382020-12-15 15:14:35 +0100833 _csvstate *module_state = _csv_state_from_type(Py_TYPE(self),
834 "Reader.__next__");
835 if (module_state == NULL) {
836 return NULL;
837 }
838
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000839 if (parse_reset(self) < 0)
840 return NULL;
841 do {
842 lineobj = PyIter_Next(self->input_iter);
843 if (lineobj == NULL) {
844 /* End of input OR exception */
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700845 if (!PyErr_Occurred() && (self->field_len != 0 ||
846 self->state == IN_QUOTED_FIELD)) {
847 if (self->dialect->strict)
Petr Viktorin6a02b382020-12-15 15:14:35 +0100848 PyErr_SetString(module_state->error_obj,
Senthil Kumaran49d13022012-09-25 02:37:20 -0700849 "unexpected end of data");
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700850 else if (parse_save_field(self) >= 0)
851 break;
852 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000853 return NULL;
854 }
855 if (!PyUnicode_Check(lineobj)) {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100856 PyErr_Format(module_state->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000857 "iterator should return strings, "
858 "not %.200s "
Ram Rachum235f9182020-06-05 23:56:06 +0300859 "(the file should be opened in text mode)",
Victor Stinnerdaa97562020-02-07 03:37:06 +0100860 Py_TYPE(lineobj)->tp_name
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000861 );
862 Py_DECREF(lineobj);
863 return NULL;
864 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100865 if (PyUnicode_READY(lineobj) == -1) {
866 Py_DECREF(lineobj);
867 return NULL;
868 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000869 ++self->line_num;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200870 kind = PyUnicode_KIND(lineobj);
871 data = PyUnicode_DATA(lineobj);
872 pos = 0;
873 linelen = PyUnicode_GET_LENGTH(lineobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000874 while (linelen--) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200875 c = PyUnicode_READ(kind, data, pos);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000876 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000877 Py_DECREF(lineobj);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100878 PyErr_Format(module_state->error_obj,
Benjamin Peterson7821b4c2019-06-18 21:37:58 -0700879 "line contains NUL");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000880 goto err;
881 }
Petr Viktorin6a02b382020-12-15 15:14:35 +0100882 if (parse_process_char(self, module_state, c) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000883 Py_DECREF(lineobj);
884 goto err;
885 }
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200886 pos++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000887 }
888 Py_DECREF(lineobj);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100889 if (parse_process_char(self, module_state, 0) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000890 goto err;
891 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000892
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000893 fields = self->fields;
894 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000895err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000896 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000897}
898
899static void
900Reader_dealloc(ReaderObj *self)
901{
Petr Viktorin6a02b382020-12-15 15:14:35 +0100902 PyTypeObject *tp = Py_TYPE(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000903 PyObject_GC_UnTrack(self);
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -0700904 tp->tp_clear((PyObject *)self);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100905 if (self->field != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000906 PyMem_Free(self->field);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100907 self->field = NULL;
908 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000909 PyObject_GC_Del(self);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100910 Py_DECREF(tp);
911}
912
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000913static int
914Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
915{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000916 Py_VISIT(self->dialect);
917 Py_VISIT(self->input_iter);
918 Py_VISIT(self->fields);
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -0700919 Py_VISIT(Py_TYPE(self));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000920 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000921}
922
923static int
924Reader_clear(ReaderObj *self)
925{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000926 Py_CLEAR(self->dialect);
927 Py_CLEAR(self->input_iter);
928 Py_CLEAR(self->fields);
929 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000930}
931
932PyDoc_STRVAR(Reader_Type_doc,
933"CSV reader\n"
934"\n"
935"Reader objects are responsible for reading and parsing tabular data\n"
936"in CSV format.\n"
937);
938
939static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000940 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000941};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000942#define R_OFF(x) offsetof(ReaderObj, x)
943
944static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000945 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
946 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
947 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000948};
949
Skip Montanarob4a04172003-03-20 23:29:12 +0000950
Petr Viktorin6a02b382020-12-15 15:14:35 +0100951static PyType_Slot Reader_Type_slots[] = {
952 {Py_tp_doc, (char*)Reader_Type_doc},
953 {Py_tp_traverse, Reader_traverse},
Petr Viktorin6a02b382020-12-15 15:14:35 +0100954 {Py_tp_iter, PyObject_SelfIter},
955 {Py_tp_iternext, Reader_iternext},
956 {Py_tp_methods, Reader_methods},
957 {Py_tp_members, Reader_memberlist},
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -0700958 {Py_tp_clear, Reader_clear},
Petr Viktorin6a02b382020-12-15 15:14:35 +0100959 {Py_tp_dealloc, Reader_dealloc},
960 {0, NULL}
Skip Montanarob4a04172003-03-20 23:29:12 +0000961};
962
Petr Viktorin6a02b382020-12-15 15:14:35 +0100963PyType_Spec Reader_Type_spec = {
964 .name = "_csv.reader",
965 .basicsize = sizeof(ReaderObj),
Miss Islington (bot)7297d742021-06-17 03:19:44 -0700966 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
967 Py_TPFLAGS_IMMUTABLETYPE),
Petr Viktorin6a02b382020-12-15 15:14:35 +0100968 .slots = Reader_Type_slots
969};
970
971
Skip Montanarob4a04172003-03-20 23:29:12 +0000972static PyObject *
973csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
974{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000975 PyObject * iterator, * dialect = NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +0100976 _csvstate *module_state = get_csv_state(module);
977 ReaderObj * self = PyObject_GC_New(
978 ReaderObj,
979 module_state->reader_type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000980
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000981 if (!self)
982 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000983
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000984 self->dialect = NULL;
985 self->fields = NULL;
986 self->input_iter = NULL;
987 self->field = NULL;
988 self->field_size = 0;
989 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000990
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000991 if (parse_reset(self) < 0) {
992 Py_DECREF(self);
993 return NULL;
994 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000995
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000996 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
997 Py_DECREF(self);
998 return NULL;
999 }
1000 self->input_iter = PyObject_GetIter(iterator);
1001 if (self->input_iter == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001002 Py_DECREF(self);
1003 return NULL;
1004 }
Petr Viktorin6a02b382020-12-15 15:14:35 +01001005 self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
1006 keyword_args);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001007 if (self->dialect == NULL) {
1008 Py_DECREF(self);
1009 return NULL;
1010 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001011
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001012 PyObject_GC_Track(self);
1013 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001014}
1015
1016/*
1017 * WRITER
1018 */
1019/* ---------------------------------------------------------------- */
1020static void
1021join_reset(WriterObj *self)
1022{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001023 self->rec_len = 0;
1024 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001025}
1026
1027#define MEM_INCR 32768
1028
1029/* Calculate new record length or append field to record. Return new
1030 * record length.
1031 */
Antoine Pitrou40455752010-08-15 18:51:10 +00001032static Py_ssize_t
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001033join_append_data(WriterObj *self, unsigned int field_kind, const void *field_data,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001034 Py_ssize_t field_len, int *quoted,
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001035 int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +00001036{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001037 DialectObj *dialect = self->dialect;
1038 int i;
Antoine Pitrou40455752010-08-15 18:51:10 +00001039 Py_ssize_t rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001040
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001041#define INCLEN \
1042 do {\
1043 if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \
1044 goto overflow; \
1045 } \
1046 rec_len++; \
1047 } while(0)
1048
1049#define ADDCH(c) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001050 do {\
1051 if (copy_phase) \
1052 self->rec[rec_len] = c;\
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001053 INCLEN;\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001054 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001055
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001056 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001057
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001058 /* If this is not the first field we need a field separator */
1059 if (self->num_fields > 0)
1060 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001061
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001062 /* Handle preceding quote */
1063 if (copy_phase && *quoted)
1064 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001065
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001066 /* Copy/count field data */
1067 /* If field is null just pass over */
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001068 for (i = 0; field_data && (i < field_len); i++) {
1069 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001070 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001071
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001072 if (c == dialect->delimiter ||
1073 c == dialect->escapechar ||
1074 c == dialect->quotechar ||
Martin v. Löwis5f4f4c52011-11-01 18:42:23 +01001075 PyUnicode_FindChar(
1076 dialect->lineterminator, c, 0,
1077 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001078 if (dialect->quoting == QUOTE_NONE)
1079 want_escape = 1;
1080 else {
1081 if (c == dialect->quotechar) {
1082 if (dialect->doublequote)
1083 ADDCH(dialect->quotechar);
1084 else
1085 want_escape = 1;
1086 }
Berker Peksag5c0eed72020-09-20 09:38:07 +03001087 else if (c == dialect->escapechar) {
1088 want_escape = 1;
1089 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001090 if (!want_escape)
1091 *quoted = 1;
1092 }
1093 if (want_escape) {
1094 if (!dialect->escapechar) {
Petr Viktorin6a02b382020-12-15 15:14:35 +01001095 PyErr_Format(self->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001096 "need to escape, but no escapechar set");
1097 return -1;
1098 }
1099 ADDCH(dialect->escapechar);
1100 }
1101 }
1102 /* Copy field character into record buffer.
1103 */
1104 ADDCH(c);
1105 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001106
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001107 if (*quoted) {
1108 if (copy_phase)
1109 ADDCH(dialect->quotechar);
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001110 else {
1111 INCLEN; /* starting quote */
1112 INCLEN; /* ending quote */
1113 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001114 }
1115 return rec_len;
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001116
1117 overflow:
1118 PyErr_NoMemory();
1119 return -1;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001120#undef ADDCH
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001121#undef INCLEN
Skip Montanarob4a04172003-03-20 23:29:12 +00001122}
1123
1124static int
Antoine Pitrou40455752010-08-15 18:51:10 +00001125join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
Skip Montanarob4a04172003-03-20 23:29:12 +00001126{
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001127 assert(rec_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001128
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001129 if (rec_len > self->rec_size) {
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001130 size_t rec_size_new = (size_t)(rec_len / MEM_INCR + 1) * MEM_INCR;
1131 Py_UCS4 *rec_new = self->rec;
1132 PyMem_Resize(rec_new, Py_UCS4, rec_size_new);
1133 if (rec_new == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001134 PyErr_NoMemory();
1135 return 0;
1136 }
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001137 self->rec = rec_new;
1138 self->rec_size = (Py_ssize_t)rec_size_new;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001139 }
1140 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001141}
1142
1143static int
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001144join_append(WriterObj *self, PyObject *field, int quoted)
Skip Montanarob4a04172003-03-20 23:29:12 +00001145{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001146 unsigned int field_kind = -1;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001147 const void *field_data = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001148 Py_ssize_t field_len = 0;
Antoine Pitrou40455752010-08-15 18:51:10 +00001149 Py_ssize_t rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001150
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001151 if (field != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +01001152 if (PyUnicode_READY(field) == -1)
1153 return 0;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001154 field_kind = PyUnicode_KIND(field);
1155 field_data = PyUnicode_DATA(field);
1156 field_len = PyUnicode_GET_LENGTH(field);
1157 }
1158 rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001159 &quoted, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001160 if (rec_len < 0)
1161 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001162
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001163 /* grow record buffer if necessary */
1164 if (!join_check_rec_size(self, rec_len))
1165 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001166
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001167 self->rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001168 &quoted, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001169 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001170
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001171 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001172}
1173
1174static int
1175join_append_lineterminator(WriterObj *self)
1176{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001177 Py_ssize_t terminator_len, i;
1178 unsigned int term_kind;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001179 const void *term_data;
Skip Montanarob4a04172003-03-20 23:29:12 +00001180
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001181 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001182 if (terminator_len == -1)
1183 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001184
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001185 /* grow record buffer if necessary */
1186 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1187 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001188
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001189 term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1190 term_data = PyUnicode_DATA(self->dialect->lineterminator);
1191 for (i = 0; i < terminator_len; i++)
1192 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001193 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001194
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001195 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001196}
1197
1198PyDoc_STRVAR(csv_writerow_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001199"writerow(iterable)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001200"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001201"Construct and write a CSV record from an iterable of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001202"elements will be converted to string.");
1203
1204static PyObject *
1205csv_writerow(WriterObj *self, PyObject *seq)
1206{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001207 DialectObj *dialect = self->dialect;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001208 PyObject *iter, *field, *line, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001209
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001210 iter = PyObject_GetIter(seq);
Serhiy Storchakac88239f2020-06-22 11:21:59 +03001211 if (iter == NULL) {
1212 if (PyErr_ExceptionMatches(PyExc_TypeError)) {
Petr Viktorin6a02b382020-12-15 15:14:35 +01001213 PyErr_Format(self->error_obj,
Serhiy Storchakac88239f2020-06-22 11:21:59 +03001214 "iterable expected, not %.200s",
1215 Py_TYPE(seq)->tp_name);
1216 }
1217 return NULL;
1218 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001219
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001220 /* Join all fields in internal buffer.
1221 */
1222 join_reset(self);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001223 while ((field = PyIter_Next(iter))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001224 int append_ok;
1225 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001226
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001227 switch (dialect->quoting) {
1228 case QUOTE_NONNUMERIC:
1229 quoted = !PyNumber_Check(field);
1230 break;
1231 case QUOTE_ALL:
1232 quoted = 1;
1233 break;
1234 default:
1235 quoted = 0;
1236 break;
1237 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001238
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001239 if (PyUnicode_Check(field)) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001240 append_ok = join_append(self, field, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001241 Py_DECREF(field);
1242 }
1243 else if (field == Py_None) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001244 append_ok = join_append(self, NULL, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001245 Py_DECREF(field);
1246 }
1247 else {
1248 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001249
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001250 str = PyObject_Str(field);
1251 Py_DECREF(field);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001252 if (str == NULL) {
1253 Py_DECREF(iter);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001254 return NULL;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001255 }
1256 append_ok = join_append(self, str, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001257 Py_DECREF(str);
1258 }
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001259 if (!append_ok) {
1260 Py_DECREF(iter);
1261 return NULL;
1262 }
1263 }
1264 Py_DECREF(iter);
1265 if (PyErr_Occurred())
1266 return NULL;
1267
Licht Takeuchi20019002017-12-12 18:57:06 +09001268 if (self->num_fields > 0 && self->rec_len == 0) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001269 if (dialect->quoting == QUOTE_NONE) {
Petr Viktorin6a02b382020-12-15 15:14:35 +01001270 PyErr_Format(self->error_obj,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001271 "single empty field record must be quoted");
1272 return NULL;
1273 }
1274 self->num_fields--;
1275 if (!join_append(self, NULL, 1))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001276 return NULL;
1277 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001278
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001279 /* Add line terminator.
1280 */
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001281 if (!join_append_lineterminator(self)) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001282 return NULL;
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001283 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001284
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001285 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1286 (void *) self->rec, self->rec_len);
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001287 if (line == NULL) {
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001288 return NULL;
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001289 }
Petr Viktorinffd97532020-02-11 17:46:57 +01001290 result = PyObject_CallOneArg(self->write, line);
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001291 Py_DECREF(line);
1292 return result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001293}
1294
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001295PyDoc_STRVAR(csv_writerows_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001296"writerows(iterable of iterables)\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001297"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001298"Construct and write a series of iterables to a csv file. Non-string\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001299"elements will be converted to string.");
1300
Skip Montanarob4a04172003-03-20 23:29:12 +00001301static PyObject *
1302csv_writerows(WriterObj *self, PyObject *seqseq)
1303{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001304 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001305
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001306 row_iter = PyObject_GetIter(seqseq);
1307 if (row_iter == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001308 return NULL;
1309 }
1310 while ((row_obj = PyIter_Next(row_iter))) {
1311 result = csv_writerow(self, row_obj);
1312 Py_DECREF(row_obj);
1313 if (!result) {
1314 Py_DECREF(row_iter);
1315 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001316 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001317 else
1318 Py_DECREF(result);
1319 }
1320 Py_DECREF(row_iter);
1321 if (PyErr_Occurred())
1322 return NULL;
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001323 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001324}
1325
1326static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001327 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1328 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1329 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001330};
1331
1332#define W_OFF(x) offsetof(WriterObj, x)
1333
1334static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001335 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1336 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001337};
1338
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001339static int
1340Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1341{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001342 Py_VISIT(self->dialect);
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001343 Py_VISIT(self->write);
Petr Viktorin6a02b382020-12-15 15:14:35 +01001344 Py_VISIT(self->error_obj);
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -07001345 Py_VISIT(Py_TYPE(self));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001346 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001347}
1348
1349static int
1350Writer_clear(WriterObj *self)
1351{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001352 Py_CLEAR(self->dialect);
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001353 Py_CLEAR(self->write);
Petr Viktorin6a02b382020-12-15 15:14:35 +01001354 Py_CLEAR(self->error_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001355 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001356}
1357
Petr Viktorin6a02b382020-12-15 15:14:35 +01001358static void
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -07001359Writer_dealloc(WriterObj *self)
Petr Viktorin6a02b382020-12-15 15:14:35 +01001360{
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -07001361 PyTypeObject *tp = Py_TYPE(self);
1362 PyObject_GC_UnTrack(self);
1363 tp->tp_clear((PyObject *)self);
Petr Viktorin6a02b382020-12-15 15:14:35 +01001364 if (self->rec != NULL) {
1365 PyMem_Free(self->rec);
1366 }
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -07001367 PyObject_GC_Del(self);
1368 Py_DECREF(tp);
Petr Viktorin6a02b382020-12-15 15:14:35 +01001369}
1370
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001371PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001372"CSV writer\n"
1373"\n"
1374"Writer objects are responsible for generating tabular data\n"
1375"in CSV format from sequence input.\n"
1376);
1377
Petr Viktorin6a02b382020-12-15 15:14:35 +01001378static PyType_Slot Writer_Type_slots[] = {
Petr Viktorin6a02b382020-12-15 15:14:35 +01001379 {Py_tp_doc, (char*)Writer_Type_doc},
1380 {Py_tp_traverse, Writer_traverse},
1381 {Py_tp_clear, Writer_clear},
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -07001382 {Py_tp_dealloc, Writer_dealloc},
Petr Viktorin6a02b382020-12-15 15:14:35 +01001383 {Py_tp_methods, Writer_methods},
1384 {Py_tp_members, Writer_memberlist},
1385 {0, NULL}
Skip Montanarob4a04172003-03-20 23:29:12 +00001386};
1387
Petr Viktorin6a02b382020-12-15 15:14:35 +01001388PyType_Spec Writer_Type_spec = {
1389 .name = "_csv.writer",
1390 .basicsize = sizeof(WriterObj),
Miss Islington (bot)7297d742021-06-17 03:19:44 -07001391 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
1392 Py_TPFLAGS_IMMUTABLETYPE),
Petr Viktorin6a02b382020-12-15 15:14:35 +01001393 .slots = Writer_Type_slots,
1394};
1395
1396
Skip Montanarob4a04172003-03-20 23:29:12 +00001397static PyObject *
1398csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1399{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001400 PyObject * output_file, * dialect = NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +01001401 _csvstate *module_state = get_csv_state(module);
1402 WriterObj * self = PyObject_GC_New(WriterObj, module_state->writer_type);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001403 _Py_IDENTIFIER(write);
Skip Montanarob4a04172003-03-20 23:29:12 +00001404
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001405 if (!self)
1406 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001407
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001408 self->dialect = NULL;
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001409 self->write = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001410
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001411 self->rec = NULL;
1412 self->rec_size = 0;
1413 self->rec_len = 0;
1414 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001415
Petr Viktorin6a02b382020-12-15 15:14:35 +01001416 self->error_obj = Py_NewRef(module_state->error_obj);
1417
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001418 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1419 Py_DECREF(self);
1420 return NULL;
1421 }
Serhiy Storchaka41c57b32019-09-01 12:03:39 +03001422 if (_PyObject_LookupAttrId(output_file, &PyId_write, &self->write) < 0) {
1423 Py_DECREF(self);
1424 return NULL;
1425 }
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001426 if (self->write == NULL || !PyCallable_Check(self->write)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001427 PyErr_SetString(PyExc_TypeError,
1428 "argument 1 must have a \"write\" method");
1429 Py_DECREF(self);
1430 return NULL;
1431 }
Petr Viktorin6a02b382020-12-15 15:14:35 +01001432 self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
1433 keyword_args);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001434 if (self->dialect == NULL) {
1435 Py_DECREF(self);
1436 return NULL;
1437 }
1438 PyObject_GC_Track(self);
1439 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001440}
1441
1442/*
1443 * DIALECT REGISTRY
1444 */
1445static PyObject *
1446csv_list_dialects(PyObject *module, PyObject *args)
1447{
Petr Viktorin6a02b382020-12-15 15:14:35 +01001448 return PyDict_Keys(get_csv_state(module)->dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001449}
1450
1451static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001452csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001453{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001454 PyObject *name_obj, *dialect_obj = NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +01001455 _csvstate *module_state = get_csv_state(module);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001456 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001457
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001458 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1459 return NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +01001460 if (!PyUnicode_Check(name_obj)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001461 PyErr_SetString(PyExc_TypeError,
Stefan Krahe6996ed2012-11-02 14:44:20 +01001462 "dialect name must be a string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001463 return NULL;
1464 }
Stefan Krahe6996ed2012-11-02 14:44:20 +01001465 if (PyUnicode_READY(name_obj) == -1)
1466 return NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +01001467 dialect = _call_dialect(module_state, dialect_obj, kwargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001468 if (dialect == NULL)
1469 return NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +01001470 if (PyDict_SetItem(module_state->dialects, name_obj, dialect) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001471 Py_DECREF(dialect);
1472 return NULL;
1473 }
1474 Py_DECREF(dialect);
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001475 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001476}
1477
1478static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001479csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001480{
Petr Viktorin6a02b382020-12-15 15:14:35 +01001481 _csvstate *module_state = get_csv_state(module);
1482 if (PyDict_DelItem(module_state->dialects, name_obj) < 0) {
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001483 if (PyErr_ExceptionMatches(PyExc_KeyError)) {
Petr Viktorin6a02b382020-12-15 15:14:35 +01001484 PyErr_Format(module_state->error_obj, "unknown dialect");
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001485 }
1486 return NULL;
1487 }
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001488 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001489}
1490
1491static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001492csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001493{
Petr Viktorin6a02b382020-12-15 15:14:35 +01001494 return get_dialect_from_registry(name_obj, get_csv_state(module));
Skip Montanarob4a04172003-03-20 23:29:12 +00001495}
1496
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001497static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001498csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001499{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001500 PyObject *new_limit = NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +01001501 _csvstate *module_state = get_csv_state(module);
1502 long old_limit = module_state->field_limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001503
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001504 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1505 return NULL;
1506 if (new_limit != NULL) {
1507 if (!PyLong_CheckExact(new_limit)) {
1508 PyErr_Format(PyExc_TypeError,
1509 "limit must be an integer");
1510 return NULL;
1511 }
Petr Viktorin6a02b382020-12-15 15:14:35 +01001512 module_state->field_limit = PyLong_AsLong(new_limit);
1513 if (module_state->field_limit == -1 && PyErr_Occurred()) {
1514 module_state->field_limit = old_limit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001515 return NULL;
1516 }
1517 }
1518 return PyLong_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001519}
1520
Petr Viktorin6a02b382020-12-15 15:14:35 +01001521static PyType_Slot error_slots[] = {
1522 {0, NULL},
1523};
1524
1525PyType_Spec error_spec = {
1526 .name = "_csv.Error",
Miss Islington (bot)3e44e9a2021-05-12 07:02:46 -07001527 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
Petr Viktorin6a02b382020-12-15 15:14:35 +01001528 .slots = error_slots,
1529};
1530
Skip Montanarob4a04172003-03-20 23:29:12 +00001531/*
1532 * MODULE
1533 */
1534
1535PyDoc_STRVAR(csv_module_doc,
1536"CSV parsing and writing.\n"
1537"\n"
1538"This module provides classes that assist in the reading and writing\n"
1539"of Comma Separated Value (CSV) files, and implements the interface\n"
1540"described by PEP 305. Although many CSV files are simple to parse,\n"
1541"the format is not formally defined by a stable specification and\n"
1542"is subtle enough that parsing lines of a CSV file with something\n"
1543"like line.split(\",\") is bound to fail. The module supports three\n"
1544"basic APIs: reading, writing, and registration of dialects.\n"
1545"\n"
1546"\n"
1547"DIALECT REGISTRATION:\n"
1548"\n"
1549"Readers and writers support a dialect argument, which is a convenient\n"
1550"handle on a group of settings. When the dialect argument is a string,\n"
1551"it identifies one of the dialects previously registered with the module.\n"
1552"If it is a class or instance, the attributes of the argument are used as\n"
1553"the settings for the reader or writer:\n"
1554"\n"
1555" class excel:\n"
1556" delimiter = ','\n"
1557" quotechar = '\"'\n"
1558" escapechar = None\n"
1559" doublequote = True\n"
1560" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001561" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001562" quoting = QUOTE_MINIMAL\n"
1563"\n"
1564"SETTINGS:\n"
1565"\n"
oldkaa0735f2018-02-02 16:52:55 +08001566" * quotechar - specifies a one-character string to use as the\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001567" quoting character. It defaults to '\"'.\n"
oldkaa0735f2018-02-02 16:52:55 +08001568" * delimiter - specifies a one-character string to use as the\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001569" field separator. It defaults to ','.\n"
1570" * skipinitialspace - specifies how to interpret whitespace which\n"
1571" immediately follows a delimiter. It defaults to False, which\n"
1572" means that whitespace immediately following a delimiter is part\n"
1573" of the following field.\n"
oldkaa0735f2018-02-02 16:52:55 +08001574" * lineterminator - specifies the character sequence which should\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001575" terminate rows.\n"
1576" * quoting - controls when quotes should be generated by the writer.\n"
1577" It can take on any of the following module constants:\n"
1578"\n"
1579" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1580" field contains either the quotechar or the delimiter\n"
1581" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1582" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001583" fields which do not parse as integers or floating point\n"
1584" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001585" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
oldkaa0735f2018-02-02 16:52:55 +08001586" * escapechar - specifies a one-character string used to escape\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001587" the delimiter when quoting is set to QUOTE_NONE.\n"
1588" * doublequote - controls the handling of quotes inside fields. When\n"
1589" True, two consecutive quotes are interpreted as one during read,\n"
1590" and when writing, each quote character embedded in the data is\n"
1591" written as two quotes\n");
1592
1593PyDoc_STRVAR(csv_reader_doc,
1594" csv_reader = reader(iterable [, dialect='excel']\n"
1595" [optional keyword args])\n"
1596" for row in csv_reader:\n"
1597" process(row)\n"
1598"\n"
1599"The \"iterable\" argument can be any object that returns a line\n"
1600"of input for each iteration, such as a file object or a list. The\n"
1601"optional \"dialect\" parameter is discussed below. The function\n"
1602"also accepts optional keyword arguments which override settings\n"
1603"provided by the dialect.\n"
1604"\n"
1605"The returned object is an iterator. Each iteration returns a row\n"
Berker Peksage2382c52015-10-02 19:25:32 +03001606"of the CSV file (which can span multiple input lines).\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001607
1608PyDoc_STRVAR(csv_writer_doc,
1609" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1610" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001611" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001612" csv_writer.writerow(row)\n"
1613"\n"
1614" [or]\n"
1615"\n"
1616" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1617" [optional keyword args])\n"
1618" csv_writer.writerows(rows)\n"
1619"\n"
1620"The \"fileobj\" argument can be any object that supports the file API.\n");
1621
1622PyDoc_STRVAR(csv_list_dialects_doc,
1623"Return a list of all know dialect names.\n"
1624" names = csv.list_dialects()");
1625
1626PyDoc_STRVAR(csv_get_dialect_doc,
1627"Return the dialect instance associated with name.\n"
1628" dialect = csv.get_dialect(name)");
1629
1630PyDoc_STRVAR(csv_register_dialect_doc,
1631"Create a mapping from a string name to a dialect class.\n"
Berker Peksag12b50ce2015-06-05 15:17:51 +03001632" dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
Skip Montanarob4a04172003-03-20 23:29:12 +00001633
1634PyDoc_STRVAR(csv_unregister_dialect_doc,
1635"Delete the name/dialect mapping associated with a string name.\n"
1636" csv.unregister_dialect(name)");
1637
Andrew McNamara31d88962005-01-12 03:45:10 +00001638PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001639"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001640" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001641"\n"
1642"Returns old limit. If limit is not given, no new limit is set and\n"
1643"the old limit is returned");
1644
Skip Montanarob4a04172003-03-20 23:29:12 +00001645static struct PyMethodDef csv_methods[] = {
Serhiy Storchaka62be7422018-11-27 13:27:31 +02001646 { "reader", (PyCFunction)(void(*)(void))csv_reader,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001647 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
Serhiy Storchaka62be7422018-11-27 13:27:31 +02001648 { "writer", (PyCFunction)(void(*)(void))csv_writer,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001649 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1650 { "list_dialects", (PyCFunction)csv_list_dialects,
1651 METH_NOARGS, csv_list_dialects_doc},
Serhiy Storchaka62be7422018-11-27 13:27:31 +02001652 { "register_dialect", (PyCFunction)(void(*)(void))csv_register_dialect,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001653 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1654 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1655 METH_O, csv_unregister_dialect_doc},
1656 { "get_dialect", (PyCFunction)csv_get_dialect,
1657 METH_O, csv_get_dialect_doc},
1658 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1659 METH_VARARGS, csv_field_size_limit_doc},
1660 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001661};
1662
Petr Viktorin6a02b382020-12-15 15:14:35 +01001663static int
1664csv_exec(PyObject *module) {
1665 const StyleDesc *style;
1666 PyObject *temp;
1667 _csvstate *module_state = get_csv_state(module);
1668
1669 temp = PyType_FromModuleAndSpec(module, &Dialect_Type_spec, NULL);
1670 module_state->dialect_type = (PyTypeObject *)temp;
1671 if (PyModule_AddObjectRef(module, "Dialect", temp) < 0) {
1672 return -1;
1673 }
1674
1675 temp = PyType_FromModuleAndSpec(module, &Reader_Type_spec, NULL);
1676 module_state->reader_type = (PyTypeObject *)temp;
1677 if (PyModule_AddObjectRef(module, "Reader", temp) < 0) {
1678 return -1;
1679 }
1680
1681 temp = PyType_FromModuleAndSpec(module, &Writer_Type_spec, NULL);
1682 module_state->writer_type = (PyTypeObject *)temp;
1683 if (PyModule_AddObjectRef(module, "Writer", temp) < 0) {
1684 return -1;
1685 }
1686
1687 /* Add version to the module. */
1688 if (PyModule_AddStringConstant(module, "__version__",
1689 MODULE_VERSION) == -1) {
1690 return -1;
1691 }
1692
1693 /* Set the field limit */
1694 module_state->field_limit = 128 * 1024;
1695
1696 /* Add _dialects dictionary */
1697 module_state->dialects = PyDict_New();
1698 if (PyModule_AddObjectRef(module, "_dialects", module_state->dialects) < 0) {
1699 return -1;
1700 }
1701
1702 /* Add quote styles into dictionary */
1703 for (style = quote_styles; style->name; style++) {
1704 if (PyModule_AddIntConstant(module, style->name,
1705 style->style) == -1)
1706 return -1;
1707 }
1708
1709 /* Add the CSV exception object to the module. */
1710 PyObject *bases = PyTuple_Pack(1, PyExc_Exception);
1711 if (bases == NULL) {
1712 return -1;
1713 }
1714 module_state->error_obj = PyType_FromModuleAndSpec(module, &error_spec,
1715 bases);
1716 Py_DECREF(bases);
1717 if (module_state->error_obj == NULL) {
1718 return -1;
1719 }
1720 if (PyModule_AddType(module, (PyTypeObject *)module_state->error_obj) != 0) {
1721 return -1;
1722 }
1723
1724 return 0;
1725}
1726
1727static PyModuleDef_Slot csv_slots[] = {
1728 {Py_mod_exec, csv_exec},
1729 {0, NULL}
1730};
1731
Martin v. Löwis1a214512008-06-11 05:26:20 +00001732static struct PyModuleDef _csvmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001733 PyModuleDef_HEAD_INIT,
1734 "_csv",
1735 csv_module_doc,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001736 sizeof(_csvstate),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001737 csv_methods,
Petr Viktorin6a02b382020-12-15 15:14:35 +01001738 csv_slots,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001739 _csv_traverse,
1740 _csv_clear,
1741 _csv_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00001742};
1743
Skip Montanarob4a04172003-03-20 23:29:12 +00001744PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001745PyInit__csv(void)
Skip Montanarob4a04172003-03-20 23:29:12 +00001746{
Petr Viktorin6a02b382020-12-15 15:14:35 +01001747 return PyModuleDef_Init(&_csvmodule);
Skip Montanarob4a04172003-03-20 23:29:12 +00001748}