blob: 78855b871352c5ef6371f4006dca8cecec4f1d3c [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
Skip Montanarob4a04172003-03-20 23:29:12 +00009*/
10
Skip Montanaro7b01a832003-04-12 19:23:46 +000011#define MODULE_VERSION "1.0"
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013#include "Python.h"
Victor Stinner4a21e572020-04-15 02:35:41 +020014#include "structmember.h" // PyMemberDef
Serhiy Storchaka323748a2018-07-26 13:21:09 +030015#include <stdbool.h>
Skip Montanarob4a04172003-03-20 23:29:12 +000016
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000017
Antoine Pitroue7672d32012-05-16 11:33:08 +020018typedef struct {
19 PyObject *error_obj; /* CSV exception */
20 PyObject *dialects; /* Dialect registry */
Petr Viktorin6a02b382020-12-15 15:14:35 +010021 PyTypeObject *dialect_type;
22 PyTypeObject *reader_type;
23 PyTypeObject *writer_type;
Antoine Pitroue7672d32012-05-16 11:33:08 +020024 long field_limit; /* max parsed field size */
25} _csvstate;
26
Petr Viktorin6a02b382020-12-15 15:14:35 +010027static struct PyModuleDef _csvmodule;
28
Hai Shif707d942020-03-16 21:15:01 +080029static inline _csvstate*
30get_csv_state(PyObject *module)
31{
32 void *state = PyModule_GetState(module);
33 assert(state != NULL);
34 return (_csvstate *)state;
35}
Antoine Pitroue7672d32012-05-16 11:33:08 +020036
37static int
Petr Viktorin6a02b382020-12-15 15:14:35 +010038_csv_clear(PyObject *module)
Antoine Pitroue7672d32012-05-16 11:33:08 +020039{
Petr Viktorin6a02b382020-12-15 15:14:35 +010040 _csvstate *module_state = PyModule_GetState(module);
41 Py_CLEAR(module_state->error_obj);
42 Py_CLEAR(module_state->dialects);
43 Py_CLEAR(module_state->dialect_type);
44 Py_CLEAR(module_state->reader_type);
45 Py_CLEAR(module_state->writer_type);
Antoine Pitroue7672d32012-05-16 11:33:08 +020046 return 0;
47}
48
49static int
Petr Viktorin6a02b382020-12-15 15:14:35 +010050_csv_traverse(PyObject *module, visitproc visit, void *arg)
Antoine Pitroue7672d32012-05-16 11:33:08 +020051{
Petr Viktorin6a02b382020-12-15 15:14:35 +010052 _csvstate *module_state = PyModule_GetState(module);
53 Py_VISIT(module_state->error_obj);
54 Py_VISIT(module_state->dialects);
55 Py_VISIT(module_state->dialect_type);
56 Py_VISIT(module_state->reader_type);
57 Py_VISIT(module_state->writer_type);
Antoine Pitroue7672d32012-05-16 11:33:08 +020058 return 0;
59}
60
61static void
Petr Viktorin6a02b382020-12-15 15:14:35 +010062_csv_free(void *module)
Antoine Pitroue7672d32012-05-16 11:33:08 +020063{
Petr Viktorin6a02b382020-12-15 15:14:35 +010064 _csv_clear((PyObject *)module);
Antoine Pitroue7672d32012-05-16 11:33:08 +020065}
66
Skip Montanarob4a04172003-03-20 23:29:12 +000067typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000068 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
69 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
R David Murrayc7c42ef2013-03-19 22:41:47 -040070 EAT_CRNL,AFTER_ESCAPED_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000071} ParserState;
72
73typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000074 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000075} QuoteStyle;
76
77typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078 QuoteStyle style;
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020079 const char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000080} StyleDesc;
81
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020082static const StyleDesc quote_styles[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000083 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
84 { QUOTE_ALL, "QUOTE_ALL" },
85 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
86 { QUOTE_NONE, "QUOTE_NONE" },
87 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000088};
89
90typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000092
Serhiy Storchaka323748a2018-07-26 13:21:09 +030093 char doublequote; /* is " represented by ""? */
94 char skipinitialspace; /* ignore spaces following delimiter? */
95 char strict; /* raise exception on bad CSV */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000096 int quoting; /* style of quoting to write */
Serhiy Storchaka323748a2018-07-26 13:21:09 +030097 Py_UCS4 delimiter; /* field separator */
98 Py_UCS4 quotechar; /* quote character */
99 Py_UCS4 escapechar; /* escape character */
100 PyObject *lineterminator; /* string to write between records */
Skip Montanarob4a04172003-03-20 23:29:12 +0000101
Skip Montanarob4a04172003-03-20 23:29:12 +0000102} DialectObj;
103
Skip Montanarob4a04172003-03-20 23:29:12 +0000104typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000105 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000106
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000107 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +0000108
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000109 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000110
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000111 PyObject *fields; /* field list for current record */
112 ParserState state; /* current CSV parse state */
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200113 Py_UCS4 *field; /* temporary buffer */
Antoine Pitrou40455752010-08-15 18:51:10 +0000114 Py_ssize_t field_size; /* size of allocated buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000115 Py_ssize_t field_len; /* length of current field */
116 int numeric_field; /* treat field as numeric */
117 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000118} ReaderObj;
119
Skip Montanarob4a04172003-03-20 23:29:12 +0000120typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000121 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000122
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +0200123 PyObject *write; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +0000124
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000125 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000126
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200127 Py_UCS4 *rec; /* buffer for parser.join */
Antoine Pitrou40455752010-08-15 18:51:10 +0000128 Py_ssize_t rec_size; /* size of allocated record */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000129 Py_ssize_t rec_len; /* length of record */
130 int num_fields; /* number of fields in record */
Skip Montanarob4a04172003-03-20 23:29:12 +0000131
Petr Viktorin6a02b382020-12-15 15:14:35 +0100132 PyObject *error_obj; /* cached error object */
133} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000134
135/*
136 * DIALECT class
137 */
138
139static PyObject *
Petr Viktorin6a02b382020-12-15 15:14:35 +0100140get_dialect_from_registry(PyObject *name_obj, _csvstate *module_state)
Skip Montanarob4a04172003-03-20 23:29:12 +0000141{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000142 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000143
Petr Viktorin6a02b382020-12-15 15:14:35 +0100144 dialect_obj = PyDict_GetItemWithError(module_state->dialects, name_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 if (dialect_obj == NULL) {
146 if (!PyErr_Occurred())
Petr Viktorin6a02b382020-12-15 15:14:35 +0100147 PyErr_Format(module_state->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000148 }
149 else
150 Py_INCREF(dialect_obj);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100151
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000152 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000153}
154
Skip Montanarob4a04172003-03-20 23:29:12 +0000155static PyObject *
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200156get_nullchar_as_None(Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000157{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000158 if (c == '\0') {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200159 Py_RETURN_NONE;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000160 }
161 else
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200162 return PyUnicode_FromOrdinal(c);
Skip Montanarob4a04172003-03-20 23:29:12 +0000163}
164
Skip Montanarob4a04172003-03-20 23:29:12 +0000165static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200166Dialect_get_lineterminator(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000167{
Dong-hee Na0383be42020-06-10 00:33:43 +0900168 Py_XINCREF(self->lineterminator);
169 return self->lineterminator;
Skip Montanarob4a04172003-03-20 23:29:12 +0000170}
171
Skip Montanarob4a04172003-03-20 23:29:12 +0000172static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200173Dialect_get_delimiter(DialectObj *self, void *Py_UNUSED(ignored))
Guido van Rossuma9769c22007-08-07 23:59:30 +0000174{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000175 return get_nullchar_as_None(self->delimiter);
Guido van Rossuma9769c22007-08-07 23:59:30 +0000176}
177
178static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200179Dialect_get_escapechar(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000180{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000181 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000182}
183
Andrew McNamara1196cf12005-01-07 04:42:45 +0000184static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200185Dialect_get_quotechar(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000186{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000188}
189
190static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200191Dialect_get_quoting(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000192{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000193 return PyLong_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000194}
195
196static int
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300197_set_bool(const char *name, char *target, PyObject *src, bool dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000198{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000199 if (src == NULL)
200 *target = dflt;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200201 else {
202 int b = PyObject_IsTrue(src);
203 if (b < 0)
204 return -1;
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300205 *target = (char)b;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200206 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000207 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000208}
209
Andrew McNamara1196cf12005-01-07 04:42:45 +0000210static int
211_set_int(const char *name, int *target, PyObject *src, int dflt)
212{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000213 if (src == NULL)
214 *target = dflt;
215 else {
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200216 int value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000217 if (!PyLong_CheckExact(src)) {
218 PyErr_Format(PyExc_TypeError,
219 "\"%s\" must be an integer", name);
220 return -1;
221 }
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200222 value = _PyLong_AsInt(src);
223 if (value == -1 && PyErr_Occurred()) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000224 return -1;
225 }
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200226 *target = value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000227 }
228 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000229}
230
231static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200232_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000233{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000234 if (src == NULL)
235 *target = dflt;
236 else {
237 *target = '\0';
238 if (src != Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000239 Py_ssize_t len;
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200240 if (!PyUnicode_Check(src)) {
241 PyErr_Format(PyExc_TypeError,
242 "\"%s\" must be string, not %.200s", name,
Victor Stinnerdaa97562020-02-07 03:37:06 +0100243 Py_TYPE(src)->tp_name);
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200244 return -1;
245 }
Victor Stinner9e30aa52011-11-21 02:49:52 +0100246 len = PyUnicode_GetLength(src);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200247 if (len > 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000248 PyErr_Format(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300249 "\"%s\" must be a 1-character string",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000250 name);
251 return -1;
252 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100253 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000254 if (len > 0)
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200255 *target = PyUnicode_READ_CHAR(src, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000256 }
257 }
258 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000259}
260
261static int
262_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
263{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000264 if (src == NULL)
265 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
266 else {
267 if (src == Py_None)
268 *target = NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +0100269 else if (!PyUnicode_Check(src)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000270 PyErr_Format(PyExc_TypeError,
271 "\"%s\" must be a string", name);
272 return -1;
273 }
274 else {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100275 if (PyUnicode_READY(src) == -1)
276 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000277 Py_INCREF(src);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300278 Py_XSETREF(*target, src);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000279 }
280 }
281 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000282}
283
284static int
285dialect_check_quoting(int quoting)
286{
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200287 const StyleDesc *qs;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000288
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000289 for (qs = quote_styles; qs->name; qs++) {
Victor Stinner706768c2014-08-16 01:03:39 +0200290 if ((int)qs->style == quoting)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000291 return 0;
292 }
293 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
294 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000295}
Skip Montanarob4a04172003-03-20 23:29:12 +0000296
297#define D_OFF(x) offsetof(DialectObj, x)
298
299static struct PyMemberDef Dialect_memberlist[] = {
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300300 { "skipinitialspace", T_BOOL, D_OFF(skipinitialspace), READONLY },
301 { "doublequote", T_BOOL, D_OFF(doublequote), READONLY },
302 { "strict", T_BOOL, D_OFF(strict), READONLY },
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000303 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000304};
305
306static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000307 { "delimiter", (getter)Dialect_get_delimiter},
308 { "escapechar", (getter)Dialect_get_escapechar},
309 { "lineterminator", (getter)Dialect_get_lineterminator},
310 { "quotechar", (getter)Dialect_get_quotechar},
311 { "quoting", (getter)Dialect_get_quoting},
312 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000313};
314
315static void
316Dialect_dealloc(DialectObj *self)
317{
Petr Viktorin6a02b382020-12-15 15:14:35 +0100318 PyTypeObject *tp = Py_TYPE(self);
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -0700319 PyObject_GC_UnTrack(self);
320 tp->tp_clear((PyObject *)self);
321 PyObject_GC_Del(self);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100322 Py_DECREF(tp);
323}
324
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000325static char *dialect_kws[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000326 "dialect",
327 "delimiter",
328 "doublequote",
329 "escapechar",
330 "lineterminator",
331 "quotechar",
332 "quoting",
333 "skipinitialspace",
334 "strict",
335 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000336};
337
Petr Viktorin6a02b382020-12-15 15:14:35 +0100338static _csvstate *
339_csv_state_from_type(PyTypeObject *type, const char *name)
340{
341 PyObject *module = _PyType_GetModuleByDef(type, &_csvmodule);
342 if (module == NULL) {
343 return NULL;
344 }
345 _csvstate *module_state = PyModule_GetState(module);
346 if (module_state == NULL) {
347 PyErr_Format(PyExc_SystemError,
348 "%s: No _csv module state found", name);
349 return NULL;
350 }
351 return module_state;
352}
353
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000354static PyObject *
355dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000356{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000357 DialectObj *self;
358 PyObject *ret = NULL;
359 PyObject *dialect = NULL;
360 PyObject *delimiter = NULL;
361 PyObject *doublequote = NULL;
362 PyObject *escapechar = NULL;
363 PyObject *lineterminator = NULL;
364 PyObject *quotechar = NULL;
365 PyObject *quoting = NULL;
366 PyObject *skipinitialspace = NULL;
367 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000368
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000369 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
370 "|OOOOOOOOO", dialect_kws,
371 &dialect,
372 &delimiter,
373 &doublequote,
374 &escapechar,
375 &lineterminator,
376 &quotechar,
377 &quoting,
378 &skipinitialspace,
379 &strict))
380 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000381
Petr Viktorin6a02b382020-12-15 15:14:35 +0100382 _csvstate *module_state = _csv_state_from_type(type, "dialect_new");
383 if (module_state == NULL) {
384 return NULL;
385 }
386
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000387 if (dialect != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100388 if (PyUnicode_Check(dialect)) {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100389 dialect = get_dialect_from_registry(dialect, module_state);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000390 if (dialect == NULL)
391 return NULL;
392 }
393 else
394 Py_INCREF(dialect);
395 /* Can we reuse this instance? */
Petr Viktorin6a02b382020-12-15 15:14:35 +0100396 if (PyObject_TypeCheck(dialect, module_state->dialect_type) &&
Serhiy Storchaka0b3ec192017-03-23 17:53:47 +0200397 delimiter == NULL &&
398 doublequote == NULL &&
399 escapechar == NULL &&
400 lineterminator == NULL &&
401 quotechar == NULL &&
402 quoting == NULL &&
403 skipinitialspace == NULL &&
404 strict == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000405 return dialect;
406 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000407
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000408 self = (DialectObj *)type->tp_alloc(type, 0);
409 if (self == NULL) {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100410 Py_CLEAR(dialect);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000411 return NULL;
412 }
413 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000414
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000415 Py_XINCREF(delimiter);
416 Py_XINCREF(doublequote);
417 Py_XINCREF(escapechar);
418 Py_XINCREF(lineterminator);
419 Py_XINCREF(quotechar);
420 Py_XINCREF(quoting);
421 Py_XINCREF(skipinitialspace);
422 Py_XINCREF(strict);
423 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000424#define DIALECT_GETATTR(v, n) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000425 if (v == NULL) \
426 v = PyObject_GetAttrString(dialect, n)
427 DIALECT_GETATTR(delimiter, "delimiter");
428 DIALECT_GETATTR(doublequote, "doublequote");
429 DIALECT_GETATTR(escapechar, "escapechar");
430 DIALECT_GETATTR(lineterminator, "lineterminator");
431 DIALECT_GETATTR(quotechar, "quotechar");
432 DIALECT_GETATTR(quoting, "quoting");
433 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
434 DIALECT_GETATTR(strict, "strict");
435 PyErr_Clear();
436 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000437
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000438 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000439#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000440 if (meth(name, target, src, dflt)) \
441 goto err
442 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300443 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, true);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000444 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
445 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
446 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
447 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300448 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, false);
449 DIASET(_set_bool, "strict", &self->strict, strict, false);
Skip Montanarob4a04172003-03-20 23:29:12 +0000450
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000451 /* validate options */
452 if (dialect_check_quoting(self->quoting))
453 goto err;
454 if (self->delimiter == 0) {
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200455 PyErr_SetString(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300456 "\"delimiter\" must be a 1-character string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000457 goto err;
458 }
459 if (quotechar == Py_None && quoting == NULL)
460 self->quoting = QUOTE_NONE;
461 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
462 PyErr_SetString(PyExc_TypeError,
463 "quotechar must be set if quoting enabled");
464 goto err;
465 }
466 if (self->lineterminator == 0) {
467 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
468 goto err;
469 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000470
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000471 ret = (PyObject *)self;
472 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000473err:
Petr Viktorin6a02b382020-12-15 15:14:35 +0100474 Py_CLEAR(self);
475 Py_CLEAR(dialect);
476 Py_CLEAR(delimiter);
477 Py_CLEAR(doublequote);
478 Py_CLEAR(escapechar);
479 Py_CLEAR(lineterminator);
480 Py_CLEAR(quotechar);
481 Py_CLEAR(quoting);
482 Py_CLEAR(skipinitialspace);
483 Py_CLEAR(strict);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000484 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000485}
486
Petr Viktorin6a02b382020-12-15 15:14:35 +0100487/* Since dialect is now a heap type, it inherits pickling method for
488 * protocol 0 and 1 from object, therefore it needs to be overriden */
489
490PyDoc_STRVAR(dialect_reduce_doc, "raises an exception to avoid pickling");
491
492static PyObject *
493Dialect_reduce(PyObject *self, PyObject *args) {
494 PyErr_Format(PyExc_TypeError,
495 "cannot pickle '%.100s' instances", _PyType_Name(Py_TYPE(self)));
496 return NULL;
497}
498
499static struct PyMethodDef dialect_methods[] = {
500 {"__reduce__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
501 {"__reduce_ex__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
502 {NULL, NULL}
503};
Skip Montanarob4a04172003-03-20 23:29:12 +0000504
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000505PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000506"CSV dialect\n"
507"\n"
508"The Dialect type records CSV parsing and generation options.\n");
509
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -0700510static int
511Dialect_clear(DialectObj *self)
512{
513 Py_CLEAR(self->lineterminator);
514 return 0;
515}
516
517static int
518Dialect_traverse(DialectObj *self, visitproc visit, void *arg)
519{
520 Py_VISIT(self->lineterminator);
521 Py_VISIT(Py_TYPE(self));
522 return 0;
523}
524
Petr Viktorin6a02b382020-12-15 15:14:35 +0100525static PyType_Slot Dialect_Type_slots[] = {
526 {Py_tp_doc, (char*)Dialect_Type_doc},
527 {Py_tp_members, Dialect_memberlist},
528 {Py_tp_getset, Dialect_getsetlist},
529 {Py_tp_new, dialect_new},
530 {Py_tp_methods, dialect_methods},
Petr Viktorin6a02b382020-12-15 15:14:35 +0100531 {Py_tp_dealloc, Dialect_dealloc},
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -0700532 {Py_tp_clear, Dialect_clear},
533 {Py_tp_traverse, Dialect_traverse},
Petr Viktorin6a02b382020-12-15 15:14:35 +0100534 {0, NULL}
Skip Montanarob4a04172003-03-20 23:29:12 +0000535};
536
Petr Viktorin6a02b382020-12-15 15:14:35 +0100537PyType_Spec Dialect_Type_spec = {
538 .name = "_csv.Dialect",
539 .basicsize = sizeof(DialectObj),
Miss Islington (bot)7297d742021-06-17 03:19:44 -0700540 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
541 Py_TPFLAGS_IMMUTABLETYPE),
Petr Viktorin6a02b382020-12-15 15:14:35 +0100542 .slots = Dialect_Type_slots,
543};
544
545
Andrew McNamara91b97462005-01-11 01:07:23 +0000546/*
547 * Return an instance of the dialect type, given a Python instance or kwarg
548 * description of the dialect
549 */
550static PyObject *
Petr Viktorin6a02b382020-12-15 15:14:35 +0100551_call_dialect(_csvstate *module_state, PyObject *dialect_inst, PyObject *kwargs)
Andrew McNamara91b97462005-01-11 01:07:23 +0000552{
Petr Viktorin6a02b382020-12-15 15:14:35 +0100553 PyObject *type = (PyObject *)module_state->dialect_type;
Victor Stinner6412f492016-08-23 00:21:34 +0200554 if (dialect_inst) {
Petr Viktorinffd97532020-02-11 17:46:57 +0100555 return PyObject_VectorcallDict(type, &dialect_inst, 1, kwargs);
Victor Stinner6412f492016-08-23 00:21:34 +0200556 }
557 else {
Petr Viktorinffd97532020-02-11 17:46:57 +0100558 return PyObject_VectorcallDict(type, NULL, 0, kwargs);
Victor Stinner6412f492016-08-23 00:21:34 +0200559 }
Andrew McNamara91b97462005-01-11 01:07:23 +0000560}
561
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000562/*
563 * READER
564 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000565static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000566parse_save_field(ReaderObj *self)
567{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000568 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000569
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200570 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
571 (void *) self->field, self->field_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000572 if (field == NULL)
573 return -1;
574 self->field_len = 0;
575 if (self->numeric_field) {
576 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000577
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000578 self->numeric_field = 0;
579 tmp = PyNumber_Float(field);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000580 Py_DECREF(field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200581 if (tmp == NULL)
582 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000583 field = tmp;
584 }
Victor Stinnerb80b3782013-11-14 21:29:34 +0100585 if (PyList_Append(self->fields, field) < 0) {
586 Py_DECREF(field);
587 return -1;
588 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000589 Py_DECREF(field);
590 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000591}
592
593static int
594parse_grow_buff(ReaderObj *self)
595{
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +0500596 assert((size_t)self->field_size <= PY_SSIZE_T_MAX / sizeof(Py_UCS4));
597
598 Py_ssize_t field_size_new = self->field_size ? 2 * self->field_size : 4096;
599 Py_UCS4 *field_new = self->field;
600 PyMem_Resize(field_new, Py_UCS4, field_size_new);
601 if (field_new == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000602 PyErr_NoMemory();
603 return 0;
604 }
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +0500605 self->field = field_new;
606 self->field_size = field_size_new;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000607 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000608}
609
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000610static int
Petr Viktorin6a02b382020-12-15 15:14:35 +0100611parse_add_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000612{
Petr Viktorin6a02b382020-12-15 15:14:35 +0100613 if (self->field_len >= module_state->field_limit) {
614 PyErr_Format(module_state->error_obj,
615 "field larger than field limit (%ld)",
616 module_state->field_limit);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000617 return -1;
618 }
619 if (self->field_len == self->field_size && !parse_grow_buff(self))
620 return -1;
621 self->field[self->field_len++] = c;
622 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000623}
624
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000625static int
Petr Viktorin6a02b382020-12-15 15:14:35 +0100626parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000627{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000628 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000629
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000630 switch (self->state) {
631 case START_RECORD:
632 /* start of record */
633 if (c == '\0')
634 /* empty line - return [] */
635 break;
636 else if (c == '\n' || c == '\r') {
637 self->state = EAT_CRNL;
638 break;
639 }
640 /* normal character - handle as START_FIELD */
641 self->state = START_FIELD;
642 /* fallthru */
643 case START_FIELD:
644 /* expecting field */
645 if (c == '\n' || c == '\r' || c == '\0') {
646 /* save empty field - return [fields] */
647 if (parse_save_field(self) < 0)
648 return -1;
649 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
650 }
651 else if (c == dialect->quotechar &&
652 dialect->quoting != QUOTE_NONE) {
653 /* start quoted field */
654 self->state = IN_QUOTED_FIELD;
655 }
656 else if (c == dialect->escapechar) {
657 /* possible escaped character */
658 self->state = ESCAPED_CHAR;
659 }
660 else if (c == ' ' && dialect->skipinitialspace)
661 /* ignore space at start of field */
662 ;
663 else if (c == dialect->delimiter) {
664 /* save empty field */
665 if (parse_save_field(self) < 0)
666 return -1;
667 }
668 else {
669 /* begin new unquoted field */
670 if (dialect->quoting == QUOTE_NONNUMERIC)
671 self->numeric_field = 1;
Petr Viktorin6a02b382020-12-15 15:14:35 +0100672 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000673 return -1;
674 self->state = IN_FIELD;
675 }
676 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000677
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000678 case ESCAPED_CHAR:
R David Murray9a7d3762013-03-20 00:15:20 -0400679 if (c == '\n' || c=='\r') {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100680 if (parse_add_char(self, module_state, c) < 0)
R David Murrayc7c42ef2013-03-19 22:41:47 -0400681 return -1;
682 self->state = AFTER_ESCAPED_CRNL;
683 break;
684 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000685 if (c == '\0')
686 c = '\n';
Petr Viktorin6a02b382020-12-15 15:14:35 +0100687 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000688 return -1;
689 self->state = IN_FIELD;
690 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000691
R David Murrayc7c42ef2013-03-19 22:41:47 -0400692 case AFTER_ESCAPED_CRNL:
693 if (c == '\0')
694 break;
695 /*fallthru*/
696
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000697 case IN_FIELD:
698 /* in unquoted field */
699 if (c == '\n' || c == '\r' || c == '\0') {
700 /* end of line - return [fields] */
701 if (parse_save_field(self) < 0)
702 return -1;
703 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
704 }
705 else if (c == dialect->escapechar) {
706 /* possible escaped character */
707 self->state = ESCAPED_CHAR;
708 }
709 else if (c == dialect->delimiter) {
710 /* save field - wait for new field */
711 if (parse_save_field(self) < 0)
712 return -1;
713 self->state = START_FIELD;
714 }
715 else {
716 /* normal character - save in field */
Petr Viktorin6a02b382020-12-15 15:14:35 +0100717 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000718 return -1;
719 }
720 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000721
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000722 case IN_QUOTED_FIELD:
723 /* in quoted field */
724 if (c == '\0')
725 ;
726 else if (c == dialect->escapechar) {
727 /* Possible escape character */
728 self->state = ESCAPE_IN_QUOTED_FIELD;
729 }
730 else if (c == dialect->quotechar &&
731 dialect->quoting != QUOTE_NONE) {
732 if (dialect->doublequote) {
733 /* doublequote; " represented by "" */
734 self->state = QUOTE_IN_QUOTED_FIELD;
735 }
736 else {
737 /* end of quote part of field */
738 self->state = IN_FIELD;
739 }
740 }
741 else {
742 /* normal character - save in field */
Petr Viktorin6a02b382020-12-15 15:14:35 +0100743 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000744 return -1;
745 }
746 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000747
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000748 case ESCAPE_IN_QUOTED_FIELD:
749 if (c == '\0')
750 c = '\n';
Petr Viktorin6a02b382020-12-15 15:14:35 +0100751 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000752 return -1;
753 self->state = IN_QUOTED_FIELD;
754 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000755
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000756 case QUOTE_IN_QUOTED_FIELD:
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300757 /* doublequote - seen a quote in a quoted field */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000758 if (dialect->quoting != QUOTE_NONE &&
759 c == dialect->quotechar) {
760 /* save "" as " */
Petr Viktorin6a02b382020-12-15 15:14:35 +0100761 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000762 return -1;
763 self->state = IN_QUOTED_FIELD;
764 }
765 else if (c == dialect->delimiter) {
766 /* save field - wait for new field */
767 if (parse_save_field(self) < 0)
768 return -1;
769 self->state = START_FIELD;
770 }
771 else if (c == '\n' || c == '\r' || c == '\0') {
772 /* end of line - return [fields] */
773 if (parse_save_field(self) < 0)
774 return -1;
775 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
776 }
777 else if (!dialect->strict) {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100778 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000779 return -1;
780 self->state = IN_FIELD;
781 }
782 else {
783 /* illegal */
Petr Viktorin6a02b382020-12-15 15:14:35 +0100784 PyErr_Format(module_state->error_obj, "'%c' expected after '%c'",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000785 dialect->delimiter,
786 dialect->quotechar);
787 return -1;
788 }
789 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000790
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000791 case EAT_CRNL:
792 if (c == '\n' || c == '\r')
793 ;
794 else if (c == '\0')
795 self->state = START_RECORD;
796 else {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100797 PyErr_Format(module_state->error_obj,
798 "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000799 return -1;
800 }
801 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000802
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000803 }
804 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000805}
806
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000807static int
808parse_reset(ReaderObj *self)
809{
Serhiy Storchaka48842712016-04-06 09:45:48 +0300810 Py_XSETREF(self->fields, PyList_New(0));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000811 if (self->fields == NULL)
812 return -1;
813 self->field_len = 0;
814 self->state = START_RECORD;
815 self->numeric_field = 0;
816 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000817}
Skip Montanarob4a04172003-03-20 23:29:12 +0000818
819static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000820Reader_iternext(ReaderObj *self)
821{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000822 PyObject *fields = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200823 Py_UCS4 c;
824 Py_ssize_t pos, linelen;
825 unsigned int kind;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300826 const void *data;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200827 PyObject *lineobj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000828
Petr Viktorin6a02b382020-12-15 15:14:35 +0100829 _csvstate *module_state = _csv_state_from_type(Py_TYPE(self),
830 "Reader.__next__");
831 if (module_state == NULL) {
832 return NULL;
833 }
834
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000835 if (parse_reset(self) < 0)
836 return NULL;
837 do {
838 lineobj = PyIter_Next(self->input_iter);
839 if (lineobj == NULL) {
840 /* End of input OR exception */
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700841 if (!PyErr_Occurred() && (self->field_len != 0 ||
842 self->state == IN_QUOTED_FIELD)) {
843 if (self->dialect->strict)
Petr Viktorin6a02b382020-12-15 15:14:35 +0100844 PyErr_SetString(module_state->error_obj,
Senthil Kumaran49d13022012-09-25 02:37:20 -0700845 "unexpected end of data");
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700846 else if (parse_save_field(self) >= 0)
847 break;
848 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000849 return NULL;
850 }
851 if (!PyUnicode_Check(lineobj)) {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100852 PyErr_Format(module_state->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000853 "iterator should return strings, "
854 "not %.200s "
Ram Rachum235f9182020-06-05 23:56:06 +0300855 "(the file should be opened in text mode)",
Victor Stinnerdaa97562020-02-07 03:37:06 +0100856 Py_TYPE(lineobj)->tp_name
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000857 );
858 Py_DECREF(lineobj);
859 return NULL;
860 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100861 if (PyUnicode_READY(lineobj) == -1) {
862 Py_DECREF(lineobj);
863 return NULL;
864 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000865 ++self->line_num;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200866 kind = PyUnicode_KIND(lineobj);
867 data = PyUnicode_DATA(lineobj);
868 pos = 0;
869 linelen = PyUnicode_GET_LENGTH(lineobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000870 while (linelen--) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200871 c = PyUnicode_READ(kind, data, pos);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000872 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000873 Py_DECREF(lineobj);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100874 PyErr_Format(module_state->error_obj,
Benjamin Peterson7821b4c2019-06-18 21:37:58 -0700875 "line contains NUL");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000876 goto err;
877 }
Petr Viktorin6a02b382020-12-15 15:14:35 +0100878 if (parse_process_char(self, module_state, c) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000879 Py_DECREF(lineobj);
880 goto err;
881 }
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200882 pos++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000883 }
884 Py_DECREF(lineobj);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100885 if (parse_process_char(self, module_state, 0) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000886 goto err;
887 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000888
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000889 fields = self->fields;
890 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000891err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000892 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000893}
894
895static void
896Reader_dealloc(ReaderObj *self)
897{
Petr Viktorin6a02b382020-12-15 15:14:35 +0100898 PyTypeObject *tp = Py_TYPE(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000899 PyObject_GC_UnTrack(self);
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -0700900 tp->tp_clear((PyObject *)self);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100901 if (self->field != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000902 PyMem_Free(self->field);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100903 self->field = NULL;
904 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000905 PyObject_GC_Del(self);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100906 Py_DECREF(tp);
907}
908
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000909static int
910Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
911{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000912 Py_VISIT(self->dialect);
913 Py_VISIT(self->input_iter);
914 Py_VISIT(self->fields);
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -0700915 Py_VISIT(Py_TYPE(self));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000916 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000917}
918
919static int
920Reader_clear(ReaderObj *self)
921{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000922 Py_CLEAR(self->dialect);
923 Py_CLEAR(self->input_iter);
924 Py_CLEAR(self->fields);
925 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000926}
927
928PyDoc_STRVAR(Reader_Type_doc,
929"CSV reader\n"
930"\n"
931"Reader objects are responsible for reading and parsing tabular data\n"
932"in CSV format.\n"
933);
934
935static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000936 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000937};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000938#define R_OFF(x) offsetof(ReaderObj, x)
939
940static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000941 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
942 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
943 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000944};
945
Skip Montanarob4a04172003-03-20 23:29:12 +0000946
Petr Viktorin6a02b382020-12-15 15:14:35 +0100947static PyType_Slot Reader_Type_slots[] = {
948 {Py_tp_doc, (char*)Reader_Type_doc},
949 {Py_tp_traverse, Reader_traverse},
Petr Viktorin6a02b382020-12-15 15:14:35 +0100950 {Py_tp_iter, PyObject_SelfIter},
951 {Py_tp_iternext, Reader_iternext},
952 {Py_tp_methods, Reader_methods},
953 {Py_tp_members, Reader_memberlist},
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -0700954 {Py_tp_clear, Reader_clear},
Petr Viktorin6a02b382020-12-15 15:14:35 +0100955 {Py_tp_dealloc, Reader_dealloc},
956 {0, NULL}
Skip Montanarob4a04172003-03-20 23:29:12 +0000957};
958
Petr Viktorin6a02b382020-12-15 15:14:35 +0100959PyType_Spec Reader_Type_spec = {
960 .name = "_csv.reader",
961 .basicsize = sizeof(ReaderObj),
Miss Islington (bot)7297d742021-06-17 03:19:44 -0700962 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
963 Py_TPFLAGS_IMMUTABLETYPE),
Petr Viktorin6a02b382020-12-15 15:14:35 +0100964 .slots = Reader_Type_slots
965};
966
967
Skip Montanarob4a04172003-03-20 23:29:12 +0000968static PyObject *
969csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
970{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000971 PyObject * iterator, * dialect = NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +0100972 _csvstate *module_state = get_csv_state(module);
973 ReaderObj * self = PyObject_GC_New(
974 ReaderObj,
975 module_state->reader_type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000976
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000977 if (!self)
978 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000979
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000980 self->dialect = NULL;
981 self->fields = NULL;
982 self->input_iter = NULL;
983 self->field = NULL;
984 self->field_size = 0;
985 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000986
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000987 if (parse_reset(self) < 0) {
988 Py_DECREF(self);
989 return NULL;
990 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000991
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000992 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
993 Py_DECREF(self);
994 return NULL;
995 }
996 self->input_iter = PyObject_GetIter(iterator);
997 if (self->input_iter == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000998 Py_DECREF(self);
999 return NULL;
1000 }
Petr Viktorin6a02b382020-12-15 15:14:35 +01001001 self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
1002 keyword_args);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001003 if (self->dialect == NULL) {
1004 Py_DECREF(self);
1005 return NULL;
1006 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001007
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001008 PyObject_GC_Track(self);
1009 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001010}
1011
1012/*
1013 * WRITER
1014 */
1015/* ---------------------------------------------------------------- */
1016static void
1017join_reset(WriterObj *self)
1018{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001019 self->rec_len = 0;
1020 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001021}
1022
1023#define MEM_INCR 32768
1024
1025/* Calculate new record length or append field to record. Return new
1026 * record length.
1027 */
Antoine Pitrou40455752010-08-15 18:51:10 +00001028static Py_ssize_t
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001029join_append_data(WriterObj *self, unsigned int field_kind, const void *field_data,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001030 Py_ssize_t field_len, int *quoted,
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001031 int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +00001032{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001033 DialectObj *dialect = self->dialect;
1034 int i;
Antoine Pitrou40455752010-08-15 18:51:10 +00001035 Py_ssize_t rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001036
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001037#define INCLEN \
1038 do {\
1039 if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \
1040 goto overflow; \
1041 } \
1042 rec_len++; \
1043 } while(0)
1044
1045#define ADDCH(c) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001046 do {\
1047 if (copy_phase) \
1048 self->rec[rec_len] = c;\
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001049 INCLEN;\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001050 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001051
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001052 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001053
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001054 /* If this is not the first field we need a field separator */
1055 if (self->num_fields > 0)
1056 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001057
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001058 /* Handle preceding quote */
1059 if (copy_phase && *quoted)
1060 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001061
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001062 /* Copy/count field data */
1063 /* If field is null just pass over */
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001064 for (i = 0; field_data && (i < field_len); i++) {
1065 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001066 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001067
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001068 if (c == dialect->delimiter ||
1069 c == dialect->escapechar ||
1070 c == dialect->quotechar ||
Martin v. Löwis5f4f4c52011-11-01 18:42:23 +01001071 PyUnicode_FindChar(
1072 dialect->lineterminator, c, 0,
1073 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001074 if (dialect->quoting == QUOTE_NONE)
1075 want_escape = 1;
1076 else {
1077 if (c == dialect->quotechar) {
1078 if (dialect->doublequote)
1079 ADDCH(dialect->quotechar);
1080 else
1081 want_escape = 1;
1082 }
Berker Peksag5c0eed72020-09-20 09:38:07 +03001083 else if (c == dialect->escapechar) {
1084 want_escape = 1;
1085 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001086 if (!want_escape)
1087 *quoted = 1;
1088 }
1089 if (want_escape) {
1090 if (!dialect->escapechar) {
Petr Viktorin6a02b382020-12-15 15:14:35 +01001091 PyErr_Format(self->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001092 "need to escape, but no escapechar set");
1093 return -1;
1094 }
1095 ADDCH(dialect->escapechar);
1096 }
1097 }
1098 /* Copy field character into record buffer.
1099 */
1100 ADDCH(c);
1101 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001102
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001103 if (*quoted) {
1104 if (copy_phase)
1105 ADDCH(dialect->quotechar);
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001106 else {
1107 INCLEN; /* starting quote */
1108 INCLEN; /* ending quote */
1109 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001110 }
1111 return rec_len;
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001112
1113 overflow:
1114 PyErr_NoMemory();
1115 return -1;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001116#undef ADDCH
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001117#undef INCLEN
Skip Montanarob4a04172003-03-20 23:29:12 +00001118}
1119
1120static int
Antoine Pitrou40455752010-08-15 18:51:10 +00001121join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
Skip Montanarob4a04172003-03-20 23:29:12 +00001122{
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001123 assert(rec_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001124
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001125 if (rec_len > self->rec_size) {
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001126 size_t rec_size_new = (size_t)(rec_len / MEM_INCR + 1) * MEM_INCR;
1127 Py_UCS4 *rec_new = self->rec;
1128 PyMem_Resize(rec_new, Py_UCS4, rec_size_new);
1129 if (rec_new == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001130 PyErr_NoMemory();
1131 return 0;
1132 }
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001133 self->rec = rec_new;
1134 self->rec_size = (Py_ssize_t)rec_size_new;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001135 }
1136 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001137}
1138
1139static int
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001140join_append(WriterObj *self, PyObject *field, int quoted)
Skip Montanarob4a04172003-03-20 23:29:12 +00001141{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001142 unsigned int field_kind = -1;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001143 const void *field_data = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001144 Py_ssize_t field_len = 0;
Antoine Pitrou40455752010-08-15 18:51:10 +00001145 Py_ssize_t rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001146
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001147 if (field != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +01001148 if (PyUnicode_READY(field) == -1)
1149 return 0;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001150 field_kind = PyUnicode_KIND(field);
1151 field_data = PyUnicode_DATA(field);
1152 field_len = PyUnicode_GET_LENGTH(field);
1153 }
1154 rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001155 &quoted, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001156 if (rec_len < 0)
1157 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001158
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001159 /* grow record buffer if necessary */
1160 if (!join_check_rec_size(self, rec_len))
1161 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001162
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001163 self->rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001164 &quoted, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001165 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001166
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001167 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001168}
1169
1170static int
1171join_append_lineterminator(WriterObj *self)
1172{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001173 Py_ssize_t terminator_len, i;
1174 unsigned int term_kind;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001175 const void *term_data;
Skip Montanarob4a04172003-03-20 23:29:12 +00001176
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001177 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001178 if (terminator_len == -1)
1179 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001180
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001181 /* grow record buffer if necessary */
1182 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1183 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001184
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001185 term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1186 term_data = PyUnicode_DATA(self->dialect->lineterminator);
1187 for (i = 0; i < terminator_len; i++)
1188 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001189 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001190
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001191 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001192}
1193
1194PyDoc_STRVAR(csv_writerow_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001195"writerow(iterable)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001196"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001197"Construct and write a CSV record from an iterable of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001198"elements will be converted to string.");
1199
1200static PyObject *
1201csv_writerow(WriterObj *self, PyObject *seq)
1202{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001203 DialectObj *dialect = self->dialect;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001204 PyObject *iter, *field, *line, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001205
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001206 iter = PyObject_GetIter(seq);
Serhiy Storchakac88239f2020-06-22 11:21:59 +03001207 if (iter == NULL) {
1208 if (PyErr_ExceptionMatches(PyExc_TypeError)) {
Petr Viktorin6a02b382020-12-15 15:14:35 +01001209 PyErr_Format(self->error_obj,
Serhiy Storchakac88239f2020-06-22 11:21:59 +03001210 "iterable expected, not %.200s",
1211 Py_TYPE(seq)->tp_name);
1212 }
1213 return NULL;
1214 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001215
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001216 /* Join all fields in internal buffer.
1217 */
1218 join_reset(self);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001219 while ((field = PyIter_Next(iter))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001220 int append_ok;
1221 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001222
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001223 switch (dialect->quoting) {
1224 case QUOTE_NONNUMERIC:
1225 quoted = !PyNumber_Check(field);
1226 break;
1227 case QUOTE_ALL:
1228 quoted = 1;
1229 break;
1230 default:
1231 quoted = 0;
1232 break;
1233 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001234
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001235 if (PyUnicode_Check(field)) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001236 append_ok = join_append(self, field, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001237 Py_DECREF(field);
1238 }
1239 else if (field == Py_None) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001240 append_ok = join_append(self, NULL, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001241 Py_DECREF(field);
1242 }
1243 else {
1244 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001245
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001246 str = PyObject_Str(field);
1247 Py_DECREF(field);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001248 if (str == NULL) {
1249 Py_DECREF(iter);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001250 return NULL;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001251 }
1252 append_ok = join_append(self, str, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001253 Py_DECREF(str);
1254 }
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001255 if (!append_ok) {
1256 Py_DECREF(iter);
1257 return NULL;
1258 }
1259 }
1260 Py_DECREF(iter);
1261 if (PyErr_Occurred())
1262 return NULL;
1263
Licht Takeuchi20019002017-12-12 18:57:06 +09001264 if (self->num_fields > 0 && self->rec_len == 0) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001265 if (dialect->quoting == QUOTE_NONE) {
Petr Viktorin6a02b382020-12-15 15:14:35 +01001266 PyErr_Format(self->error_obj,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001267 "single empty field record must be quoted");
1268 return NULL;
1269 }
1270 self->num_fields--;
1271 if (!join_append(self, NULL, 1))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001272 return NULL;
1273 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001274
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001275 /* Add line terminator.
1276 */
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001277 if (!join_append_lineterminator(self)) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001278 return NULL;
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001279 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001280
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001281 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1282 (void *) self->rec, self->rec_len);
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001283 if (line == NULL) {
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001284 return NULL;
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001285 }
Petr Viktorinffd97532020-02-11 17:46:57 +01001286 result = PyObject_CallOneArg(self->write, line);
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001287 Py_DECREF(line);
1288 return result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001289}
1290
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001291PyDoc_STRVAR(csv_writerows_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001292"writerows(iterable of iterables)\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001293"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001294"Construct and write a series of iterables to a csv file. Non-string\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001295"elements will be converted to string.");
1296
Skip Montanarob4a04172003-03-20 23:29:12 +00001297static PyObject *
1298csv_writerows(WriterObj *self, PyObject *seqseq)
1299{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001300 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001301
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001302 row_iter = PyObject_GetIter(seqseq);
1303 if (row_iter == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001304 return NULL;
1305 }
1306 while ((row_obj = PyIter_Next(row_iter))) {
1307 result = csv_writerow(self, row_obj);
1308 Py_DECREF(row_obj);
1309 if (!result) {
1310 Py_DECREF(row_iter);
1311 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001312 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001313 else
1314 Py_DECREF(result);
1315 }
1316 Py_DECREF(row_iter);
1317 if (PyErr_Occurred())
1318 return NULL;
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001319 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001320}
1321
1322static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001323 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1324 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1325 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001326};
1327
1328#define W_OFF(x) offsetof(WriterObj, x)
1329
1330static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001331 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1332 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001333};
1334
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001335static int
1336Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1337{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001338 Py_VISIT(self->dialect);
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001339 Py_VISIT(self->write);
Petr Viktorin6a02b382020-12-15 15:14:35 +01001340 Py_VISIT(self->error_obj);
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -07001341 Py_VISIT(Py_TYPE(self));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001342 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001343}
1344
1345static int
1346Writer_clear(WriterObj *self)
1347{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001348 Py_CLEAR(self->dialect);
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001349 Py_CLEAR(self->write);
Petr Viktorin6a02b382020-12-15 15:14:35 +01001350 Py_CLEAR(self->error_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001351 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001352}
1353
Petr Viktorin6a02b382020-12-15 15:14:35 +01001354static void
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -07001355Writer_dealloc(WriterObj *self)
Petr Viktorin6a02b382020-12-15 15:14:35 +01001356{
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -07001357 PyTypeObject *tp = Py_TYPE(self);
1358 PyObject_GC_UnTrack(self);
1359 tp->tp_clear((PyObject *)self);
Petr Viktorin6a02b382020-12-15 15:14:35 +01001360 if (self->rec != NULL) {
1361 PyMem_Free(self->rec);
1362 }
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -07001363 PyObject_GC_Del(self);
1364 Py_DECREF(tp);
Petr Viktorin6a02b382020-12-15 15:14:35 +01001365}
1366
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001367PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001368"CSV writer\n"
1369"\n"
1370"Writer objects are responsible for generating tabular data\n"
1371"in CSV format from sequence input.\n"
1372);
1373
Petr Viktorin6a02b382020-12-15 15:14:35 +01001374static PyType_Slot Writer_Type_slots[] = {
Petr Viktorin6a02b382020-12-15 15:14:35 +01001375 {Py_tp_doc, (char*)Writer_Type_doc},
1376 {Py_tp_traverse, Writer_traverse},
1377 {Py_tp_clear, Writer_clear},
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -07001378 {Py_tp_dealloc, Writer_dealloc},
Petr Viktorin6a02b382020-12-15 15:14:35 +01001379 {Py_tp_methods, Writer_methods},
1380 {Py_tp_members, Writer_memberlist},
1381 {0, NULL}
Skip Montanarob4a04172003-03-20 23:29:12 +00001382};
1383
Petr Viktorin6a02b382020-12-15 15:14:35 +01001384PyType_Spec Writer_Type_spec = {
1385 .name = "_csv.writer",
1386 .basicsize = sizeof(WriterObj),
Miss Islington (bot)7297d742021-06-17 03:19:44 -07001387 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
1388 Py_TPFLAGS_IMMUTABLETYPE),
Petr Viktorin6a02b382020-12-15 15:14:35 +01001389 .slots = Writer_Type_slots,
1390};
1391
1392
Skip Montanarob4a04172003-03-20 23:29:12 +00001393static PyObject *
1394csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1395{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001396 PyObject * output_file, * dialect = NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +01001397 _csvstate *module_state = get_csv_state(module);
1398 WriterObj * self = PyObject_GC_New(WriterObj, module_state->writer_type);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001399 _Py_IDENTIFIER(write);
Skip Montanarob4a04172003-03-20 23:29:12 +00001400
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001401 if (!self)
1402 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001403
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001404 self->dialect = NULL;
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001405 self->write = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001406
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001407 self->rec = NULL;
1408 self->rec_size = 0;
1409 self->rec_len = 0;
1410 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001411
Petr Viktorin6a02b382020-12-15 15:14:35 +01001412 self->error_obj = Py_NewRef(module_state->error_obj);
1413
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001414 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1415 Py_DECREF(self);
1416 return NULL;
1417 }
Serhiy Storchaka41c57b32019-09-01 12:03:39 +03001418 if (_PyObject_LookupAttrId(output_file, &PyId_write, &self->write) < 0) {
1419 Py_DECREF(self);
1420 return NULL;
1421 }
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001422 if (self->write == NULL || !PyCallable_Check(self->write)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001423 PyErr_SetString(PyExc_TypeError,
1424 "argument 1 must have a \"write\" method");
1425 Py_DECREF(self);
1426 return NULL;
1427 }
Petr Viktorin6a02b382020-12-15 15:14:35 +01001428 self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
1429 keyword_args);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001430 if (self->dialect == NULL) {
1431 Py_DECREF(self);
1432 return NULL;
1433 }
1434 PyObject_GC_Track(self);
1435 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001436}
1437
1438/*
1439 * DIALECT REGISTRY
1440 */
1441static PyObject *
1442csv_list_dialects(PyObject *module, PyObject *args)
1443{
Petr Viktorin6a02b382020-12-15 15:14:35 +01001444 return PyDict_Keys(get_csv_state(module)->dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001445}
1446
1447static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001448csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001449{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001450 PyObject *name_obj, *dialect_obj = NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +01001451 _csvstate *module_state = get_csv_state(module);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001452 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001453
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001454 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1455 return NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +01001456 if (!PyUnicode_Check(name_obj)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001457 PyErr_SetString(PyExc_TypeError,
Stefan Krahe6996ed2012-11-02 14:44:20 +01001458 "dialect name must be a string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001459 return NULL;
1460 }
Stefan Krahe6996ed2012-11-02 14:44:20 +01001461 if (PyUnicode_READY(name_obj) == -1)
1462 return NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +01001463 dialect = _call_dialect(module_state, dialect_obj, kwargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001464 if (dialect == NULL)
1465 return NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +01001466 if (PyDict_SetItem(module_state->dialects, name_obj, dialect) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001467 Py_DECREF(dialect);
1468 return NULL;
1469 }
1470 Py_DECREF(dialect);
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001471 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001472}
1473
1474static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001475csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001476{
Petr Viktorin6a02b382020-12-15 15:14:35 +01001477 _csvstate *module_state = get_csv_state(module);
1478 if (PyDict_DelItem(module_state->dialects, name_obj) < 0) {
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001479 if (PyErr_ExceptionMatches(PyExc_KeyError)) {
Petr Viktorin6a02b382020-12-15 15:14:35 +01001480 PyErr_Format(module_state->error_obj, "unknown dialect");
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001481 }
1482 return NULL;
1483 }
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001484 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001485}
1486
1487static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001488csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001489{
Petr Viktorin6a02b382020-12-15 15:14:35 +01001490 return get_dialect_from_registry(name_obj, get_csv_state(module));
Skip Montanarob4a04172003-03-20 23:29:12 +00001491}
1492
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001493static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001494csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001495{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001496 PyObject *new_limit = NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +01001497 _csvstate *module_state = get_csv_state(module);
1498 long old_limit = module_state->field_limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001499
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001500 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1501 return NULL;
1502 if (new_limit != NULL) {
1503 if (!PyLong_CheckExact(new_limit)) {
1504 PyErr_Format(PyExc_TypeError,
1505 "limit must be an integer");
1506 return NULL;
1507 }
Petr Viktorin6a02b382020-12-15 15:14:35 +01001508 module_state->field_limit = PyLong_AsLong(new_limit);
1509 if (module_state->field_limit == -1 && PyErr_Occurred()) {
1510 module_state->field_limit = old_limit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001511 return NULL;
1512 }
1513 }
1514 return PyLong_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001515}
1516
Petr Viktorin6a02b382020-12-15 15:14:35 +01001517static PyType_Slot error_slots[] = {
1518 {0, NULL},
1519};
1520
1521PyType_Spec error_spec = {
1522 .name = "_csv.Error",
Miss Islington (bot)3e44e9a2021-05-12 07:02:46 -07001523 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
Petr Viktorin6a02b382020-12-15 15:14:35 +01001524 .slots = error_slots,
1525};
1526
Skip Montanarob4a04172003-03-20 23:29:12 +00001527/*
1528 * MODULE
1529 */
1530
1531PyDoc_STRVAR(csv_module_doc,
1532"CSV parsing and writing.\n"
1533"\n"
1534"This module provides classes that assist in the reading and writing\n"
1535"of Comma Separated Value (CSV) files, and implements the interface\n"
1536"described by PEP 305. Although many CSV files are simple to parse,\n"
1537"the format is not formally defined by a stable specification and\n"
1538"is subtle enough that parsing lines of a CSV file with something\n"
1539"like line.split(\",\") is bound to fail. The module supports three\n"
1540"basic APIs: reading, writing, and registration of dialects.\n"
1541"\n"
1542"\n"
1543"DIALECT REGISTRATION:\n"
1544"\n"
1545"Readers and writers support a dialect argument, which is a convenient\n"
1546"handle on a group of settings. When the dialect argument is a string,\n"
1547"it identifies one of the dialects previously registered with the module.\n"
1548"If it is a class or instance, the attributes of the argument are used as\n"
1549"the settings for the reader or writer:\n"
1550"\n"
1551" class excel:\n"
1552" delimiter = ','\n"
1553" quotechar = '\"'\n"
1554" escapechar = None\n"
1555" doublequote = True\n"
1556" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001557" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001558" quoting = QUOTE_MINIMAL\n"
1559"\n"
1560"SETTINGS:\n"
1561"\n"
oldkaa0735f2018-02-02 16:52:55 +08001562" * quotechar - specifies a one-character string to use as the\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001563" quoting character. It defaults to '\"'.\n"
oldkaa0735f2018-02-02 16:52:55 +08001564" * delimiter - specifies a one-character string to use as the\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001565" field separator. It defaults to ','.\n"
1566" * skipinitialspace - specifies how to interpret whitespace which\n"
1567" immediately follows a delimiter. It defaults to False, which\n"
1568" means that whitespace immediately following a delimiter is part\n"
1569" of the following field.\n"
oldkaa0735f2018-02-02 16:52:55 +08001570" * lineterminator - specifies the character sequence which should\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001571" terminate rows.\n"
1572" * quoting - controls when quotes should be generated by the writer.\n"
1573" It can take on any of the following module constants:\n"
1574"\n"
1575" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1576" field contains either the quotechar or the delimiter\n"
1577" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1578" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001579" fields which do not parse as integers or floating point\n"
1580" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001581" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
oldkaa0735f2018-02-02 16:52:55 +08001582" * escapechar - specifies a one-character string used to escape\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001583" the delimiter when quoting is set to QUOTE_NONE.\n"
1584" * doublequote - controls the handling of quotes inside fields. When\n"
1585" True, two consecutive quotes are interpreted as one during read,\n"
1586" and when writing, each quote character embedded in the data is\n"
1587" written as two quotes\n");
1588
1589PyDoc_STRVAR(csv_reader_doc,
1590" csv_reader = reader(iterable [, dialect='excel']\n"
1591" [optional keyword args])\n"
1592" for row in csv_reader:\n"
1593" process(row)\n"
1594"\n"
1595"The \"iterable\" argument can be any object that returns a line\n"
1596"of input for each iteration, such as a file object or a list. The\n"
1597"optional \"dialect\" parameter is discussed below. The function\n"
1598"also accepts optional keyword arguments which override settings\n"
1599"provided by the dialect.\n"
1600"\n"
1601"The returned object is an iterator. Each iteration returns a row\n"
Berker Peksage2382c52015-10-02 19:25:32 +03001602"of the CSV file (which can span multiple input lines).\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001603
1604PyDoc_STRVAR(csv_writer_doc,
1605" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1606" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001607" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001608" csv_writer.writerow(row)\n"
1609"\n"
1610" [or]\n"
1611"\n"
1612" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1613" [optional keyword args])\n"
1614" csv_writer.writerows(rows)\n"
1615"\n"
1616"The \"fileobj\" argument can be any object that supports the file API.\n");
1617
1618PyDoc_STRVAR(csv_list_dialects_doc,
1619"Return a list of all know dialect names.\n"
1620" names = csv.list_dialects()");
1621
1622PyDoc_STRVAR(csv_get_dialect_doc,
1623"Return the dialect instance associated with name.\n"
1624" dialect = csv.get_dialect(name)");
1625
1626PyDoc_STRVAR(csv_register_dialect_doc,
1627"Create a mapping from a string name to a dialect class.\n"
Berker Peksag12b50ce2015-06-05 15:17:51 +03001628" dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
Skip Montanarob4a04172003-03-20 23:29:12 +00001629
1630PyDoc_STRVAR(csv_unregister_dialect_doc,
1631"Delete the name/dialect mapping associated with a string name.\n"
1632" csv.unregister_dialect(name)");
1633
Andrew McNamara31d88962005-01-12 03:45:10 +00001634PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001635"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001636" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001637"\n"
1638"Returns old limit. If limit is not given, no new limit is set and\n"
1639"the old limit is returned");
1640
Skip Montanarob4a04172003-03-20 23:29:12 +00001641static struct PyMethodDef csv_methods[] = {
Serhiy Storchaka62be7422018-11-27 13:27:31 +02001642 { "reader", (PyCFunction)(void(*)(void))csv_reader,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001643 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
Serhiy Storchaka62be7422018-11-27 13:27:31 +02001644 { "writer", (PyCFunction)(void(*)(void))csv_writer,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001645 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1646 { "list_dialects", (PyCFunction)csv_list_dialects,
1647 METH_NOARGS, csv_list_dialects_doc},
Serhiy Storchaka62be7422018-11-27 13:27:31 +02001648 { "register_dialect", (PyCFunction)(void(*)(void))csv_register_dialect,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001649 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1650 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1651 METH_O, csv_unregister_dialect_doc},
1652 { "get_dialect", (PyCFunction)csv_get_dialect,
1653 METH_O, csv_get_dialect_doc},
1654 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1655 METH_VARARGS, csv_field_size_limit_doc},
1656 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001657};
1658
Petr Viktorin6a02b382020-12-15 15:14:35 +01001659static int
1660csv_exec(PyObject *module) {
1661 const StyleDesc *style;
1662 PyObject *temp;
1663 _csvstate *module_state = get_csv_state(module);
1664
1665 temp = PyType_FromModuleAndSpec(module, &Dialect_Type_spec, NULL);
1666 module_state->dialect_type = (PyTypeObject *)temp;
1667 if (PyModule_AddObjectRef(module, "Dialect", temp) < 0) {
1668 return -1;
1669 }
1670
1671 temp = PyType_FromModuleAndSpec(module, &Reader_Type_spec, NULL);
1672 module_state->reader_type = (PyTypeObject *)temp;
1673 if (PyModule_AddObjectRef(module, "Reader", temp) < 0) {
1674 return -1;
1675 }
1676
1677 temp = PyType_FromModuleAndSpec(module, &Writer_Type_spec, NULL);
1678 module_state->writer_type = (PyTypeObject *)temp;
1679 if (PyModule_AddObjectRef(module, "Writer", temp) < 0) {
1680 return -1;
1681 }
1682
1683 /* Add version to the module. */
1684 if (PyModule_AddStringConstant(module, "__version__",
1685 MODULE_VERSION) == -1) {
1686 return -1;
1687 }
1688
1689 /* Set the field limit */
1690 module_state->field_limit = 128 * 1024;
1691
1692 /* Add _dialects dictionary */
1693 module_state->dialects = PyDict_New();
1694 if (PyModule_AddObjectRef(module, "_dialects", module_state->dialects) < 0) {
1695 return -1;
1696 }
1697
1698 /* Add quote styles into dictionary */
1699 for (style = quote_styles; style->name; style++) {
1700 if (PyModule_AddIntConstant(module, style->name,
1701 style->style) == -1)
1702 return -1;
1703 }
1704
1705 /* Add the CSV exception object to the module. */
1706 PyObject *bases = PyTuple_Pack(1, PyExc_Exception);
1707 if (bases == NULL) {
1708 return -1;
1709 }
1710 module_state->error_obj = PyType_FromModuleAndSpec(module, &error_spec,
1711 bases);
1712 Py_DECREF(bases);
1713 if (module_state->error_obj == NULL) {
1714 return -1;
1715 }
1716 if (PyModule_AddType(module, (PyTypeObject *)module_state->error_obj) != 0) {
1717 return -1;
1718 }
1719
1720 return 0;
1721}
1722
1723static PyModuleDef_Slot csv_slots[] = {
1724 {Py_mod_exec, csv_exec},
1725 {0, NULL}
1726};
1727
Martin v. Löwis1a214512008-06-11 05:26:20 +00001728static struct PyModuleDef _csvmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001729 PyModuleDef_HEAD_INIT,
1730 "_csv",
1731 csv_module_doc,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001732 sizeof(_csvstate),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001733 csv_methods,
Petr Viktorin6a02b382020-12-15 15:14:35 +01001734 csv_slots,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001735 _csv_traverse,
1736 _csv_clear,
1737 _csv_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00001738};
1739
Skip Montanarob4a04172003-03-20 23:29:12 +00001740PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001741PyInit__csv(void)
Skip Montanarob4a04172003-03-20 23:29:12 +00001742{
Petr Viktorin6a02b382020-12-15 15:14:35 +01001743 return PyModuleDef_Init(&_csvmodule);
Skip Montanarob4a04172003-03-20 23:29:12 +00001744}