blob: 72f0791a4398b6f61a8eb972a8e7e3431baf0181 [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
Skip Montanarob4a04172003-03-20 23:29:12 +00009*/
10
Skip Montanaro7b01a832003-04-12 19:23:46 +000011#define MODULE_VERSION "1.0"
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013#include "Python.h"
Victor Stinner4a21e572020-04-15 02:35:41 +020014#include "structmember.h" // PyMemberDef
Serhiy Storchaka323748a2018-07-26 13:21:09 +030015#include <stdbool.h>
Skip Montanarob4a04172003-03-20 23:29:12 +000016
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000017
Antoine Pitroue7672d32012-05-16 11:33:08 +020018typedef struct {
19 PyObject *error_obj; /* CSV exception */
20 PyObject *dialects; /* Dialect registry */
Petr Viktorin6a02b382020-12-15 15:14:35 +010021 PyTypeObject *dialect_type;
22 PyTypeObject *reader_type;
23 PyTypeObject *writer_type;
Antoine Pitroue7672d32012-05-16 11:33:08 +020024 long field_limit; /* max parsed field size */
25} _csvstate;
26
Petr Viktorin6a02b382020-12-15 15:14:35 +010027static struct PyModuleDef _csvmodule;
28
Hai Shif707d942020-03-16 21:15:01 +080029static inline _csvstate*
30get_csv_state(PyObject *module)
31{
32 void *state = PyModule_GetState(module);
33 assert(state != NULL);
34 return (_csvstate *)state;
35}
Antoine Pitroue7672d32012-05-16 11:33:08 +020036
37static int
Petr Viktorin6a02b382020-12-15 15:14:35 +010038_csv_clear(PyObject *module)
Antoine Pitroue7672d32012-05-16 11:33:08 +020039{
Petr Viktorin6a02b382020-12-15 15:14:35 +010040 _csvstate *module_state = PyModule_GetState(module);
41 Py_CLEAR(module_state->error_obj);
42 Py_CLEAR(module_state->dialects);
43 Py_CLEAR(module_state->dialect_type);
44 Py_CLEAR(module_state->reader_type);
45 Py_CLEAR(module_state->writer_type);
Antoine Pitroue7672d32012-05-16 11:33:08 +020046 return 0;
47}
48
49static int
Petr Viktorin6a02b382020-12-15 15:14:35 +010050_csv_traverse(PyObject *module, visitproc visit, void *arg)
Antoine Pitroue7672d32012-05-16 11:33:08 +020051{
Petr Viktorin6a02b382020-12-15 15:14:35 +010052 _csvstate *module_state = PyModule_GetState(module);
53 Py_VISIT(module_state->error_obj);
54 Py_VISIT(module_state->dialects);
55 Py_VISIT(module_state->dialect_type);
56 Py_VISIT(module_state->reader_type);
57 Py_VISIT(module_state->writer_type);
Antoine Pitroue7672d32012-05-16 11:33:08 +020058 return 0;
59}
60
61static void
Petr Viktorin6a02b382020-12-15 15:14:35 +010062_csv_free(void *module)
Antoine Pitroue7672d32012-05-16 11:33:08 +020063{
Petr Viktorin6a02b382020-12-15 15:14:35 +010064 _csv_clear((PyObject *)module);
Antoine Pitroue7672d32012-05-16 11:33:08 +020065}
66
Skip Montanarob4a04172003-03-20 23:29:12 +000067typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000068 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
69 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
R David Murrayc7c42ef2013-03-19 22:41:47 -040070 EAT_CRNL,AFTER_ESCAPED_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000071} ParserState;
72
73typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000074 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000075} QuoteStyle;
76
77typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078 QuoteStyle style;
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020079 const char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000080} StyleDesc;
81
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020082static const StyleDesc quote_styles[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000083 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
84 { QUOTE_ALL, "QUOTE_ALL" },
85 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
86 { QUOTE_NONE, "QUOTE_NONE" },
87 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000088};
89
90typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000092
Serhiy Storchaka323748a2018-07-26 13:21:09 +030093 char doublequote; /* is " represented by ""? */
94 char skipinitialspace; /* ignore spaces following delimiter? */
95 char strict; /* raise exception on bad CSV */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000096 int quoting; /* style of quoting to write */
Serhiy Storchaka323748a2018-07-26 13:21:09 +030097 Py_UCS4 delimiter; /* field separator */
98 Py_UCS4 quotechar; /* quote character */
99 Py_UCS4 escapechar; /* escape character */
100 PyObject *lineterminator; /* string to write between records */
Skip Montanarob4a04172003-03-20 23:29:12 +0000101
Skip Montanarob4a04172003-03-20 23:29:12 +0000102} DialectObj;
103
Skip Montanarob4a04172003-03-20 23:29:12 +0000104typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000105 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000106
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000107 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +0000108
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000109 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000110
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000111 PyObject *fields; /* field list for current record */
112 ParserState state; /* current CSV parse state */
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200113 Py_UCS4 *field; /* temporary buffer */
Antoine Pitrou40455752010-08-15 18:51:10 +0000114 Py_ssize_t field_size; /* size of allocated buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000115 Py_ssize_t field_len; /* length of current field */
116 int numeric_field; /* treat field as numeric */
117 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000118} ReaderObj;
119
Skip Montanarob4a04172003-03-20 23:29:12 +0000120typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000121 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000122
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +0200123 PyObject *write; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +0000124
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000125 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000126
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200127 Py_UCS4 *rec; /* buffer for parser.join */
Antoine Pitrou40455752010-08-15 18:51:10 +0000128 Py_ssize_t rec_size; /* size of allocated record */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000129 Py_ssize_t rec_len; /* length of record */
130 int num_fields; /* number of fields in record */
Skip Montanarob4a04172003-03-20 23:29:12 +0000131
Petr Viktorin6a02b382020-12-15 15:14:35 +0100132 PyObject *error_obj; /* cached error object */
133} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000134
135/*
136 * DIALECT class
137 */
138
139static PyObject *
Petr Viktorin6a02b382020-12-15 15:14:35 +0100140get_dialect_from_registry(PyObject *name_obj, _csvstate *module_state)
Skip Montanarob4a04172003-03-20 23:29:12 +0000141{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000142 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000143
Petr Viktorin6a02b382020-12-15 15:14:35 +0100144 dialect_obj = PyDict_GetItemWithError(module_state->dialects, name_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 if (dialect_obj == NULL) {
146 if (!PyErr_Occurred())
Petr Viktorin6a02b382020-12-15 15:14:35 +0100147 PyErr_Format(module_state->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000148 }
149 else
150 Py_INCREF(dialect_obj);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100151
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000152 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000153}
154
Skip Montanarob4a04172003-03-20 23:29:12 +0000155static PyObject *
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200156get_nullchar_as_None(Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000157{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000158 if (c == '\0') {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200159 Py_RETURN_NONE;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000160 }
161 else
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200162 return PyUnicode_FromOrdinal(c);
Skip Montanarob4a04172003-03-20 23:29:12 +0000163}
164
Skip Montanarob4a04172003-03-20 23:29:12 +0000165static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200166Dialect_get_lineterminator(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000167{
Dong-hee Na0383be42020-06-10 00:33:43 +0900168 Py_XINCREF(self->lineterminator);
169 return self->lineterminator;
Skip Montanarob4a04172003-03-20 23:29:12 +0000170}
171
Skip Montanarob4a04172003-03-20 23:29:12 +0000172static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200173Dialect_get_delimiter(DialectObj *self, void *Py_UNUSED(ignored))
Guido van Rossuma9769c22007-08-07 23:59:30 +0000174{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000175 return get_nullchar_as_None(self->delimiter);
Guido van Rossuma9769c22007-08-07 23:59:30 +0000176}
177
178static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200179Dialect_get_escapechar(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000180{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000181 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000182}
183
Andrew McNamara1196cf12005-01-07 04:42:45 +0000184static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200185Dialect_get_quotechar(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000186{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000188}
189
190static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200191Dialect_get_quoting(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000192{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000193 return PyLong_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000194}
195
196static int
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300197_set_bool(const char *name, char *target, PyObject *src, bool dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000198{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000199 if (src == NULL)
200 *target = dflt;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200201 else {
202 int b = PyObject_IsTrue(src);
203 if (b < 0)
204 return -1;
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300205 *target = (char)b;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200206 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000207 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000208}
209
Andrew McNamara1196cf12005-01-07 04:42:45 +0000210static int
211_set_int(const char *name, int *target, PyObject *src, int dflt)
212{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000213 if (src == NULL)
214 *target = dflt;
215 else {
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200216 int value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000217 if (!PyLong_CheckExact(src)) {
218 PyErr_Format(PyExc_TypeError,
219 "\"%s\" must be an integer", name);
220 return -1;
221 }
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200222 value = _PyLong_AsInt(src);
223 if (value == -1 && PyErr_Occurred()) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000224 return -1;
225 }
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200226 *target = value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000227 }
228 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000229}
230
231static int
Miss Islington (bot)87729352021-10-09 08:35:33 -0700232_set_char_or_none(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000233{
Miss Islington (bot)87729352021-10-09 08:35:33 -0700234 if (src == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000235 *target = dflt;
Miss Islington (bot)87729352021-10-09 08:35:33 -0700236 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000237 else {
238 *target = '\0';
239 if (src != Py_None) {
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200240 if (!PyUnicode_Check(src)) {
241 PyErr_Format(PyExc_TypeError,
Miss Islington (bot)87729352021-10-09 08:35:33 -0700242 "\"%s\" must be string or None, not %.200s", name,
Victor Stinnerdaa97562020-02-07 03:37:06 +0100243 Py_TYPE(src)->tp_name);
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200244 return -1;
245 }
Miss Islington (bot)87729352021-10-09 08:35:33 -0700246 Py_ssize_t len = PyUnicode_GetLength(src);
Dong-hee Nac80f0b72021-10-10 01:13:21 +0900247 if (len < 0) {
248 return -1;
249 }
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200250 if (len > 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000251 PyErr_Format(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300252 "\"%s\" must be a 1-character string",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000253 name);
254 return -1;
255 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100256 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
Miss Islington (bot)87729352021-10-09 08:35:33 -0700257 else {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200258 *target = PyUnicode_READ_CHAR(src, 0);
Miss Islington (bot)87729352021-10-09 08:35:33 -0700259 }
260 }
261 }
262 return 0;
263}
264
265static int
266_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
267{
268 if (src == NULL) {
269 *target = dflt;
270 }
271 else {
272 *target = '\0';
273 if (!PyUnicode_Check(src)) {
274 PyErr_Format(PyExc_TypeError,
275 "\"%s\" must be string, not %.200s", name,
276 Py_TYPE(src)->tp_name);
277 return -1;
278 }
279 Py_ssize_t len = PyUnicode_GetLength(src);
Dong-hee Nac80f0b72021-10-10 01:13:21 +0900280 if (len < 0) {
281 return -1;
282 }
Miss Islington (bot)87729352021-10-09 08:35:33 -0700283 if (len > 1) {
284 PyErr_Format(PyExc_TypeError,
285 "\"%s\" must be a 1-character string",
286 name);
287 return -1;
288 }
289 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
290 else {
291 *target = PyUnicode_READ_CHAR(src, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000292 }
293 }
294 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000295}
296
297static int
298_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
299{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000300 if (src == NULL)
301 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
302 else {
303 if (src == Py_None)
304 *target = NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +0100305 else if (!PyUnicode_Check(src)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000306 PyErr_Format(PyExc_TypeError,
307 "\"%s\" must be a string", name);
308 return -1;
309 }
310 else {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100311 if (PyUnicode_READY(src) == -1)
312 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000313 Py_INCREF(src);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300314 Py_XSETREF(*target, src);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000315 }
316 }
317 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000318}
319
320static int
321dialect_check_quoting(int quoting)
322{
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200323 const StyleDesc *qs;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000324
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000325 for (qs = quote_styles; qs->name; qs++) {
Victor Stinner706768c2014-08-16 01:03:39 +0200326 if ((int)qs->style == quoting)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000327 return 0;
328 }
329 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
330 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000331}
Skip Montanarob4a04172003-03-20 23:29:12 +0000332
333#define D_OFF(x) offsetof(DialectObj, x)
334
335static struct PyMemberDef Dialect_memberlist[] = {
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300336 { "skipinitialspace", T_BOOL, D_OFF(skipinitialspace), READONLY },
337 { "doublequote", T_BOOL, D_OFF(doublequote), READONLY },
338 { "strict", T_BOOL, D_OFF(strict), READONLY },
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000339 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000340};
341
342static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000343 { "delimiter", (getter)Dialect_get_delimiter},
344 { "escapechar", (getter)Dialect_get_escapechar},
345 { "lineterminator", (getter)Dialect_get_lineterminator},
346 { "quotechar", (getter)Dialect_get_quotechar},
347 { "quoting", (getter)Dialect_get_quoting},
348 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000349};
350
351static void
352Dialect_dealloc(DialectObj *self)
353{
Petr Viktorin6a02b382020-12-15 15:14:35 +0100354 PyTypeObject *tp = Py_TYPE(self);
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -0700355 PyObject_GC_UnTrack(self);
356 tp->tp_clear((PyObject *)self);
357 PyObject_GC_Del(self);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100358 Py_DECREF(tp);
359}
360
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000361static char *dialect_kws[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000362 "dialect",
363 "delimiter",
364 "doublequote",
365 "escapechar",
366 "lineterminator",
367 "quotechar",
368 "quoting",
369 "skipinitialspace",
370 "strict",
371 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000372};
373
Petr Viktorin6a02b382020-12-15 15:14:35 +0100374static _csvstate *
375_csv_state_from_type(PyTypeObject *type, const char *name)
376{
377 PyObject *module = _PyType_GetModuleByDef(type, &_csvmodule);
378 if (module == NULL) {
379 return NULL;
380 }
381 _csvstate *module_state = PyModule_GetState(module);
382 if (module_state == NULL) {
383 PyErr_Format(PyExc_SystemError,
384 "%s: No _csv module state found", name);
385 return NULL;
386 }
387 return module_state;
388}
389
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000390static PyObject *
391dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000392{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000393 DialectObj *self;
394 PyObject *ret = NULL;
395 PyObject *dialect = NULL;
396 PyObject *delimiter = NULL;
397 PyObject *doublequote = NULL;
398 PyObject *escapechar = NULL;
399 PyObject *lineterminator = NULL;
400 PyObject *quotechar = NULL;
401 PyObject *quoting = NULL;
402 PyObject *skipinitialspace = NULL;
403 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000404
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000405 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
406 "|OOOOOOOOO", dialect_kws,
407 &dialect,
408 &delimiter,
409 &doublequote,
410 &escapechar,
411 &lineterminator,
412 &quotechar,
413 &quoting,
414 &skipinitialspace,
415 &strict))
416 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000417
Petr Viktorin6a02b382020-12-15 15:14:35 +0100418 _csvstate *module_state = _csv_state_from_type(type, "dialect_new");
419 if (module_state == NULL) {
420 return NULL;
421 }
422
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000423 if (dialect != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100424 if (PyUnicode_Check(dialect)) {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100425 dialect = get_dialect_from_registry(dialect, module_state);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000426 if (dialect == NULL)
427 return NULL;
428 }
429 else
430 Py_INCREF(dialect);
431 /* Can we reuse this instance? */
Petr Viktorin6a02b382020-12-15 15:14:35 +0100432 if (PyObject_TypeCheck(dialect, module_state->dialect_type) &&
Serhiy Storchaka0b3ec192017-03-23 17:53:47 +0200433 delimiter == NULL &&
434 doublequote == NULL &&
435 escapechar == NULL &&
436 lineterminator == NULL &&
437 quotechar == NULL &&
438 quoting == NULL &&
439 skipinitialspace == NULL &&
440 strict == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000441 return dialect;
442 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000443
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000444 self = (DialectObj *)type->tp_alloc(type, 0);
445 if (self == NULL) {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100446 Py_CLEAR(dialect);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000447 return NULL;
448 }
449 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000450
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000451 Py_XINCREF(delimiter);
452 Py_XINCREF(doublequote);
453 Py_XINCREF(escapechar);
454 Py_XINCREF(lineterminator);
455 Py_XINCREF(quotechar);
456 Py_XINCREF(quoting);
457 Py_XINCREF(skipinitialspace);
458 Py_XINCREF(strict);
459 if (dialect != NULL) {
Miss Islington (bot)bb260c22021-07-13 16:18:28 -0700460#define DIALECT_GETATTR(v, n) \
461 do { \
462 if (v == NULL) { \
463 v = PyObject_GetAttrString(dialect, n); \
464 if (v == NULL) \
465 PyErr_Clear(); \
466 } \
467 } while (0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000468 DIALECT_GETATTR(delimiter, "delimiter");
469 DIALECT_GETATTR(doublequote, "doublequote");
470 DIALECT_GETATTR(escapechar, "escapechar");
471 DIALECT_GETATTR(lineterminator, "lineterminator");
472 DIALECT_GETATTR(quotechar, "quotechar");
473 DIALECT_GETATTR(quoting, "quoting");
474 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
475 DIALECT_GETATTR(strict, "strict");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000476 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000477
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000478 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000479#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000480 if (meth(name, target, src, dflt)) \
481 goto err
482 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300483 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, true);
Miss Islington (bot)87729352021-10-09 08:35:33 -0700484 DIASET(_set_char_or_none, "escapechar", &self->escapechar, escapechar, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000485 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
Miss Islington (bot)87729352021-10-09 08:35:33 -0700486 DIASET(_set_char_or_none, "quotechar", &self->quotechar, quotechar, '"');
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000487 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300488 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, false);
489 DIASET(_set_bool, "strict", &self->strict, strict, false);
Skip Montanarob4a04172003-03-20 23:29:12 +0000490
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000491 /* validate options */
492 if (dialect_check_quoting(self->quoting))
493 goto err;
494 if (self->delimiter == 0) {
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200495 PyErr_SetString(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300496 "\"delimiter\" must be a 1-character string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000497 goto err;
498 }
499 if (quotechar == Py_None && quoting == NULL)
500 self->quoting = QUOTE_NONE;
501 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
502 PyErr_SetString(PyExc_TypeError,
503 "quotechar must be set if quoting enabled");
504 goto err;
505 }
506 if (self->lineterminator == 0) {
507 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
508 goto err;
509 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000510
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000511 ret = (PyObject *)self;
512 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000513err:
Petr Viktorin6a02b382020-12-15 15:14:35 +0100514 Py_CLEAR(self);
515 Py_CLEAR(dialect);
516 Py_CLEAR(delimiter);
517 Py_CLEAR(doublequote);
518 Py_CLEAR(escapechar);
519 Py_CLEAR(lineterminator);
520 Py_CLEAR(quotechar);
521 Py_CLEAR(quoting);
522 Py_CLEAR(skipinitialspace);
523 Py_CLEAR(strict);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000524 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000525}
526
Petr Viktorin6a02b382020-12-15 15:14:35 +0100527/* Since dialect is now a heap type, it inherits pickling method for
Miss Islington (bot)5afc5bb2021-10-07 01:55:18 -0700528 * protocol 0 and 1 from object, therefore it needs to be overridden */
Petr Viktorin6a02b382020-12-15 15:14:35 +0100529
530PyDoc_STRVAR(dialect_reduce_doc, "raises an exception to avoid pickling");
531
532static PyObject *
533Dialect_reduce(PyObject *self, PyObject *args) {
534 PyErr_Format(PyExc_TypeError,
535 "cannot pickle '%.100s' instances", _PyType_Name(Py_TYPE(self)));
536 return NULL;
537}
538
539static struct PyMethodDef dialect_methods[] = {
540 {"__reduce__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
541 {"__reduce_ex__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
542 {NULL, NULL}
543};
Skip Montanarob4a04172003-03-20 23:29:12 +0000544
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000545PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000546"CSV dialect\n"
547"\n"
548"The Dialect type records CSV parsing and generation options.\n");
549
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -0700550static int
551Dialect_clear(DialectObj *self)
552{
553 Py_CLEAR(self->lineterminator);
554 return 0;
555}
556
557static int
558Dialect_traverse(DialectObj *self, visitproc visit, void *arg)
559{
560 Py_VISIT(self->lineterminator);
561 Py_VISIT(Py_TYPE(self));
562 return 0;
563}
564
Petr Viktorin6a02b382020-12-15 15:14:35 +0100565static PyType_Slot Dialect_Type_slots[] = {
566 {Py_tp_doc, (char*)Dialect_Type_doc},
567 {Py_tp_members, Dialect_memberlist},
568 {Py_tp_getset, Dialect_getsetlist},
569 {Py_tp_new, dialect_new},
570 {Py_tp_methods, dialect_methods},
Petr Viktorin6a02b382020-12-15 15:14:35 +0100571 {Py_tp_dealloc, Dialect_dealloc},
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -0700572 {Py_tp_clear, Dialect_clear},
573 {Py_tp_traverse, Dialect_traverse},
Petr Viktorin6a02b382020-12-15 15:14:35 +0100574 {0, NULL}
Skip Montanarob4a04172003-03-20 23:29:12 +0000575};
576
Petr Viktorin6a02b382020-12-15 15:14:35 +0100577PyType_Spec Dialect_Type_spec = {
578 .name = "_csv.Dialect",
579 .basicsize = sizeof(DialectObj),
Miss Islington (bot)7297d742021-06-17 03:19:44 -0700580 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
581 Py_TPFLAGS_IMMUTABLETYPE),
Petr Viktorin6a02b382020-12-15 15:14:35 +0100582 .slots = Dialect_Type_slots,
583};
584
585
Andrew McNamara91b97462005-01-11 01:07:23 +0000586/*
587 * Return an instance of the dialect type, given a Python instance or kwarg
588 * description of the dialect
589 */
590static PyObject *
Petr Viktorin6a02b382020-12-15 15:14:35 +0100591_call_dialect(_csvstate *module_state, PyObject *dialect_inst, PyObject *kwargs)
Andrew McNamara91b97462005-01-11 01:07:23 +0000592{
Petr Viktorin6a02b382020-12-15 15:14:35 +0100593 PyObject *type = (PyObject *)module_state->dialect_type;
Victor Stinner6412f492016-08-23 00:21:34 +0200594 if (dialect_inst) {
Petr Viktorinffd97532020-02-11 17:46:57 +0100595 return PyObject_VectorcallDict(type, &dialect_inst, 1, kwargs);
Victor Stinner6412f492016-08-23 00:21:34 +0200596 }
597 else {
Petr Viktorinffd97532020-02-11 17:46:57 +0100598 return PyObject_VectorcallDict(type, NULL, 0, kwargs);
Victor Stinner6412f492016-08-23 00:21:34 +0200599 }
Andrew McNamara91b97462005-01-11 01:07:23 +0000600}
601
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000602/*
603 * READER
604 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000605static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000606parse_save_field(ReaderObj *self)
607{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000608 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000609
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200610 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
611 (void *) self->field, self->field_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000612 if (field == NULL)
613 return -1;
614 self->field_len = 0;
615 if (self->numeric_field) {
616 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000617
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000618 self->numeric_field = 0;
619 tmp = PyNumber_Float(field);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000620 Py_DECREF(field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200621 if (tmp == NULL)
622 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000623 field = tmp;
624 }
Victor Stinnerb80b3782013-11-14 21:29:34 +0100625 if (PyList_Append(self->fields, field) < 0) {
626 Py_DECREF(field);
627 return -1;
628 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000629 Py_DECREF(field);
630 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000631}
632
633static int
634parse_grow_buff(ReaderObj *self)
635{
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +0500636 assert((size_t)self->field_size <= PY_SSIZE_T_MAX / sizeof(Py_UCS4));
637
638 Py_ssize_t field_size_new = self->field_size ? 2 * self->field_size : 4096;
639 Py_UCS4 *field_new = self->field;
640 PyMem_Resize(field_new, Py_UCS4, field_size_new);
641 if (field_new == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000642 PyErr_NoMemory();
643 return 0;
644 }
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +0500645 self->field = field_new;
646 self->field_size = field_size_new;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000647 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000648}
649
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000650static int
Petr Viktorin6a02b382020-12-15 15:14:35 +0100651parse_add_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000652{
Petr Viktorin6a02b382020-12-15 15:14:35 +0100653 if (self->field_len >= module_state->field_limit) {
654 PyErr_Format(module_state->error_obj,
655 "field larger than field limit (%ld)",
656 module_state->field_limit);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000657 return -1;
658 }
659 if (self->field_len == self->field_size && !parse_grow_buff(self))
660 return -1;
661 self->field[self->field_len++] = c;
662 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000663}
664
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000665static int
Petr Viktorin6a02b382020-12-15 15:14:35 +0100666parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000667{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000668 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000669
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000670 switch (self->state) {
671 case START_RECORD:
672 /* start of record */
673 if (c == '\0')
674 /* empty line - return [] */
675 break;
676 else if (c == '\n' || c == '\r') {
677 self->state = EAT_CRNL;
678 break;
679 }
680 /* normal character - handle as START_FIELD */
681 self->state = START_FIELD;
682 /* fallthru */
683 case START_FIELD:
684 /* expecting field */
685 if (c == '\n' || c == '\r' || c == '\0') {
686 /* save empty field - return [fields] */
687 if (parse_save_field(self) < 0)
688 return -1;
689 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
690 }
691 else if (c == dialect->quotechar &&
692 dialect->quoting != QUOTE_NONE) {
693 /* start quoted field */
694 self->state = IN_QUOTED_FIELD;
695 }
696 else if (c == dialect->escapechar) {
697 /* possible escaped character */
698 self->state = ESCAPED_CHAR;
699 }
700 else if (c == ' ' && dialect->skipinitialspace)
701 /* ignore space at start of field */
702 ;
703 else if (c == dialect->delimiter) {
704 /* save empty field */
705 if (parse_save_field(self) < 0)
706 return -1;
707 }
708 else {
709 /* begin new unquoted field */
710 if (dialect->quoting == QUOTE_NONNUMERIC)
711 self->numeric_field = 1;
Petr Viktorin6a02b382020-12-15 15:14:35 +0100712 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000713 return -1;
714 self->state = IN_FIELD;
715 }
716 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000717
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000718 case ESCAPED_CHAR:
R David Murray9a7d3762013-03-20 00:15:20 -0400719 if (c == '\n' || c=='\r') {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100720 if (parse_add_char(self, module_state, c) < 0)
R David Murrayc7c42ef2013-03-19 22:41:47 -0400721 return -1;
722 self->state = AFTER_ESCAPED_CRNL;
723 break;
724 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000725 if (c == '\0')
726 c = '\n';
Petr Viktorin6a02b382020-12-15 15:14:35 +0100727 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000728 return -1;
729 self->state = IN_FIELD;
730 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000731
R David Murrayc7c42ef2013-03-19 22:41:47 -0400732 case AFTER_ESCAPED_CRNL:
733 if (c == '\0')
734 break;
735 /*fallthru*/
736
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000737 case IN_FIELD:
738 /* in unquoted field */
739 if (c == '\n' || c == '\r' || c == '\0') {
740 /* end of line - return [fields] */
741 if (parse_save_field(self) < 0)
742 return -1;
743 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
744 }
745 else if (c == dialect->escapechar) {
746 /* possible escaped character */
747 self->state = ESCAPED_CHAR;
748 }
749 else if (c == dialect->delimiter) {
750 /* save field - wait for new field */
751 if (parse_save_field(self) < 0)
752 return -1;
753 self->state = START_FIELD;
754 }
755 else {
756 /* normal character - save in field */
Petr Viktorin6a02b382020-12-15 15:14:35 +0100757 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000758 return -1;
759 }
760 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000761
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000762 case IN_QUOTED_FIELD:
763 /* in quoted field */
764 if (c == '\0')
765 ;
766 else if (c == dialect->escapechar) {
767 /* Possible escape character */
768 self->state = ESCAPE_IN_QUOTED_FIELD;
769 }
770 else if (c == dialect->quotechar &&
771 dialect->quoting != QUOTE_NONE) {
772 if (dialect->doublequote) {
773 /* doublequote; " represented by "" */
774 self->state = QUOTE_IN_QUOTED_FIELD;
775 }
776 else {
777 /* end of quote part of field */
778 self->state = IN_FIELD;
779 }
780 }
781 else {
782 /* normal character - save in field */
Petr Viktorin6a02b382020-12-15 15:14:35 +0100783 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000784 return -1;
785 }
786 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000787
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000788 case ESCAPE_IN_QUOTED_FIELD:
789 if (c == '\0')
790 c = '\n';
Petr Viktorin6a02b382020-12-15 15:14:35 +0100791 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000792 return -1;
793 self->state = IN_QUOTED_FIELD;
794 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000795
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000796 case QUOTE_IN_QUOTED_FIELD:
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300797 /* doublequote - seen a quote in a quoted field */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000798 if (dialect->quoting != QUOTE_NONE &&
799 c == dialect->quotechar) {
800 /* save "" as " */
Petr Viktorin6a02b382020-12-15 15:14:35 +0100801 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000802 return -1;
803 self->state = IN_QUOTED_FIELD;
804 }
805 else if (c == dialect->delimiter) {
806 /* save field - wait for new field */
807 if (parse_save_field(self) < 0)
808 return -1;
809 self->state = START_FIELD;
810 }
811 else if (c == '\n' || c == '\r' || c == '\0') {
812 /* end of line - return [fields] */
813 if (parse_save_field(self) < 0)
814 return -1;
815 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
816 }
817 else if (!dialect->strict) {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100818 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000819 return -1;
820 self->state = IN_FIELD;
821 }
822 else {
823 /* illegal */
Petr Viktorin6a02b382020-12-15 15:14:35 +0100824 PyErr_Format(module_state->error_obj, "'%c' expected after '%c'",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000825 dialect->delimiter,
826 dialect->quotechar);
827 return -1;
828 }
829 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000830
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000831 case EAT_CRNL:
832 if (c == '\n' || c == '\r')
833 ;
834 else if (c == '\0')
835 self->state = START_RECORD;
836 else {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100837 PyErr_Format(module_state->error_obj,
838 "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000839 return -1;
840 }
841 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000842
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000843 }
844 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000845}
846
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000847static int
848parse_reset(ReaderObj *self)
849{
Serhiy Storchaka48842712016-04-06 09:45:48 +0300850 Py_XSETREF(self->fields, PyList_New(0));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000851 if (self->fields == NULL)
852 return -1;
853 self->field_len = 0;
854 self->state = START_RECORD;
855 self->numeric_field = 0;
856 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000857}
Skip Montanarob4a04172003-03-20 23:29:12 +0000858
859static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000860Reader_iternext(ReaderObj *self)
861{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000862 PyObject *fields = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200863 Py_UCS4 c;
864 Py_ssize_t pos, linelen;
865 unsigned int kind;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300866 const void *data;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200867 PyObject *lineobj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000868
Petr Viktorin6a02b382020-12-15 15:14:35 +0100869 _csvstate *module_state = _csv_state_from_type(Py_TYPE(self),
870 "Reader.__next__");
871 if (module_state == NULL) {
872 return NULL;
873 }
874
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000875 if (parse_reset(self) < 0)
876 return NULL;
877 do {
878 lineobj = PyIter_Next(self->input_iter);
879 if (lineobj == NULL) {
880 /* End of input OR exception */
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700881 if (!PyErr_Occurred() && (self->field_len != 0 ||
882 self->state == IN_QUOTED_FIELD)) {
883 if (self->dialect->strict)
Petr Viktorin6a02b382020-12-15 15:14:35 +0100884 PyErr_SetString(module_state->error_obj,
Senthil Kumaran49d13022012-09-25 02:37:20 -0700885 "unexpected end of data");
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700886 else if (parse_save_field(self) >= 0)
887 break;
888 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000889 return NULL;
890 }
891 if (!PyUnicode_Check(lineobj)) {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100892 PyErr_Format(module_state->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000893 "iterator should return strings, "
894 "not %.200s "
Ram Rachum235f9182020-06-05 23:56:06 +0300895 "(the file should be opened in text mode)",
Victor Stinnerdaa97562020-02-07 03:37:06 +0100896 Py_TYPE(lineobj)->tp_name
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000897 );
898 Py_DECREF(lineobj);
899 return NULL;
900 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100901 if (PyUnicode_READY(lineobj) == -1) {
902 Py_DECREF(lineobj);
903 return NULL;
904 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000905 ++self->line_num;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200906 kind = PyUnicode_KIND(lineobj);
907 data = PyUnicode_DATA(lineobj);
908 pos = 0;
909 linelen = PyUnicode_GET_LENGTH(lineobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000910 while (linelen--) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200911 c = PyUnicode_READ(kind, data, pos);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000912 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000913 Py_DECREF(lineobj);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100914 PyErr_Format(module_state->error_obj,
Benjamin Peterson7821b4c2019-06-18 21:37:58 -0700915 "line contains NUL");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000916 goto err;
917 }
Petr Viktorin6a02b382020-12-15 15:14:35 +0100918 if (parse_process_char(self, module_state, c) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000919 Py_DECREF(lineobj);
920 goto err;
921 }
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200922 pos++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000923 }
924 Py_DECREF(lineobj);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100925 if (parse_process_char(self, module_state, 0) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000926 goto err;
927 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000928
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000929 fields = self->fields;
930 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000931err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000932 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000933}
934
935static void
936Reader_dealloc(ReaderObj *self)
937{
Petr Viktorin6a02b382020-12-15 15:14:35 +0100938 PyTypeObject *tp = Py_TYPE(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000939 PyObject_GC_UnTrack(self);
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -0700940 tp->tp_clear((PyObject *)self);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100941 if (self->field != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000942 PyMem_Free(self->field);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100943 self->field = NULL;
944 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000945 PyObject_GC_Del(self);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100946 Py_DECREF(tp);
947}
948
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000949static int
950Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
951{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000952 Py_VISIT(self->dialect);
953 Py_VISIT(self->input_iter);
954 Py_VISIT(self->fields);
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -0700955 Py_VISIT(Py_TYPE(self));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000956 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000957}
958
959static int
960Reader_clear(ReaderObj *self)
961{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000962 Py_CLEAR(self->dialect);
963 Py_CLEAR(self->input_iter);
964 Py_CLEAR(self->fields);
965 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000966}
967
968PyDoc_STRVAR(Reader_Type_doc,
969"CSV reader\n"
970"\n"
971"Reader objects are responsible for reading and parsing tabular data\n"
972"in CSV format.\n"
973);
974
975static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000976 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000977};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000978#define R_OFF(x) offsetof(ReaderObj, x)
979
980static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000981 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
982 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
983 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000984};
985
Skip Montanarob4a04172003-03-20 23:29:12 +0000986
Petr Viktorin6a02b382020-12-15 15:14:35 +0100987static PyType_Slot Reader_Type_slots[] = {
988 {Py_tp_doc, (char*)Reader_Type_doc},
989 {Py_tp_traverse, Reader_traverse},
Petr Viktorin6a02b382020-12-15 15:14:35 +0100990 {Py_tp_iter, PyObject_SelfIter},
991 {Py_tp_iternext, Reader_iternext},
992 {Py_tp_methods, Reader_methods},
993 {Py_tp_members, Reader_memberlist},
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -0700994 {Py_tp_clear, Reader_clear},
Petr Viktorin6a02b382020-12-15 15:14:35 +0100995 {Py_tp_dealloc, Reader_dealloc},
996 {0, NULL}
Skip Montanarob4a04172003-03-20 23:29:12 +0000997};
998
Petr Viktorin6a02b382020-12-15 15:14:35 +0100999PyType_Spec Reader_Type_spec = {
1000 .name = "_csv.reader",
1001 .basicsize = sizeof(ReaderObj),
Miss Islington (bot)7297d742021-06-17 03:19:44 -07001002 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
1003 Py_TPFLAGS_IMMUTABLETYPE),
Petr Viktorin6a02b382020-12-15 15:14:35 +01001004 .slots = Reader_Type_slots
1005};
1006
1007
Skip Montanarob4a04172003-03-20 23:29:12 +00001008static PyObject *
1009csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
1010{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001011 PyObject * iterator, * dialect = NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +01001012 _csvstate *module_state = get_csv_state(module);
1013 ReaderObj * self = PyObject_GC_New(
1014 ReaderObj,
1015 module_state->reader_type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001016
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001017 if (!self)
1018 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001019
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001020 self->dialect = NULL;
1021 self->fields = NULL;
1022 self->input_iter = NULL;
1023 self->field = NULL;
1024 self->field_size = 0;
1025 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +00001026
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001027 if (parse_reset(self) < 0) {
1028 Py_DECREF(self);
1029 return NULL;
1030 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001031
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001032 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
1033 Py_DECREF(self);
1034 return NULL;
1035 }
1036 self->input_iter = PyObject_GetIter(iterator);
1037 if (self->input_iter == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001038 Py_DECREF(self);
1039 return NULL;
1040 }
Petr Viktorin6a02b382020-12-15 15:14:35 +01001041 self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
1042 keyword_args);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001043 if (self->dialect == NULL) {
1044 Py_DECREF(self);
1045 return NULL;
1046 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001047
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001048 PyObject_GC_Track(self);
1049 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001050}
1051
1052/*
1053 * WRITER
1054 */
1055/* ---------------------------------------------------------------- */
1056static void
1057join_reset(WriterObj *self)
1058{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001059 self->rec_len = 0;
1060 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001061}
1062
1063#define MEM_INCR 32768
1064
1065/* Calculate new record length or append field to record. Return new
1066 * record length.
1067 */
Antoine Pitrou40455752010-08-15 18:51:10 +00001068static Py_ssize_t
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001069join_append_data(WriterObj *self, unsigned int field_kind, const void *field_data,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001070 Py_ssize_t field_len, int *quoted,
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001071 int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +00001072{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001073 DialectObj *dialect = self->dialect;
1074 int i;
Antoine Pitrou40455752010-08-15 18:51:10 +00001075 Py_ssize_t rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001076
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001077#define INCLEN \
1078 do {\
1079 if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \
1080 goto overflow; \
1081 } \
1082 rec_len++; \
1083 } while(0)
1084
1085#define ADDCH(c) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001086 do {\
1087 if (copy_phase) \
1088 self->rec[rec_len] = c;\
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001089 INCLEN;\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001090 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001091
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001092 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001093
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001094 /* If this is not the first field we need a field separator */
1095 if (self->num_fields > 0)
1096 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001097
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001098 /* Handle preceding quote */
1099 if (copy_phase && *quoted)
1100 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001101
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001102 /* Copy/count field data */
1103 /* If field is null just pass over */
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001104 for (i = 0; field_data && (i < field_len); i++) {
1105 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001106 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001107
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001108 if (c == dialect->delimiter ||
1109 c == dialect->escapechar ||
1110 c == dialect->quotechar ||
Martin v. Löwis5f4f4c52011-11-01 18:42:23 +01001111 PyUnicode_FindChar(
1112 dialect->lineterminator, c, 0,
1113 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001114 if (dialect->quoting == QUOTE_NONE)
1115 want_escape = 1;
1116 else {
1117 if (c == dialect->quotechar) {
1118 if (dialect->doublequote)
1119 ADDCH(dialect->quotechar);
1120 else
1121 want_escape = 1;
1122 }
Berker Peksag5c0eed72020-09-20 09:38:07 +03001123 else if (c == dialect->escapechar) {
1124 want_escape = 1;
1125 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001126 if (!want_escape)
1127 *quoted = 1;
1128 }
1129 if (want_escape) {
1130 if (!dialect->escapechar) {
Petr Viktorin6a02b382020-12-15 15:14:35 +01001131 PyErr_Format(self->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001132 "need to escape, but no escapechar set");
1133 return -1;
1134 }
1135 ADDCH(dialect->escapechar);
1136 }
1137 }
1138 /* Copy field character into record buffer.
1139 */
1140 ADDCH(c);
1141 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001142
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001143 if (*quoted) {
1144 if (copy_phase)
1145 ADDCH(dialect->quotechar);
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001146 else {
1147 INCLEN; /* starting quote */
1148 INCLEN; /* ending quote */
1149 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001150 }
1151 return rec_len;
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001152
1153 overflow:
1154 PyErr_NoMemory();
1155 return -1;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001156#undef ADDCH
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001157#undef INCLEN
Skip Montanarob4a04172003-03-20 23:29:12 +00001158}
1159
1160static int
Antoine Pitrou40455752010-08-15 18:51:10 +00001161join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
Skip Montanarob4a04172003-03-20 23:29:12 +00001162{
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001163 assert(rec_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001164
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001165 if (rec_len > self->rec_size) {
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001166 size_t rec_size_new = (size_t)(rec_len / MEM_INCR + 1) * MEM_INCR;
1167 Py_UCS4 *rec_new = self->rec;
1168 PyMem_Resize(rec_new, Py_UCS4, rec_size_new);
1169 if (rec_new == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001170 PyErr_NoMemory();
1171 return 0;
1172 }
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001173 self->rec = rec_new;
1174 self->rec_size = (Py_ssize_t)rec_size_new;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001175 }
1176 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001177}
1178
1179static int
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001180join_append(WriterObj *self, PyObject *field, int quoted)
Skip Montanarob4a04172003-03-20 23:29:12 +00001181{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001182 unsigned int field_kind = -1;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001183 const void *field_data = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001184 Py_ssize_t field_len = 0;
Antoine Pitrou40455752010-08-15 18:51:10 +00001185 Py_ssize_t rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001186
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001187 if (field != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +01001188 if (PyUnicode_READY(field) == -1)
1189 return 0;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001190 field_kind = PyUnicode_KIND(field);
1191 field_data = PyUnicode_DATA(field);
1192 field_len = PyUnicode_GET_LENGTH(field);
1193 }
1194 rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001195 &quoted, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001196 if (rec_len < 0)
1197 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001198
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001199 /* grow record buffer if necessary */
1200 if (!join_check_rec_size(self, rec_len))
1201 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001202
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001203 self->rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001204 &quoted, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001205 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001206
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001207 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001208}
1209
1210static int
1211join_append_lineterminator(WriterObj *self)
1212{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001213 Py_ssize_t terminator_len, i;
1214 unsigned int term_kind;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001215 const void *term_data;
Skip Montanarob4a04172003-03-20 23:29:12 +00001216
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001217 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001218 if (terminator_len == -1)
1219 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001220
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001221 /* grow record buffer if necessary */
1222 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1223 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001224
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001225 term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1226 term_data = PyUnicode_DATA(self->dialect->lineterminator);
1227 for (i = 0; i < terminator_len; i++)
1228 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001229 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001230
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001231 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001232}
1233
1234PyDoc_STRVAR(csv_writerow_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001235"writerow(iterable)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001236"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001237"Construct and write a CSV record from an iterable of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001238"elements will be converted to string.");
1239
1240static PyObject *
1241csv_writerow(WriterObj *self, PyObject *seq)
1242{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001243 DialectObj *dialect = self->dialect;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001244 PyObject *iter, *field, *line, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001245
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001246 iter = PyObject_GetIter(seq);
Serhiy Storchakac88239f2020-06-22 11:21:59 +03001247 if (iter == NULL) {
1248 if (PyErr_ExceptionMatches(PyExc_TypeError)) {
Petr Viktorin6a02b382020-12-15 15:14:35 +01001249 PyErr_Format(self->error_obj,
Serhiy Storchakac88239f2020-06-22 11:21:59 +03001250 "iterable expected, not %.200s",
1251 Py_TYPE(seq)->tp_name);
1252 }
1253 return NULL;
1254 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001255
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001256 /* Join all fields in internal buffer.
1257 */
1258 join_reset(self);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001259 while ((field = PyIter_Next(iter))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001260 int append_ok;
1261 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001262
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001263 switch (dialect->quoting) {
1264 case QUOTE_NONNUMERIC:
1265 quoted = !PyNumber_Check(field);
1266 break;
1267 case QUOTE_ALL:
1268 quoted = 1;
1269 break;
1270 default:
1271 quoted = 0;
1272 break;
1273 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001274
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001275 if (PyUnicode_Check(field)) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001276 append_ok = join_append(self, field, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001277 Py_DECREF(field);
1278 }
1279 else if (field == Py_None) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001280 append_ok = join_append(self, NULL, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001281 Py_DECREF(field);
1282 }
1283 else {
1284 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001285
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001286 str = PyObject_Str(field);
1287 Py_DECREF(field);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001288 if (str == NULL) {
1289 Py_DECREF(iter);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001290 return NULL;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001291 }
1292 append_ok = join_append(self, str, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001293 Py_DECREF(str);
1294 }
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001295 if (!append_ok) {
1296 Py_DECREF(iter);
1297 return NULL;
1298 }
1299 }
1300 Py_DECREF(iter);
1301 if (PyErr_Occurred())
1302 return NULL;
1303
Licht Takeuchi20019002017-12-12 18:57:06 +09001304 if (self->num_fields > 0 && self->rec_len == 0) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001305 if (dialect->quoting == QUOTE_NONE) {
Petr Viktorin6a02b382020-12-15 15:14:35 +01001306 PyErr_Format(self->error_obj,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001307 "single empty field record must be quoted");
1308 return NULL;
1309 }
1310 self->num_fields--;
1311 if (!join_append(self, NULL, 1))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001312 return NULL;
1313 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001314
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001315 /* Add line terminator.
1316 */
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001317 if (!join_append_lineterminator(self)) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001318 return NULL;
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001319 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001320
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001321 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1322 (void *) self->rec, self->rec_len);
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001323 if (line == NULL) {
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001324 return NULL;
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001325 }
Petr Viktorinffd97532020-02-11 17:46:57 +01001326 result = PyObject_CallOneArg(self->write, line);
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001327 Py_DECREF(line);
1328 return result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001329}
1330
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001331PyDoc_STRVAR(csv_writerows_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001332"writerows(iterable of iterables)\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001333"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001334"Construct and write a series of iterables to a csv file. Non-string\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001335"elements will be converted to string.");
1336
Skip Montanarob4a04172003-03-20 23:29:12 +00001337static PyObject *
1338csv_writerows(WriterObj *self, PyObject *seqseq)
1339{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001340 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001341
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001342 row_iter = PyObject_GetIter(seqseq);
1343 if (row_iter == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001344 return NULL;
1345 }
1346 while ((row_obj = PyIter_Next(row_iter))) {
1347 result = csv_writerow(self, row_obj);
1348 Py_DECREF(row_obj);
1349 if (!result) {
1350 Py_DECREF(row_iter);
1351 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001352 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001353 else
1354 Py_DECREF(result);
1355 }
1356 Py_DECREF(row_iter);
1357 if (PyErr_Occurred())
1358 return NULL;
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001359 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001360}
1361
1362static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001363 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1364 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1365 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001366};
1367
1368#define W_OFF(x) offsetof(WriterObj, x)
1369
1370static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001371 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1372 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001373};
1374
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001375static int
1376Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1377{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001378 Py_VISIT(self->dialect);
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001379 Py_VISIT(self->write);
Petr Viktorin6a02b382020-12-15 15:14:35 +01001380 Py_VISIT(self->error_obj);
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -07001381 Py_VISIT(Py_TYPE(self));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001382 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001383}
1384
1385static int
1386Writer_clear(WriterObj *self)
1387{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001388 Py_CLEAR(self->dialect);
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001389 Py_CLEAR(self->write);
Petr Viktorin6a02b382020-12-15 15:14:35 +01001390 Py_CLEAR(self->error_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001391 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001392}
1393
Petr Viktorin6a02b382020-12-15 15:14:35 +01001394static void
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -07001395Writer_dealloc(WriterObj *self)
Petr Viktorin6a02b382020-12-15 15:14:35 +01001396{
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -07001397 PyTypeObject *tp = Py_TYPE(self);
1398 PyObject_GC_UnTrack(self);
1399 tp->tp_clear((PyObject *)self);
Petr Viktorin6a02b382020-12-15 15:14:35 +01001400 if (self->rec != NULL) {
1401 PyMem_Free(self->rec);
1402 }
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -07001403 PyObject_GC_Del(self);
1404 Py_DECREF(tp);
Petr Viktorin6a02b382020-12-15 15:14:35 +01001405}
1406
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001407PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001408"CSV writer\n"
1409"\n"
1410"Writer objects are responsible for generating tabular data\n"
1411"in CSV format from sequence input.\n"
1412);
1413
Petr Viktorin6a02b382020-12-15 15:14:35 +01001414static PyType_Slot Writer_Type_slots[] = {
Petr Viktorin6a02b382020-12-15 15:14:35 +01001415 {Py_tp_doc, (char*)Writer_Type_doc},
1416 {Py_tp_traverse, Writer_traverse},
1417 {Py_tp_clear, Writer_clear},
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -07001418 {Py_tp_dealloc, Writer_dealloc},
Petr Viktorin6a02b382020-12-15 15:14:35 +01001419 {Py_tp_methods, Writer_methods},
1420 {Py_tp_members, Writer_memberlist},
1421 {0, NULL}
Skip Montanarob4a04172003-03-20 23:29:12 +00001422};
1423
Petr Viktorin6a02b382020-12-15 15:14:35 +01001424PyType_Spec Writer_Type_spec = {
1425 .name = "_csv.writer",
1426 .basicsize = sizeof(WriterObj),
Miss Islington (bot)7297d742021-06-17 03:19:44 -07001427 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
1428 Py_TPFLAGS_IMMUTABLETYPE),
Petr Viktorin6a02b382020-12-15 15:14:35 +01001429 .slots = Writer_Type_slots,
1430};
1431
1432
Skip Montanarob4a04172003-03-20 23:29:12 +00001433static PyObject *
1434csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1435{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001436 PyObject * output_file, * dialect = NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +01001437 _csvstate *module_state = get_csv_state(module);
1438 WriterObj * self = PyObject_GC_New(WriterObj, module_state->writer_type);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001439 _Py_IDENTIFIER(write);
Skip Montanarob4a04172003-03-20 23:29:12 +00001440
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001441 if (!self)
1442 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001443
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001444 self->dialect = NULL;
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001445 self->write = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001446
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001447 self->rec = NULL;
1448 self->rec_size = 0;
1449 self->rec_len = 0;
1450 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001451
Petr Viktorin6a02b382020-12-15 15:14:35 +01001452 self->error_obj = Py_NewRef(module_state->error_obj);
1453
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001454 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1455 Py_DECREF(self);
1456 return NULL;
1457 }
Serhiy Storchaka41c57b32019-09-01 12:03:39 +03001458 if (_PyObject_LookupAttrId(output_file, &PyId_write, &self->write) < 0) {
1459 Py_DECREF(self);
1460 return NULL;
1461 }
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001462 if (self->write == NULL || !PyCallable_Check(self->write)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001463 PyErr_SetString(PyExc_TypeError,
1464 "argument 1 must have a \"write\" method");
1465 Py_DECREF(self);
1466 return NULL;
1467 }
Petr Viktorin6a02b382020-12-15 15:14:35 +01001468 self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
1469 keyword_args);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001470 if (self->dialect == NULL) {
1471 Py_DECREF(self);
1472 return NULL;
1473 }
1474 PyObject_GC_Track(self);
1475 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001476}
1477
1478/*
1479 * DIALECT REGISTRY
1480 */
1481static PyObject *
1482csv_list_dialects(PyObject *module, PyObject *args)
1483{
Petr Viktorin6a02b382020-12-15 15:14:35 +01001484 return PyDict_Keys(get_csv_state(module)->dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001485}
1486
1487static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001488csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001489{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001490 PyObject *name_obj, *dialect_obj = NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +01001491 _csvstate *module_state = get_csv_state(module);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001492 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001493
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001494 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1495 return NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +01001496 if (!PyUnicode_Check(name_obj)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001497 PyErr_SetString(PyExc_TypeError,
Stefan Krahe6996ed2012-11-02 14:44:20 +01001498 "dialect name must be a string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001499 return NULL;
1500 }
Stefan Krahe6996ed2012-11-02 14:44:20 +01001501 if (PyUnicode_READY(name_obj) == -1)
1502 return NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +01001503 dialect = _call_dialect(module_state, dialect_obj, kwargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001504 if (dialect == NULL)
1505 return NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +01001506 if (PyDict_SetItem(module_state->dialects, name_obj, dialect) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001507 Py_DECREF(dialect);
1508 return NULL;
1509 }
1510 Py_DECREF(dialect);
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001511 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001512}
1513
1514static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001515csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001516{
Petr Viktorin6a02b382020-12-15 15:14:35 +01001517 _csvstate *module_state = get_csv_state(module);
1518 if (PyDict_DelItem(module_state->dialects, name_obj) < 0) {
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001519 if (PyErr_ExceptionMatches(PyExc_KeyError)) {
Petr Viktorin6a02b382020-12-15 15:14:35 +01001520 PyErr_Format(module_state->error_obj, "unknown dialect");
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001521 }
1522 return NULL;
1523 }
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001524 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001525}
1526
1527static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001528csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001529{
Petr Viktorin6a02b382020-12-15 15:14:35 +01001530 return get_dialect_from_registry(name_obj, get_csv_state(module));
Skip Montanarob4a04172003-03-20 23:29:12 +00001531}
1532
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001533static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001534csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001535{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001536 PyObject *new_limit = NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +01001537 _csvstate *module_state = get_csv_state(module);
1538 long old_limit = module_state->field_limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001539
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001540 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1541 return NULL;
1542 if (new_limit != NULL) {
1543 if (!PyLong_CheckExact(new_limit)) {
1544 PyErr_Format(PyExc_TypeError,
1545 "limit must be an integer");
1546 return NULL;
1547 }
Petr Viktorin6a02b382020-12-15 15:14:35 +01001548 module_state->field_limit = PyLong_AsLong(new_limit);
1549 if (module_state->field_limit == -1 && PyErr_Occurred()) {
1550 module_state->field_limit = old_limit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001551 return NULL;
1552 }
1553 }
1554 return PyLong_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001555}
1556
Petr Viktorin6a02b382020-12-15 15:14:35 +01001557static PyType_Slot error_slots[] = {
1558 {0, NULL},
1559};
1560
1561PyType_Spec error_spec = {
1562 .name = "_csv.Error",
Miss Islington (bot)3e44e9a2021-05-12 07:02:46 -07001563 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
Petr Viktorin6a02b382020-12-15 15:14:35 +01001564 .slots = error_slots,
1565};
1566
Skip Montanarob4a04172003-03-20 23:29:12 +00001567/*
1568 * MODULE
1569 */
1570
1571PyDoc_STRVAR(csv_module_doc,
1572"CSV parsing and writing.\n"
1573"\n"
1574"This module provides classes that assist in the reading and writing\n"
1575"of Comma Separated Value (CSV) files, and implements the interface\n"
1576"described by PEP 305. Although many CSV files are simple to parse,\n"
1577"the format is not formally defined by a stable specification and\n"
1578"is subtle enough that parsing lines of a CSV file with something\n"
1579"like line.split(\",\") is bound to fail. The module supports three\n"
1580"basic APIs: reading, writing, and registration of dialects.\n"
1581"\n"
1582"\n"
1583"DIALECT REGISTRATION:\n"
1584"\n"
1585"Readers and writers support a dialect argument, which is a convenient\n"
1586"handle on a group of settings. When the dialect argument is a string,\n"
1587"it identifies one of the dialects previously registered with the module.\n"
1588"If it is a class or instance, the attributes of the argument are used as\n"
1589"the settings for the reader or writer:\n"
1590"\n"
1591" class excel:\n"
1592" delimiter = ','\n"
1593" quotechar = '\"'\n"
1594" escapechar = None\n"
1595" doublequote = True\n"
1596" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001597" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001598" quoting = QUOTE_MINIMAL\n"
1599"\n"
1600"SETTINGS:\n"
1601"\n"
oldkaa0735f2018-02-02 16:52:55 +08001602" * quotechar - specifies a one-character string to use as the\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001603" quoting character. It defaults to '\"'.\n"
oldkaa0735f2018-02-02 16:52:55 +08001604" * delimiter - specifies a one-character string to use as the\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001605" field separator. It defaults to ','.\n"
1606" * skipinitialspace - specifies how to interpret whitespace which\n"
1607" immediately follows a delimiter. It defaults to False, which\n"
1608" means that whitespace immediately following a delimiter is part\n"
1609" of the following field.\n"
oldkaa0735f2018-02-02 16:52:55 +08001610" * lineterminator - specifies the character sequence which should\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001611" terminate rows.\n"
1612" * quoting - controls when quotes should be generated by the writer.\n"
1613" It can take on any of the following module constants:\n"
1614"\n"
1615" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1616" field contains either the quotechar or the delimiter\n"
1617" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1618" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001619" fields which do not parse as integers or floating point\n"
1620" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001621" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
oldkaa0735f2018-02-02 16:52:55 +08001622" * escapechar - specifies a one-character string used to escape\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001623" the delimiter when quoting is set to QUOTE_NONE.\n"
1624" * doublequote - controls the handling of quotes inside fields. When\n"
1625" True, two consecutive quotes are interpreted as one during read,\n"
1626" and when writing, each quote character embedded in the data is\n"
1627" written as two quotes\n");
1628
1629PyDoc_STRVAR(csv_reader_doc,
1630" csv_reader = reader(iterable [, dialect='excel']\n"
1631" [optional keyword args])\n"
1632" for row in csv_reader:\n"
1633" process(row)\n"
1634"\n"
1635"The \"iterable\" argument can be any object that returns a line\n"
1636"of input for each iteration, such as a file object or a list. The\n"
1637"optional \"dialect\" parameter is discussed below. The function\n"
1638"also accepts optional keyword arguments which override settings\n"
1639"provided by the dialect.\n"
1640"\n"
1641"The returned object is an iterator. Each iteration returns a row\n"
Berker Peksage2382c52015-10-02 19:25:32 +03001642"of the CSV file (which can span multiple input lines).\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001643
1644PyDoc_STRVAR(csv_writer_doc,
1645" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1646" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001647" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001648" csv_writer.writerow(row)\n"
1649"\n"
1650" [or]\n"
1651"\n"
1652" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1653" [optional keyword args])\n"
1654" csv_writer.writerows(rows)\n"
1655"\n"
1656"The \"fileobj\" argument can be any object that supports the file API.\n");
1657
1658PyDoc_STRVAR(csv_list_dialects_doc,
1659"Return a list of all know dialect names.\n"
1660" names = csv.list_dialects()");
1661
1662PyDoc_STRVAR(csv_get_dialect_doc,
1663"Return the dialect instance associated with name.\n"
1664" dialect = csv.get_dialect(name)");
1665
1666PyDoc_STRVAR(csv_register_dialect_doc,
1667"Create a mapping from a string name to a dialect class.\n"
Berker Peksag12b50ce2015-06-05 15:17:51 +03001668" dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
Skip Montanarob4a04172003-03-20 23:29:12 +00001669
1670PyDoc_STRVAR(csv_unregister_dialect_doc,
1671"Delete the name/dialect mapping associated with a string name.\n"
1672" csv.unregister_dialect(name)");
1673
Andrew McNamara31d88962005-01-12 03:45:10 +00001674PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001675"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001676" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001677"\n"
1678"Returns old limit. If limit is not given, no new limit is set and\n"
1679"the old limit is returned");
1680
Skip Montanarob4a04172003-03-20 23:29:12 +00001681static struct PyMethodDef csv_methods[] = {
Serhiy Storchaka62be7422018-11-27 13:27:31 +02001682 { "reader", (PyCFunction)(void(*)(void))csv_reader,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001683 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
Serhiy Storchaka62be7422018-11-27 13:27:31 +02001684 { "writer", (PyCFunction)(void(*)(void))csv_writer,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001685 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1686 { "list_dialects", (PyCFunction)csv_list_dialects,
1687 METH_NOARGS, csv_list_dialects_doc},
Serhiy Storchaka62be7422018-11-27 13:27:31 +02001688 { "register_dialect", (PyCFunction)(void(*)(void))csv_register_dialect,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001689 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1690 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1691 METH_O, csv_unregister_dialect_doc},
1692 { "get_dialect", (PyCFunction)csv_get_dialect,
1693 METH_O, csv_get_dialect_doc},
1694 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1695 METH_VARARGS, csv_field_size_limit_doc},
1696 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001697};
1698
Petr Viktorin6a02b382020-12-15 15:14:35 +01001699static int
1700csv_exec(PyObject *module) {
1701 const StyleDesc *style;
1702 PyObject *temp;
1703 _csvstate *module_state = get_csv_state(module);
1704
1705 temp = PyType_FromModuleAndSpec(module, &Dialect_Type_spec, NULL);
1706 module_state->dialect_type = (PyTypeObject *)temp;
1707 if (PyModule_AddObjectRef(module, "Dialect", temp) < 0) {
1708 return -1;
1709 }
1710
1711 temp = PyType_FromModuleAndSpec(module, &Reader_Type_spec, NULL);
1712 module_state->reader_type = (PyTypeObject *)temp;
1713 if (PyModule_AddObjectRef(module, "Reader", temp) < 0) {
1714 return -1;
1715 }
1716
1717 temp = PyType_FromModuleAndSpec(module, &Writer_Type_spec, NULL);
1718 module_state->writer_type = (PyTypeObject *)temp;
1719 if (PyModule_AddObjectRef(module, "Writer", temp) < 0) {
1720 return -1;
1721 }
1722
1723 /* Add version to the module. */
1724 if (PyModule_AddStringConstant(module, "__version__",
1725 MODULE_VERSION) == -1) {
1726 return -1;
1727 }
1728
1729 /* Set the field limit */
1730 module_state->field_limit = 128 * 1024;
1731
1732 /* Add _dialects dictionary */
1733 module_state->dialects = PyDict_New();
1734 if (PyModule_AddObjectRef(module, "_dialects", module_state->dialects) < 0) {
1735 return -1;
1736 }
1737
1738 /* Add quote styles into dictionary */
1739 for (style = quote_styles; style->name; style++) {
1740 if (PyModule_AddIntConstant(module, style->name,
1741 style->style) == -1)
1742 return -1;
1743 }
1744
1745 /* Add the CSV exception object to the module. */
1746 PyObject *bases = PyTuple_Pack(1, PyExc_Exception);
1747 if (bases == NULL) {
1748 return -1;
1749 }
1750 module_state->error_obj = PyType_FromModuleAndSpec(module, &error_spec,
1751 bases);
1752 Py_DECREF(bases);
1753 if (module_state->error_obj == NULL) {
1754 return -1;
1755 }
1756 if (PyModule_AddType(module, (PyTypeObject *)module_state->error_obj) != 0) {
1757 return -1;
1758 }
1759
1760 return 0;
1761}
1762
1763static PyModuleDef_Slot csv_slots[] = {
1764 {Py_mod_exec, csv_exec},
1765 {0, NULL}
1766};
1767
Martin v. Löwis1a214512008-06-11 05:26:20 +00001768static struct PyModuleDef _csvmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001769 PyModuleDef_HEAD_INIT,
1770 "_csv",
1771 csv_module_doc,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001772 sizeof(_csvstate),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001773 csv_methods,
Petr Viktorin6a02b382020-12-15 15:14:35 +01001774 csv_slots,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001775 _csv_traverse,
1776 _csv_clear,
1777 _csv_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00001778};
1779
Skip Montanarob4a04172003-03-20 23:29:12 +00001780PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001781PyInit__csv(void)
Skip Montanarob4a04172003-03-20 23:29:12 +00001782{
Petr Viktorin6a02b382020-12-15 15:14:35 +01001783 return PyModuleDef_Init(&_csvmodule);
Skip Montanarob4a04172003-03-20 23:29:12 +00001784}