blob: cfdfbce6e6824f675fb81a1d93bc4f433d98eedf [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
Skip Montanarob4a04172003-03-20 23:29:12 +00009*/
10
Skip Montanaro7b01a832003-04-12 19:23:46 +000011#define MODULE_VERSION "1.0"
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013#include "Python.h"
Victor Stinner4a21e572020-04-15 02:35:41 +020014#include "structmember.h" // PyMemberDef
Serhiy Storchaka323748a2018-07-26 13:21:09 +030015#include <stdbool.h>
Skip Montanarob4a04172003-03-20 23:29:12 +000016
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000017
Antoine Pitroue7672d32012-05-16 11:33:08 +020018typedef struct {
19 PyObject *error_obj; /* CSV exception */
20 PyObject *dialects; /* Dialect registry */
Petr Viktorin6a02b382020-12-15 15:14:35 +010021 PyTypeObject *dialect_type;
22 PyTypeObject *reader_type;
23 PyTypeObject *writer_type;
Antoine Pitroue7672d32012-05-16 11:33:08 +020024 long field_limit; /* max parsed field size */
25} _csvstate;
26
Petr Viktorin6a02b382020-12-15 15:14:35 +010027static struct PyModuleDef _csvmodule;
28
Hai Shif707d942020-03-16 21:15:01 +080029static inline _csvstate*
30get_csv_state(PyObject *module)
31{
32 void *state = PyModule_GetState(module);
33 assert(state != NULL);
34 return (_csvstate *)state;
35}
Antoine Pitroue7672d32012-05-16 11:33:08 +020036
37static int
Petr Viktorin6a02b382020-12-15 15:14:35 +010038_csv_clear(PyObject *module)
Antoine Pitroue7672d32012-05-16 11:33:08 +020039{
Petr Viktorin6a02b382020-12-15 15:14:35 +010040 _csvstate *module_state = PyModule_GetState(module);
41 Py_CLEAR(module_state->error_obj);
42 Py_CLEAR(module_state->dialects);
43 Py_CLEAR(module_state->dialect_type);
44 Py_CLEAR(module_state->reader_type);
45 Py_CLEAR(module_state->writer_type);
Antoine Pitroue7672d32012-05-16 11:33:08 +020046 return 0;
47}
48
49static int
Petr Viktorin6a02b382020-12-15 15:14:35 +010050_csv_traverse(PyObject *module, visitproc visit, void *arg)
Antoine Pitroue7672d32012-05-16 11:33:08 +020051{
Petr Viktorin6a02b382020-12-15 15:14:35 +010052 _csvstate *module_state = PyModule_GetState(module);
53 Py_VISIT(module_state->error_obj);
54 Py_VISIT(module_state->dialects);
55 Py_VISIT(module_state->dialect_type);
56 Py_VISIT(module_state->reader_type);
57 Py_VISIT(module_state->writer_type);
Antoine Pitroue7672d32012-05-16 11:33:08 +020058 return 0;
59}
60
61static void
Petr Viktorin6a02b382020-12-15 15:14:35 +010062_csv_free(void *module)
Antoine Pitroue7672d32012-05-16 11:33:08 +020063{
Petr Viktorin6a02b382020-12-15 15:14:35 +010064 _csv_clear((PyObject *)module);
Antoine Pitroue7672d32012-05-16 11:33:08 +020065}
66
Skip Montanarob4a04172003-03-20 23:29:12 +000067typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000068 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
69 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
R David Murrayc7c42ef2013-03-19 22:41:47 -040070 EAT_CRNL,AFTER_ESCAPED_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000071} ParserState;
72
73typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000074 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000075} QuoteStyle;
76
77typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078 QuoteStyle style;
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020079 const char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000080} StyleDesc;
81
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020082static const StyleDesc quote_styles[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000083 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
84 { QUOTE_ALL, "QUOTE_ALL" },
85 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
86 { QUOTE_NONE, "QUOTE_NONE" },
87 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000088};
89
90typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000092
Serhiy Storchaka323748a2018-07-26 13:21:09 +030093 char doublequote; /* is " represented by ""? */
94 char skipinitialspace; /* ignore spaces following delimiter? */
95 char strict; /* raise exception on bad CSV */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000096 int quoting; /* style of quoting to write */
Serhiy Storchaka323748a2018-07-26 13:21:09 +030097 Py_UCS4 delimiter; /* field separator */
98 Py_UCS4 quotechar; /* quote character */
99 Py_UCS4 escapechar; /* escape character */
100 PyObject *lineterminator; /* string to write between records */
Skip Montanarob4a04172003-03-20 23:29:12 +0000101
Skip Montanarob4a04172003-03-20 23:29:12 +0000102} DialectObj;
103
Skip Montanarob4a04172003-03-20 23:29:12 +0000104typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000105 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000106
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000107 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +0000108
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000109 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000110
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000111 PyObject *fields; /* field list for current record */
112 ParserState state; /* current CSV parse state */
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200113 Py_UCS4 *field; /* temporary buffer */
Antoine Pitrou40455752010-08-15 18:51:10 +0000114 Py_ssize_t field_size; /* size of allocated buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000115 Py_ssize_t field_len; /* length of current field */
116 int numeric_field; /* treat field as numeric */
117 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000118} ReaderObj;
119
Skip Montanarob4a04172003-03-20 23:29:12 +0000120typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000121 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000122
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +0200123 PyObject *write; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +0000124
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000125 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000126
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200127 Py_UCS4 *rec; /* buffer for parser.join */
Antoine Pitrou40455752010-08-15 18:51:10 +0000128 Py_ssize_t rec_size; /* size of allocated record */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000129 Py_ssize_t rec_len; /* length of record */
130 int num_fields; /* number of fields in record */
Skip Montanarob4a04172003-03-20 23:29:12 +0000131
Petr Viktorin6a02b382020-12-15 15:14:35 +0100132 PyObject *error_obj; /* cached error object */
133} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000134
135/*
136 * DIALECT class
137 */
138
139static PyObject *
Petr Viktorin6a02b382020-12-15 15:14:35 +0100140get_dialect_from_registry(PyObject *name_obj, _csvstate *module_state)
Skip Montanarob4a04172003-03-20 23:29:12 +0000141{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000142 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000143
Petr Viktorin6a02b382020-12-15 15:14:35 +0100144 dialect_obj = PyDict_GetItemWithError(module_state->dialects, name_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 if (dialect_obj == NULL) {
146 if (!PyErr_Occurred())
Petr Viktorin6a02b382020-12-15 15:14:35 +0100147 PyErr_Format(module_state->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000148 }
149 else
150 Py_INCREF(dialect_obj);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100151
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000152 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000153}
154
Skip Montanarob4a04172003-03-20 23:29:12 +0000155static PyObject *
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200156get_nullchar_as_None(Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000157{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000158 if (c == '\0') {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200159 Py_RETURN_NONE;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000160 }
161 else
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200162 return PyUnicode_FromOrdinal(c);
Skip Montanarob4a04172003-03-20 23:29:12 +0000163}
164
Skip Montanarob4a04172003-03-20 23:29:12 +0000165static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200166Dialect_get_lineterminator(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000167{
Dong-hee Na0383be42020-06-10 00:33:43 +0900168 Py_XINCREF(self->lineterminator);
169 return self->lineterminator;
Skip Montanarob4a04172003-03-20 23:29:12 +0000170}
171
Skip Montanarob4a04172003-03-20 23:29:12 +0000172static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200173Dialect_get_delimiter(DialectObj *self, void *Py_UNUSED(ignored))
Guido van Rossuma9769c22007-08-07 23:59:30 +0000174{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000175 return get_nullchar_as_None(self->delimiter);
Guido van Rossuma9769c22007-08-07 23:59:30 +0000176}
177
178static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200179Dialect_get_escapechar(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000180{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000181 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000182}
183
Andrew McNamara1196cf12005-01-07 04:42:45 +0000184static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200185Dialect_get_quotechar(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000186{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000188}
189
190static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200191Dialect_get_quoting(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000192{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000193 return PyLong_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000194}
195
196static int
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300197_set_bool(const char *name, char *target, PyObject *src, bool dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000198{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000199 if (src == NULL)
200 *target = dflt;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200201 else {
202 int b = PyObject_IsTrue(src);
203 if (b < 0)
204 return -1;
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300205 *target = (char)b;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200206 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000207 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000208}
209
Andrew McNamara1196cf12005-01-07 04:42:45 +0000210static int
211_set_int(const char *name, int *target, PyObject *src, int dflt)
212{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000213 if (src == NULL)
214 *target = dflt;
215 else {
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200216 int value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000217 if (!PyLong_CheckExact(src)) {
218 PyErr_Format(PyExc_TypeError,
219 "\"%s\" must be an integer", name);
220 return -1;
221 }
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200222 value = _PyLong_AsInt(src);
223 if (value == -1 && PyErr_Occurred()) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000224 return -1;
225 }
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200226 *target = value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000227 }
228 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000229}
230
231static int
Miss Islington (bot)87729352021-10-09 08:35:33 -0700232_set_char_or_none(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000233{
Miss Islington (bot)87729352021-10-09 08:35:33 -0700234 if (src == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000235 *target = dflt;
Miss Islington (bot)87729352021-10-09 08:35:33 -0700236 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000237 else {
238 *target = '\0';
239 if (src != Py_None) {
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200240 if (!PyUnicode_Check(src)) {
241 PyErr_Format(PyExc_TypeError,
Miss Islington (bot)87729352021-10-09 08:35:33 -0700242 "\"%s\" must be string or None, not %.200s", name,
Victor Stinnerdaa97562020-02-07 03:37:06 +0100243 Py_TYPE(src)->tp_name);
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200244 return -1;
245 }
Miss Islington (bot)87729352021-10-09 08:35:33 -0700246 Py_ssize_t len = PyUnicode_GetLength(src);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200247 if (len > 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000248 PyErr_Format(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300249 "\"%s\" must be a 1-character string",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000250 name);
251 return -1;
252 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100253 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
Miss Islington (bot)87729352021-10-09 08:35:33 -0700254 else {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200255 *target = PyUnicode_READ_CHAR(src, 0);
Miss Islington (bot)87729352021-10-09 08:35:33 -0700256 }
257 }
258 }
259 return 0;
260}
261
262static int
263_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
264{
265 if (src == NULL) {
266 *target = dflt;
267 }
268 else {
269 *target = '\0';
270 if (!PyUnicode_Check(src)) {
271 PyErr_Format(PyExc_TypeError,
272 "\"%s\" must be string, not %.200s", name,
273 Py_TYPE(src)->tp_name);
274 return -1;
275 }
276 Py_ssize_t len = PyUnicode_GetLength(src);
277 if (len > 1) {
278 PyErr_Format(PyExc_TypeError,
279 "\"%s\" must be a 1-character string",
280 name);
281 return -1;
282 }
283 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
284 else {
285 *target = PyUnicode_READ_CHAR(src, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000286 }
287 }
288 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000289}
290
291static int
292_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
293{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000294 if (src == NULL)
295 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
296 else {
297 if (src == Py_None)
298 *target = NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +0100299 else if (!PyUnicode_Check(src)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000300 PyErr_Format(PyExc_TypeError,
301 "\"%s\" must be a string", name);
302 return -1;
303 }
304 else {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100305 if (PyUnicode_READY(src) == -1)
306 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000307 Py_INCREF(src);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300308 Py_XSETREF(*target, src);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000309 }
310 }
311 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000312}
313
314static int
315dialect_check_quoting(int quoting)
316{
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200317 const StyleDesc *qs;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000318
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000319 for (qs = quote_styles; qs->name; qs++) {
Victor Stinner706768c2014-08-16 01:03:39 +0200320 if ((int)qs->style == quoting)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000321 return 0;
322 }
323 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
324 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000325}
Skip Montanarob4a04172003-03-20 23:29:12 +0000326
327#define D_OFF(x) offsetof(DialectObj, x)
328
329static struct PyMemberDef Dialect_memberlist[] = {
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300330 { "skipinitialspace", T_BOOL, D_OFF(skipinitialspace), READONLY },
331 { "doublequote", T_BOOL, D_OFF(doublequote), READONLY },
332 { "strict", T_BOOL, D_OFF(strict), READONLY },
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000333 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000334};
335
336static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000337 { "delimiter", (getter)Dialect_get_delimiter},
338 { "escapechar", (getter)Dialect_get_escapechar},
339 { "lineterminator", (getter)Dialect_get_lineterminator},
340 { "quotechar", (getter)Dialect_get_quotechar},
341 { "quoting", (getter)Dialect_get_quoting},
342 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000343};
344
345static void
346Dialect_dealloc(DialectObj *self)
347{
Petr Viktorin6a02b382020-12-15 15:14:35 +0100348 PyTypeObject *tp = Py_TYPE(self);
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -0700349 PyObject_GC_UnTrack(self);
350 tp->tp_clear((PyObject *)self);
351 PyObject_GC_Del(self);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100352 Py_DECREF(tp);
353}
354
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000355static char *dialect_kws[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000356 "dialect",
357 "delimiter",
358 "doublequote",
359 "escapechar",
360 "lineterminator",
361 "quotechar",
362 "quoting",
363 "skipinitialspace",
364 "strict",
365 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000366};
367
Petr Viktorin6a02b382020-12-15 15:14:35 +0100368static _csvstate *
369_csv_state_from_type(PyTypeObject *type, const char *name)
370{
371 PyObject *module = _PyType_GetModuleByDef(type, &_csvmodule);
372 if (module == NULL) {
373 return NULL;
374 }
375 _csvstate *module_state = PyModule_GetState(module);
376 if (module_state == NULL) {
377 PyErr_Format(PyExc_SystemError,
378 "%s: No _csv module state found", name);
379 return NULL;
380 }
381 return module_state;
382}
383
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000384static PyObject *
385dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000386{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000387 DialectObj *self;
388 PyObject *ret = NULL;
389 PyObject *dialect = NULL;
390 PyObject *delimiter = NULL;
391 PyObject *doublequote = NULL;
392 PyObject *escapechar = NULL;
393 PyObject *lineterminator = NULL;
394 PyObject *quotechar = NULL;
395 PyObject *quoting = NULL;
396 PyObject *skipinitialspace = NULL;
397 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000398
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000399 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
400 "|OOOOOOOOO", dialect_kws,
401 &dialect,
402 &delimiter,
403 &doublequote,
404 &escapechar,
405 &lineterminator,
406 &quotechar,
407 &quoting,
408 &skipinitialspace,
409 &strict))
410 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000411
Petr Viktorin6a02b382020-12-15 15:14:35 +0100412 _csvstate *module_state = _csv_state_from_type(type, "dialect_new");
413 if (module_state == NULL) {
414 return NULL;
415 }
416
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000417 if (dialect != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100418 if (PyUnicode_Check(dialect)) {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100419 dialect = get_dialect_from_registry(dialect, module_state);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000420 if (dialect == NULL)
421 return NULL;
422 }
423 else
424 Py_INCREF(dialect);
425 /* Can we reuse this instance? */
Petr Viktorin6a02b382020-12-15 15:14:35 +0100426 if (PyObject_TypeCheck(dialect, module_state->dialect_type) &&
Serhiy Storchaka0b3ec192017-03-23 17:53:47 +0200427 delimiter == NULL &&
428 doublequote == NULL &&
429 escapechar == NULL &&
430 lineterminator == NULL &&
431 quotechar == NULL &&
432 quoting == NULL &&
433 skipinitialspace == NULL &&
434 strict == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000435 return dialect;
436 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000437
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000438 self = (DialectObj *)type->tp_alloc(type, 0);
439 if (self == NULL) {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100440 Py_CLEAR(dialect);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000441 return NULL;
442 }
443 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000444
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000445 Py_XINCREF(delimiter);
446 Py_XINCREF(doublequote);
447 Py_XINCREF(escapechar);
448 Py_XINCREF(lineterminator);
449 Py_XINCREF(quotechar);
450 Py_XINCREF(quoting);
451 Py_XINCREF(skipinitialspace);
452 Py_XINCREF(strict);
453 if (dialect != NULL) {
Miss Islington (bot)bb260c22021-07-13 16:18:28 -0700454#define DIALECT_GETATTR(v, n) \
455 do { \
456 if (v == NULL) { \
457 v = PyObject_GetAttrString(dialect, n); \
458 if (v == NULL) \
459 PyErr_Clear(); \
460 } \
461 } while (0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000462 DIALECT_GETATTR(delimiter, "delimiter");
463 DIALECT_GETATTR(doublequote, "doublequote");
464 DIALECT_GETATTR(escapechar, "escapechar");
465 DIALECT_GETATTR(lineterminator, "lineterminator");
466 DIALECT_GETATTR(quotechar, "quotechar");
467 DIALECT_GETATTR(quoting, "quoting");
468 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
469 DIALECT_GETATTR(strict, "strict");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000470 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000471
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000472 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000473#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000474 if (meth(name, target, src, dflt)) \
475 goto err
476 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300477 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, true);
Miss Islington (bot)87729352021-10-09 08:35:33 -0700478 DIASET(_set_char_or_none, "escapechar", &self->escapechar, escapechar, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000479 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
Miss Islington (bot)87729352021-10-09 08:35:33 -0700480 DIASET(_set_char_or_none, "quotechar", &self->quotechar, quotechar, '"');
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000481 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300482 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, false);
483 DIASET(_set_bool, "strict", &self->strict, strict, false);
Skip Montanarob4a04172003-03-20 23:29:12 +0000484
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000485 /* validate options */
486 if (dialect_check_quoting(self->quoting))
487 goto err;
488 if (self->delimiter == 0) {
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200489 PyErr_SetString(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300490 "\"delimiter\" must be a 1-character string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000491 goto err;
492 }
493 if (quotechar == Py_None && quoting == NULL)
494 self->quoting = QUOTE_NONE;
495 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
496 PyErr_SetString(PyExc_TypeError,
497 "quotechar must be set if quoting enabled");
498 goto err;
499 }
500 if (self->lineterminator == 0) {
501 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
502 goto err;
503 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000504
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000505 ret = (PyObject *)self;
506 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000507err:
Petr Viktorin6a02b382020-12-15 15:14:35 +0100508 Py_CLEAR(self);
509 Py_CLEAR(dialect);
510 Py_CLEAR(delimiter);
511 Py_CLEAR(doublequote);
512 Py_CLEAR(escapechar);
513 Py_CLEAR(lineterminator);
514 Py_CLEAR(quotechar);
515 Py_CLEAR(quoting);
516 Py_CLEAR(skipinitialspace);
517 Py_CLEAR(strict);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000518 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000519}
520
Petr Viktorin6a02b382020-12-15 15:14:35 +0100521/* Since dialect is now a heap type, it inherits pickling method for
Miss Islington (bot)5afc5bb2021-10-07 01:55:18 -0700522 * protocol 0 and 1 from object, therefore it needs to be overridden */
Petr Viktorin6a02b382020-12-15 15:14:35 +0100523
524PyDoc_STRVAR(dialect_reduce_doc, "raises an exception to avoid pickling");
525
526static PyObject *
527Dialect_reduce(PyObject *self, PyObject *args) {
528 PyErr_Format(PyExc_TypeError,
529 "cannot pickle '%.100s' instances", _PyType_Name(Py_TYPE(self)));
530 return NULL;
531}
532
533static struct PyMethodDef dialect_methods[] = {
534 {"__reduce__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
535 {"__reduce_ex__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
536 {NULL, NULL}
537};
Skip Montanarob4a04172003-03-20 23:29:12 +0000538
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000539PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000540"CSV dialect\n"
541"\n"
542"The Dialect type records CSV parsing and generation options.\n");
543
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -0700544static int
545Dialect_clear(DialectObj *self)
546{
547 Py_CLEAR(self->lineterminator);
548 return 0;
549}
550
551static int
552Dialect_traverse(DialectObj *self, visitproc visit, void *arg)
553{
554 Py_VISIT(self->lineterminator);
555 Py_VISIT(Py_TYPE(self));
556 return 0;
557}
558
Petr Viktorin6a02b382020-12-15 15:14:35 +0100559static PyType_Slot Dialect_Type_slots[] = {
560 {Py_tp_doc, (char*)Dialect_Type_doc},
561 {Py_tp_members, Dialect_memberlist},
562 {Py_tp_getset, Dialect_getsetlist},
563 {Py_tp_new, dialect_new},
564 {Py_tp_methods, dialect_methods},
Petr Viktorin6a02b382020-12-15 15:14:35 +0100565 {Py_tp_dealloc, Dialect_dealloc},
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -0700566 {Py_tp_clear, Dialect_clear},
567 {Py_tp_traverse, Dialect_traverse},
Petr Viktorin6a02b382020-12-15 15:14:35 +0100568 {0, NULL}
Skip Montanarob4a04172003-03-20 23:29:12 +0000569};
570
Petr Viktorin6a02b382020-12-15 15:14:35 +0100571PyType_Spec Dialect_Type_spec = {
572 .name = "_csv.Dialect",
573 .basicsize = sizeof(DialectObj),
Miss Islington (bot)7297d742021-06-17 03:19:44 -0700574 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
575 Py_TPFLAGS_IMMUTABLETYPE),
Petr Viktorin6a02b382020-12-15 15:14:35 +0100576 .slots = Dialect_Type_slots,
577};
578
579
Andrew McNamara91b97462005-01-11 01:07:23 +0000580/*
581 * Return an instance of the dialect type, given a Python instance or kwarg
582 * description of the dialect
583 */
584static PyObject *
Petr Viktorin6a02b382020-12-15 15:14:35 +0100585_call_dialect(_csvstate *module_state, PyObject *dialect_inst, PyObject *kwargs)
Andrew McNamara91b97462005-01-11 01:07:23 +0000586{
Petr Viktorin6a02b382020-12-15 15:14:35 +0100587 PyObject *type = (PyObject *)module_state->dialect_type;
Victor Stinner6412f492016-08-23 00:21:34 +0200588 if (dialect_inst) {
Petr Viktorinffd97532020-02-11 17:46:57 +0100589 return PyObject_VectorcallDict(type, &dialect_inst, 1, kwargs);
Victor Stinner6412f492016-08-23 00:21:34 +0200590 }
591 else {
Petr Viktorinffd97532020-02-11 17:46:57 +0100592 return PyObject_VectorcallDict(type, NULL, 0, kwargs);
Victor Stinner6412f492016-08-23 00:21:34 +0200593 }
Andrew McNamara91b97462005-01-11 01:07:23 +0000594}
595
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000596/*
597 * READER
598 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000599static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000600parse_save_field(ReaderObj *self)
601{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000602 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000603
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200604 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
605 (void *) self->field, self->field_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000606 if (field == NULL)
607 return -1;
608 self->field_len = 0;
609 if (self->numeric_field) {
610 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000611
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000612 self->numeric_field = 0;
613 tmp = PyNumber_Float(field);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000614 Py_DECREF(field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200615 if (tmp == NULL)
616 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000617 field = tmp;
618 }
Victor Stinnerb80b3782013-11-14 21:29:34 +0100619 if (PyList_Append(self->fields, field) < 0) {
620 Py_DECREF(field);
621 return -1;
622 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000623 Py_DECREF(field);
624 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000625}
626
627static int
628parse_grow_buff(ReaderObj *self)
629{
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +0500630 assert((size_t)self->field_size <= PY_SSIZE_T_MAX / sizeof(Py_UCS4));
631
632 Py_ssize_t field_size_new = self->field_size ? 2 * self->field_size : 4096;
633 Py_UCS4 *field_new = self->field;
634 PyMem_Resize(field_new, Py_UCS4, field_size_new);
635 if (field_new == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000636 PyErr_NoMemory();
637 return 0;
638 }
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +0500639 self->field = field_new;
640 self->field_size = field_size_new;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000641 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000642}
643
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000644static int
Petr Viktorin6a02b382020-12-15 15:14:35 +0100645parse_add_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000646{
Petr Viktorin6a02b382020-12-15 15:14:35 +0100647 if (self->field_len >= module_state->field_limit) {
648 PyErr_Format(module_state->error_obj,
649 "field larger than field limit (%ld)",
650 module_state->field_limit);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000651 return -1;
652 }
653 if (self->field_len == self->field_size && !parse_grow_buff(self))
654 return -1;
655 self->field[self->field_len++] = c;
656 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000657}
658
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000659static int
Petr Viktorin6a02b382020-12-15 15:14:35 +0100660parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000661{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000662 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000663
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000664 switch (self->state) {
665 case START_RECORD:
666 /* start of record */
667 if (c == '\0')
668 /* empty line - return [] */
669 break;
670 else if (c == '\n' || c == '\r') {
671 self->state = EAT_CRNL;
672 break;
673 }
674 /* normal character - handle as START_FIELD */
675 self->state = START_FIELD;
676 /* fallthru */
677 case START_FIELD:
678 /* expecting field */
679 if (c == '\n' || c == '\r' || c == '\0') {
680 /* save empty field - return [fields] */
681 if (parse_save_field(self) < 0)
682 return -1;
683 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
684 }
685 else if (c == dialect->quotechar &&
686 dialect->quoting != QUOTE_NONE) {
687 /* start quoted field */
688 self->state = IN_QUOTED_FIELD;
689 }
690 else if (c == dialect->escapechar) {
691 /* possible escaped character */
692 self->state = ESCAPED_CHAR;
693 }
694 else if (c == ' ' && dialect->skipinitialspace)
695 /* ignore space at start of field */
696 ;
697 else if (c == dialect->delimiter) {
698 /* save empty field */
699 if (parse_save_field(self) < 0)
700 return -1;
701 }
702 else {
703 /* begin new unquoted field */
704 if (dialect->quoting == QUOTE_NONNUMERIC)
705 self->numeric_field = 1;
Petr Viktorin6a02b382020-12-15 15:14:35 +0100706 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000707 return -1;
708 self->state = IN_FIELD;
709 }
710 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000711
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000712 case ESCAPED_CHAR:
R David Murray9a7d3762013-03-20 00:15:20 -0400713 if (c == '\n' || c=='\r') {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100714 if (parse_add_char(self, module_state, c) < 0)
R David Murrayc7c42ef2013-03-19 22:41:47 -0400715 return -1;
716 self->state = AFTER_ESCAPED_CRNL;
717 break;
718 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000719 if (c == '\0')
720 c = '\n';
Petr Viktorin6a02b382020-12-15 15:14:35 +0100721 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000722 return -1;
723 self->state = IN_FIELD;
724 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000725
R David Murrayc7c42ef2013-03-19 22:41:47 -0400726 case AFTER_ESCAPED_CRNL:
727 if (c == '\0')
728 break;
729 /*fallthru*/
730
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000731 case IN_FIELD:
732 /* in unquoted field */
733 if (c == '\n' || c == '\r' || c == '\0') {
734 /* end of line - return [fields] */
735 if (parse_save_field(self) < 0)
736 return -1;
737 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
738 }
739 else if (c == dialect->escapechar) {
740 /* possible escaped character */
741 self->state = ESCAPED_CHAR;
742 }
743 else if (c == dialect->delimiter) {
744 /* save field - wait for new field */
745 if (parse_save_field(self) < 0)
746 return -1;
747 self->state = START_FIELD;
748 }
749 else {
750 /* normal character - save in field */
Petr Viktorin6a02b382020-12-15 15:14:35 +0100751 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000752 return -1;
753 }
754 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000755
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000756 case IN_QUOTED_FIELD:
757 /* in quoted field */
758 if (c == '\0')
759 ;
760 else if (c == dialect->escapechar) {
761 /* Possible escape character */
762 self->state = ESCAPE_IN_QUOTED_FIELD;
763 }
764 else if (c == dialect->quotechar &&
765 dialect->quoting != QUOTE_NONE) {
766 if (dialect->doublequote) {
767 /* doublequote; " represented by "" */
768 self->state = QUOTE_IN_QUOTED_FIELD;
769 }
770 else {
771 /* end of quote part of field */
772 self->state = IN_FIELD;
773 }
774 }
775 else {
776 /* normal character - save in field */
Petr Viktorin6a02b382020-12-15 15:14:35 +0100777 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000778 return -1;
779 }
780 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000781
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000782 case ESCAPE_IN_QUOTED_FIELD:
783 if (c == '\0')
784 c = '\n';
Petr Viktorin6a02b382020-12-15 15:14:35 +0100785 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000786 return -1;
787 self->state = IN_QUOTED_FIELD;
788 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000789
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000790 case QUOTE_IN_QUOTED_FIELD:
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300791 /* doublequote - seen a quote in a quoted field */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000792 if (dialect->quoting != QUOTE_NONE &&
793 c == dialect->quotechar) {
794 /* save "" as " */
Petr Viktorin6a02b382020-12-15 15:14:35 +0100795 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000796 return -1;
797 self->state = IN_QUOTED_FIELD;
798 }
799 else if (c == dialect->delimiter) {
800 /* save field - wait for new field */
801 if (parse_save_field(self) < 0)
802 return -1;
803 self->state = START_FIELD;
804 }
805 else if (c == '\n' || c == '\r' || c == '\0') {
806 /* end of line - return [fields] */
807 if (parse_save_field(self) < 0)
808 return -1;
809 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
810 }
811 else if (!dialect->strict) {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100812 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000813 return -1;
814 self->state = IN_FIELD;
815 }
816 else {
817 /* illegal */
Petr Viktorin6a02b382020-12-15 15:14:35 +0100818 PyErr_Format(module_state->error_obj, "'%c' expected after '%c'",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000819 dialect->delimiter,
820 dialect->quotechar);
821 return -1;
822 }
823 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000824
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000825 case EAT_CRNL:
826 if (c == '\n' || c == '\r')
827 ;
828 else if (c == '\0')
829 self->state = START_RECORD;
830 else {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100831 PyErr_Format(module_state->error_obj,
832 "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000833 return -1;
834 }
835 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000836
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000837 }
838 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000839}
840
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000841static int
842parse_reset(ReaderObj *self)
843{
Serhiy Storchaka48842712016-04-06 09:45:48 +0300844 Py_XSETREF(self->fields, PyList_New(0));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000845 if (self->fields == NULL)
846 return -1;
847 self->field_len = 0;
848 self->state = START_RECORD;
849 self->numeric_field = 0;
850 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000851}
Skip Montanarob4a04172003-03-20 23:29:12 +0000852
853static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000854Reader_iternext(ReaderObj *self)
855{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000856 PyObject *fields = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200857 Py_UCS4 c;
858 Py_ssize_t pos, linelen;
859 unsigned int kind;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300860 const void *data;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200861 PyObject *lineobj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000862
Petr Viktorin6a02b382020-12-15 15:14:35 +0100863 _csvstate *module_state = _csv_state_from_type(Py_TYPE(self),
864 "Reader.__next__");
865 if (module_state == NULL) {
866 return NULL;
867 }
868
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000869 if (parse_reset(self) < 0)
870 return NULL;
871 do {
872 lineobj = PyIter_Next(self->input_iter);
873 if (lineobj == NULL) {
874 /* End of input OR exception */
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700875 if (!PyErr_Occurred() && (self->field_len != 0 ||
876 self->state == IN_QUOTED_FIELD)) {
877 if (self->dialect->strict)
Petr Viktorin6a02b382020-12-15 15:14:35 +0100878 PyErr_SetString(module_state->error_obj,
Senthil Kumaran49d13022012-09-25 02:37:20 -0700879 "unexpected end of data");
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700880 else if (parse_save_field(self) >= 0)
881 break;
882 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000883 return NULL;
884 }
885 if (!PyUnicode_Check(lineobj)) {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100886 PyErr_Format(module_state->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000887 "iterator should return strings, "
888 "not %.200s "
Ram Rachum235f9182020-06-05 23:56:06 +0300889 "(the file should be opened in text mode)",
Victor Stinnerdaa97562020-02-07 03:37:06 +0100890 Py_TYPE(lineobj)->tp_name
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000891 );
892 Py_DECREF(lineobj);
893 return NULL;
894 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100895 if (PyUnicode_READY(lineobj) == -1) {
896 Py_DECREF(lineobj);
897 return NULL;
898 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000899 ++self->line_num;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200900 kind = PyUnicode_KIND(lineobj);
901 data = PyUnicode_DATA(lineobj);
902 pos = 0;
903 linelen = PyUnicode_GET_LENGTH(lineobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000904 while (linelen--) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200905 c = PyUnicode_READ(kind, data, pos);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000906 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000907 Py_DECREF(lineobj);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100908 PyErr_Format(module_state->error_obj,
Benjamin Peterson7821b4c2019-06-18 21:37:58 -0700909 "line contains NUL");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000910 goto err;
911 }
Petr Viktorin6a02b382020-12-15 15:14:35 +0100912 if (parse_process_char(self, module_state, c) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000913 Py_DECREF(lineobj);
914 goto err;
915 }
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200916 pos++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000917 }
918 Py_DECREF(lineobj);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100919 if (parse_process_char(self, module_state, 0) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000920 goto err;
921 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000922
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000923 fields = self->fields;
924 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000925err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000926 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000927}
928
929static void
930Reader_dealloc(ReaderObj *self)
931{
Petr Viktorin6a02b382020-12-15 15:14:35 +0100932 PyTypeObject *tp = Py_TYPE(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000933 PyObject_GC_UnTrack(self);
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -0700934 tp->tp_clear((PyObject *)self);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100935 if (self->field != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000936 PyMem_Free(self->field);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100937 self->field = NULL;
938 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000939 PyObject_GC_Del(self);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100940 Py_DECREF(tp);
941}
942
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000943static int
944Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
945{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000946 Py_VISIT(self->dialect);
947 Py_VISIT(self->input_iter);
948 Py_VISIT(self->fields);
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -0700949 Py_VISIT(Py_TYPE(self));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000950 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000951}
952
953static int
954Reader_clear(ReaderObj *self)
955{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000956 Py_CLEAR(self->dialect);
957 Py_CLEAR(self->input_iter);
958 Py_CLEAR(self->fields);
959 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000960}
961
962PyDoc_STRVAR(Reader_Type_doc,
963"CSV reader\n"
964"\n"
965"Reader objects are responsible for reading and parsing tabular data\n"
966"in CSV format.\n"
967);
968
969static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000970 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000971};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000972#define R_OFF(x) offsetof(ReaderObj, x)
973
974static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000975 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
976 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
977 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000978};
979
Skip Montanarob4a04172003-03-20 23:29:12 +0000980
Petr Viktorin6a02b382020-12-15 15:14:35 +0100981static PyType_Slot Reader_Type_slots[] = {
982 {Py_tp_doc, (char*)Reader_Type_doc},
983 {Py_tp_traverse, Reader_traverse},
Petr Viktorin6a02b382020-12-15 15:14:35 +0100984 {Py_tp_iter, PyObject_SelfIter},
985 {Py_tp_iternext, Reader_iternext},
986 {Py_tp_methods, Reader_methods},
987 {Py_tp_members, Reader_memberlist},
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -0700988 {Py_tp_clear, Reader_clear},
Petr Viktorin6a02b382020-12-15 15:14:35 +0100989 {Py_tp_dealloc, Reader_dealloc},
990 {0, NULL}
Skip Montanarob4a04172003-03-20 23:29:12 +0000991};
992
Petr Viktorin6a02b382020-12-15 15:14:35 +0100993PyType_Spec Reader_Type_spec = {
994 .name = "_csv.reader",
995 .basicsize = sizeof(ReaderObj),
Miss Islington (bot)7297d742021-06-17 03:19:44 -0700996 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
997 Py_TPFLAGS_IMMUTABLETYPE),
Petr Viktorin6a02b382020-12-15 15:14:35 +0100998 .slots = Reader_Type_slots
999};
1000
1001
Skip Montanarob4a04172003-03-20 23:29:12 +00001002static PyObject *
1003csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
1004{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001005 PyObject * iterator, * dialect = NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +01001006 _csvstate *module_state = get_csv_state(module);
1007 ReaderObj * self = PyObject_GC_New(
1008 ReaderObj,
1009 module_state->reader_type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001010
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001011 if (!self)
1012 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001013
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001014 self->dialect = NULL;
1015 self->fields = NULL;
1016 self->input_iter = NULL;
1017 self->field = NULL;
1018 self->field_size = 0;
1019 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +00001020
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001021 if (parse_reset(self) < 0) {
1022 Py_DECREF(self);
1023 return NULL;
1024 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001025
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001026 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
1027 Py_DECREF(self);
1028 return NULL;
1029 }
1030 self->input_iter = PyObject_GetIter(iterator);
1031 if (self->input_iter == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001032 Py_DECREF(self);
1033 return NULL;
1034 }
Petr Viktorin6a02b382020-12-15 15:14:35 +01001035 self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
1036 keyword_args);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001037 if (self->dialect == NULL) {
1038 Py_DECREF(self);
1039 return NULL;
1040 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001041
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001042 PyObject_GC_Track(self);
1043 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001044}
1045
1046/*
1047 * WRITER
1048 */
1049/* ---------------------------------------------------------------- */
1050static void
1051join_reset(WriterObj *self)
1052{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001053 self->rec_len = 0;
1054 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001055}
1056
1057#define MEM_INCR 32768
1058
1059/* Calculate new record length or append field to record. Return new
1060 * record length.
1061 */
Antoine Pitrou40455752010-08-15 18:51:10 +00001062static Py_ssize_t
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001063join_append_data(WriterObj *self, unsigned int field_kind, const void *field_data,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001064 Py_ssize_t field_len, int *quoted,
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001065 int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +00001066{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001067 DialectObj *dialect = self->dialect;
1068 int i;
Antoine Pitrou40455752010-08-15 18:51:10 +00001069 Py_ssize_t rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001070
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001071#define INCLEN \
1072 do {\
1073 if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \
1074 goto overflow; \
1075 } \
1076 rec_len++; \
1077 } while(0)
1078
1079#define ADDCH(c) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001080 do {\
1081 if (copy_phase) \
1082 self->rec[rec_len] = c;\
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001083 INCLEN;\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001084 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001085
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001086 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001087
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001088 /* If this is not the first field we need a field separator */
1089 if (self->num_fields > 0)
1090 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001091
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001092 /* Handle preceding quote */
1093 if (copy_phase && *quoted)
1094 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001095
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001096 /* Copy/count field data */
1097 /* If field is null just pass over */
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001098 for (i = 0; field_data && (i < field_len); i++) {
1099 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001100 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001101
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001102 if (c == dialect->delimiter ||
1103 c == dialect->escapechar ||
1104 c == dialect->quotechar ||
Martin v. Löwis5f4f4c52011-11-01 18:42:23 +01001105 PyUnicode_FindChar(
1106 dialect->lineterminator, c, 0,
1107 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001108 if (dialect->quoting == QUOTE_NONE)
1109 want_escape = 1;
1110 else {
1111 if (c == dialect->quotechar) {
1112 if (dialect->doublequote)
1113 ADDCH(dialect->quotechar);
1114 else
1115 want_escape = 1;
1116 }
Berker Peksag5c0eed72020-09-20 09:38:07 +03001117 else if (c == dialect->escapechar) {
1118 want_escape = 1;
1119 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001120 if (!want_escape)
1121 *quoted = 1;
1122 }
1123 if (want_escape) {
1124 if (!dialect->escapechar) {
Petr Viktorin6a02b382020-12-15 15:14:35 +01001125 PyErr_Format(self->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001126 "need to escape, but no escapechar set");
1127 return -1;
1128 }
1129 ADDCH(dialect->escapechar);
1130 }
1131 }
1132 /* Copy field character into record buffer.
1133 */
1134 ADDCH(c);
1135 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001136
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001137 if (*quoted) {
1138 if (copy_phase)
1139 ADDCH(dialect->quotechar);
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001140 else {
1141 INCLEN; /* starting quote */
1142 INCLEN; /* ending quote */
1143 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001144 }
1145 return rec_len;
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001146
1147 overflow:
1148 PyErr_NoMemory();
1149 return -1;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001150#undef ADDCH
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001151#undef INCLEN
Skip Montanarob4a04172003-03-20 23:29:12 +00001152}
1153
1154static int
Antoine Pitrou40455752010-08-15 18:51:10 +00001155join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
Skip Montanarob4a04172003-03-20 23:29:12 +00001156{
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001157 assert(rec_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001158
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001159 if (rec_len > self->rec_size) {
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001160 size_t rec_size_new = (size_t)(rec_len / MEM_INCR + 1) * MEM_INCR;
1161 Py_UCS4 *rec_new = self->rec;
1162 PyMem_Resize(rec_new, Py_UCS4, rec_size_new);
1163 if (rec_new == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001164 PyErr_NoMemory();
1165 return 0;
1166 }
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001167 self->rec = rec_new;
1168 self->rec_size = (Py_ssize_t)rec_size_new;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001169 }
1170 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001171}
1172
1173static int
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001174join_append(WriterObj *self, PyObject *field, int quoted)
Skip Montanarob4a04172003-03-20 23:29:12 +00001175{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001176 unsigned int field_kind = -1;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001177 const void *field_data = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001178 Py_ssize_t field_len = 0;
Antoine Pitrou40455752010-08-15 18:51:10 +00001179 Py_ssize_t rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001180
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001181 if (field != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +01001182 if (PyUnicode_READY(field) == -1)
1183 return 0;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001184 field_kind = PyUnicode_KIND(field);
1185 field_data = PyUnicode_DATA(field);
1186 field_len = PyUnicode_GET_LENGTH(field);
1187 }
1188 rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001189 &quoted, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001190 if (rec_len < 0)
1191 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001192
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001193 /* grow record buffer if necessary */
1194 if (!join_check_rec_size(self, rec_len))
1195 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001196
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001197 self->rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001198 &quoted, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001199 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001200
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001201 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001202}
1203
1204static int
1205join_append_lineterminator(WriterObj *self)
1206{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001207 Py_ssize_t terminator_len, i;
1208 unsigned int term_kind;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001209 const void *term_data;
Skip Montanarob4a04172003-03-20 23:29:12 +00001210
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001211 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001212 if (terminator_len == -1)
1213 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001214
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001215 /* grow record buffer if necessary */
1216 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1217 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001218
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001219 term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1220 term_data = PyUnicode_DATA(self->dialect->lineterminator);
1221 for (i = 0; i < terminator_len; i++)
1222 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001223 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001224
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001225 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001226}
1227
1228PyDoc_STRVAR(csv_writerow_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001229"writerow(iterable)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001230"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001231"Construct and write a CSV record from an iterable of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001232"elements will be converted to string.");
1233
1234static PyObject *
1235csv_writerow(WriterObj *self, PyObject *seq)
1236{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001237 DialectObj *dialect = self->dialect;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001238 PyObject *iter, *field, *line, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001239
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001240 iter = PyObject_GetIter(seq);
Serhiy Storchakac88239f2020-06-22 11:21:59 +03001241 if (iter == NULL) {
1242 if (PyErr_ExceptionMatches(PyExc_TypeError)) {
Petr Viktorin6a02b382020-12-15 15:14:35 +01001243 PyErr_Format(self->error_obj,
Serhiy Storchakac88239f2020-06-22 11:21:59 +03001244 "iterable expected, not %.200s",
1245 Py_TYPE(seq)->tp_name);
1246 }
1247 return NULL;
1248 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001249
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001250 /* Join all fields in internal buffer.
1251 */
1252 join_reset(self);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001253 while ((field = PyIter_Next(iter))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001254 int append_ok;
1255 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001256
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001257 switch (dialect->quoting) {
1258 case QUOTE_NONNUMERIC:
1259 quoted = !PyNumber_Check(field);
1260 break;
1261 case QUOTE_ALL:
1262 quoted = 1;
1263 break;
1264 default:
1265 quoted = 0;
1266 break;
1267 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001268
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001269 if (PyUnicode_Check(field)) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001270 append_ok = join_append(self, field, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001271 Py_DECREF(field);
1272 }
1273 else if (field == Py_None) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001274 append_ok = join_append(self, NULL, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001275 Py_DECREF(field);
1276 }
1277 else {
1278 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001279
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001280 str = PyObject_Str(field);
1281 Py_DECREF(field);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001282 if (str == NULL) {
1283 Py_DECREF(iter);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001284 return NULL;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001285 }
1286 append_ok = join_append(self, str, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001287 Py_DECREF(str);
1288 }
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001289 if (!append_ok) {
1290 Py_DECREF(iter);
1291 return NULL;
1292 }
1293 }
1294 Py_DECREF(iter);
1295 if (PyErr_Occurred())
1296 return NULL;
1297
Licht Takeuchi20019002017-12-12 18:57:06 +09001298 if (self->num_fields > 0 && self->rec_len == 0) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001299 if (dialect->quoting == QUOTE_NONE) {
Petr Viktorin6a02b382020-12-15 15:14:35 +01001300 PyErr_Format(self->error_obj,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001301 "single empty field record must be quoted");
1302 return NULL;
1303 }
1304 self->num_fields--;
1305 if (!join_append(self, NULL, 1))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001306 return NULL;
1307 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001308
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001309 /* Add line terminator.
1310 */
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001311 if (!join_append_lineterminator(self)) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001312 return NULL;
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001313 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001314
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001315 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1316 (void *) self->rec, self->rec_len);
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001317 if (line == NULL) {
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001318 return NULL;
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001319 }
Petr Viktorinffd97532020-02-11 17:46:57 +01001320 result = PyObject_CallOneArg(self->write, line);
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001321 Py_DECREF(line);
1322 return result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001323}
1324
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001325PyDoc_STRVAR(csv_writerows_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001326"writerows(iterable of iterables)\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001327"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001328"Construct and write a series of iterables to a csv file. Non-string\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001329"elements will be converted to string.");
1330
Skip Montanarob4a04172003-03-20 23:29:12 +00001331static PyObject *
1332csv_writerows(WriterObj *self, PyObject *seqseq)
1333{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001334 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001335
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001336 row_iter = PyObject_GetIter(seqseq);
1337 if (row_iter == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001338 return NULL;
1339 }
1340 while ((row_obj = PyIter_Next(row_iter))) {
1341 result = csv_writerow(self, row_obj);
1342 Py_DECREF(row_obj);
1343 if (!result) {
1344 Py_DECREF(row_iter);
1345 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001346 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001347 else
1348 Py_DECREF(result);
1349 }
1350 Py_DECREF(row_iter);
1351 if (PyErr_Occurred())
1352 return NULL;
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001353 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001354}
1355
1356static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001357 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1358 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1359 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001360};
1361
1362#define W_OFF(x) offsetof(WriterObj, x)
1363
1364static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001365 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1366 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001367};
1368
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001369static int
1370Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1371{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001372 Py_VISIT(self->dialect);
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001373 Py_VISIT(self->write);
Petr Viktorin6a02b382020-12-15 15:14:35 +01001374 Py_VISIT(self->error_obj);
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -07001375 Py_VISIT(Py_TYPE(self));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001376 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001377}
1378
1379static int
1380Writer_clear(WriterObj *self)
1381{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001382 Py_CLEAR(self->dialect);
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001383 Py_CLEAR(self->write);
Petr Viktorin6a02b382020-12-15 15:14:35 +01001384 Py_CLEAR(self->error_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001385 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001386}
1387
Petr Viktorin6a02b382020-12-15 15:14:35 +01001388static void
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -07001389Writer_dealloc(WriterObj *self)
Petr Viktorin6a02b382020-12-15 15:14:35 +01001390{
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -07001391 PyTypeObject *tp = Py_TYPE(self);
1392 PyObject_GC_UnTrack(self);
1393 tp->tp_clear((PyObject *)self);
Petr Viktorin6a02b382020-12-15 15:14:35 +01001394 if (self->rec != NULL) {
1395 PyMem_Free(self->rec);
1396 }
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -07001397 PyObject_GC_Del(self);
1398 Py_DECREF(tp);
Petr Viktorin6a02b382020-12-15 15:14:35 +01001399}
1400
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001401PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001402"CSV writer\n"
1403"\n"
1404"Writer objects are responsible for generating tabular data\n"
1405"in CSV format from sequence input.\n"
1406);
1407
Petr Viktorin6a02b382020-12-15 15:14:35 +01001408static PyType_Slot Writer_Type_slots[] = {
Petr Viktorin6a02b382020-12-15 15:14:35 +01001409 {Py_tp_doc, (char*)Writer_Type_doc},
1410 {Py_tp_traverse, Writer_traverse},
1411 {Py_tp_clear, Writer_clear},
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -07001412 {Py_tp_dealloc, Writer_dealloc},
Petr Viktorin6a02b382020-12-15 15:14:35 +01001413 {Py_tp_methods, Writer_methods},
1414 {Py_tp_members, Writer_memberlist},
1415 {0, NULL}
Skip Montanarob4a04172003-03-20 23:29:12 +00001416};
1417
Petr Viktorin6a02b382020-12-15 15:14:35 +01001418PyType_Spec Writer_Type_spec = {
1419 .name = "_csv.writer",
1420 .basicsize = sizeof(WriterObj),
Miss Islington (bot)7297d742021-06-17 03:19:44 -07001421 .flags = (Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC |
1422 Py_TPFLAGS_IMMUTABLETYPE),
Petr Viktorin6a02b382020-12-15 15:14:35 +01001423 .slots = Writer_Type_slots,
1424};
1425
1426
Skip Montanarob4a04172003-03-20 23:29:12 +00001427static PyObject *
1428csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1429{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001430 PyObject * output_file, * dialect = NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +01001431 _csvstate *module_state = get_csv_state(module);
1432 WriterObj * self = PyObject_GC_New(WriterObj, module_state->writer_type);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001433 _Py_IDENTIFIER(write);
Skip Montanarob4a04172003-03-20 23:29:12 +00001434
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001435 if (!self)
1436 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001437
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001438 self->dialect = NULL;
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001439 self->write = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001440
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001441 self->rec = NULL;
1442 self->rec_size = 0;
1443 self->rec_len = 0;
1444 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001445
Petr Viktorin6a02b382020-12-15 15:14:35 +01001446 self->error_obj = Py_NewRef(module_state->error_obj);
1447
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001448 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1449 Py_DECREF(self);
1450 return NULL;
1451 }
Serhiy Storchaka41c57b32019-09-01 12:03:39 +03001452 if (_PyObject_LookupAttrId(output_file, &PyId_write, &self->write) < 0) {
1453 Py_DECREF(self);
1454 return NULL;
1455 }
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001456 if (self->write == NULL || !PyCallable_Check(self->write)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001457 PyErr_SetString(PyExc_TypeError,
1458 "argument 1 must have a \"write\" method");
1459 Py_DECREF(self);
1460 return NULL;
1461 }
Petr Viktorin6a02b382020-12-15 15:14:35 +01001462 self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
1463 keyword_args);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001464 if (self->dialect == NULL) {
1465 Py_DECREF(self);
1466 return NULL;
1467 }
1468 PyObject_GC_Track(self);
1469 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001470}
1471
1472/*
1473 * DIALECT REGISTRY
1474 */
1475static PyObject *
1476csv_list_dialects(PyObject *module, PyObject *args)
1477{
Petr Viktorin6a02b382020-12-15 15:14:35 +01001478 return PyDict_Keys(get_csv_state(module)->dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001479}
1480
1481static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001482csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001483{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001484 PyObject *name_obj, *dialect_obj = NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +01001485 _csvstate *module_state = get_csv_state(module);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001486 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001487
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001488 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1489 return NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +01001490 if (!PyUnicode_Check(name_obj)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001491 PyErr_SetString(PyExc_TypeError,
Stefan Krahe6996ed2012-11-02 14:44:20 +01001492 "dialect name must be a string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001493 return NULL;
1494 }
Stefan Krahe6996ed2012-11-02 14:44:20 +01001495 if (PyUnicode_READY(name_obj) == -1)
1496 return NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +01001497 dialect = _call_dialect(module_state, dialect_obj, kwargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001498 if (dialect == NULL)
1499 return NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +01001500 if (PyDict_SetItem(module_state->dialects, name_obj, dialect) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001501 Py_DECREF(dialect);
1502 return NULL;
1503 }
1504 Py_DECREF(dialect);
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001505 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001506}
1507
1508static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001509csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001510{
Petr Viktorin6a02b382020-12-15 15:14:35 +01001511 _csvstate *module_state = get_csv_state(module);
1512 if (PyDict_DelItem(module_state->dialects, name_obj) < 0) {
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001513 if (PyErr_ExceptionMatches(PyExc_KeyError)) {
Petr Viktorin6a02b382020-12-15 15:14:35 +01001514 PyErr_Format(module_state->error_obj, "unknown dialect");
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001515 }
1516 return NULL;
1517 }
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001518 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001519}
1520
1521static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001522csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001523{
Petr Viktorin6a02b382020-12-15 15:14:35 +01001524 return get_dialect_from_registry(name_obj, get_csv_state(module));
Skip Montanarob4a04172003-03-20 23:29:12 +00001525}
1526
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001527static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001528csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001529{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001530 PyObject *new_limit = NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +01001531 _csvstate *module_state = get_csv_state(module);
1532 long old_limit = module_state->field_limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001533
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001534 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1535 return NULL;
1536 if (new_limit != NULL) {
1537 if (!PyLong_CheckExact(new_limit)) {
1538 PyErr_Format(PyExc_TypeError,
1539 "limit must be an integer");
1540 return NULL;
1541 }
Petr Viktorin6a02b382020-12-15 15:14:35 +01001542 module_state->field_limit = PyLong_AsLong(new_limit);
1543 if (module_state->field_limit == -1 && PyErr_Occurred()) {
1544 module_state->field_limit = old_limit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001545 return NULL;
1546 }
1547 }
1548 return PyLong_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001549}
1550
Petr Viktorin6a02b382020-12-15 15:14:35 +01001551static PyType_Slot error_slots[] = {
1552 {0, NULL},
1553};
1554
1555PyType_Spec error_spec = {
1556 .name = "_csv.Error",
Miss Islington (bot)3e44e9a2021-05-12 07:02:46 -07001557 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
Petr Viktorin6a02b382020-12-15 15:14:35 +01001558 .slots = error_slots,
1559};
1560
Skip Montanarob4a04172003-03-20 23:29:12 +00001561/*
1562 * MODULE
1563 */
1564
1565PyDoc_STRVAR(csv_module_doc,
1566"CSV parsing and writing.\n"
1567"\n"
1568"This module provides classes that assist in the reading and writing\n"
1569"of Comma Separated Value (CSV) files, and implements the interface\n"
1570"described by PEP 305. Although many CSV files are simple to parse,\n"
1571"the format is not formally defined by a stable specification and\n"
1572"is subtle enough that parsing lines of a CSV file with something\n"
1573"like line.split(\",\") is bound to fail. The module supports three\n"
1574"basic APIs: reading, writing, and registration of dialects.\n"
1575"\n"
1576"\n"
1577"DIALECT REGISTRATION:\n"
1578"\n"
1579"Readers and writers support a dialect argument, which is a convenient\n"
1580"handle on a group of settings. When the dialect argument is a string,\n"
1581"it identifies one of the dialects previously registered with the module.\n"
1582"If it is a class or instance, the attributes of the argument are used as\n"
1583"the settings for the reader or writer:\n"
1584"\n"
1585" class excel:\n"
1586" delimiter = ','\n"
1587" quotechar = '\"'\n"
1588" escapechar = None\n"
1589" doublequote = True\n"
1590" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001591" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001592" quoting = QUOTE_MINIMAL\n"
1593"\n"
1594"SETTINGS:\n"
1595"\n"
oldkaa0735f2018-02-02 16:52:55 +08001596" * quotechar - specifies a one-character string to use as the\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001597" quoting character. It defaults to '\"'.\n"
oldkaa0735f2018-02-02 16:52:55 +08001598" * delimiter - specifies a one-character string to use as the\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001599" field separator. It defaults to ','.\n"
1600" * skipinitialspace - specifies how to interpret whitespace which\n"
1601" immediately follows a delimiter. It defaults to False, which\n"
1602" means that whitespace immediately following a delimiter is part\n"
1603" of the following field.\n"
oldkaa0735f2018-02-02 16:52:55 +08001604" * lineterminator - specifies the character sequence which should\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001605" terminate rows.\n"
1606" * quoting - controls when quotes should be generated by the writer.\n"
1607" It can take on any of the following module constants:\n"
1608"\n"
1609" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1610" field contains either the quotechar or the delimiter\n"
1611" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1612" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001613" fields which do not parse as integers or floating point\n"
1614" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001615" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
oldkaa0735f2018-02-02 16:52:55 +08001616" * escapechar - specifies a one-character string used to escape\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001617" the delimiter when quoting is set to QUOTE_NONE.\n"
1618" * doublequote - controls the handling of quotes inside fields. When\n"
1619" True, two consecutive quotes are interpreted as one during read,\n"
1620" and when writing, each quote character embedded in the data is\n"
1621" written as two quotes\n");
1622
1623PyDoc_STRVAR(csv_reader_doc,
1624" csv_reader = reader(iterable [, dialect='excel']\n"
1625" [optional keyword args])\n"
1626" for row in csv_reader:\n"
1627" process(row)\n"
1628"\n"
1629"The \"iterable\" argument can be any object that returns a line\n"
1630"of input for each iteration, such as a file object or a list. The\n"
1631"optional \"dialect\" parameter is discussed below. The function\n"
1632"also accepts optional keyword arguments which override settings\n"
1633"provided by the dialect.\n"
1634"\n"
1635"The returned object is an iterator. Each iteration returns a row\n"
Berker Peksage2382c52015-10-02 19:25:32 +03001636"of the CSV file (which can span multiple input lines).\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001637
1638PyDoc_STRVAR(csv_writer_doc,
1639" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1640" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001641" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001642" csv_writer.writerow(row)\n"
1643"\n"
1644" [or]\n"
1645"\n"
1646" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1647" [optional keyword args])\n"
1648" csv_writer.writerows(rows)\n"
1649"\n"
1650"The \"fileobj\" argument can be any object that supports the file API.\n");
1651
1652PyDoc_STRVAR(csv_list_dialects_doc,
1653"Return a list of all know dialect names.\n"
1654" names = csv.list_dialects()");
1655
1656PyDoc_STRVAR(csv_get_dialect_doc,
1657"Return the dialect instance associated with name.\n"
1658" dialect = csv.get_dialect(name)");
1659
1660PyDoc_STRVAR(csv_register_dialect_doc,
1661"Create a mapping from a string name to a dialect class.\n"
Berker Peksag12b50ce2015-06-05 15:17:51 +03001662" dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
Skip Montanarob4a04172003-03-20 23:29:12 +00001663
1664PyDoc_STRVAR(csv_unregister_dialect_doc,
1665"Delete the name/dialect mapping associated with a string name.\n"
1666" csv.unregister_dialect(name)");
1667
Andrew McNamara31d88962005-01-12 03:45:10 +00001668PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001669"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001670" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001671"\n"
1672"Returns old limit. If limit is not given, no new limit is set and\n"
1673"the old limit is returned");
1674
Skip Montanarob4a04172003-03-20 23:29:12 +00001675static struct PyMethodDef csv_methods[] = {
Serhiy Storchaka62be7422018-11-27 13:27:31 +02001676 { "reader", (PyCFunction)(void(*)(void))csv_reader,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001677 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
Serhiy Storchaka62be7422018-11-27 13:27:31 +02001678 { "writer", (PyCFunction)(void(*)(void))csv_writer,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001679 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1680 { "list_dialects", (PyCFunction)csv_list_dialects,
1681 METH_NOARGS, csv_list_dialects_doc},
Serhiy Storchaka62be7422018-11-27 13:27:31 +02001682 { "register_dialect", (PyCFunction)(void(*)(void))csv_register_dialect,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001683 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1684 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1685 METH_O, csv_unregister_dialect_doc},
1686 { "get_dialect", (PyCFunction)csv_get_dialect,
1687 METH_O, csv_get_dialect_doc},
1688 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1689 METH_VARARGS, csv_field_size_limit_doc},
1690 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001691};
1692
Petr Viktorin6a02b382020-12-15 15:14:35 +01001693static int
1694csv_exec(PyObject *module) {
1695 const StyleDesc *style;
1696 PyObject *temp;
1697 _csvstate *module_state = get_csv_state(module);
1698
1699 temp = PyType_FromModuleAndSpec(module, &Dialect_Type_spec, NULL);
1700 module_state->dialect_type = (PyTypeObject *)temp;
1701 if (PyModule_AddObjectRef(module, "Dialect", temp) < 0) {
1702 return -1;
1703 }
1704
1705 temp = PyType_FromModuleAndSpec(module, &Reader_Type_spec, NULL);
1706 module_state->reader_type = (PyTypeObject *)temp;
1707 if (PyModule_AddObjectRef(module, "Reader", temp) < 0) {
1708 return -1;
1709 }
1710
1711 temp = PyType_FromModuleAndSpec(module, &Writer_Type_spec, NULL);
1712 module_state->writer_type = (PyTypeObject *)temp;
1713 if (PyModule_AddObjectRef(module, "Writer", temp) < 0) {
1714 return -1;
1715 }
1716
1717 /* Add version to the module. */
1718 if (PyModule_AddStringConstant(module, "__version__",
1719 MODULE_VERSION) == -1) {
1720 return -1;
1721 }
1722
1723 /* Set the field limit */
1724 module_state->field_limit = 128 * 1024;
1725
1726 /* Add _dialects dictionary */
1727 module_state->dialects = PyDict_New();
1728 if (PyModule_AddObjectRef(module, "_dialects", module_state->dialects) < 0) {
1729 return -1;
1730 }
1731
1732 /* Add quote styles into dictionary */
1733 for (style = quote_styles; style->name; style++) {
1734 if (PyModule_AddIntConstant(module, style->name,
1735 style->style) == -1)
1736 return -1;
1737 }
1738
1739 /* Add the CSV exception object to the module. */
1740 PyObject *bases = PyTuple_Pack(1, PyExc_Exception);
1741 if (bases == NULL) {
1742 return -1;
1743 }
1744 module_state->error_obj = PyType_FromModuleAndSpec(module, &error_spec,
1745 bases);
1746 Py_DECREF(bases);
1747 if (module_state->error_obj == NULL) {
1748 return -1;
1749 }
1750 if (PyModule_AddType(module, (PyTypeObject *)module_state->error_obj) != 0) {
1751 return -1;
1752 }
1753
1754 return 0;
1755}
1756
1757static PyModuleDef_Slot csv_slots[] = {
1758 {Py_mod_exec, csv_exec},
1759 {0, NULL}
1760};
1761
Martin v. Löwis1a214512008-06-11 05:26:20 +00001762static struct PyModuleDef _csvmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001763 PyModuleDef_HEAD_INIT,
1764 "_csv",
1765 csv_module_doc,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001766 sizeof(_csvstate),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001767 csv_methods,
Petr Viktorin6a02b382020-12-15 15:14:35 +01001768 csv_slots,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001769 _csv_traverse,
1770 _csv_clear,
1771 _csv_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00001772};
1773
Skip Montanarob4a04172003-03-20 23:29:12 +00001774PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001775PyInit__csv(void)
Skip Montanarob4a04172003-03-20 23:29:12 +00001776{
Petr Viktorin6a02b382020-12-15 15:14:35 +01001777 return PyModuleDef_Init(&_csvmodule);
Skip Montanarob4a04172003-03-20 23:29:12 +00001778}