blob: a213734f5080684699514b06212a179693120081 [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
Skip Montanarob4a04172003-03-20 23:29:12 +00009*/
10
Skip Montanaro7b01a832003-04-12 19:23:46 +000011#define MODULE_VERSION "1.0"
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013#include "Python.h"
Victor Stinner4a21e572020-04-15 02:35:41 +020014#include "structmember.h" // PyMemberDef
Serhiy Storchaka323748a2018-07-26 13:21:09 +030015#include <stdbool.h>
Skip Montanarob4a04172003-03-20 23:29:12 +000016
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000017
Antoine Pitroue7672d32012-05-16 11:33:08 +020018typedef struct {
19 PyObject *error_obj; /* CSV exception */
20 PyObject *dialects; /* Dialect registry */
Petr Viktorin6a02b382020-12-15 15:14:35 +010021 PyTypeObject *dialect_type;
22 PyTypeObject *reader_type;
23 PyTypeObject *writer_type;
Antoine Pitroue7672d32012-05-16 11:33:08 +020024 long field_limit; /* max parsed field size */
25} _csvstate;
26
Petr Viktorin6a02b382020-12-15 15:14:35 +010027static struct PyModuleDef _csvmodule;
28
Hai Shif707d942020-03-16 21:15:01 +080029static inline _csvstate*
30get_csv_state(PyObject *module)
31{
32 void *state = PyModule_GetState(module);
33 assert(state != NULL);
34 return (_csvstate *)state;
35}
Antoine Pitroue7672d32012-05-16 11:33:08 +020036
37static int
Petr Viktorin6a02b382020-12-15 15:14:35 +010038_csv_clear(PyObject *module)
Antoine Pitroue7672d32012-05-16 11:33:08 +020039{
Petr Viktorin6a02b382020-12-15 15:14:35 +010040 _csvstate *module_state = PyModule_GetState(module);
41 Py_CLEAR(module_state->error_obj);
42 Py_CLEAR(module_state->dialects);
43 Py_CLEAR(module_state->dialect_type);
44 Py_CLEAR(module_state->reader_type);
45 Py_CLEAR(module_state->writer_type);
Antoine Pitroue7672d32012-05-16 11:33:08 +020046 return 0;
47}
48
49static int
Petr Viktorin6a02b382020-12-15 15:14:35 +010050_csv_traverse(PyObject *module, visitproc visit, void *arg)
Antoine Pitroue7672d32012-05-16 11:33:08 +020051{
Petr Viktorin6a02b382020-12-15 15:14:35 +010052 _csvstate *module_state = PyModule_GetState(module);
53 Py_VISIT(module_state->error_obj);
54 Py_VISIT(module_state->dialects);
55 Py_VISIT(module_state->dialect_type);
56 Py_VISIT(module_state->reader_type);
57 Py_VISIT(module_state->writer_type);
Antoine Pitroue7672d32012-05-16 11:33:08 +020058 return 0;
59}
60
61static void
Petr Viktorin6a02b382020-12-15 15:14:35 +010062_csv_free(void *module)
Antoine Pitroue7672d32012-05-16 11:33:08 +020063{
Petr Viktorin6a02b382020-12-15 15:14:35 +010064 _csv_clear((PyObject *)module);
Antoine Pitroue7672d32012-05-16 11:33:08 +020065}
66
Skip Montanarob4a04172003-03-20 23:29:12 +000067typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000068 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
69 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
R David Murrayc7c42ef2013-03-19 22:41:47 -040070 EAT_CRNL,AFTER_ESCAPED_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000071} ParserState;
72
73typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000074 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000075} QuoteStyle;
76
77typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000078 QuoteStyle style;
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020079 const char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000080} StyleDesc;
81
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020082static const StyleDesc quote_styles[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000083 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
84 { QUOTE_ALL, "QUOTE_ALL" },
85 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
86 { QUOTE_NONE, "QUOTE_NONE" },
87 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000088};
89
90typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000092
Serhiy Storchaka323748a2018-07-26 13:21:09 +030093 char doublequote; /* is " represented by ""? */
94 char skipinitialspace; /* ignore spaces following delimiter? */
95 char strict; /* raise exception on bad CSV */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000096 int quoting; /* style of quoting to write */
Serhiy Storchaka323748a2018-07-26 13:21:09 +030097 Py_UCS4 delimiter; /* field separator */
98 Py_UCS4 quotechar; /* quote character */
99 Py_UCS4 escapechar; /* escape character */
100 PyObject *lineterminator; /* string to write between records */
Skip Montanarob4a04172003-03-20 23:29:12 +0000101
Skip Montanarob4a04172003-03-20 23:29:12 +0000102} DialectObj;
103
Skip Montanarob4a04172003-03-20 23:29:12 +0000104typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000105 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000106
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000107 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +0000108
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000109 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000110
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000111 PyObject *fields; /* field list for current record */
112 ParserState state; /* current CSV parse state */
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200113 Py_UCS4 *field; /* temporary buffer */
Antoine Pitrou40455752010-08-15 18:51:10 +0000114 Py_ssize_t field_size; /* size of allocated buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000115 Py_ssize_t field_len; /* length of current field */
116 int numeric_field; /* treat field as numeric */
117 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000118} ReaderObj;
119
Skip Montanarob4a04172003-03-20 23:29:12 +0000120typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000121 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000122
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +0200123 PyObject *write; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +0000124
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000125 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000126
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200127 Py_UCS4 *rec; /* buffer for parser.join */
Antoine Pitrou40455752010-08-15 18:51:10 +0000128 Py_ssize_t rec_size; /* size of allocated record */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000129 Py_ssize_t rec_len; /* length of record */
130 int num_fields; /* number of fields in record */
Skip Montanarob4a04172003-03-20 23:29:12 +0000131
Petr Viktorin6a02b382020-12-15 15:14:35 +0100132 PyObject *error_obj; /* cached error object */
133} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000134
135/*
136 * DIALECT class
137 */
138
139static PyObject *
Petr Viktorin6a02b382020-12-15 15:14:35 +0100140get_dialect_from_registry(PyObject *name_obj, _csvstate *module_state)
Skip Montanarob4a04172003-03-20 23:29:12 +0000141{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000142 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000143
Petr Viktorin6a02b382020-12-15 15:14:35 +0100144 dialect_obj = PyDict_GetItemWithError(module_state->dialects, name_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000145 if (dialect_obj == NULL) {
146 if (!PyErr_Occurred())
Petr Viktorin6a02b382020-12-15 15:14:35 +0100147 PyErr_Format(module_state->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000148 }
149 else
150 Py_INCREF(dialect_obj);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100151
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000152 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000153}
154
Skip Montanarob4a04172003-03-20 23:29:12 +0000155static PyObject *
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200156get_nullchar_as_None(Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000157{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000158 if (c == '\0') {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200159 Py_RETURN_NONE;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000160 }
161 else
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200162 return PyUnicode_FromOrdinal(c);
Skip Montanarob4a04172003-03-20 23:29:12 +0000163}
164
Skip Montanarob4a04172003-03-20 23:29:12 +0000165static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200166Dialect_get_lineterminator(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000167{
Dong-hee Na0383be42020-06-10 00:33:43 +0900168 Py_XINCREF(self->lineterminator);
169 return self->lineterminator;
Skip Montanarob4a04172003-03-20 23:29:12 +0000170}
171
Skip Montanarob4a04172003-03-20 23:29:12 +0000172static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200173Dialect_get_delimiter(DialectObj *self, void *Py_UNUSED(ignored))
Guido van Rossuma9769c22007-08-07 23:59:30 +0000174{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000175 return get_nullchar_as_None(self->delimiter);
Guido van Rossuma9769c22007-08-07 23:59:30 +0000176}
177
178static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200179Dialect_get_escapechar(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000180{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000181 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000182}
183
Andrew McNamara1196cf12005-01-07 04:42:45 +0000184static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200185Dialect_get_quotechar(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000186{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000187 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000188}
189
190static PyObject *
Serhiy Storchakad4f9cf52018-11-27 19:34:35 +0200191Dialect_get_quoting(DialectObj *self, void *Py_UNUSED(ignored))
Skip Montanarob4a04172003-03-20 23:29:12 +0000192{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000193 return PyLong_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000194}
195
196static int
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300197_set_bool(const char *name, char *target, PyObject *src, bool dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000198{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000199 if (src == NULL)
200 *target = dflt;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200201 else {
202 int b = PyObject_IsTrue(src);
203 if (b < 0)
204 return -1;
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300205 *target = (char)b;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200206 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000207 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000208}
209
Andrew McNamara1196cf12005-01-07 04:42:45 +0000210static int
211_set_int(const char *name, int *target, PyObject *src, int dflt)
212{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000213 if (src == NULL)
214 *target = dflt;
215 else {
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200216 int value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000217 if (!PyLong_CheckExact(src)) {
218 PyErr_Format(PyExc_TypeError,
219 "\"%s\" must be an integer", name);
220 return -1;
221 }
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200222 value = _PyLong_AsInt(src);
223 if (value == -1 && PyErr_Occurred()) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000224 return -1;
225 }
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200226 *target = value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000227 }
228 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000229}
230
231static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200232_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000233{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000234 if (src == NULL)
235 *target = dflt;
236 else {
237 *target = '\0';
238 if (src != Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000239 Py_ssize_t len;
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200240 if (!PyUnicode_Check(src)) {
241 PyErr_Format(PyExc_TypeError,
242 "\"%s\" must be string, not %.200s", name,
Victor Stinnerdaa97562020-02-07 03:37:06 +0100243 Py_TYPE(src)->tp_name);
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200244 return -1;
245 }
Victor Stinner9e30aa52011-11-21 02:49:52 +0100246 len = PyUnicode_GetLength(src);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200247 if (len > 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000248 PyErr_Format(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300249 "\"%s\" must be a 1-character string",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000250 name);
251 return -1;
252 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100253 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000254 if (len > 0)
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200255 *target = PyUnicode_READ_CHAR(src, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000256 }
257 }
258 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000259}
260
261static int
262_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
263{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000264 if (src == NULL)
265 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
266 else {
267 if (src == Py_None)
268 *target = NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +0100269 else if (!PyUnicode_Check(src)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000270 PyErr_Format(PyExc_TypeError,
271 "\"%s\" must be a string", name);
272 return -1;
273 }
274 else {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100275 if (PyUnicode_READY(src) == -1)
276 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000277 Py_INCREF(src);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300278 Py_XSETREF(*target, src);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000279 }
280 }
281 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000282}
283
284static int
285dialect_check_quoting(int quoting)
286{
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200287 const StyleDesc *qs;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000288
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000289 for (qs = quote_styles; qs->name; qs++) {
Victor Stinner706768c2014-08-16 01:03:39 +0200290 if ((int)qs->style == quoting)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000291 return 0;
292 }
293 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
294 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000295}
Skip Montanarob4a04172003-03-20 23:29:12 +0000296
297#define D_OFF(x) offsetof(DialectObj, x)
298
299static struct PyMemberDef Dialect_memberlist[] = {
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300300 { "skipinitialspace", T_BOOL, D_OFF(skipinitialspace), READONLY },
301 { "doublequote", T_BOOL, D_OFF(doublequote), READONLY },
302 { "strict", T_BOOL, D_OFF(strict), READONLY },
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000303 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000304};
305
306static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000307 { "delimiter", (getter)Dialect_get_delimiter},
308 { "escapechar", (getter)Dialect_get_escapechar},
309 { "lineterminator", (getter)Dialect_get_lineterminator},
310 { "quotechar", (getter)Dialect_get_quotechar},
311 { "quoting", (getter)Dialect_get_quoting},
312 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000313};
314
315static void
316Dialect_dealloc(DialectObj *self)
317{
Petr Viktorin6a02b382020-12-15 15:14:35 +0100318 PyTypeObject *tp = Py_TYPE(self);
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -0700319 PyObject_GC_UnTrack(self);
320 tp->tp_clear((PyObject *)self);
321 PyObject_GC_Del(self);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100322 Py_DECREF(tp);
323}
324
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000325static char *dialect_kws[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000326 "dialect",
327 "delimiter",
328 "doublequote",
329 "escapechar",
330 "lineterminator",
331 "quotechar",
332 "quoting",
333 "skipinitialspace",
334 "strict",
335 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000336};
337
Petr Viktorin6a02b382020-12-15 15:14:35 +0100338static _csvstate *
339_csv_state_from_type(PyTypeObject *type, const char *name)
340{
341 PyObject *module = _PyType_GetModuleByDef(type, &_csvmodule);
342 if (module == NULL) {
343 return NULL;
344 }
345 _csvstate *module_state = PyModule_GetState(module);
346 if (module_state == NULL) {
347 PyErr_Format(PyExc_SystemError,
348 "%s: No _csv module state found", name);
349 return NULL;
350 }
351 return module_state;
352}
353
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000354static PyObject *
355dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000356{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000357 DialectObj *self;
358 PyObject *ret = NULL;
359 PyObject *dialect = NULL;
360 PyObject *delimiter = NULL;
361 PyObject *doublequote = NULL;
362 PyObject *escapechar = NULL;
363 PyObject *lineterminator = NULL;
364 PyObject *quotechar = NULL;
365 PyObject *quoting = NULL;
366 PyObject *skipinitialspace = NULL;
367 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000368
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000369 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
370 "|OOOOOOOOO", dialect_kws,
371 &dialect,
372 &delimiter,
373 &doublequote,
374 &escapechar,
375 &lineterminator,
376 &quotechar,
377 &quoting,
378 &skipinitialspace,
379 &strict))
380 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000381
Petr Viktorin6a02b382020-12-15 15:14:35 +0100382 _csvstate *module_state = _csv_state_from_type(type, "dialect_new");
383 if (module_state == NULL) {
384 return NULL;
385 }
386
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000387 if (dialect != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100388 if (PyUnicode_Check(dialect)) {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100389 dialect = get_dialect_from_registry(dialect, module_state);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000390 if (dialect == NULL)
391 return NULL;
392 }
393 else
394 Py_INCREF(dialect);
395 /* Can we reuse this instance? */
Petr Viktorin6a02b382020-12-15 15:14:35 +0100396 if (PyObject_TypeCheck(dialect, module_state->dialect_type) &&
Serhiy Storchaka0b3ec192017-03-23 17:53:47 +0200397 delimiter == NULL &&
398 doublequote == NULL &&
399 escapechar == NULL &&
400 lineterminator == NULL &&
401 quotechar == NULL &&
402 quoting == NULL &&
403 skipinitialspace == NULL &&
404 strict == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000405 return dialect;
406 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000407
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000408 self = (DialectObj *)type->tp_alloc(type, 0);
409 if (self == NULL) {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100410 Py_CLEAR(dialect);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000411 return NULL;
412 }
413 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000414
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000415 Py_XINCREF(delimiter);
416 Py_XINCREF(doublequote);
417 Py_XINCREF(escapechar);
418 Py_XINCREF(lineterminator);
419 Py_XINCREF(quotechar);
420 Py_XINCREF(quoting);
421 Py_XINCREF(skipinitialspace);
422 Py_XINCREF(strict);
423 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000424#define DIALECT_GETATTR(v, n) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000425 if (v == NULL) \
426 v = PyObject_GetAttrString(dialect, n)
427 DIALECT_GETATTR(delimiter, "delimiter");
428 DIALECT_GETATTR(doublequote, "doublequote");
429 DIALECT_GETATTR(escapechar, "escapechar");
430 DIALECT_GETATTR(lineterminator, "lineterminator");
431 DIALECT_GETATTR(quotechar, "quotechar");
432 DIALECT_GETATTR(quoting, "quoting");
433 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
434 DIALECT_GETATTR(strict, "strict");
435 PyErr_Clear();
436 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000437
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000438 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000439#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000440 if (meth(name, target, src, dflt)) \
441 goto err
442 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300443 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, true);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000444 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
445 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
446 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
447 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300448 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, false);
449 DIASET(_set_bool, "strict", &self->strict, strict, false);
Skip Montanarob4a04172003-03-20 23:29:12 +0000450
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000451 /* validate options */
452 if (dialect_check_quoting(self->quoting))
453 goto err;
454 if (self->delimiter == 0) {
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200455 PyErr_SetString(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300456 "\"delimiter\" must be a 1-character string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000457 goto err;
458 }
459 if (quotechar == Py_None && quoting == NULL)
460 self->quoting = QUOTE_NONE;
461 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
462 PyErr_SetString(PyExc_TypeError,
463 "quotechar must be set if quoting enabled");
464 goto err;
465 }
466 if (self->lineterminator == 0) {
467 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
468 goto err;
469 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000470
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000471 ret = (PyObject *)self;
472 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000473err:
Petr Viktorin6a02b382020-12-15 15:14:35 +0100474 Py_CLEAR(self);
475 Py_CLEAR(dialect);
476 Py_CLEAR(delimiter);
477 Py_CLEAR(doublequote);
478 Py_CLEAR(escapechar);
479 Py_CLEAR(lineterminator);
480 Py_CLEAR(quotechar);
481 Py_CLEAR(quoting);
482 Py_CLEAR(skipinitialspace);
483 Py_CLEAR(strict);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000484 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000485}
486
Petr Viktorin6a02b382020-12-15 15:14:35 +0100487/* Since dialect is now a heap type, it inherits pickling method for
488 * protocol 0 and 1 from object, therefore it needs to be overriden */
489
490PyDoc_STRVAR(dialect_reduce_doc, "raises an exception to avoid pickling");
491
492static PyObject *
493Dialect_reduce(PyObject *self, PyObject *args) {
494 PyErr_Format(PyExc_TypeError,
495 "cannot pickle '%.100s' instances", _PyType_Name(Py_TYPE(self)));
496 return NULL;
497}
498
499static struct PyMethodDef dialect_methods[] = {
500 {"__reduce__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
501 {"__reduce_ex__", Dialect_reduce, METH_VARARGS, dialect_reduce_doc},
502 {NULL, NULL}
503};
Skip Montanarob4a04172003-03-20 23:29:12 +0000504
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000505PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000506"CSV dialect\n"
507"\n"
508"The Dialect type records CSV parsing and generation options.\n");
509
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -0700510static int
511Dialect_clear(DialectObj *self)
512{
513 Py_CLEAR(self->lineterminator);
514 return 0;
515}
516
517static int
518Dialect_traverse(DialectObj *self, visitproc visit, void *arg)
519{
520 Py_VISIT(self->lineterminator);
521 Py_VISIT(Py_TYPE(self));
522 return 0;
523}
524
Petr Viktorin6a02b382020-12-15 15:14:35 +0100525static PyType_Slot Dialect_Type_slots[] = {
526 {Py_tp_doc, (char*)Dialect_Type_doc},
527 {Py_tp_members, Dialect_memberlist},
528 {Py_tp_getset, Dialect_getsetlist},
529 {Py_tp_new, dialect_new},
530 {Py_tp_methods, dialect_methods},
Petr Viktorin6a02b382020-12-15 15:14:35 +0100531 {Py_tp_dealloc, Dialect_dealloc},
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -0700532 {Py_tp_clear, Dialect_clear},
533 {Py_tp_traverse, Dialect_traverse},
Petr Viktorin6a02b382020-12-15 15:14:35 +0100534 {0, NULL}
Skip Montanarob4a04172003-03-20 23:29:12 +0000535};
536
Petr Viktorin6a02b382020-12-15 15:14:35 +0100537PyType_Spec Dialect_Type_spec = {
538 .name = "_csv.Dialect",
539 .basicsize = sizeof(DialectObj),
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -0700540 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
Petr Viktorin6a02b382020-12-15 15:14:35 +0100541 .slots = Dialect_Type_slots,
542};
543
544
Andrew McNamara91b97462005-01-11 01:07:23 +0000545/*
546 * Return an instance of the dialect type, given a Python instance or kwarg
547 * description of the dialect
548 */
549static PyObject *
Petr Viktorin6a02b382020-12-15 15:14:35 +0100550_call_dialect(_csvstate *module_state, PyObject *dialect_inst, PyObject *kwargs)
Andrew McNamara91b97462005-01-11 01:07:23 +0000551{
Petr Viktorin6a02b382020-12-15 15:14:35 +0100552 PyObject *type = (PyObject *)module_state->dialect_type;
Victor Stinner6412f492016-08-23 00:21:34 +0200553 if (dialect_inst) {
Petr Viktorinffd97532020-02-11 17:46:57 +0100554 return PyObject_VectorcallDict(type, &dialect_inst, 1, kwargs);
Victor Stinner6412f492016-08-23 00:21:34 +0200555 }
556 else {
Petr Viktorinffd97532020-02-11 17:46:57 +0100557 return PyObject_VectorcallDict(type, NULL, 0, kwargs);
Victor Stinner6412f492016-08-23 00:21:34 +0200558 }
Andrew McNamara91b97462005-01-11 01:07:23 +0000559}
560
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000561/*
562 * READER
563 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000564static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000565parse_save_field(ReaderObj *self)
566{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000567 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000568
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200569 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
570 (void *) self->field, self->field_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000571 if (field == NULL)
572 return -1;
573 self->field_len = 0;
574 if (self->numeric_field) {
575 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000576
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000577 self->numeric_field = 0;
578 tmp = PyNumber_Float(field);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000579 Py_DECREF(field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200580 if (tmp == NULL)
581 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000582 field = tmp;
583 }
Victor Stinnerb80b3782013-11-14 21:29:34 +0100584 if (PyList_Append(self->fields, field) < 0) {
585 Py_DECREF(field);
586 return -1;
587 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000588 Py_DECREF(field);
589 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000590}
591
592static int
593parse_grow_buff(ReaderObj *self)
594{
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +0500595 assert((size_t)self->field_size <= PY_SSIZE_T_MAX / sizeof(Py_UCS4));
596
597 Py_ssize_t field_size_new = self->field_size ? 2 * self->field_size : 4096;
598 Py_UCS4 *field_new = self->field;
599 PyMem_Resize(field_new, Py_UCS4, field_size_new);
600 if (field_new == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000601 PyErr_NoMemory();
602 return 0;
603 }
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +0500604 self->field = field_new;
605 self->field_size = field_size_new;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000606 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000607}
608
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000609static int
Petr Viktorin6a02b382020-12-15 15:14:35 +0100610parse_add_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000611{
Petr Viktorin6a02b382020-12-15 15:14:35 +0100612 if (self->field_len >= module_state->field_limit) {
613 PyErr_Format(module_state->error_obj,
614 "field larger than field limit (%ld)",
615 module_state->field_limit);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000616 return -1;
617 }
618 if (self->field_len == self->field_size && !parse_grow_buff(self))
619 return -1;
620 self->field[self->field_len++] = c;
621 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000622}
623
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000624static int
Petr Viktorin6a02b382020-12-15 15:14:35 +0100625parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000626{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000627 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000628
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000629 switch (self->state) {
630 case START_RECORD:
631 /* start of record */
632 if (c == '\0')
633 /* empty line - return [] */
634 break;
635 else if (c == '\n' || c == '\r') {
636 self->state = EAT_CRNL;
637 break;
638 }
639 /* normal character - handle as START_FIELD */
640 self->state = START_FIELD;
641 /* fallthru */
642 case START_FIELD:
643 /* expecting field */
644 if (c == '\n' || c == '\r' || c == '\0') {
645 /* save empty field - return [fields] */
646 if (parse_save_field(self) < 0)
647 return -1;
648 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
649 }
650 else if (c == dialect->quotechar &&
651 dialect->quoting != QUOTE_NONE) {
652 /* start quoted field */
653 self->state = IN_QUOTED_FIELD;
654 }
655 else if (c == dialect->escapechar) {
656 /* possible escaped character */
657 self->state = ESCAPED_CHAR;
658 }
659 else if (c == ' ' && dialect->skipinitialspace)
660 /* ignore space at start of field */
661 ;
662 else if (c == dialect->delimiter) {
663 /* save empty field */
664 if (parse_save_field(self) < 0)
665 return -1;
666 }
667 else {
668 /* begin new unquoted field */
669 if (dialect->quoting == QUOTE_NONNUMERIC)
670 self->numeric_field = 1;
Petr Viktorin6a02b382020-12-15 15:14:35 +0100671 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000672 return -1;
673 self->state = IN_FIELD;
674 }
675 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000676
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000677 case ESCAPED_CHAR:
R David Murray9a7d3762013-03-20 00:15:20 -0400678 if (c == '\n' || c=='\r') {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100679 if (parse_add_char(self, module_state, c) < 0)
R David Murrayc7c42ef2013-03-19 22:41:47 -0400680 return -1;
681 self->state = AFTER_ESCAPED_CRNL;
682 break;
683 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000684 if (c == '\0')
685 c = '\n';
Petr Viktorin6a02b382020-12-15 15:14:35 +0100686 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000687 return -1;
688 self->state = IN_FIELD;
689 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000690
R David Murrayc7c42ef2013-03-19 22:41:47 -0400691 case AFTER_ESCAPED_CRNL:
692 if (c == '\0')
693 break;
694 /*fallthru*/
695
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000696 case IN_FIELD:
697 /* in unquoted field */
698 if (c == '\n' || c == '\r' || c == '\0') {
699 /* end of line - return [fields] */
700 if (parse_save_field(self) < 0)
701 return -1;
702 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
703 }
704 else if (c == dialect->escapechar) {
705 /* possible escaped character */
706 self->state = ESCAPED_CHAR;
707 }
708 else if (c == dialect->delimiter) {
709 /* save field - wait for new field */
710 if (parse_save_field(self) < 0)
711 return -1;
712 self->state = START_FIELD;
713 }
714 else {
715 /* normal character - save in field */
Petr Viktorin6a02b382020-12-15 15:14:35 +0100716 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000717 return -1;
718 }
719 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000720
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000721 case IN_QUOTED_FIELD:
722 /* in quoted field */
723 if (c == '\0')
724 ;
725 else if (c == dialect->escapechar) {
726 /* Possible escape character */
727 self->state = ESCAPE_IN_QUOTED_FIELD;
728 }
729 else if (c == dialect->quotechar &&
730 dialect->quoting != QUOTE_NONE) {
731 if (dialect->doublequote) {
732 /* doublequote; " represented by "" */
733 self->state = QUOTE_IN_QUOTED_FIELD;
734 }
735 else {
736 /* end of quote part of field */
737 self->state = IN_FIELD;
738 }
739 }
740 else {
741 /* normal character - save in field */
Petr Viktorin6a02b382020-12-15 15:14:35 +0100742 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000743 return -1;
744 }
745 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000746
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000747 case ESCAPE_IN_QUOTED_FIELD:
748 if (c == '\0')
749 c = '\n';
Petr Viktorin6a02b382020-12-15 15:14:35 +0100750 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000751 return -1;
752 self->state = IN_QUOTED_FIELD;
753 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000754
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000755 case QUOTE_IN_QUOTED_FIELD:
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300756 /* doublequote - seen a quote in a quoted field */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000757 if (dialect->quoting != QUOTE_NONE &&
758 c == dialect->quotechar) {
759 /* save "" as " */
Petr Viktorin6a02b382020-12-15 15:14:35 +0100760 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000761 return -1;
762 self->state = IN_QUOTED_FIELD;
763 }
764 else if (c == dialect->delimiter) {
765 /* save field - wait for new field */
766 if (parse_save_field(self) < 0)
767 return -1;
768 self->state = START_FIELD;
769 }
770 else if (c == '\n' || c == '\r' || c == '\0') {
771 /* end of line - return [fields] */
772 if (parse_save_field(self) < 0)
773 return -1;
774 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
775 }
776 else if (!dialect->strict) {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100777 if (parse_add_char(self, module_state, c) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000778 return -1;
779 self->state = IN_FIELD;
780 }
781 else {
782 /* illegal */
Petr Viktorin6a02b382020-12-15 15:14:35 +0100783 PyErr_Format(module_state->error_obj, "'%c' expected after '%c'",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000784 dialect->delimiter,
785 dialect->quotechar);
786 return -1;
787 }
788 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000789
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000790 case EAT_CRNL:
791 if (c == '\n' || c == '\r')
792 ;
793 else if (c == '\0')
794 self->state = START_RECORD;
795 else {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100796 PyErr_Format(module_state->error_obj,
797 "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000798 return -1;
799 }
800 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000801
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000802 }
803 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000804}
805
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000806static int
807parse_reset(ReaderObj *self)
808{
Serhiy Storchaka48842712016-04-06 09:45:48 +0300809 Py_XSETREF(self->fields, PyList_New(0));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000810 if (self->fields == NULL)
811 return -1;
812 self->field_len = 0;
813 self->state = START_RECORD;
814 self->numeric_field = 0;
815 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000816}
Skip Montanarob4a04172003-03-20 23:29:12 +0000817
818static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000819Reader_iternext(ReaderObj *self)
820{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000821 PyObject *fields = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200822 Py_UCS4 c;
823 Py_ssize_t pos, linelen;
824 unsigned int kind;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +0300825 const void *data;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200826 PyObject *lineobj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000827
Petr Viktorin6a02b382020-12-15 15:14:35 +0100828 _csvstate *module_state = _csv_state_from_type(Py_TYPE(self),
829 "Reader.__next__");
830 if (module_state == NULL) {
831 return NULL;
832 }
833
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000834 if (parse_reset(self) < 0)
835 return NULL;
836 do {
837 lineobj = PyIter_Next(self->input_iter);
838 if (lineobj == NULL) {
839 /* End of input OR exception */
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700840 if (!PyErr_Occurred() && (self->field_len != 0 ||
841 self->state == IN_QUOTED_FIELD)) {
842 if (self->dialect->strict)
Petr Viktorin6a02b382020-12-15 15:14:35 +0100843 PyErr_SetString(module_state->error_obj,
Senthil Kumaran49d13022012-09-25 02:37:20 -0700844 "unexpected end of data");
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700845 else if (parse_save_field(self) >= 0)
846 break;
847 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000848 return NULL;
849 }
850 if (!PyUnicode_Check(lineobj)) {
Petr Viktorin6a02b382020-12-15 15:14:35 +0100851 PyErr_Format(module_state->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000852 "iterator should return strings, "
853 "not %.200s "
Ram Rachum235f9182020-06-05 23:56:06 +0300854 "(the file should be opened in text mode)",
Victor Stinnerdaa97562020-02-07 03:37:06 +0100855 Py_TYPE(lineobj)->tp_name
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000856 );
857 Py_DECREF(lineobj);
858 return NULL;
859 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100860 if (PyUnicode_READY(lineobj) == -1) {
861 Py_DECREF(lineobj);
862 return NULL;
863 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000864 ++self->line_num;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200865 kind = PyUnicode_KIND(lineobj);
866 data = PyUnicode_DATA(lineobj);
867 pos = 0;
868 linelen = PyUnicode_GET_LENGTH(lineobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000869 while (linelen--) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200870 c = PyUnicode_READ(kind, data, pos);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000871 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000872 Py_DECREF(lineobj);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100873 PyErr_Format(module_state->error_obj,
Benjamin Peterson7821b4c2019-06-18 21:37:58 -0700874 "line contains NUL");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000875 goto err;
876 }
Petr Viktorin6a02b382020-12-15 15:14:35 +0100877 if (parse_process_char(self, module_state, c) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000878 Py_DECREF(lineobj);
879 goto err;
880 }
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200881 pos++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000882 }
883 Py_DECREF(lineobj);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100884 if (parse_process_char(self, module_state, 0) < 0)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000885 goto err;
886 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000887
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000888 fields = self->fields;
889 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000890err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000891 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000892}
893
894static void
895Reader_dealloc(ReaderObj *self)
896{
Petr Viktorin6a02b382020-12-15 15:14:35 +0100897 PyTypeObject *tp = Py_TYPE(self);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000898 PyObject_GC_UnTrack(self);
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -0700899 tp->tp_clear((PyObject *)self);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100900 if (self->field != NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000901 PyMem_Free(self->field);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100902 self->field = NULL;
903 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000904 PyObject_GC_Del(self);
Petr Viktorin6a02b382020-12-15 15:14:35 +0100905 Py_DECREF(tp);
906}
907
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000908static int
909Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
910{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000911 Py_VISIT(self->dialect);
912 Py_VISIT(self->input_iter);
913 Py_VISIT(self->fields);
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -0700914 Py_VISIT(Py_TYPE(self));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000915 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000916}
917
918static int
919Reader_clear(ReaderObj *self)
920{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000921 Py_CLEAR(self->dialect);
922 Py_CLEAR(self->input_iter);
923 Py_CLEAR(self->fields);
924 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000925}
926
927PyDoc_STRVAR(Reader_Type_doc,
928"CSV reader\n"
929"\n"
930"Reader objects are responsible for reading and parsing tabular data\n"
931"in CSV format.\n"
932);
933
934static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000935 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000936};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000937#define R_OFF(x) offsetof(ReaderObj, x)
938
939static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000940 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
941 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
942 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000943};
944
Skip Montanarob4a04172003-03-20 23:29:12 +0000945
Petr Viktorin6a02b382020-12-15 15:14:35 +0100946static PyType_Slot Reader_Type_slots[] = {
947 {Py_tp_doc, (char*)Reader_Type_doc},
948 {Py_tp_traverse, Reader_traverse},
Petr Viktorin6a02b382020-12-15 15:14:35 +0100949 {Py_tp_iter, PyObject_SelfIter},
950 {Py_tp_iternext, Reader_iternext},
951 {Py_tp_methods, Reader_methods},
952 {Py_tp_members, Reader_memberlist},
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -0700953 {Py_tp_clear, Reader_clear},
Petr Viktorin6a02b382020-12-15 15:14:35 +0100954 {Py_tp_dealloc, Reader_dealloc},
955 {0, NULL}
Skip Montanarob4a04172003-03-20 23:29:12 +0000956};
957
Petr Viktorin6a02b382020-12-15 15:14:35 +0100958PyType_Spec Reader_Type_spec = {
959 .name = "_csv.reader",
960 .basicsize = sizeof(ReaderObj),
961 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
962 .slots = Reader_Type_slots
963};
964
965
Skip Montanarob4a04172003-03-20 23:29:12 +0000966static PyObject *
967csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
968{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000969 PyObject * iterator, * dialect = NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +0100970 _csvstate *module_state = get_csv_state(module);
971 ReaderObj * self = PyObject_GC_New(
972 ReaderObj,
973 module_state->reader_type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000974
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000975 if (!self)
976 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000977
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000978 self->dialect = NULL;
979 self->fields = NULL;
980 self->input_iter = NULL;
981 self->field = NULL;
982 self->field_size = 0;
983 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000984
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000985 if (parse_reset(self) < 0) {
986 Py_DECREF(self);
987 return NULL;
988 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000989
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000990 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
991 Py_DECREF(self);
992 return NULL;
993 }
994 self->input_iter = PyObject_GetIter(iterator);
995 if (self->input_iter == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000996 Py_DECREF(self);
997 return NULL;
998 }
Petr Viktorin6a02b382020-12-15 15:14:35 +0100999 self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
1000 keyword_args);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001001 if (self->dialect == NULL) {
1002 Py_DECREF(self);
1003 return NULL;
1004 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001005
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001006 PyObject_GC_Track(self);
1007 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001008}
1009
1010/*
1011 * WRITER
1012 */
1013/* ---------------------------------------------------------------- */
1014static void
1015join_reset(WriterObj *self)
1016{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001017 self->rec_len = 0;
1018 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001019}
1020
1021#define MEM_INCR 32768
1022
1023/* Calculate new record length or append field to record. Return new
1024 * record length.
1025 */
Antoine Pitrou40455752010-08-15 18:51:10 +00001026static Py_ssize_t
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001027join_append_data(WriterObj *self, unsigned int field_kind, const void *field_data,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001028 Py_ssize_t field_len, int *quoted,
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001029 int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +00001030{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001031 DialectObj *dialect = self->dialect;
1032 int i;
Antoine Pitrou40455752010-08-15 18:51:10 +00001033 Py_ssize_t rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001034
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001035#define INCLEN \
1036 do {\
1037 if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \
1038 goto overflow; \
1039 } \
1040 rec_len++; \
1041 } while(0)
1042
1043#define ADDCH(c) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001044 do {\
1045 if (copy_phase) \
1046 self->rec[rec_len] = c;\
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001047 INCLEN;\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001048 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001049
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001050 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001051
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001052 /* If this is not the first field we need a field separator */
1053 if (self->num_fields > 0)
1054 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001055
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001056 /* Handle preceding quote */
1057 if (copy_phase && *quoted)
1058 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001059
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001060 /* Copy/count field data */
1061 /* If field is null just pass over */
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001062 for (i = 0; field_data && (i < field_len); i++) {
1063 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001064 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001065
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001066 if (c == dialect->delimiter ||
1067 c == dialect->escapechar ||
1068 c == dialect->quotechar ||
Martin v. Löwis5f4f4c52011-11-01 18:42:23 +01001069 PyUnicode_FindChar(
1070 dialect->lineterminator, c, 0,
1071 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001072 if (dialect->quoting == QUOTE_NONE)
1073 want_escape = 1;
1074 else {
1075 if (c == dialect->quotechar) {
1076 if (dialect->doublequote)
1077 ADDCH(dialect->quotechar);
1078 else
1079 want_escape = 1;
1080 }
Berker Peksag5c0eed72020-09-20 09:38:07 +03001081 else if (c == dialect->escapechar) {
1082 want_escape = 1;
1083 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001084 if (!want_escape)
1085 *quoted = 1;
1086 }
1087 if (want_escape) {
1088 if (!dialect->escapechar) {
Petr Viktorin6a02b382020-12-15 15:14:35 +01001089 PyErr_Format(self->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001090 "need to escape, but no escapechar set");
1091 return -1;
1092 }
1093 ADDCH(dialect->escapechar);
1094 }
1095 }
1096 /* Copy field character into record buffer.
1097 */
1098 ADDCH(c);
1099 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001100
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001101 if (*quoted) {
1102 if (copy_phase)
1103 ADDCH(dialect->quotechar);
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001104 else {
1105 INCLEN; /* starting quote */
1106 INCLEN; /* ending quote */
1107 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001108 }
1109 return rec_len;
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001110
1111 overflow:
1112 PyErr_NoMemory();
1113 return -1;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001114#undef ADDCH
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001115#undef INCLEN
Skip Montanarob4a04172003-03-20 23:29:12 +00001116}
1117
1118static int
Antoine Pitrou40455752010-08-15 18:51:10 +00001119join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
Skip Montanarob4a04172003-03-20 23:29:12 +00001120{
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001121 assert(rec_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001122
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001123 if (rec_len > self->rec_size) {
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001124 size_t rec_size_new = (size_t)(rec_len / MEM_INCR + 1) * MEM_INCR;
1125 Py_UCS4 *rec_new = self->rec;
1126 PyMem_Resize(rec_new, Py_UCS4, rec_size_new);
1127 if (rec_new == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001128 PyErr_NoMemory();
1129 return 0;
1130 }
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001131 self->rec = rec_new;
1132 self->rec_size = (Py_ssize_t)rec_size_new;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001133 }
1134 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001135}
1136
1137static int
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001138join_append(WriterObj *self, PyObject *field, int quoted)
Skip Montanarob4a04172003-03-20 23:29:12 +00001139{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001140 unsigned int field_kind = -1;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001141 const void *field_data = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001142 Py_ssize_t field_len = 0;
Antoine Pitrou40455752010-08-15 18:51:10 +00001143 Py_ssize_t rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001144
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001145 if (field != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +01001146 if (PyUnicode_READY(field) == -1)
1147 return 0;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001148 field_kind = PyUnicode_KIND(field);
1149 field_data = PyUnicode_DATA(field);
1150 field_len = PyUnicode_GET_LENGTH(field);
1151 }
1152 rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001153 &quoted, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001154 if (rec_len < 0)
1155 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001156
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001157 /* grow record buffer if necessary */
1158 if (!join_check_rec_size(self, rec_len))
1159 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001160
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001161 self->rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001162 &quoted, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001163 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001164
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001165 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001166}
1167
1168static int
1169join_append_lineterminator(WriterObj *self)
1170{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001171 Py_ssize_t terminator_len, i;
1172 unsigned int term_kind;
Serhiy Storchakacd8295f2020-04-11 10:48:40 +03001173 const void *term_data;
Skip Montanarob4a04172003-03-20 23:29:12 +00001174
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001175 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001176 if (terminator_len == -1)
1177 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001178
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001179 /* grow record buffer if necessary */
1180 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1181 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001182
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001183 term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1184 term_data = PyUnicode_DATA(self->dialect->lineterminator);
1185 for (i = 0; i < terminator_len; i++)
1186 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001187 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001188
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001189 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001190}
1191
1192PyDoc_STRVAR(csv_writerow_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001193"writerow(iterable)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001194"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001195"Construct and write a CSV record from an iterable of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001196"elements will be converted to string.");
1197
1198static PyObject *
1199csv_writerow(WriterObj *self, PyObject *seq)
1200{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001201 DialectObj *dialect = self->dialect;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001202 PyObject *iter, *field, *line, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001203
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001204 iter = PyObject_GetIter(seq);
Serhiy Storchakac88239f2020-06-22 11:21:59 +03001205 if (iter == NULL) {
1206 if (PyErr_ExceptionMatches(PyExc_TypeError)) {
Petr Viktorin6a02b382020-12-15 15:14:35 +01001207 PyErr_Format(self->error_obj,
Serhiy Storchakac88239f2020-06-22 11:21:59 +03001208 "iterable expected, not %.200s",
1209 Py_TYPE(seq)->tp_name);
1210 }
1211 return NULL;
1212 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001213
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001214 /* Join all fields in internal buffer.
1215 */
1216 join_reset(self);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001217 while ((field = PyIter_Next(iter))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001218 int append_ok;
1219 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001220
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001221 switch (dialect->quoting) {
1222 case QUOTE_NONNUMERIC:
1223 quoted = !PyNumber_Check(field);
1224 break;
1225 case QUOTE_ALL:
1226 quoted = 1;
1227 break;
1228 default:
1229 quoted = 0;
1230 break;
1231 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001232
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001233 if (PyUnicode_Check(field)) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001234 append_ok = join_append(self, field, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001235 Py_DECREF(field);
1236 }
1237 else if (field == Py_None) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001238 append_ok = join_append(self, NULL, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001239 Py_DECREF(field);
1240 }
1241 else {
1242 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001243
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001244 str = PyObject_Str(field);
1245 Py_DECREF(field);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001246 if (str == NULL) {
1247 Py_DECREF(iter);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001248 return NULL;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001249 }
1250 append_ok = join_append(self, str, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001251 Py_DECREF(str);
1252 }
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001253 if (!append_ok) {
1254 Py_DECREF(iter);
1255 return NULL;
1256 }
1257 }
1258 Py_DECREF(iter);
1259 if (PyErr_Occurred())
1260 return NULL;
1261
Licht Takeuchi20019002017-12-12 18:57:06 +09001262 if (self->num_fields > 0 && self->rec_len == 0) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001263 if (dialect->quoting == QUOTE_NONE) {
Petr Viktorin6a02b382020-12-15 15:14:35 +01001264 PyErr_Format(self->error_obj,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001265 "single empty field record must be quoted");
1266 return NULL;
1267 }
1268 self->num_fields--;
1269 if (!join_append(self, NULL, 1))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001270 return NULL;
1271 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001272
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001273 /* Add line terminator.
1274 */
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001275 if (!join_append_lineterminator(self)) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001276 return NULL;
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001277 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001278
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001279 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1280 (void *) self->rec, self->rec_len);
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001281 if (line == NULL) {
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001282 return NULL;
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001283 }
Petr Viktorinffd97532020-02-11 17:46:57 +01001284 result = PyObject_CallOneArg(self->write, line);
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001285 Py_DECREF(line);
1286 return result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001287}
1288
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001289PyDoc_STRVAR(csv_writerows_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001290"writerows(iterable of iterables)\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001291"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001292"Construct and write a series of iterables to a csv file. Non-string\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001293"elements will be converted to string.");
1294
Skip Montanarob4a04172003-03-20 23:29:12 +00001295static PyObject *
1296csv_writerows(WriterObj *self, PyObject *seqseq)
1297{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001298 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001299
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001300 row_iter = PyObject_GetIter(seqseq);
1301 if (row_iter == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001302 return NULL;
1303 }
1304 while ((row_obj = PyIter_Next(row_iter))) {
1305 result = csv_writerow(self, row_obj);
1306 Py_DECREF(row_obj);
1307 if (!result) {
1308 Py_DECREF(row_iter);
1309 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001310 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001311 else
1312 Py_DECREF(result);
1313 }
1314 Py_DECREF(row_iter);
1315 if (PyErr_Occurred())
1316 return NULL;
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001317 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001318}
1319
1320static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001321 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1322 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1323 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001324};
1325
1326#define W_OFF(x) offsetof(WriterObj, x)
1327
1328static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001329 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1330 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001331};
1332
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001333static int
1334Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1335{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001336 Py_VISIT(self->dialect);
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001337 Py_VISIT(self->write);
Petr Viktorin6a02b382020-12-15 15:14:35 +01001338 Py_VISIT(self->error_obj);
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -07001339 Py_VISIT(Py_TYPE(self));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001340 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001341}
1342
1343static int
1344Writer_clear(WriterObj *self)
1345{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001346 Py_CLEAR(self->dialect);
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001347 Py_CLEAR(self->write);
Petr Viktorin6a02b382020-12-15 15:14:35 +01001348 Py_CLEAR(self->error_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001349 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001350}
1351
Petr Viktorin6a02b382020-12-15 15:14:35 +01001352static void
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -07001353Writer_dealloc(WriterObj *self)
Petr Viktorin6a02b382020-12-15 15:14:35 +01001354{
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -07001355 PyTypeObject *tp = Py_TYPE(self);
1356 PyObject_GC_UnTrack(self);
1357 tp->tp_clear((PyObject *)self);
Petr Viktorin6a02b382020-12-15 15:14:35 +01001358 if (self->rec != NULL) {
1359 PyMem_Free(self->rec);
1360 }
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -07001361 PyObject_GC_Del(self);
1362 Py_DECREF(tp);
Petr Viktorin6a02b382020-12-15 15:14:35 +01001363}
1364
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001365PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001366"CSV writer\n"
1367"\n"
1368"Writer objects are responsible for generating tabular data\n"
1369"in CSV format from sequence input.\n"
1370);
1371
Petr Viktorin6a02b382020-12-15 15:14:35 +01001372static PyType_Slot Writer_Type_slots[] = {
Petr Viktorin6a02b382020-12-15 15:14:35 +01001373 {Py_tp_doc, (char*)Writer_Type_doc},
1374 {Py_tp_traverse, Writer_traverse},
1375 {Py_tp_clear, Writer_clear},
Miss Islington (bot)ba260ac2021-05-12 11:56:19 -07001376 {Py_tp_dealloc, Writer_dealloc},
Petr Viktorin6a02b382020-12-15 15:14:35 +01001377 {Py_tp_methods, Writer_methods},
1378 {Py_tp_members, Writer_memberlist},
1379 {0, NULL}
Skip Montanarob4a04172003-03-20 23:29:12 +00001380};
1381
Petr Viktorin6a02b382020-12-15 15:14:35 +01001382PyType_Spec Writer_Type_spec = {
1383 .name = "_csv.writer",
1384 .basicsize = sizeof(WriterObj),
1385 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_GC,
1386 .slots = Writer_Type_slots,
1387};
1388
1389
Skip Montanarob4a04172003-03-20 23:29:12 +00001390static PyObject *
1391csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1392{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001393 PyObject * output_file, * dialect = NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +01001394 _csvstate *module_state = get_csv_state(module);
1395 WriterObj * self = PyObject_GC_New(WriterObj, module_state->writer_type);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001396 _Py_IDENTIFIER(write);
Skip Montanarob4a04172003-03-20 23:29:12 +00001397
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001398 if (!self)
1399 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001400
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001401 self->dialect = NULL;
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001402 self->write = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001403
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001404 self->rec = NULL;
1405 self->rec_size = 0;
1406 self->rec_len = 0;
1407 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001408
Petr Viktorin6a02b382020-12-15 15:14:35 +01001409 self->error_obj = Py_NewRef(module_state->error_obj);
1410
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001411 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1412 Py_DECREF(self);
1413 return NULL;
1414 }
Serhiy Storchaka41c57b32019-09-01 12:03:39 +03001415 if (_PyObject_LookupAttrId(output_file, &PyId_write, &self->write) < 0) {
1416 Py_DECREF(self);
1417 return NULL;
1418 }
Rémi Lapeyre2bc158f2019-05-14 15:45:14 +02001419 if (self->write == NULL || !PyCallable_Check(self->write)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001420 PyErr_SetString(PyExc_TypeError,
1421 "argument 1 must have a \"write\" method");
1422 Py_DECREF(self);
1423 return NULL;
1424 }
Petr Viktorin6a02b382020-12-15 15:14:35 +01001425 self->dialect = (DialectObj *)_call_dialect(module_state, dialect,
1426 keyword_args);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001427 if (self->dialect == NULL) {
1428 Py_DECREF(self);
1429 return NULL;
1430 }
1431 PyObject_GC_Track(self);
1432 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001433}
1434
1435/*
1436 * DIALECT REGISTRY
1437 */
1438static PyObject *
1439csv_list_dialects(PyObject *module, PyObject *args)
1440{
Petr Viktorin6a02b382020-12-15 15:14:35 +01001441 return PyDict_Keys(get_csv_state(module)->dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001442}
1443
1444static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001445csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001446{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001447 PyObject *name_obj, *dialect_obj = NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +01001448 _csvstate *module_state = get_csv_state(module);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001449 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001450
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001451 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1452 return NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +01001453 if (!PyUnicode_Check(name_obj)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001454 PyErr_SetString(PyExc_TypeError,
Stefan Krahe6996ed2012-11-02 14:44:20 +01001455 "dialect name must be a string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001456 return NULL;
1457 }
Stefan Krahe6996ed2012-11-02 14:44:20 +01001458 if (PyUnicode_READY(name_obj) == -1)
1459 return NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +01001460 dialect = _call_dialect(module_state, dialect_obj, kwargs);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001461 if (dialect == NULL)
1462 return NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +01001463 if (PyDict_SetItem(module_state->dialects, name_obj, dialect) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001464 Py_DECREF(dialect);
1465 return NULL;
1466 }
1467 Py_DECREF(dialect);
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001468 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001469}
1470
1471static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001472csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001473{
Petr Viktorin6a02b382020-12-15 15:14:35 +01001474 _csvstate *module_state = get_csv_state(module);
1475 if (PyDict_DelItem(module_state->dialects, name_obj) < 0) {
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001476 if (PyErr_ExceptionMatches(PyExc_KeyError)) {
Petr Viktorin6a02b382020-12-15 15:14:35 +01001477 PyErr_Format(module_state->error_obj, "unknown dialect");
Serhiy Storchakaa24107b2019-02-25 17:59:46 +02001478 }
1479 return NULL;
1480 }
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001481 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001482}
1483
1484static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001485csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001486{
Petr Viktorin6a02b382020-12-15 15:14:35 +01001487 return get_dialect_from_registry(name_obj, get_csv_state(module));
Skip Montanarob4a04172003-03-20 23:29:12 +00001488}
1489
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001490static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001491csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001492{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001493 PyObject *new_limit = NULL;
Petr Viktorin6a02b382020-12-15 15:14:35 +01001494 _csvstate *module_state = get_csv_state(module);
1495 long old_limit = module_state->field_limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001496
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001497 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1498 return NULL;
1499 if (new_limit != NULL) {
1500 if (!PyLong_CheckExact(new_limit)) {
1501 PyErr_Format(PyExc_TypeError,
1502 "limit must be an integer");
1503 return NULL;
1504 }
Petr Viktorin6a02b382020-12-15 15:14:35 +01001505 module_state->field_limit = PyLong_AsLong(new_limit);
1506 if (module_state->field_limit == -1 && PyErr_Occurred()) {
1507 module_state->field_limit = old_limit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001508 return NULL;
1509 }
1510 }
1511 return PyLong_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001512}
1513
Petr Viktorin6a02b382020-12-15 15:14:35 +01001514static PyType_Slot error_slots[] = {
1515 {0, NULL},
1516};
1517
1518PyType_Spec error_spec = {
1519 .name = "_csv.Error",
Miss Islington (bot)3e44e9a2021-05-12 07:02:46 -07001520 .flags = Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,
Petr Viktorin6a02b382020-12-15 15:14:35 +01001521 .slots = error_slots,
1522};
1523
Skip Montanarob4a04172003-03-20 23:29:12 +00001524/*
1525 * MODULE
1526 */
1527
1528PyDoc_STRVAR(csv_module_doc,
1529"CSV parsing and writing.\n"
1530"\n"
1531"This module provides classes that assist in the reading and writing\n"
1532"of Comma Separated Value (CSV) files, and implements the interface\n"
1533"described by PEP 305. Although many CSV files are simple to parse,\n"
1534"the format is not formally defined by a stable specification and\n"
1535"is subtle enough that parsing lines of a CSV file with something\n"
1536"like line.split(\",\") is bound to fail. The module supports three\n"
1537"basic APIs: reading, writing, and registration of dialects.\n"
1538"\n"
1539"\n"
1540"DIALECT REGISTRATION:\n"
1541"\n"
1542"Readers and writers support a dialect argument, which is a convenient\n"
1543"handle on a group of settings. When the dialect argument is a string,\n"
1544"it identifies one of the dialects previously registered with the module.\n"
1545"If it is a class or instance, the attributes of the argument are used as\n"
1546"the settings for the reader or writer:\n"
1547"\n"
1548" class excel:\n"
1549" delimiter = ','\n"
1550" quotechar = '\"'\n"
1551" escapechar = None\n"
1552" doublequote = True\n"
1553" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001554" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001555" quoting = QUOTE_MINIMAL\n"
1556"\n"
1557"SETTINGS:\n"
1558"\n"
oldkaa0735f2018-02-02 16:52:55 +08001559" * quotechar - specifies a one-character string to use as the\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001560" quoting character. It defaults to '\"'.\n"
oldkaa0735f2018-02-02 16:52:55 +08001561" * delimiter - specifies a one-character string to use as the\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001562" field separator. It defaults to ','.\n"
1563" * skipinitialspace - specifies how to interpret whitespace which\n"
1564" immediately follows a delimiter. It defaults to False, which\n"
1565" means that whitespace immediately following a delimiter is part\n"
1566" of the following field.\n"
oldkaa0735f2018-02-02 16:52:55 +08001567" * lineterminator - specifies the character sequence which should\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001568" terminate rows.\n"
1569" * quoting - controls when quotes should be generated by the writer.\n"
1570" It can take on any of the following module constants:\n"
1571"\n"
1572" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1573" field contains either the quotechar or the delimiter\n"
1574" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1575" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001576" fields which do not parse as integers or floating point\n"
1577" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001578" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
oldkaa0735f2018-02-02 16:52:55 +08001579" * escapechar - specifies a one-character string used to escape\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001580" the delimiter when quoting is set to QUOTE_NONE.\n"
1581" * doublequote - controls the handling of quotes inside fields. When\n"
1582" True, two consecutive quotes are interpreted as one during read,\n"
1583" and when writing, each quote character embedded in the data is\n"
1584" written as two quotes\n");
1585
1586PyDoc_STRVAR(csv_reader_doc,
1587" csv_reader = reader(iterable [, dialect='excel']\n"
1588" [optional keyword args])\n"
1589" for row in csv_reader:\n"
1590" process(row)\n"
1591"\n"
1592"The \"iterable\" argument can be any object that returns a line\n"
1593"of input for each iteration, such as a file object or a list. The\n"
1594"optional \"dialect\" parameter is discussed below. The function\n"
1595"also accepts optional keyword arguments which override settings\n"
1596"provided by the dialect.\n"
1597"\n"
1598"The returned object is an iterator. Each iteration returns a row\n"
Berker Peksage2382c52015-10-02 19:25:32 +03001599"of the CSV file (which can span multiple input lines).\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001600
1601PyDoc_STRVAR(csv_writer_doc,
1602" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1603" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001604" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001605" csv_writer.writerow(row)\n"
1606"\n"
1607" [or]\n"
1608"\n"
1609" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1610" [optional keyword args])\n"
1611" csv_writer.writerows(rows)\n"
1612"\n"
1613"The \"fileobj\" argument can be any object that supports the file API.\n");
1614
1615PyDoc_STRVAR(csv_list_dialects_doc,
1616"Return a list of all know dialect names.\n"
1617" names = csv.list_dialects()");
1618
1619PyDoc_STRVAR(csv_get_dialect_doc,
1620"Return the dialect instance associated with name.\n"
1621" dialect = csv.get_dialect(name)");
1622
1623PyDoc_STRVAR(csv_register_dialect_doc,
1624"Create a mapping from a string name to a dialect class.\n"
Berker Peksag12b50ce2015-06-05 15:17:51 +03001625" dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
Skip Montanarob4a04172003-03-20 23:29:12 +00001626
1627PyDoc_STRVAR(csv_unregister_dialect_doc,
1628"Delete the name/dialect mapping associated with a string name.\n"
1629" csv.unregister_dialect(name)");
1630
Andrew McNamara31d88962005-01-12 03:45:10 +00001631PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001632"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001633" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001634"\n"
1635"Returns old limit. If limit is not given, no new limit is set and\n"
1636"the old limit is returned");
1637
Skip Montanarob4a04172003-03-20 23:29:12 +00001638static struct PyMethodDef csv_methods[] = {
Serhiy Storchaka62be7422018-11-27 13:27:31 +02001639 { "reader", (PyCFunction)(void(*)(void))csv_reader,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001640 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
Serhiy Storchaka62be7422018-11-27 13:27:31 +02001641 { "writer", (PyCFunction)(void(*)(void))csv_writer,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001642 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1643 { "list_dialects", (PyCFunction)csv_list_dialects,
1644 METH_NOARGS, csv_list_dialects_doc},
Serhiy Storchaka62be7422018-11-27 13:27:31 +02001645 { "register_dialect", (PyCFunction)(void(*)(void))csv_register_dialect,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001646 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1647 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1648 METH_O, csv_unregister_dialect_doc},
1649 { "get_dialect", (PyCFunction)csv_get_dialect,
1650 METH_O, csv_get_dialect_doc},
1651 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1652 METH_VARARGS, csv_field_size_limit_doc},
1653 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001654};
1655
Petr Viktorin6a02b382020-12-15 15:14:35 +01001656static int
1657csv_exec(PyObject *module) {
1658 const StyleDesc *style;
1659 PyObject *temp;
1660 _csvstate *module_state = get_csv_state(module);
1661
1662 temp = PyType_FromModuleAndSpec(module, &Dialect_Type_spec, NULL);
1663 module_state->dialect_type = (PyTypeObject *)temp;
1664 if (PyModule_AddObjectRef(module, "Dialect", temp) < 0) {
1665 return -1;
1666 }
1667
1668 temp = PyType_FromModuleAndSpec(module, &Reader_Type_spec, NULL);
1669 module_state->reader_type = (PyTypeObject *)temp;
1670 if (PyModule_AddObjectRef(module, "Reader", temp) < 0) {
1671 return -1;
1672 }
1673
1674 temp = PyType_FromModuleAndSpec(module, &Writer_Type_spec, NULL);
1675 module_state->writer_type = (PyTypeObject *)temp;
1676 if (PyModule_AddObjectRef(module, "Writer", temp) < 0) {
1677 return -1;
1678 }
1679
1680 /* Add version to the module. */
1681 if (PyModule_AddStringConstant(module, "__version__",
1682 MODULE_VERSION) == -1) {
1683 return -1;
1684 }
1685
1686 /* Set the field limit */
1687 module_state->field_limit = 128 * 1024;
1688
1689 /* Add _dialects dictionary */
1690 module_state->dialects = PyDict_New();
1691 if (PyModule_AddObjectRef(module, "_dialects", module_state->dialects) < 0) {
1692 return -1;
1693 }
1694
1695 /* Add quote styles into dictionary */
1696 for (style = quote_styles; style->name; style++) {
1697 if (PyModule_AddIntConstant(module, style->name,
1698 style->style) == -1)
1699 return -1;
1700 }
1701
1702 /* Add the CSV exception object to the module. */
1703 PyObject *bases = PyTuple_Pack(1, PyExc_Exception);
1704 if (bases == NULL) {
1705 return -1;
1706 }
1707 module_state->error_obj = PyType_FromModuleAndSpec(module, &error_spec,
1708 bases);
1709 Py_DECREF(bases);
1710 if (module_state->error_obj == NULL) {
1711 return -1;
1712 }
1713 if (PyModule_AddType(module, (PyTypeObject *)module_state->error_obj) != 0) {
1714 return -1;
1715 }
1716
1717 return 0;
1718}
1719
1720static PyModuleDef_Slot csv_slots[] = {
1721 {Py_mod_exec, csv_exec},
1722 {0, NULL}
1723};
1724
Martin v. Löwis1a214512008-06-11 05:26:20 +00001725static struct PyModuleDef _csvmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001726 PyModuleDef_HEAD_INIT,
1727 "_csv",
1728 csv_module_doc,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001729 sizeof(_csvstate),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001730 csv_methods,
Petr Viktorin6a02b382020-12-15 15:14:35 +01001731 csv_slots,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001732 _csv_traverse,
1733 _csv_clear,
1734 _csv_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00001735};
1736
Skip Montanarob4a04172003-03-20 23:29:12 +00001737PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001738PyInit__csv(void)
Skip Montanarob4a04172003-03-20 23:29:12 +00001739{
Petr Viktorin6a02b382020-12-15 15:14:35 +01001740 return PyModuleDef_Init(&_csvmodule);
Skip Montanarob4a04172003-03-20 23:29:12 +00001741}