blob: 4cc1f7c88d87799535396ff9a5aebf0e2466cc54 [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
Skip Montanarob4a04172003-03-20 23:29:12 +00009*/
10
Skip Montanaro7b01a832003-04-12 19:23:46 +000011#define MODULE_VERSION "1.0"
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013#include "Python.h"
14#include "structmember.h"
Serhiy Storchaka323748a2018-07-26 13:21:09 +030015#include <stdbool.h>
Skip Montanarob4a04172003-03-20 23:29:12 +000016
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000017
Antoine Pitroue7672d32012-05-16 11:33:08 +020018typedef struct {
19 PyObject *error_obj; /* CSV exception */
20 PyObject *dialects; /* Dialect registry */
21 long field_limit; /* max parsed field size */
22} _csvstate;
23
24#define _csvstate(o) ((_csvstate *)PyModule_GetState(o))
25
26static int
27_csv_clear(PyObject *m)
28{
29 Py_CLEAR(_csvstate(m)->error_obj);
30 Py_CLEAR(_csvstate(m)->dialects);
31 return 0;
32}
33
34static int
35_csv_traverse(PyObject *m, visitproc visit, void *arg)
36{
37 Py_VISIT(_csvstate(m)->error_obj);
38 Py_VISIT(_csvstate(m)->dialects);
39 return 0;
40}
41
42static void
43_csv_free(void *m)
44{
45 _csv_clear((PyObject *)m);
46}
47
48static struct PyModuleDef _csvmodule;
49
50#define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
Skip Montanarob4a04172003-03-20 23:29:12 +000051
52typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000053 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
54 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
R David Murrayc7c42ef2013-03-19 22:41:47 -040055 EAT_CRNL,AFTER_ESCAPED_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000056} ParserState;
57
58typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000059 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000060} QuoteStyle;
61
62typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000063 QuoteStyle style;
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020064 const char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000065} StyleDesc;
66
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020067static const StyleDesc quote_styles[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000068 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
69 { QUOTE_ALL, "QUOTE_ALL" },
70 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
71 { QUOTE_NONE, "QUOTE_NONE" },
72 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000073};
74
75typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000076 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000077
Serhiy Storchaka323748a2018-07-26 13:21:09 +030078 char doublequote; /* is " represented by ""? */
79 char skipinitialspace; /* ignore spaces following delimiter? */
80 char strict; /* raise exception on bad CSV */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000081 int quoting; /* style of quoting to write */
Serhiy Storchaka323748a2018-07-26 13:21:09 +030082 Py_UCS4 delimiter; /* field separator */
83 Py_UCS4 quotechar; /* quote character */
84 Py_UCS4 escapechar; /* escape character */
85 PyObject *lineterminator; /* string to write between records */
Skip Montanarob4a04172003-03-20 23:29:12 +000086
Skip Montanarob4a04172003-03-20 23:29:12 +000087} DialectObj;
88
Neal Norwitz227b5332006-03-22 09:28:35 +000089static PyTypeObject Dialect_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000090
91typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000092 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +000093
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000094 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +000095
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000096 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +000097
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000098 PyObject *fields; /* field list for current record */
99 ParserState state; /* current CSV parse state */
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200100 Py_UCS4 *field; /* temporary buffer */
Antoine Pitrou40455752010-08-15 18:51:10 +0000101 Py_ssize_t field_size; /* size of allocated buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000102 Py_ssize_t field_len; /* length of current field */
103 int numeric_field; /* treat field as numeric */
104 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000105} ReaderObj;
106
Neal Norwitz227b5332006-03-22 09:28:35 +0000107static PyTypeObject Reader_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000108
Christian Heimes90aa7642007-12-19 02:45:37 +0000109#define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +0000110
111typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000112 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000113
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000114 PyObject *writeline; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +0000115
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000116 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000117
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200118 Py_UCS4 *rec; /* buffer for parser.join */
Antoine Pitrou40455752010-08-15 18:51:10 +0000119 Py_ssize_t rec_size; /* size of allocated record */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000120 Py_ssize_t rec_len; /* length of record */
121 int num_fields; /* number of fields in record */
Guido van Rossum46264582007-08-06 19:32:18 +0000122} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000123
Neal Norwitz227b5332006-03-22 09:28:35 +0000124static PyTypeObject Writer_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000125
126/*
127 * DIALECT class
128 */
129
130static PyObject *
131get_dialect_from_registry(PyObject * name_obj)
132{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000133 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000134
Antoine Pitroue7672d32012-05-16 11:33:08 +0200135 dialect_obj = PyDict_GetItem(_csvstate_global->dialects, name_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000136 if (dialect_obj == NULL) {
137 if (!PyErr_Occurred())
Antoine Pitroue7672d32012-05-16 11:33:08 +0200138 PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000139 }
140 else
141 Py_INCREF(dialect_obj);
142 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000143}
144
Skip Montanarob4a04172003-03-20 23:29:12 +0000145static PyObject *
146get_string(PyObject *str)
147{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000148 Py_XINCREF(str);
149 return str;
Skip Montanarob4a04172003-03-20 23:29:12 +0000150}
151
Skip Montanarob4a04172003-03-20 23:29:12 +0000152static PyObject *
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200153get_nullchar_as_None(Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000154{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000155 if (c == '\0') {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200156 Py_RETURN_NONE;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000157 }
158 else
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200159 return PyUnicode_FromOrdinal(c);
Skip Montanarob4a04172003-03-20 23:29:12 +0000160}
161
Skip Montanarob4a04172003-03-20 23:29:12 +0000162static PyObject *
163Dialect_get_lineterminator(DialectObj *self)
164{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000165 return get_string(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000166}
167
Skip Montanarob4a04172003-03-20 23:29:12 +0000168static PyObject *
Guido van Rossuma9769c22007-08-07 23:59:30 +0000169Dialect_get_delimiter(DialectObj *self)
170{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000171 return get_nullchar_as_None(self->delimiter);
Guido van Rossuma9769c22007-08-07 23:59:30 +0000172}
173
174static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000175Dialect_get_escapechar(DialectObj *self)
176{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000178}
179
Andrew McNamara1196cf12005-01-07 04:42:45 +0000180static PyObject *
181Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000182{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000183 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000184}
185
186static PyObject *
187Dialect_get_quoting(DialectObj *self)
188{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000189 return PyLong_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000190}
191
192static int
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300193_set_bool(const char *name, char *target, PyObject *src, bool dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000194{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000195 if (src == NULL)
196 *target = dflt;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200197 else {
198 int b = PyObject_IsTrue(src);
199 if (b < 0)
200 return -1;
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300201 *target = (char)b;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200202 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000203 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000204}
205
Andrew McNamara1196cf12005-01-07 04:42:45 +0000206static int
207_set_int(const char *name, int *target, PyObject *src, int dflt)
208{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000209 if (src == NULL)
210 *target = dflt;
211 else {
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200212 int value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000213 if (!PyLong_CheckExact(src)) {
214 PyErr_Format(PyExc_TypeError,
215 "\"%s\" must be an integer", name);
216 return -1;
217 }
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200218 value = _PyLong_AsInt(src);
219 if (value == -1 && PyErr_Occurred()) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000220 return -1;
221 }
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200222 *target = value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000223 }
224 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000225}
226
227static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200228_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000229{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000230 if (src == NULL)
231 *target = dflt;
232 else {
233 *target = '\0';
234 if (src != Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000235 Py_ssize_t len;
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200236 if (!PyUnicode_Check(src)) {
237 PyErr_Format(PyExc_TypeError,
238 "\"%s\" must be string, not %.200s", name,
239 src->ob_type->tp_name);
240 return -1;
241 }
Victor Stinner9e30aa52011-11-21 02:49:52 +0100242 len = PyUnicode_GetLength(src);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200243 if (len > 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000244 PyErr_Format(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300245 "\"%s\" must be a 1-character string",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000246 name);
247 return -1;
248 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100249 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000250 if (len > 0)
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200251 *target = PyUnicode_READ_CHAR(src, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000252 }
253 }
254 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000255}
256
257static int
258_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
259{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000260 if (src == NULL)
261 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
262 else {
263 if (src == Py_None)
264 *target = NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +0100265 else if (!PyUnicode_Check(src)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000266 PyErr_Format(PyExc_TypeError,
267 "\"%s\" must be a string", name);
268 return -1;
269 }
270 else {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100271 if (PyUnicode_READY(src) == -1)
272 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000273 Py_INCREF(src);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300274 Py_XSETREF(*target, src);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000275 }
276 }
277 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000278}
279
280static int
281dialect_check_quoting(int quoting)
282{
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200283 const StyleDesc *qs;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000284
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000285 for (qs = quote_styles; qs->name; qs++) {
Victor Stinner706768c2014-08-16 01:03:39 +0200286 if ((int)qs->style == quoting)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000287 return 0;
288 }
289 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
290 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000291}
Skip Montanarob4a04172003-03-20 23:29:12 +0000292
293#define D_OFF(x) offsetof(DialectObj, x)
294
295static struct PyMemberDef Dialect_memberlist[] = {
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300296 { "skipinitialspace", T_BOOL, D_OFF(skipinitialspace), READONLY },
297 { "doublequote", T_BOOL, D_OFF(doublequote), READONLY },
298 { "strict", T_BOOL, D_OFF(strict), READONLY },
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000299 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000300};
301
302static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000303 { "delimiter", (getter)Dialect_get_delimiter},
304 { "escapechar", (getter)Dialect_get_escapechar},
305 { "lineterminator", (getter)Dialect_get_lineterminator},
306 { "quotechar", (getter)Dialect_get_quotechar},
307 { "quoting", (getter)Dialect_get_quoting},
308 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000309};
310
311static void
312Dialect_dealloc(DialectObj *self)
313{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000314 Py_XDECREF(self->lineterminator);
315 Py_TYPE(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000316}
317
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000318static char *dialect_kws[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000319 "dialect",
320 "delimiter",
321 "doublequote",
322 "escapechar",
323 "lineterminator",
324 "quotechar",
325 "quoting",
326 "skipinitialspace",
327 "strict",
328 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000329};
330
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000331static PyObject *
332dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000333{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000334 DialectObj *self;
335 PyObject *ret = NULL;
336 PyObject *dialect = NULL;
337 PyObject *delimiter = NULL;
338 PyObject *doublequote = NULL;
339 PyObject *escapechar = NULL;
340 PyObject *lineterminator = NULL;
341 PyObject *quotechar = NULL;
342 PyObject *quoting = NULL;
343 PyObject *skipinitialspace = NULL;
344 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000345
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
347 "|OOOOOOOOO", dialect_kws,
348 &dialect,
349 &delimiter,
350 &doublequote,
351 &escapechar,
352 &lineterminator,
353 &quotechar,
354 &quoting,
355 &skipinitialspace,
356 &strict))
357 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000358
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 if (dialect != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100360 if (PyUnicode_Check(dialect)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000361 dialect = get_dialect_from_registry(dialect);
362 if (dialect == NULL)
363 return NULL;
364 }
365 else
366 Py_INCREF(dialect);
367 /* Can we reuse this instance? */
368 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
Serhiy Storchaka0b3ec192017-03-23 17:53:47 +0200369 delimiter == NULL &&
370 doublequote == NULL &&
371 escapechar == NULL &&
372 lineterminator == NULL &&
373 quotechar == NULL &&
374 quoting == NULL &&
375 skipinitialspace == NULL &&
376 strict == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000377 return dialect;
378 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000379
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000380 self = (DialectObj *)type->tp_alloc(type, 0);
381 if (self == NULL) {
382 Py_XDECREF(dialect);
383 return NULL;
384 }
385 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000386
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000387 Py_XINCREF(delimiter);
388 Py_XINCREF(doublequote);
389 Py_XINCREF(escapechar);
390 Py_XINCREF(lineterminator);
391 Py_XINCREF(quotechar);
392 Py_XINCREF(quoting);
393 Py_XINCREF(skipinitialspace);
394 Py_XINCREF(strict);
395 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000396#define DIALECT_GETATTR(v, n) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000397 if (v == NULL) \
398 v = PyObject_GetAttrString(dialect, n)
399 DIALECT_GETATTR(delimiter, "delimiter");
400 DIALECT_GETATTR(doublequote, "doublequote");
401 DIALECT_GETATTR(escapechar, "escapechar");
402 DIALECT_GETATTR(lineterminator, "lineterminator");
403 DIALECT_GETATTR(quotechar, "quotechar");
404 DIALECT_GETATTR(quoting, "quoting");
405 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
406 DIALECT_GETATTR(strict, "strict");
407 PyErr_Clear();
408 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000409
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000410 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000411#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000412 if (meth(name, target, src, dflt)) \
413 goto err
414 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300415 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, true);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000416 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
417 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
418 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
419 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300420 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, false);
421 DIASET(_set_bool, "strict", &self->strict, strict, false);
Skip Montanarob4a04172003-03-20 23:29:12 +0000422
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000423 /* validate options */
424 if (dialect_check_quoting(self->quoting))
425 goto err;
426 if (self->delimiter == 0) {
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200427 PyErr_SetString(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300428 "\"delimiter\" must be a 1-character string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000429 goto err;
430 }
431 if (quotechar == Py_None && quoting == NULL)
432 self->quoting = QUOTE_NONE;
433 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
434 PyErr_SetString(PyExc_TypeError,
435 "quotechar must be set if quoting enabled");
436 goto err;
437 }
438 if (self->lineterminator == 0) {
439 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
440 goto err;
441 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000442
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000443 ret = (PyObject *)self;
444 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000445err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000446 Py_XDECREF(self);
447 Py_XDECREF(dialect);
448 Py_XDECREF(delimiter);
449 Py_XDECREF(doublequote);
450 Py_XDECREF(escapechar);
451 Py_XDECREF(lineterminator);
452 Py_XDECREF(quotechar);
453 Py_XDECREF(quoting);
454 Py_XDECREF(skipinitialspace);
455 Py_XDECREF(strict);
456 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000457}
458
459
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000460PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000461"CSV dialect\n"
462"\n"
463"The Dialect type records CSV parsing and generation options.\n");
464
465static PyTypeObject Dialect_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000466 PyVarObject_HEAD_INIT(NULL, 0)
467 "_csv.Dialect", /* tp_name */
468 sizeof(DialectObj), /* tp_basicsize */
469 0, /* tp_itemsize */
470 /* methods */
471 (destructor)Dialect_dealloc, /* tp_dealloc */
472 (printfunc)0, /* tp_print */
473 (getattrfunc)0, /* tp_getattr */
474 (setattrfunc)0, /* tp_setattr */
475 0, /* tp_reserved */
476 (reprfunc)0, /* tp_repr */
477 0, /* tp_as_number */
478 0, /* tp_as_sequence */
479 0, /* tp_as_mapping */
480 (hashfunc)0, /* tp_hash */
481 (ternaryfunc)0, /* tp_call */
482 (reprfunc)0, /* tp_str */
483 0, /* tp_getattro */
484 0, /* tp_setattro */
485 0, /* tp_as_buffer */
486 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
487 Dialect_Type_doc, /* tp_doc */
488 0, /* tp_traverse */
489 0, /* tp_clear */
490 0, /* tp_richcompare */
491 0, /* tp_weaklistoffset */
492 0, /* tp_iter */
493 0, /* tp_iternext */
494 0, /* tp_methods */
495 Dialect_memberlist, /* tp_members */
496 Dialect_getsetlist, /* tp_getset */
497 0, /* tp_base */
498 0, /* tp_dict */
499 0, /* tp_descr_get */
500 0, /* tp_descr_set */
501 0, /* tp_dictoffset */
502 0, /* tp_init */
503 0, /* tp_alloc */
504 dialect_new, /* tp_new */
505 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000506};
507
Andrew McNamara91b97462005-01-11 01:07:23 +0000508/*
509 * Return an instance of the dialect type, given a Python instance or kwarg
510 * description of the dialect
511 */
512static PyObject *
513_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
514{
Victor Stinner6412f492016-08-23 00:21:34 +0200515 PyObject *type = (PyObject *)&Dialect_Type;
516 if (dialect_inst) {
517 return _PyObject_FastCallDict(type, &dialect_inst, 1, kwargs);
518 }
519 else {
520 return _PyObject_FastCallDict(type, NULL, 0, kwargs);
521 }
Andrew McNamara91b97462005-01-11 01:07:23 +0000522}
523
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000524/*
525 * READER
526 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000527static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000528parse_save_field(ReaderObj *self)
529{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000530 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000531
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200532 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
533 (void *) self->field, self->field_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000534 if (field == NULL)
535 return -1;
536 self->field_len = 0;
537 if (self->numeric_field) {
538 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000539
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000540 self->numeric_field = 0;
541 tmp = PyNumber_Float(field);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000542 Py_DECREF(field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200543 if (tmp == NULL)
544 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000545 field = tmp;
546 }
Victor Stinnerb80b3782013-11-14 21:29:34 +0100547 if (PyList_Append(self->fields, field) < 0) {
548 Py_DECREF(field);
549 return -1;
550 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000551 Py_DECREF(field);
552 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000553}
554
555static int
556parse_grow_buff(ReaderObj *self)
557{
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +0500558 assert((size_t)self->field_size <= PY_SSIZE_T_MAX / sizeof(Py_UCS4));
559
560 Py_ssize_t field_size_new = self->field_size ? 2 * self->field_size : 4096;
561 Py_UCS4 *field_new = self->field;
562 PyMem_Resize(field_new, Py_UCS4, field_size_new);
563 if (field_new == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000564 PyErr_NoMemory();
565 return 0;
566 }
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +0500567 self->field = field_new;
568 self->field_size = field_size_new;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000569 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000570}
571
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000572static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200573parse_add_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000574{
Antoine Pitroue7672d32012-05-16 11:33:08 +0200575 if (self->field_len >= _csvstate_global->field_limit) {
576 PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
577 _csvstate_global->field_limit);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000578 return -1;
579 }
580 if (self->field_len == self->field_size && !parse_grow_buff(self))
581 return -1;
582 self->field[self->field_len++] = c;
583 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000584}
585
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000586static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200587parse_process_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000588{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000589 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000590
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000591 switch (self->state) {
592 case START_RECORD:
593 /* start of record */
594 if (c == '\0')
595 /* empty line - return [] */
596 break;
597 else if (c == '\n' || c == '\r') {
598 self->state = EAT_CRNL;
599 break;
600 }
601 /* normal character - handle as START_FIELD */
602 self->state = START_FIELD;
603 /* fallthru */
604 case START_FIELD:
605 /* expecting field */
606 if (c == '\n' || c == '\r' || c == '\0') {
607 /* save empty field - return [fields] */
608 if (parse_save_field(self) < 0)
609 return -1;
610 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
611 }
612 else if (c == dialect->quotechar &&
613 dialect->quoting != QUOTE_NONE) {
614 /* start quoted field */
615 self->state = IN_QUOTED_FIELD;
616 }
617 else if (c == dialect->escapechar) {
618 /* possible escaped character */
619 self->state = ESCAPED_CHAR;
620 }
621 else if (c == ' ' && dialect->skipinitialspace)
622 /* ignore space at start of field */
623 ;
624 else if (c == dialect->delimiter) {
625 /* save empty field */
626 if (parse_save_field(self) < 0)
627 return -1;
628 }
629 else {
630 /* begin new unquoted field */
631 if (dialect->quoting == QUOTE_NONNUMERIC)
632 self->numeric_field = 1;
633 if (parse_add_char(self, c) < 0)
634 return -1;
635 self->state = IN_FIELD;
636 }
637 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000638
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000639 case ESCAPED_CHAR:
R David Murray9a7d3762013-03-20 00:15:20 -0400640 if (c == '\n' || c=='\r') {
R David Murrayc7c42ef2013-03-19 22:41:47 -0400641 if (parse_add_char(self, c) < 0)
642 return -1;
643 self->state = AFTER_ESCAPED_CRNL;
644 break;
645 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000646 if (c == '\0')
647 c = '\n';
648 if (parse_add_char(self, c) < 0)
649 return -1;
650 self->state = IN_FIELD;
651 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000652
R David Murrayc7c42ef2013-03-19 22:41:47 -0400653 case AFTER_ESCAPED_CRNL:
654 if (c == '\0')
655 break;
656 /*fallthru*/
657
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000658 case IN_FIELD:
659 /* in unquoted field */
660 if (c == '\n' || c == '\r' || c == '\0') {
661 /* end of line - return [fields] */
662 if (parse_save_field(self) < 0)
663 return -1;
664 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
665 }
666 else if (c == dialect->escapechar) {
667 /* possible escaped character */
668 self->state = ESCAPED_CHAR;
669 }
670 else if (c == dialect->delimiter) {
671 /* save field - wait for new field */
672 if (parse_save_field(self) < 0)
673 return -1;
674 self->state = START_FIELD;
675 }
676 else {
677 /* normal character - save in field */
678 if (parse_add_char(self, c) < 0)
679 return -1;
680 }
681 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000682
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000683 case IN_QUOTED_FIELD:
684 /* in quoted field */
685 if (c == '\0')
686 ;
687 else if (c == dialect->escapechar) {
688 /* Possible escape character */
689 self->state = ESCAPE_IN_QUOTED_FIELD;
690 }
691 else if (c == dialect->quotechar &&
692 dialect->quoting != QUOTE_NONE) {
693 if (dialect->doublequote) {
694 /* doublequote; " represented by "" */
695 self->state = QUOTE_IN_QUOTED_FIELD;
696 }
697 else {
698 /* end of quote part of field */
699 self->state = IN_FIELD;
700 }
701 }
702 else {
703 /* normal character - save in field */
704 if (parse_add_char(self, c) < 0)
705 return -1;
706 }
707 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000708
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000709 case ESCAPE_IN_QUOTED_FIELD:
710 if (c == '\0')
711 c = '\n';
712 if (parse_add_char(self, c) < 0)
713 return -1;
714 self->state = IN_QUOTED_FIELD;
715 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000716
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000717 case QUOTE_IN_QUOTED_FIELD:
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300718 /* doublequote - seen a quote in a quoted field */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000719 if (dialect->quoting != QUOTE_NONE &&
720 c == dialect->quotechar) {
721 /* save "" as " */
722 if (parse_add_char(self, c) < 0)
723 return -1;
724 self->state = IN_QUOTED_FIELD;
725 }
726 else if (c == dialect->delimiter) {
727 /* save field - wait for new field */
728 if (parse_save_field(self) < 0)
729 return -1;
730 self->state = START_FIELD;
731 }
732 else if (c == '\n' || c == '\r' || c == '\0') {
733 /* end of line - return [fields] */
734 if (parse_save_field(self) < 0)
735 return -1;
736 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
737 }
738 else if (!dialect->strict) {
739 if (parse_add_char(self, c) < 0)
740 return -1;
741 self->state = IN_FIELD;
742 }
743 else {
744 /* illegal */
Antoine Pitroue7672d32012-05-16 11:33:08 +0200745 PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000746 dialect->delimiter,
747 dialect->quotechar);
748 return -1;
749 }
750 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000751
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000752 case EAT_CRNL:
753 if (c == '\n' || c == '\r')
754 ;
755 else if (c == '\0')
756 self->state = START_RECORD;
757 else {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200758 PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000759 return -1;
760 }
761 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000762
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000763 }
764 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000765}
766
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000767static int
768parse_reset(ReaderObj *self)
769{
Serhiy Storchaka48842712016-04-06 09:45:48 +0300770 Py_XSETREF(self->fields, PyList_New(0));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000771 if (self->fields == NULL)
772 return -1;
773 self->field_len = 0;
774 self->state = START_RECORD;
775 self->numeric_field = 0;
776 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000777}
Skip Montanarob4a04172003-03-20 23:29:12 +0000778
779static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000780Reader_iternext(ReaderObj *self)
781{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000782 PyObject *fields = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200783 Py_UCS4 c;
784 Py_ssize_t pos, linelen;
785 unsigned int kind;
786 void *data;
787 PyObject *lineobj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000788
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000789 if (parse_reset(self) < 0)
790 return NULL;
791 do {
792 lineobj = PyIter_Next(self->input_iter);
793 if (lineobj == NULL) {
794 /* End of input OR exception */
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700795 if (!PyErr_Occurred() && (self->field_len != 0 ||
796 self->state == IN_QUOTED_FIELD)) {
797 if (self->dialect->strict)
Senthil Kumaran49d13022012-09-25 02:37:20 -0700798 PyErr_SetString(_csvstate_global->error_obj,
799 "unexpected end of data");
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700800 else if (parse_save_field(self) >= 0)
801 break;
802 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000803 return NULL;
804 }
805 if (!PyUnicode_Check(lineobj)) {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200806 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000807 "iterator should return strings, "
808 "not %.200s "
809 "(did you open the file in text mode?)",
810 lineobj->ob_type->tp_name
811 );
812 Py_DECREF(lineobj);
813 return NULL;
814 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100815 if (PyUnicode_READY(lineobj) == -1) {
816 Py_DECREF(lineobj);
817 return NULL;
818 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000819 ++self->line_num;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200820 kind = PyUnicode_KIND(lineobj);
821 data = PyUnicode_DATA(lineobj);
822 pos = 0;
823 linelen = PyUnicode_GET_LENGTH(lineobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000824 while (linelen--) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200825 c = PyUnicode_READ(kind, data, pos);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000826 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000827 Py_DECREF(lineobj);
Antoine Pitroue7672d32012-05-16 11:33:08 +0200828 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000829 "line contains NULL byte");
830 goto err;
831 }
832 if (parse_process_char(self, c) < 0) {
833 Py_DECREF(lineobj);
834 goto err;
835 }
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200836 pos++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000837 }
838 Py_DECREF(lineobj);
839 if (parse_process_char(self, 0) < 0)
840 goto err;
841 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000842
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000843 fields = self->fields;
844 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000845err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000846 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000847}
848
849static void
850Reader_dealloc(ReaderObj *self)
851{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000852 PyObject_GC_UnTrack(self);
853 Py_XDECREF(self->dialect);
854 Py_XDECREF(self->input_iter);
855 Py_XDECREF(self->fields);
856 if (self->field != NULL)
857 PyMem_Free(self->field);
858 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000859}
860
861static int
862Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
863{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000864 Py_VISIT(self->dialect);
865 Py_VISIT(self->input_iter);
866 Py_VISIT(self->fields);
867 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000868}
869
870static int
871Reader_clear(ReaderObj *self)
872{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000873 Py_CLEAR(self->dialect);
874 Py_CLEAR(self->input_iter);
875 Py_CLEAR(self->fields);
876 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000877}
878
879PyDoc_STRVAR(Reader_Type_doc,
880"CSV reader\n"
881"\n"
882"Reader objects are responsible for reading and parsing tabular data\n"
883"in CSV format.\n"
884);
885
886static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000887 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000888};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000889#define R_OFF(x) offsetof(ReaderObj, x)
890
891static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000892 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
893 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
894 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000895};
896
Skip Montanarob4a04172003-03-20 23:29:12 +0000897
898static PyTypeObject Reader_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000899 PyVarObject_HEAD_INIT(NULL, 0)
900 "_csv.reader", /*tp_name*/
901 sizeof(ReaderObj), /*tp_basicsize*/
902 0, /*tp_itemsize*/
903 /* methods */
904 (destructor)Reader_dealloc, /*tp_dealloc*/
905 (printfunc)0, /*tp_print*/
906 (getattrfunc)0, /*tp_getattr*/
907 (setattrfunc)0, /*tp_setattr*/
908 0, /*tp_reserved*/
909 (reprfunc)0, /*tp_repr*/
910 0, /*tp_as_number*/
911 0, /*tp_as_sequence*/
912 0, /*tp_as_mapping*/
913 (hashfunc)0, /*tp_hash*/
914 (ternaryfunc)0, /*tp_call*/
915 (reprfunc)0, /*tp_str*/
916 0, /*tp_getattro*/
917 0, /*tp_setattro*/
918 0, /*tp_as_buffer*/
919 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
920 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
921 Reader_Type_doc, /*tp_doc*/
922 (traverseproc)Reader_traverse, /*tp_traverse*/
923 (inquiry)Reader_clear, /*tp_clear*/
924 0, /*tp_richcompare*/
925 0, /*tp_weaklistoffset*/
926 PyObject_SelfIter, /*tp_iter*/
927 (getiterfunc)Reader_iternext, /*tp_iternext*/
928 Reader_methods, /*tp_methods*/
929 Reader_memberlist, /*tp_members*/
930 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000931
932};
933
934static PyObject *
935csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
936{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000937 PyObject * iterator, * dialect = NULL;
938 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000939
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000940 if (!self)
941 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000942
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000943 self->dialect = NULL;
944 self->fields = NULL;
945 self->input_iter = NULL;
946 self->field = NULL;
947 self->field_size = 0;
948 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000949
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000950 if (parse_reset(self) < 0) {
951 Py_DECREF(self);
952 return NULL;
953 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000954
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000955 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
956 Py_DECREF(self);
957 return NULL;
958 }
959 self->input_iter = PyObject_GetIter(iterator);
960 if (self->input_iter == NULL) {
961 PyErr_SetString(PyExc_TypeError,
962 "argument 1 must be an iterator");
963 Py_DECREF(self);
964 return NULL;
965 }
966 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
967 if (self->dialect == NULL) {
968 Py_DECREF(self);
969 return NULL;
970 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000971
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000972 PyObject_GC_Track(self);
973 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +0000974}
975
976/*
977 * WRITER
978 */
979/* ---------------------------------------------------------------- */
980static void
981join_reset(WriterObj *self)
982{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000983 self->rec_len = 0;
984 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000985}
986
987#define MEM_INCR 32768
988
989/* Calculate new record length or append field to record. Return new
990 * record length.
991 */
Antoine Pitrou40455752010-08-15 18:51:10 +0000992static Py_ssize_t
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200993join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
Serhiy Storchaka7901b482015-03-30 09:09:54 +0300994 Py_ssize_t field_len, int *quoted,
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200995 int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +0000996{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000997 DialectObj *dialect = self->dialect;
998 int i;
Antoine Pitrou40455752010-08-15 18:51:10 +0000999 Py_ssize_t rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001000
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001001#define INCLEN \
1002 do {\
1003 if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \
1004 goto overflow; \
1005 } \
1006 rec_len++; \
1007 } while(0)
1008
1009#define ADDCH(c) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001010 do {\
1011 if (copy_phase) \
1012 self->rec[rec_len] = c;\
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001013 INCLEN;\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001014 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001015
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001016 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001017
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001018 /* If this is not the first field we need a field separator */
1019 if (self->num_fields > 0)
1020 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001021
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001022 /* Handle preceding quote */
1023 if (copy_phase && *quoted)
1024 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001025
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001026 /* Copy/count field data */
1027 /* If field is null just pass over */
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001028 for (i = 0; field_data && (i < field_len); i++) {
1029 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001030 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001031
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001032 if (c == dialect->delimiter ||
1033 c == dialect->escapechar ||
1034 c == dialect->quotechar ||
Martin v. Löwis5f4f4c52011-11-01 18:42:23 +01001035 PyUnicode_FindChar(
1036 dialect->lineterminator, c, 0,
1037 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001038 if (dialect->quoting == QUOTE_NONE)
1039 want_escape = 1;
1040 else {
1041 if (c == dialect->quotechar) {
1042 if (dialect->doublequote)
1043 ADDCH(dialect->quotechar);
1044 else
1045 want_escape = 1;
1046 }
1047 if (!want_escape)
1048 *quoted = 1;
1049 }
1050 if (want_escape) {
1051 if (!dialect->escapechar) {
Antoine Pitroue7672d32012-05-16 11:33:08 +02001052 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001053 "need to escape, but no escapechar set");
1054 return -1;
1055 }
1056 ADDCH(dialect->escapechar);
1057 }
1058 }
1059 /* Copy field character into record buffer.
1060 */
1061 ADDCH(c);
1062 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001063
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001064 if (*quoted) {
1065 if (copy_phase)
1066 ADDCH(dialect->quotechar);
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001067 else {
1068 INCLEN; /* starting quote */
1069 INCLEN; /* ending quote */
1070 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001071 }
1072 return rec_len;
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001073
1074 overflow:
1075 PyErr_NoMemory();
1076 return -1;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001077#undef ADDCH
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001078#undef INCLEN
Skip Montanarob4a04172003-03-20 23:29:12 +00001079}
1080
1081static int
Antoine Pitrou40455752010-08-15 18:51:10 +00001082join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
Skip Montanarob4a04172003-03-20 23:29:12 +00001083{
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001084 assert(rec_len >= 0);
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001085
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001086 if (rec_len > self->rec_size) {
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001087 size_t rec_size_new = (size_t)(rec_len / MEM_INCR + 1) * MEM_INCR;
1088 Py_UCS4 *rec_new = self->rec;
1089 PyMem_Resize(rec_new, Py_UCS4, rec_size_new);
1090 if (rec_new == NULL) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001091 PyErr_NoMemory();
1092 return 0;
1093 }
Sergey Fedoseev67b9cc82018-08-16 09:27:50 +05001094 self->rec = rec_new;
1095 self->rec_size = (Py_ssize_t)rec_size_new;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001096 }
1097 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001098}
1099
1100static int
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001101join_append(WriterObj *self, PyObject *field, int quoted)
Skip Montanarob4a04172003-03-20 23:29:12 +00001102{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001103 unsigned int field_kind = -1;
1104 void *field_data = NULL;
1105 Py_ssize_t field_len = 0;
Antoine Pitrou40455752010-08-15 18:51:10 +00001106 Py_ssize_t rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001107
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001108 if (field != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +01001109 if (PyUnicode_READY(field) == -1)
1110 return 0;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001111 field_kind = PyUnicode_KIND(field);
1112 field_data = PyUnicode_DATA(field);
1113 field_len = PyUnicode_GET_LENGTH(field);
1114 }
1115 rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001116 &quoted, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001117 if (rec_len < 0)
1118 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001119
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001120 /* grow record buffer if necessary */
1121 if (!join_check_rec_size(self, rec_len))
1122 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001123
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001124 self->rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001125 &quoted, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001126 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001127
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001128 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001129}
1130
1131static int
1132join_append_lineterminator(WriterObj *self)
1133{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001134 Py_ssize_t terminator_len, i;
1135 unsigned int term_kind;
1136 void *term_data;
Skip Montanarob4a04172003-03-20 23:29:12 +00001137
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001138 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001139 if (terminator_len == -1)
1140 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001141
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001142 /* grow record buffer if necessary */
1143 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1144 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001145
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001146 term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1147 term_data = PyUnicode_DATA(self->dialect->lineterminator);
1148 for (i = 0; i < terminator_len; i++)
1149 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001150 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001151
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001152 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001153}
1154
1155PyDoc_STRVAR(csv_writerow_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001156"writerow(iterable)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001157"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001158"Construct and write a CSV record from an iterable of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001159"elements will be converted to string.");
1160
1161static PyObject *
1162csv_writerow(WriterObj *self, PyObject *seq)
1163{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001164 DialectObj *dialect = self->dialect;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001165 PyObject *iter, *field, *line, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001166
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001167 iter = PyObject_GetIter(seq);
1168 if (iter == NULL)
1169 return PyErr_Format(_csvstate_global->error_obj,
1170 "iterable expected, not %.200s",
1171 seq->ob_type->tp_name);
Skip Montanarob4a04172003-03-20 23:29:12 +00001172
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001173 /* Join all fields in internal buffer.
1174 */
1175 join_reset(self);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001176 while ((field = PyIter_Next(iter))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001177 int append_ok;
1178 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001179
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001180 switch (dialect->quoting) {
1181 case QUOTE_NONNUMERIC:
1182 quoted = !PyNumber_Check(field);
1183 break;
1184 case QUOTE_ALL:
1185 quoted = 1;
1186 break;
1187 default:
1188 quoted = 0;
1189 break;
1190 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001191
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001192 if (PyUnicode_Check(field)) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001193 append_ok = join_append(self, field, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001194 Py_DECREF(field);
1195 }
1196 else if (field == Py_None) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001197 append_ok = join_append(self, NULL, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001198 Py_DECREF(field);
1199 }
1200 else {
1201 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001202
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001203 str = PyObject_Str(field);
1204 Py_DECREF(field);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001205 if (str == NULL) {
1206 Py_DECREF(iter);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001207 return NULL;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001208 }
1209 append_ok = join_append(self, str, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001210 Py_DECREF(str);
1211 }
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001212 if (!append_ok) {
1213 Py_DECREF(iter);
1214 return NULL;
1215 }
1216 }
1217 Py_DECREF(iter);
1218 if (PyErr_Occurred())
1219 return NULL;
1220
Licht Takeuchi20019002017-12-12 18:57:06 +09001221 if (self->num_fields > 0 && self->rec_len == 0) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001222 if (dialect->quoting == QUOTE_NONE) {
1223 PyErr_Format(_csvstate_global->error_obj,
1224 "single empty field record must be quoted");
1225 return NULL;
1226 }
1227 self->num_fields--;
1228 if (!join_append(self, NULL, 1))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001229 return NULL;
1230 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001231
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001232 /* Add line terminator.
1233 */
1234 if (!join_append_lineterminator(self))
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001235 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001236
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001237 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1238 (void *) self->rec, self->rec_len);
1239 if (line == NULL)
1240 return NULL;
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01001241 result = PyObject_CallFunctionObjArgs(self->writeline, line, NULL);
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001242 Py_DECREF(line);
1243 return result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001244}
1245
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001246PyDoc_STRVAR(csv_writerows_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001247"writerows(iterable of iterables)\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001248"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001249"Construct and write a series of iterables to a csv file. Non-string\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001250"elements will be converted to string.");
1251
Skip Montanarob4a04172003-03-20 23:29:12 +00001252static PyObject *
1253csv_writerows(WriterObj *self, PyObject *seqseq)
1254{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001255 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001256
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001257 row_iter = PyObject_GetIter(seqseq);
1258 if (row_iter == NULL) {
1259 PyErr_SetString(PyExc_TypeError,
1260 "writerows() argument must be iterable");
1261 return NULL;
1262 }
1263 while ((row_obj = PyIter_Next(row_iter))) {
1264 result = csv_writerow(self, row_obj);
1265 Py_DECREF(row_obj);
1266 if (!result) {
1267 Py_DECREF(row_iter);
1268 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001269 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001270 else
1271 Py_DECREF(result);
1272 }
1273 Py_DECREF(row_iter);
1274 if (PyErr_Occurred())
1275 return NULL;
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001276 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001277}
1278
1279static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001280 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1281 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1282 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001283};
1284
1285#define W_OFF(x) offsetof(WriterObj, x)
1286
1287static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001288 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1289 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001290};
1291
1292static void
1293Writer_dealloc(WriterObj *self)
1294{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001295 PyObject_GC_UnTrack(self);
1296 Py_XDECREF(self->dialect);
1297 Py_XDECREF(self->writeline);
1298 if (self->rec != NULL)
1299 PyMem_Free(self->rec);
1300 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001301}
1302
1303static int
1304Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1305{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001306 Py_VISIT(self->dialect);
1307 Py_VISIT(self->writeline);
1308 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001309}
1310
1311static int
1312Writer_clear(WriterObj *self)
1313{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001314 Py_CLEAR(self->dialect);
1315 Py_CLEAR(self->writeline);
1316 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001317}
1318
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001319PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001320"CSV writer\n"
1321"\n"
1322"Writer objects are responsible for generating tabular data\n"
1323"in CSV format from sequence input.\n"
1324);
1325
1326static PyTypeObject Writer_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001327 PyVarObject_HEAD_INIT(NULL, 0)
1328 "_csv.writer", /*tp_name*/
1329 sizeof(WriterObj), /*tp_basicsize*/
1330 0, /*tp_itemsize*/
1331 /* methods */
1332 (destructor)Writer_dealloc, /*tp_dealloc*/
1333 (printfunc)0, /*tp_print*/
1334 (getattrfunc)0, /*tp_getattr*/
1335 (setattrfunc)0, /*tp_setattr*/
1336 0, /*tp_reserved*/
1337 (reprfunc)0, /*tp_repr*/
1338 0, /*tp_as_number*/
1339 0, /*tp_as_sequence*/
1340 0, /*tp_as_mapping*/
1341 (hashfunc)0, /*tp_hash*/
1342 (ternaryfunc)0, /*tp_call*/
1343 (reprfunc)0, /*tp_str*/
1344 0, /*tp_getattro*/
1345 0, /*tp_setattro*/
1346 0, /*tp_as_buffer*/
1347 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1348 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1349 Writer_Type_doc,
1350 (traverseproc)Writer_traverse, /*tp_traverse*/
1351 (inquiry)Writer_clear, /*tp_clear*/
1352 0, /*tp_richcompare*/
1353 0, /*tp_weaklistoffset*/
1354 (getiterfunc)0, /*tp_iter*/
1355 (getiterfunc)0, /*tp_iternext*/
1356 Writer_methods, /*tp_methods*/
1357 Writer_memberlist, /*tp_members*/
1358 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001359};
1360
1361static PyObject *
1362csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1363{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001364 PyObject * output_file, * dialect = NULL;
1365 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001366 _Py_IDENTIFIER(write);
Skip Montanarob4a04172003-03-20 23:29:12 +00001367
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001368 if (!self)
1369 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001370
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001371 self->dialect = NULL;
1372 self->writeline = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001373
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001374 self->rec = NULL;
1375 self->rec_size = 0;
1376 self->rec_len = 0;
1377 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001378
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001379 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1380 Py_DECREF(self);
1381 return NULL;
1382 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001383 self->writeline = _PyObject_GetAttrId(output_file, &PyId_write);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001384 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1385 PyErr_SetString(PyExc_TypeError,
1386 "argument 1 must have a \"write\" method");
1387 Py_DECREF(self);
1388 return NULL;
1389 }
1390 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1391 if (self->dialect == NULL) {
1392 Py_DECREF(self);
1393 return NULL;
1394 }
1395 PyObject_GC_Track(self);
1396 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001397}
1398
1399/*
1400 * DIALECT REGISTRY
1401 */
1402static PyObject *
1403csv_list_dialects(PyObject *module, PyObject *args)
1404{
Antoine Pitroue7672d32012-05-16 11:33:08 +02001405 return PyDict_Keys(_csvstate_global->dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001406}
1407
1408static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001409csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001410{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001411 PyObject *name_obj, *dialect_obj = NULL;
1412 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001413
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001414 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1415 return NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +01001416 if (!PyUnicode_Check(name_obj)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001417 PyErr_SetString(PyExc_TypeError,
Stefan Krahe6996ed2012-11-02 14:44:20 +01001418 "dialect name must be a string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001419 return NULL;
1420 }
Stefan Krahe6996ed2012-11-02 14:44:20 +01001421 if (PyUnicode_READY(name_obj) == -1)
1422 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001423 dialect = _call_dialect(dialect_obj, kwargs);
1424 if (dialect == NULL)
1425 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001426 if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001427 Py_DECREF(dialect);
1428 return NULL;
1429 }
1430 Py_DECREF(dialect);
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001431 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001432}
1433
1434static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001435csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001436{
Antoine Pitroue7672d32012-05-16 11:33:08 +02001437 if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0)
1438 return PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001439 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001440}
1441
1442static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001443csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001444{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001445 return get_dialect_from_registry(name_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001446}
1447
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001448static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001449csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001450{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001451 PyObject *new_limit = NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001452 long old_limit = _csvstate_global->field_limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001453
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001454 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1455 return NULL;
1456 if (new_limit != NULL) {
1457 if (!PyLong_CheckExact(new_limit)) {
1458 PyErr_Format(PyExc_TypeError,
1459 "limit must be an integer");
1460 return NULL;
1461 }
Antoine Pitroue7672d32012-05-16 11:33:08 +02001462 _csvstate_global->field_limit = PyLong_AsLong(new_limit);
1463 if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
1464 _csvstate_global->field_limit = old_limit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001465 return NULL;
1466 }
1467 }
1468 return PyLong_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001469}
1470
Skip Montanarob4a04172003-03-20 23:29:12 +00001471/*
1472 * MODULE
1473 */
1474
1475PyDoc_STRVAR(csv_module_doc,
1476"CSV parsing and writing.\n"
1477"\n"
1478"This module provides classes that assist in the reading and writing\n"
1479"of Comma Separated Value (CSV) files, and implements the interface\n"
1480"described by PEP 305. Although many CSV files are simple to parse,\n"
1481"the format is not formally defined by a stable specification and\n"
1482"is subtle enough that parsing lines of a CSV file with something\n"
1483"like line.split(\",\") is bound to fail. The module supports three\n"
1484"basic APIs: reading, writing, and registration of dialects.\n"
1485"\n"
1486"\n"
1487"DIALECT REGISTRATION:\n"
1488"\n"
1489"Readers and writers support a dialect argument, which is a convenient\n"
1490"handle on a group of settings. When the dialect argument is a string,\n"
1491"it identifies one of the dialects previously registered with the module.\n"
1492"If it is a class or instance, the attributes of the argument are used as\n"
1493"the settings for the reader or writer:\n"
1494"\n"
1495" class excel:\n"
1496" delimiter = ','\n"
1497" quotechar = '\"'\n"
1498" escapechar = None\n"
1499" doublequote = True\n"
1500" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001501" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001502" quoting = QUOTE_MINIMAL\n"
1503"\n"
1504"SETTINGS:\n"
1505"\n"
oldkaa0735f2018-02-02 16:52:55 +08001506" * quotechar - specifies a one-character string to use as the\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001507" quoting character. It defaults to '\"'.\n"
oldkaa0735f2018-02-02 16:52:55 +08001508" * delimiter - specifies a one-character string to use as the\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001509" field separator. It defaults to ','.\n"
1510" * skipinitialspace - specifies how to interpret whitespace which\n"
1511" immediately follows a delimiter. It defaults to False, which\n"
1512" means that whitespace immediately following a delimiter is part\n"
1513" of the following field.\n"
oldkaa0735f2018-02-02 16:52:55 +08001514" * lineterminator - specifies the character sequence which should\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001515" terminate rows.\n"
1516" * quoting - controls when quotes should be generated by the writer.\n"
1517" It can take on any of the following module constants:\n"
1518"\n"
1519" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1520" field contains either the quotechar or the delimiter\n"
1521" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1522" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001523" fields which do not parse as integers or floating point\n"
1524" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001525" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
oldkaa0735f2018-02-02 16:52:55 +08001526" * escapechar - specifies a one-character string used to escape\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001527" the delimiter when quoting is set to QUOTE_NONE.\n"
1528" * doublequote - controls the handling of quotes inside fields. When\n"
1529" True, two consecutive quotes are interpreted as one during read,\n"
1530" and when writing, each quote character embedded in the data is\n"
1531" written as two quotes\n");
1532
1533PyDoc_STRVAR(csv_reader_doc,
1534" csv_reader = reader(iterable [, dialect='excel']\n"
1535" [optional keyword args])\n"
1536" for row in csv_reader:\n"
1537" process(row)\n"
1538"\n"
1539"The \"iterable\" argument can be any object that returns a line\n"
1540"of input for each iteration, such as a file object or a list. The\n"
1541"optional \"dialect\" parameter is discussed below. The function\n"
1542"also accepts optional keyword arguments which override settings\n"
1543"provided by the dialect.\n"
1544"\n"
1545"The returned object is an iterator. Each iteration returns a row\n"
Berker Peksage2382c52015-10-02 19:25:32 +03001546"of the CSV file (which can span multiple input lines).\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001547
1548PyDoc_STRVAR(csv_writer_doc,
1549" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1550" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001551" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001552" csv_writer.writerow(row)\n"
1553"\n"
1554" [or]\n"
1555"\n"
1556" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1557" [optional keyword args])\n"
1558" csv_writer.writerows(rows)\n"
1559"\n"
1560"The \"fileobj\" argument can be any object that supports the file API.\n");
1561
1562PyDoc_STRVAR(csv_list_dialects_doc,
1563"Return a list of all know dialect names.\n"
1564" names = csv.list_dialects()");
1565
1566PyDoc_STRVAR(csv_get_dialect_doc,
1567"Return the dialect instance associated with name.\n"
1568" dialect = csv.get_dialect(name)");
1569
1570PyDoc_STRVAR(csv_register_dialect_doc,
1571"Create a mapping from a string name to a dialect class.\n"
Berker Peksag12b50ce2015-06-05 15:17:51 +03001572" dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
Skip Montanarob4a04172003-03-20 23:29:12 +00001573
1574PyDoc_STRVAR(csv_unregister_dialect_doc,
1575"Delete the name/dialect mapping associated with a string name.\n"
1576" csv.unregister_dialect(name)");
1577
Andrew McNamara31d88962005-01-12 03:45:10 +00001578PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001579"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001580" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001581"\n"
1582"Returns old limit. If limit is not given, no new limit is set and\n"
1583"the old limit is returned");
1584
Skip Montanarob4a04172003-03-20 23:29:12 +00001585static struct PyMethodDef csv_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001586 { "reader", (PyCFunction)csv_reader,
1587 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1588 { "writer", (PyCFunction)csv_writer,
1589 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1590 { "list_dialects", (PyCFunction)csv_list_dialects,
1591 METH_NOARGS, csv_list_dialects_doc},
1592 { "register_dialect", (PyCFunction)csv_register_dialect,
1593 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1594 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1595 METH_O, csv_unregister_dialect_doc},
1596 { "get_dialect", (PyCFunction)csv_get_dialect,
1597 METH_O, csv_get_dialect_doc},
1598 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1599 METH_VARARGS, csv_field_size_limit_doc},
1600 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001601};
1602
Martin v. Löwis1a214512008-06-11 05:26:20 +00001603static struct PyModuleDef _csvmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001604 PyModuleDef_HEAD_INIT,
1605 "_csv",
1606 csv_module_doc,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001607 sizeof(_csvstate),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001608 csv_methods,
1609 NULL,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001610 _csv_traverse,
1611 _csv_clear,
1612 _csv_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00001613};
1614
Skip Montanarob4a04172003-03-20 23:29:12 +00001615PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001616PyInit__csv(void)
Skip Montanarob4a04172003-03-20 23:29:12 +00001617{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001618 PyObject *module;
Serhiy Storchaka2d06e842015-12-25 19:53:18 +02001619 const StyleDesc *style;
Skip Montanarob4a04172003-03-20 23:29:12 +00001620
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001621 if (PyType_Ready(&Dialect_Type) < 0)
1622 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001623
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001624 if (PyType_Ready(&Reader_Type) < 0)
1625 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001626
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001627 if (PyType_Ready(&Writer_Type) < 0)
1628 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001629
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001630 /* Create the module and add the functions */
1631 module = PyModule_Create(&_csvmodule);
1632 if (module == NULL)
1633 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001634
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001635 /* Add version to the module. */
1636 if (PyModule_AddStringConstant(module, "__version__",
1637 MODULE_VERSION) == -1)
1638 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001639
Antoine Pitroue7672d32012-05-16 11:33:08 +02001640 /* Set the field limit */
1641 _csvstate(module)->field_limit = 128 * 1024;
1642 /* Do I still need to add this var to the Module Dict? */
1643
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001644 /* Add _dialects dictionary */
Antoine Pitroue7672d32012-05-16 11:33:08 +02001645 _csvstate(module)->dialects = PyDict_New();
1646 if (_csvstate(module)->dialects == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001647 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001648 Py_INCREF(_csvstate(module)->dialects);
1649 if (PyModule_AddObject(module, "_dialects", _csvstate(module)->dialects))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001650 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001651
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001652 /* Add quote styles into dictionary */
1653 for (style = quote_styles; style->name; style++) {
1654 if (PyModule_AddIntConstant(module, style->name,
1655 style->style) == -1)
1656 return NULL;
1657 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001658
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001659 /* Add the Dialect type */
1660 Py_INCREF(&Dialect_Type);
1661 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1662 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001663
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001664 /* Add the CSV exception object to the module. */
Antoine Pitroue7672d32012-05-16 11:33:08 +02001665 _csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1666 if (_csvstate(module)->error_obj == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001667 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001668 Py_INCREF(_csvstate(module)->error_obj);
1669 PyModule_AddObject(module, "Error", _csvstate(module)->error_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001670 return module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001671}