blob: f58538c42729797f6f39e7506822b3a7a4c697fc [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
Skip Montanarob4a04172003-03-20 23:29:12 +00009*/
10
Skip Montanaro7b01a832003-04-12 19:23:46 +000011#define MODULE_VERSION "1.0"
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013#include "Python.h"
14#include "structmember.h"
Serhiy Storchaka323748a2018-07-26 13:21:09 +030015#include <stdbool.h>
Skip Montanarob4a04172003-03-20 23:29:12 +000016
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000017
Antoine Pitroue7672d32012-05-16 11:33:08 +020018typedef struct {
19 PyObject *error_obj; /* CSV exception */
20 PyObject *dialects; /* Dialect registry */
21 long field_limit; /* max parsed field size */
22} _csvstate;
23
24#define _csvstate(o) ((_csvstate *)PyModule_GetState(o))
25
26static int
27_csv_clear(PyObject *m)
28{
29 Py_CLEAR(_csvstate(m)->error_obj);
30 Py_CLEAR(_csvstate(m)->dialects);
31 return 0;
32}
33
34static int
35_csv_traverse(PyObject *m, visitproc visit, void *arg)
36{
37 Py_VISIT(_csvstate(m)->error_obj);
38 Py_VISIT(_csvstate(m)->dialects);
39 return 0;
40}
41
42static void
43_csv_free(void *m)
44{
45 _csv_clear((PyObject *)m);
46}
47
48static struct PyModuleDef _csvmodule;
49
50#define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
Skip Montanarob4a04172003-03-20 23:29:12 +000051
52typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000053 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
54 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
R David Murrayc7c42ef2013-03-19 22:41:47 -040055 EAT_CRNL,AFTER_ESCAPED_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000056} ParserState;
57
58typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000059 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000060} QuoteStyle;
61
62typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000063 QuoteStyle style;
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020064 const char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000065} StyleDesc;
66
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020067static const StyleDesc quote_styles[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000068 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
69 { QUOTE_ALL, "QUOTE_ALL" },
70 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
71 { QUOTE_NONE, "QUOTE_NONE" },
72 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000073};
74
75typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000076 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000077
Serhiy Storchaka323748a2018-07-26 13:21:09 +030078 char doublequote; /* is " represented by ""? */
79 char skipinitialspace; /* ignore spaces following delimiter? */
80 char strict; /* raise exception on bad CSV */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000081 int quoting; /* style of quoting to write */
Serhiy Storchaka323748a2018-07-26 13:21:09 +030082 Py_UCS4 delimiter; /* field separator */
83 Py_UCS4 quotechar; /* quote character */
84 Py_UCS4 escapechar; /* escape character */
85 PyObject *lineterminator; /* string to write between records */
Skip Montanarob4a04172003-03-20 23:29:12 +000086
Skip Montanarob4a04172003-03-20 23:29:12 +000087} DialectObj;
88
Neal Norwitz227b5332006-03-22 09:28:35 +000089static PyTypeObject Dialect_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000090
91typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000092 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +000093
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000094 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +000095
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000096 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +000097
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000098 PyObject *fields; /* field list for current record */
99 ParserState state; /* current CSV parse state */
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200100 Py_UCS4 *field; /* temporary buffer */
Antoine Pitrou40455752010-08-15 18:51:10 +0000101 Py_ssize_t field_size; /* size of allocated buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000102 Py_ssize_t field_len; /* length of current field */
103 int numeric_field; /* treat field as numeric */
104 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000105} ReaderObj;
106
Neal Norwitz227b5332006-03-22 09:28:35 +0000107static PyTypeObject Reader_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000108
Christian Heimes90aa7642007-12-19 02:45:37 +0000109#define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +0000110
111typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000112 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000113
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000114 PyObject *writeline; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +0000115
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000116 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000117
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200118 Py_UCS4 *rec; /* buffer for parser.join */
Antoine Pitrou40455752010-08-15 18:51:10 +0000119 Py_ssize_t rec_size; /* size of allocated record */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000120 Py_ssize_t rec_len; /* length of record */
121 int num_fields; /* number of fields in record */
Guido van Rossum46264582007-08-06 19:32:18 +0000122} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000123
Neal Norwitz227b5332006-03-22 09:28:35 +0000124static PyTypeObject Writer_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000125
126/*
127 * DIALECT class
128 */
129
130static PyObject *
131get_dialect_from_registry(PyObject * name_obj)
132{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000133 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000134
Antoine Pitroue7672d32012-05-16 11:33:08 +0200135 dialect_obj = PyDict_GetItem(_csvstate_global->dialects, name_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000136 if (dialect_obj == NULL) {
137 if (!PyErr_Occurred())
Antoine Pitroue7672d32012-05-16 11:33:08 +0200138 PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000139 }
140 else
141 Py_INCREF(dialect_obj);
142 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000143}
144
Skip Montanarob4a04172003-03-20 23:29:12 +0000145static PyObject *
146get_string(PyObject *str)
147{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000148 Py_XINCREF(str);
149 return str;
Skip Montanarob4a04172003-03-20 23:29:12 +0000150}
151
Skip Montanarob4a04172003-03-20 23:29:12 +0000152static PyObject *
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200153get_nullchar_as_None(Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000154{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000155 if (c == '\0') {
Serhiy Storchaka228b12e2017-01-23 09:47:21 +0200156 Py_RETURN_NONE;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000157 }
158 else
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200159 return PyUnicode_FromOrdinal(c);
Skip Montanarob4a04172003-03-20 23:29:12 +0000160}
161
Skip Montanarob4a04172003-03-20 23:29:12 +0000162static PyObject *
163Dialect_get_lineterminator(DialectObj *self)
164{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000165 return get_string(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000166}
167
Skip Montanarob4a04172003-03-20 23:29:12 +0000168static PyObject *
Guido van Rossuma9769c22007-08-07 23:59:30 +0000169Dialect_get_delimiter(DialectObj *self)
170{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000171 return get_nullchar_as_None(self->delimiter);
Guido van Rossuma9769c22007-08-07 23:59:30 +0000172}
173
174static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000175Dialect_get_escapechar(DialectObj *self)
176{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000178}
179
Andrew McNamara1196cf12005-01-07 04:42:45 +0000180static PyObject *
181Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000182{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000183 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000184}
185
186static PyObject *
187Dialect_get_quoting(DialectObj *self)
188{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000189 return PyLong_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000190}
191
192static int
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300193_set_bool(const char *name, char *target, PyObject *src, bool dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000194{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000195 if (src == NULL)
196 *target = dflt;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200197 else {
198 int b = PyObject_IsTrue(src);
199 if (b < 0)
200 return -1;
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300201 *target = (char)b;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200202 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000203 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000204}
205
Andrew McNamara1196cf12005-01-07 04:42:45 +0000206static int
207_set_int(const char *name, int *target, PyObject *src, int dflt)
208{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000209 if (src == NULL)
210 *target = dflt;
211 else {
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200212 int value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000213 if (!PyLong_CheckExact(src)) {
214 PyErr_Format(PyExc_TypeError,
215 "\"%s\" must be an integer", name);
216 return -1;
217 }
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200218 value = _PyLong_AsInt(src);
219 if (value == -1 && PyErr_Occurred()) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000220 return -1;
221 }
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200222 *target = value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000223 }
224 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000225}
226
227static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200228_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000229{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000230 if (src == NULL)
231 *target = dflt;
232 else {
233 *target = '\0';
234 if (src != Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000235 Py_ssize_t len;
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200236 if (!PyUnicode_Check(src)) {
237 PyErr_Format(PyExc_TypeError,
238 "\"%s\" must be string, not %.200s", name,
239 src->ob_type->tp_name);
240 return -1;
241 }
Victor Stinner9e30aa52011-11-21 02:49:52 +0100242 len = PyUnicode_GetLength(src);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200243 if (len > 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000244 PyErr_Format(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300245 "\"%s\" must be a 1-character string",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000246 name);
247 return -1;
248 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100249 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000250 if (len > 0)
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200251 *target = PyUnicode_READ_CHAR(src, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000252 }
253 }
254 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000255}
256
257static int
258_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
259{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000260 if (src == NULL)
261 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
262 else {
263 if (src == Py_None)
264 *target = NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +0100265 else if (!PyUnicode_Check(src)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000266 PyErr_Format(PyExc_TypeError,
267 "\"%s\" must be a string", name);
268 return -1;
269 }
270 else {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100271 if (PyUnicode_READY(src) == -1)
272 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000273 Py_INCREF(src);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300274 Py_XSETREF(*target, src);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000275 }
276 }
277 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000278}
279
280static int
281dialect_check_quoting(int quoting)
282{
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200283 const StyleDesc *qs;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000284
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000285 for (qs = quote_styles; qs->name; qs++) {
Victor Stinner706768c2014-08-16 01:03:39 +0200286 if ((int)qs->style == quoting)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000287 return 0;
288 }
289 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
290 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000291}
Skip Montanarob4a04172003-03-20 23:29:12 +0000292
293#define D_OFF(x) offsetof(DialectObj, x)
294
295static struct PyMemberDef Dialect_memberlist[] = {
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300296 { "skipinitialspace", T_BOOL, D_OFF(skipinitialspace), READONLY },
297 { "doublequote", T_BOOL, D_OFF(doublequote), READONLY },
298 { "strict", T_BOOL, D_OFF(strict), READONLY },
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000299 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000300};
301
302static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000303 { "delimiter", (getter)Dialect_get_delimiter},
304 { "escapechar", (getter)Dialect_get_escapechar},
305 { "lineterminator", (getter)Dialect_get_lineterminator},
306 { "quotechar", (getter)Dialect_get_quotechar},
307 { "quoting", (getter)Dialect_get_quoting},
308 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000309};
310
311static void
312Dialect_dealloc(DialectObj *self)
313{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000314 Py_XDECREF(self->lineterminator);
315 Py_TYPE(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000316}
317
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000318static char *dialect_kws[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000319 "dialect",
320 "delimiter",
321 "doublequote",
322 "escapechar",
323 "lineterminator",
324 "quotechar",
325 "quoting",
326 "skipinitialspace",
327 "strict",
328 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000329};
330
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000331static PyObject *
332dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000333{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000334 DialectObj *self;
335 PyObject *ret = NULL;
336 PyObject *dialect = NULL;
337 PyObject *delimiter = NULL;
338 PyObject *doublequote = NULL;
339 PyObject *escapechar = NULL;
340 PyObject *lineterminator = NULL;
341 PyObject *quotechar = NULL;
342 PyObject *quoting = NULL;
343 PyObject *skipinitialspace = NULL;
344 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000345
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
347 "|OOOOOOOOO", dialect_kws,
348 &dialect,
349 &delimiter,
350 &doublequote,
351 &escapechar,
352 &lineterminator,
353 &quotechar,
354 &quoting,
355 &skipinitialspace,
356 &strict))
357 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000358
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 if (dialect != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100360 if (PyUnicode_Check(dialect)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000361 dialect = get_dialect_from_registry(dialect);
362 if (dialect == NULL)
363 return NULL;
364 }
365 else
366 Py_INCREF(dialect);
367 /* Can we reuse this instance? */
368 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
Serhiy Storchaka0b3ec192017-03-23 17:53:47 +0200369 delimiter == NULL &&
370 doublequote == NULL &&
371 escapechar == NULL &&
372 lineterminator == NULL &&
373 quotechar == NULL &&
374 quoting == NULL &&
375 skipinitialspace == NULL &&
376 strict == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000377 return dialect;
378 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000379
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000380 self = (DialectObj *)type->tp_alloc(type, 0);
381 if (self == NULL) {
382 Py_XDECREF(dialect);
383 return NULL;
384 }
385 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000386
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000387 Py_XINCREF(delimiter);
388 Py_XINCREF(doublequote);
389 Py_XINCREF(escapechar);
390 Py_XINCREF(lineterminator);
391 Py_XINCREF(quotechar);
392 Py_XINCREF(quoting);
393 Py_XINCREF(skipinitialspace);
394 Py_XINCREF(strict);
395 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000396#define DIALECT_GETATTR(v, n) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000397 if (v == NULL) \
398 v = PyObject_GetAttrString(dialect, n)
399 DIALECT_GETATTR(delimiter, "delimiter");
400 DIALECT_GETATTR(doublequote, "doublequote");
401 DIALECT_GETATTR(escapechar, "escapechar");
402 DIALECT_GETATTR(lineterminator, "lineterminator");
403 DIALECT_GETATTR(quotechar, "quotechar");
404 DIALECT_GETATTR(quoting, "quoting");
405 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
406 DIALECT_GETATTR(strict, "strict");
407 PyErr_Clear();
408 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000409
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000410 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000411#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000412 if (meth(name, target, src, dflt)) \
413 goto err
414 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300415 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, true);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000416 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
417 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
418 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
419 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
Serhiy Storchaka323748a2018-07-26 13:21:09 +0300420 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, false);
421 DIASET(_set_bool, "strict", &self->strict, strict, false);
Skip Montanarob4a04172003-03-20 23:29:12 +0000422
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000423 /* validate options */
424 if (dialect_check_quoting(self->quoting))
425 goto err;
426 if (self->delimiter == 0) {
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200427 PyErr_SetString(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300428 "\"delimiter\" must be a 1-character string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000429 goto err;
430 }
431 if (quotechar == Py_None && quoting == NULL)
432 self->quoting = QUOTE_NONE;
433 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
434 PyErr_SetString(PyExc_TypeError,
435 "quotechar must be set if quoting enabled");
436 goto err;
437 }
438 if (self->lineterminator == 0) {
439 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
440 goto err;
441 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000442
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000443 ret = (PyObject *)self;
444 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000445err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000446 Py_XDECREF(self);
447 Py_XDECREF(dialect);
448 Py_XDECREF(delimiter);
449 Py_XDECREF(doublequote);
450 Py_XDECREF(escapechar);
451 Py_XDECREF(lineterminator);
452 Py_XDECREF(quotechar);
453 Py_XDECREF(quoting);
454 Py_XDECREF(skipinitialspace);
455 Py_XDECREF(strict);
456 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000457}
458
459
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000460PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000461"CSV dialect\n"
462"\n"
463"The Dialect type records CSV parsing and generation options.\n");
464
465static PyTypeObject Dialect_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000466 PyVarObject_HEAD_INIT(NULL, 0)
467 "_csv.Dialect", /* tp_name */
468 sizeof(DialectObj), /* tp_basicsize */
469 0, /* tp_itemsize */
470 /* methods */
471 (destructor)Dialect_dealloc, /* tp_dealloc */
472 (printfunc)0, /* tp_print */
473 (getattrfunc)0, /* tp_getattr */
474 (setattrfunc)0, /* tp_setattr */
475 0, /* tp_reserved */
476 (reprfunc)0, /* tp_repr */
477 0, /* tp_as_number */
478 0, /* tp_as_sequence */
479 0, /* tp_as_mapping */
480 (hashfunc)0, /* tp_hash */
481 (ternaryfunc)0, /* tp_call */
482 (reprfunc)0, /* tp_str */
483 0, /* tp_getattro */
484 0, /* tp_setattro */
485 0, /* tp_as_buffer */
486 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
487 Dialect_Type_doc, /* tp_doc */
488 0, /* tp_traverse */
489 0, /* tp_clear */
490 0, /* tp_richcompare */
491 0, /* tp_weaklistoffset */
492 0, /* tp_iter */
493 0, /* tp_iternext */
494 0, /* tp_methods */
495 Dialect_memberlist, /* tp_members */
496 Dialect_getsetlist, /* tp_getset */
497 0, /* tp_base */
498 0, /* tp_dict */
499 0, /* tp_descr_get */
500 0, /* tp_descr_set */
501 0, /* tp_dictoffset */
502 0, /* tp_init */
503 0, /* tp_alloc */
504 dialect_new, /* tp_new */
505 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000506};
507
Andrew McNamara91b97462005-01-11 01:07:23 +0000508/*
509 * Return an instance of the dialect type, given a Python instance or kwarg
510 * description of the dialect
511 */
512static PyObject *
513_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
514{
Victor Stinner6412f492016-08-23 00:21:34 +0200515 PyObject *type = (PyObject *)&Dialect_Type;
516 if (dialect_inst) {
517 return _PyObject_FastCallDict(type, &dialect_inst, 1, kwargs);
518 }
519 else {
520 return _PyObject_FastCallDict(type, NULL, 0, kwargs);
521 }
Andrew McNamara91b97462005-01-11 01:07:23 +0000522}
523
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000524/*
525 * READER
526 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000527static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000528parse_save_field(ReaderObj *self)
529{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000530 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000531
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200532 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
533 (void *) self->field, self->field_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000534 if (field == NULL)
535 return -1;
536 self->field_len = 0;
537 if (self->numeric_field) {
538 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000539
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000540 self->numeric_field = 0;
541 tmp = PyNumber_Float(field);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000542 Py_DECREF(field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200543 if (tmp == NULL)
544 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000545 field = tmp;
546 }
Victor Stinnerb80b3782013-11-14 21:29:34 +0100547 if (PyList_Append(self->fields, field) < 0) {
548 Py_DECREF(field);
549 return -1;
550 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000551 Py_DECREF(field);
552 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000553}
554
555static int
556parse_grow_buff(ReaderObj *self)
557{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000558 if (self->field_size == 0) {
559 self->field_size = 4096;
560 if (self->field != NULL)
561 PyMem_Free(self->field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200562 self->field = PyMem_New(Py_UCS4, self->field_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000563 }
564 else {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200565 Py_UCS4 *field = self->field;
Antoine Pitrou40455752010-08-15 18:51:10 +0000566 if (self->field_size > PY_SSIZE_T_MAX / 2) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000567 PyErr_NoMemory();
568 return 0;
569 }
570 self->field_size *= 2;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200571 self->field = PyMem_Resize(field, Py_UCS4, self->field_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000572 }
573 if (self->field == NULL) {
574 PyErr_NoMemory();
575 return 0;
576 }
577 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000578}
579
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000580static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200581parse_add_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000582{
Antoine Pitroue7672d32012-05-16 11:33:08 +0200583 if (self->field_len >= _csvstate_global->field_limit) {
584 PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
585 _csvstate_global->field_limit);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000586 return -1;
587 }
588 if (self->field_len == self->field_size && !parse_grow_buff(self))
589 return -1;
590 self->field[self->field_len++] = c;
591 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000592}
593
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000594static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200595parse_process_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000596{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000597 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000598
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000599 switch (self->state) {
600 case START_RECORD:
601 /* start of record */
602 if (c == '\0')
603 /* empty line - return [] */
604 break;
605 else if (c == '\n' || c == '\r') {
606 self->state = EAT_CRNL;
607 break;
608 }
609 /* normal character - handle as START_FIELD */
610 self->state = START_FIELD;
611 /* fallthru */
612 case START_FIELD:
613 /* expecting field */
614 if (c == '\n' || c == '\r' || c == '\0') {
615 /* save empty field - return [fields] */
616 if (parse_save_field(self) < 0)
617 return -1;
618 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
619 }
620 else if (c == dialect->quotechar &&
621 dialect->quoting != QUOTE_NONE) {
622 /* start quoted field */
623 self->state = IN_QUOTED_FIELD;
624 }
625 else if (c == dialect->escapechar) {
626 /* possible escaped character */
627 self->state = ESCAPED_CHAR;
628 }
629 else if (c == ' ' && dialect->skipinitialspace)
630 /* ignore space at start of field */
631 ;
632 else if (c == dialect->delimiter) {
633 /* save empty field */
634 if (parse_save_field(self) < 0)
635 return -1;
636 }
637 else {
638 /* begin new unquoted field */
639 if (dialect->quoting == QUOTE_NONNUMERIC)
640 self->numeric_field = 1;
641 if (parse_add_char(self, c) < 0)
642 return -1;
643 self->state = IN_FIELD;
644 }
645 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000646
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000647 case ESCAPED_CHAR:
R David Murray9a7d3762013-03-20 00:15:20 -0400648 if (c == '\n' || c=='\r') {
R David Murrayc7c42ef2013-03-19 22:41:47 -0400649 if (parse_add_char(self, c) < 0)
650 return -1;
651 self->state = AFTER_ESCAPED_CRNL;
652 break;
653 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000654 if (c == '\0')
655 c = '\n';
656 if (parse_add_char(self, c) < 0)
657 return -1;
658 self->state = IN_FIELD;
659 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000660
R David Murrayc7c42ef2013-03-19 22:41:47 -0400661 case AFTER_ESCAPED_CRNL:
662 if (c == '\0')
663 break;
664 /*fallthru*/
665
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000666 case IN_FIELD:
667 /* in unquoted field */
668 if (c == '\n' || c == '\r' || c == '\0') {
669 /* end of line - return [fields] */
670 if (parse_save_field(self) < 0)
671 return -1;
672 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
673 }
674 else if (c == dialect->escapechar) {
675 /* possible escaped character */
676 self->state = ESCAPED_CHAR;
677 }
678 else if (c == dialect->delimiter) {
679 /* save field - wait for new field */
680 if (parse_save_field(self) < 0)
681 return -1;
682 self->state = START_FIELD;
683 }
684 else {
685 /* normal character - save in field */
686 if (parse_add_char(self, c) < 0)
687 return -1;
688 }
689 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000690
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000691 case IN_QUOTED_FIELD:
692 /* in quoted field */
693 if (c == '\0')
694 ;
695 else if (c == dialect->escapechar) {
696 /* Possible escape character */
697 self->state = ESCAPE_IN_QUOTED_FIELD;
698 }
699 else if (c == dialect->quotechar &&
700 dialect->quoting != QUOTE_NONE) {
701 if (dialect->doublequote) {
702 /* doublequote; " represented by "" */
703 self->state = QUOTE_IN_QUOTED_FIELD;
704 }
705 else {
706 /* end of quote part of field */
707 self->state = IN_FIELD;
708 }
709 }
710 else {
711 /* normal character - save in field */
712 if (parse_add_char(self, c) < 0)
713 return -1;
714 }
715 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000716
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000717 case ESCAPE_IN_QUOTED_FIELD:
718 if (c == '\0')
719 c = '\n';
720 if (parse_add_char(self, c) < 0)
721 return -1;
722 self->state = IN_QUOTED_FIELD;
723 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000724
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000725 case QUOTE_IN_QUOTED_FIELD:
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300726 /* doublequote - seen a quote in a quoted field */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000727 if (dialect->quoting != QUOTE_NONE &&
728 c == dialect->quotechar) {
729 /* save "" as " */
730 if (parse_add_char(self, c) < 0)
731 return -1;
732 self->state = IN_QUOTED_FIELD;
733 }
734 else if (c == dialect->delimiter) {
735 /* save field - wait for new field */
736 if (parse_save_field(self) < 0)
737 return -1;
738 self->state = START_FIELD;
739 }
740 else if (c == '\n' || c == '\r' || c == '\0') {
741 /* end of line - return [fields] */
742 if (parse_save_field(self) < 0)
743 return -1;
744 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
745 }
746 else if (!dialect->strict) {
747 if (parse_add_char(self, c) < 0)
748 return -1;
749 self->state = IN_FIELD;
750 }
751 else {
752 /* illegal */
Antoine Pitroue7672d32012-05-16 11:33:08 +0200753 PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000754 dialect->delimiter,
755 dialect->quotechar);
756 return -1;
757 }
758 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000759
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000760 case EAT_CRNL:
761 if (c == '\n' || c == '\r')
762 ;
763 else if (c == '\0')
764 self->state = START_RECORD;
765 else {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200766 PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000767 return -1;
768 }
769 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000770
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000771 }
772 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000773}
774
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000775static int
776parse_reset(ReaderObj *self)
777{
Serhiy Storchaka48842712016-04-06 09:45:48 +0300778 Py_XSETREF(self->fields, PyList_New(0));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000779 if (self->fields == NULL)
780 return -1;
781 self->field_len = 0;
782 self->state = START_RECORD;
783 self->numeric_field = 0;
784 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000785}
Skip Montanarob4a04172003-03-20 23:29:12 +0000786
787static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000788Reader_iternext(ReaderObj *self)
789{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000790 PyObject *fields = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200791 Py_UCS4 c;
792 Py_ssize_t pos, linelen;
793 unsigned int kind;
794 void *data;
795 PyObject *lineobj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000796
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000797 if (parse_reset(self) < 0)
798 return NULL;
799 do {
800 lineobj = PyIter_Next(self->input_iter);
801 if (lineobj == NULL) {
802 /* End of input OR exception */
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700803 if (!PyErr_Occurred() && (self->field_len != 0 ||
804 self->state == IN_QUOTED_FIELD)) {
805 if (self->dialect->strict)
Senthil Kumaran49d13022012-09-25 02:37:20 -0700806 PyErr_SetString(_csvstate_global->error_obj,
807 "unexpected end of data");
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700808 else if (parse_save_field(self) >= 0)
809 break;
810 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000811 return NULL;
812 }
813 if (!PyUnicode_Check(lineobj)) {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200814 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000815 "iterator should return strings, "
816 "not %.200s "
817 "(did you open the file in text mode?)",
818 lineobj->ob_type->tp_name
819 );
820 Py_DECREF(lineobj);
821 return NULL;
822 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100823 if (PyUnicode_READY(lineobj) == -1) {
824 Py_DECREF(lineobj);
825 return NULL;
826 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000827 ++self->line_num;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200828 kind = PyUnicode_KIND(lineobj);
829 data = PyUnicode_DATA(lineobj);
830 pos = 0;
831 linelen = PyUnicode_GET_LENGTH(lineobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000832 while (linelen--) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200833 c = PyUnicode_READ(kind, data, pos);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000834 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000835 Py_DECREF(lineobj);
Antoine Pitroue7672d32012-05-16 11:33:08 +0200836 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000837 "line contains NULL byte");
838 goto err;
839 }
840 if (parse_process_char(self, c) < 0) {
841 Py_DECREF(lineobj);
842 goto err;
843 }
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200844 pos++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000845 }
846 Py_DECREF(lineobj);
847 if (parse_process_char(self, 0) < 0)
848 goto err;
849 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000850
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000851 fields = self->fields;
852 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000853err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000854 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000855}
856
857static void
858Reader_dealloc(ReaderObj *self)
859{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000860 PyObject_GC_UnTrack(self);
861 Py_XDECREF(self->dialect);
862 Py_XDECREF(self->input_iter);
863 Py_XDECREF(self->fields);
864 if (self->field != NULL)
865 PyMem_Free(self->field);
866 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000867}
868
869static int
870Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
871{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000872 Py_VISIT(self->dialect);
873 Py_VISIT(self->input_iter);
874 Py_VISIT(self->fields);
875 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000876}
877
878static int
879Reader_clear(ReaderObj *self)
880{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000881 Py_CLEAR(self->dialect);
882 Py_CLEAR(self->input_iter);
883 Py_CLEAR(self->fields);
884 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000885}
886
887PyDoc_STRVAR(Reader_Type_doc,
888"CSV reader\n"
889"\n"
890"Reader objects are responsible for reading and parsing tabular data\n"
891"in CSV format.\n"
892);
893
894static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000895 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000896};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000897#define R_OFF(x) offsetof(ReaderObj, x)
898
899static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000900 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
901 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
902 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000903};
904
Skip Montanarob4a04172003-03-20 23:29:12 +0000905
906static PyTypeObject Reader_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000907 PyVarObject_HEAD_INIT(NULL, 0)
908 "_csv.reader", /*tp_name*/
909 sizeof(ReaderObj), /*tp_basicsize*/
910 0, /*tp_itemsize*/
911 /* methods */
912 (destructor)Reader_dealloc, /*tp_dealloc*/
913 (printfunc)0, /*tp_print*/
914 (getattrfunc)0, /*tp_getattr*/
915 (setattrfunc)0, /*tp_setattr*/
916 0, /*tp_reserved*/
917 (reprfunc)0, /*tp_repr*/
918 0, /*tp_as_number*/
919 0, /*tp_as_sequence*/
920 0, /*tp_as_mapping*/
921 (hashfunc)0, /*tp_hash*/
922 (ternaryfunc)0, /*tp_call*/
923 (reprfunc)0, /*tp_str*/
924 0, /*tp_getattro*/
925 0, /*tp_setattro*/
926 0, /*tp_as_buffer*/
927 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
928 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
929 Reader_Type_doc, /*tp_doc*/
930 (traverseproc)Reader_traverse, /*tp_traverse*/
931 (inquiry)Reader_clear, /*tp_clear*/
932 0, /*tp_richcompare*/
933 0, /*tp_weaklistoffset*/
934 PyObject_SelfIter, /*tp_iter*/
935 (getiterfunc)Reader_iternext, /*tp_iternext*/
936 Reader_methods, /*tp_methods*/
937 Reader_memberlist, /*tp_members*/
938 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000939
940};
941
942static PyObject *
943csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
944{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000945 PyObject * iterator, * dialect = NULL;
946 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000947
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000948 if (!self)
949 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000950
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000951 self->dialect = NULL;
952 self->fields = NULL;
953 self->input_iter = NULL;
954 self->field = NULL;
955 self->field_size = 0;
956 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000957
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000958 if (parse_reset(self) < 0) {
959 Py_DECREF(self);
960 return NULL;
961 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000962
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000963 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
964 Py_DECREF(self);
965 return NULL;
966 }
967 self->input_iter = PyObject_GetIter(iterator);
968 if (self->input_iter == NULL) {
969 PyErr_SetString(PyExc_TypeError,
970 "argument 1 must be an iterator");
971 Py_DECREF(self);
972 return NULL;
973 }
974 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
975 if (self->dialect == NULL) {
976 Py_DECREF(self);
977 return NULL;
978 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000979
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000980 PyObject_GC_Track(self);
981 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +0000982}
983
984/*
985 * WRITER
986 */
987/* ---------------------------------------------------------------- */
988static void
989join_reset(WriterObj *self)
990{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000991 self->rec_len = 0;
992 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000993}
994
995#define MEM_INCR 32768
996
997/* Calculate new record length or append field to record. Return new
998 * record length.
999 */
Antoine Pitrou40455752010-08-15 18:51:10 +00001000static Py_ssize_t
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001001join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001002 Py_ssize_t field_len, int *quoted,
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001003 int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +00001004{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001005 DialectObj *dialect = self->dialect;
1006 int i;
Antoine Pitrou40455752010-08-15 18:51:10 +00001007 Py_ssize_t rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001008
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001009#define INCLEN \
1010 do {\
1011 if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \
1012 goto overflow; \
1013 } \
1014 rec_len++; \
1015 } while(0)
1016
1017#define ADDCH(c) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001018 do {\
1019 if (copy_phase) \
1020 self->rec[rec_len] = c;\
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001021 INCLEN;\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001022 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001023
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001024 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001025
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001026 /* If this is not the first field we need a field separator */
1027 if (self->num_fields > 0)
1028 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001029
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001030 /* Handle preceding quote */
1031 if (copy_phase && *quoted)
1032 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001033
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001034 /* Copy/count field data */
1035 /* If field is null just pass over */
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001036 for (i = 0; field_data && (i < field_len); i++) {
1037 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001038 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001039
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001040 if (c == dialect->delimiter ||
1041 c == dialect->escapechar ||
1042 c == dialect->quotechar ||
Martin v. Löwis5f4f4c52011-11-01 18:42:23 +01001043 PyUnicode_FindChar(
1044 dialect->lineterminator, c, 0,
1045 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001046 if (dialect->quoting == QUOTE_NONE)
1047 want_escape = 1;
1048 else {
1049 if (c == dialect->quotechar) {
1050 if (dialect->doublequote)
1051 ADDCH(dialect->quotechar);
1052 else
1053 want_escape = 1;
1054 }
1055 if (!want_escape)
1056 *quoted = 1;
1057 }
1058 if (want_escape) {
1059 if (!dialect->escapechar) {
Antoine Pitroue7672d32012-05-16 11:33:08 +02001060 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001061 "need to escape, but no escapechar set");
1062 return -1;
1063 }
1064 ADDCH(dialect->escapechar);
1065 }
1066 }
1067 /* Copy field character into record buffer.
1068 */
1069 ADDCH(c);
1070 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001071
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001072 if (*quoted) {
1073 if (copy_phase)
1074 ADDCH(dialect->quotechar);
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001075 else {
1076 INCLEN; /* starting quote */
1077 INCLEN; /* ending quote */
1078 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001079 }
1080 return rec_len;
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001081
1082 overflow:
1083 PyErr_NoMemory();
1084 return -1;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001085#undef ADDCH
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001086#undef INCLEN
Skip Montanarob4a04172003-03-20 23:29:12 +00001087}
1088
1089static int
Antoine Pitrou40455752010-08-15 18:51:10 +00001090join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
Skip Montanarob4a04172003-03-20 23:29:12 +00001091{
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001092
Antoine Pitrou40455752010-08-15 18:51:10 +00001093 if (rec_len < 0 || rec_len > PY_SSIZE_T_MAX - MEM_INCR) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001094 PyErr_NoMemory();
1095 return 0;
1096 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001097
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001098 if (rec_len > self->rec_size) {
1099 if (self->rec_size == 0) {
1100 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1101 if (self->rec != NULL)
1102 PyMem_Free(self->rec);
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001103 self->rec = PyMem_New(Py_UCS4, self->rec_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001104 }
1105 else {
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001106 Py_UCS4* old_rec = self->rec;
Skip Montanarob4a04172003-03-20 23:29:12 +00001107
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001108 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001109 self->rec = PyMem_Resize(old_rec, Py_UCS4, self->rec_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001110 if (self->rec == NULL)
1111 PyMem_Free(old_rec);
1112 }
1113 if (self->rec == NULL) {
1114 PyErr_NoMemory();
1115 return 0;
1116 }
1117 }
1118 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001119}
1120
1121static int
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001122join_append(WriterObj *self, PyObject *field, int quoted)
Skip Montanarob4a04172003-03-20 23:29:12 +00001123{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001124 unsigned int field_kind = -1;
1125 void *field_data = NULL;
1126 Py_ssize_t field_len = 0;
Antoine Pitrou40455752010-08-15 18:51:10 +00001127 Py_ssize_t rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001128
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001129 if (field != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +01001130 if (PyUnicode_READY(field) == -1)
1131 return 0;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001132 field_kind = PyUnicode_KIND(field);
1133 field_data = PyUnicode_DATA(field);
1134 field_len = PyUnicode_GET_LENGTH(field);
1135 }
1136 rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001137 &quoted, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001138 if (rec_len < 0)
1139 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001141 /* grow record buffer if necessary */
1142 if (!join_check_rec_size(self, rec_len))
1143 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001144
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001145 self->rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001146 &quoted, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001147 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001149 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001150}
1151
1152static int
1153join_append_lineterminator(WriterObj *self)
1154{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001155 Py_ssize_t terminator_len, i;
1156 unsigned int term_kind;
1157 void *term_data;
Skip Montanarob4a04172003-03-20 23:29:12 +00001158
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001159 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001160 if (terminator_len == -1)
1161 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001162
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001163 /* grow record buffer if necessary */
1164 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1165 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001166
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001167 term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1168 term_data = PyUnicode_DATA(self->dialect->lineterminator);
1169 for (i = 0; i < terminator_len; i++)
1170 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001171 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001172
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001173 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001174}
1175
1176PyDoc_STRVAR(csv_writerow_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001177"writerow(iterable)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001178"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001179"Construct and write a CSV record from an iterable of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001180"elements will be converted to string.");
1181
1182static PyObject *
1183csv_writerow(WriterObj *self, PyObject *seq)
1184{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001185 DialectObj *dialect = self->dialect;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001186 PyObject *iter, *field, *line, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001187
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001188 iter = PyObject_GetIter(seq);
1189 if (iter == NULL)
1190 return PyErr_Format(_csvstate_global->error_obj,
1191 "iterable expected, not %.200s",
1192 seq->ob_type->tp_name);
Skip Montanarob4a04172003-03-20 23:29:12 +00001193
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001194 /* Join all fields in internal buffer.
1195 */
1196 join_reset(self);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001197 while ((field = PyIter_Next(iter))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001198 int append_ok;
1199 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001200
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001201 switch (dialect->quoting) {
1202 case QUOTE_NONNUMERIC:
1203 quoted = !PyNumber_Check(field);
1204 break;
1205 case QUOTE_ALL:
1206 quoted = 1;
1207 break;
1208 default:
1209 quoted = 0;
1210 break;
1211 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001212
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001213 if (PyUnicode_Check(field)) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001214 append_ok = join_append(self, field, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001215 Py_DECREF(field);
1216 }
1217 else if (field == Py_None) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001218 append_ok = join_append(self, NULL, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001219 Py_DECREF(field);
1220 }
1221 else {
1222 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001223
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001224 str = PyObject_Str(field);
1225 Py_DECREF(field);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001226 if (str == NULL) {
1227 Py_DECREF(iter);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001228 return NULL;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001229 }
1230 append_ok = join_append(self, str, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001231 Py_DECREF(str);
1232 }
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001233 if (!append_ok) {
1234 Py_DECREF(iter);
1235 return NULL;
1236 }
1237 }
1238 Py_DECREF(iter);
1239 if (PyErr_Occurred())
1240 return NULL;
1241
Licht Takeuchi20019002017-12-12 18:57:06 +09001242 if (self->num_fields > 0 && self->rec_len == 0) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001243 if (dialect->quoting == QUOTE_NONE) {
1244 PyErr_Format(_csvstate_global->error_obj,
1245 "single empty field record must be quoted");
1246 return NULL;
1247 }
1248 self->num_fields--;
1249 if (!join_append(self, NULL, 1))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001250 return NULL;
1251 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001252
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001253 /* Add line terminator.
1254 */
1255 if (!join_append_lineterminator(self))
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001256 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001257
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001258 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1259 (void *) self->rec, self->rec_len);
1260 if (line == NULL)
1261 return NULL;
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01001262 result = PyObject_CallFunctionObjArgs(self->writeline, line, NULL);
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001263 Py_DECREF(line);
1264 return result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001265}
1266
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001267PyDoc_STRVAR(csv_writerows_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001268"writerows(iterable of iterables)\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001269"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001270"Construct and write a series of iterables to a csv file. Non-string\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001271"elements will be converted to string.");
1272
Skip Montanarob4a04172003-03-20 23:29:12 +00001273static PyObject *
1274csv_writerows(WriterObj *self, PyObject *seqseq)
1275{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001276 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001277
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001278 row_iter = PyObject_GetIter(seqseq);
1279 if (row_iter == NULL) {
1280 PyErr_SetString(PyExc_TypeError,
1281 "writerows() argument must be iterable");
1282 return NULL;
1283 }
1284 while ((row_obj = PyIter_Next(row_iter))) {
1285 result = csv_writerow(self, row_obj);
1286 Py_DECREF(row_obj);
1287 if (!result) {
1288 Py_DECREF(row_iter);
1289 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001290 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001291 else
1292 Py_DECREF(result);
1293 }
1294 Py_DECREF(row_iter);
1295 if (PyErr_Occurred())
1296 return NULL;
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001297 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001298}
1299
1300static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001301 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1302 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1303 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001304};
1305
1306#define W_OFF(x) offsetof(WriterObj, x)
1307
1308static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001309 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1310 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001311};
1312
1313static void
1314Writer_dealloc(WriterObj *self)
1315{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001316 PyObject_GC_UnTrack(self);
1317 Py_XDECREF(self->dialect);
1318 Py_XDECREF(self->writeline);
1319 if (self->rec != NULL)
1320 PyMem_Free(self->rec);
1321 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001322}
1323
1324static int
1325Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1326{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001327 Py_VISIT(self->dialect);
1328 Py_VISIT(self->writeline);
1329 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001330}
1331
1332static int
1333Writer_clear(WriterObj *self)
1334{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001335 Py_CLEAR(self->dialect);
1336 Py_CLEAR(self->writeline);
1337 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001338}
1339
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001340PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001341"CSV writer\n"
1342"\n"
1343"Writer objects are responsible for generating tabular data\n"
1344"in CSV format from sequence input.\n"
1345);
1346
1347static PyTypeObject Writer_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001348 PyVarObject_HEAD_INIT(NULL, 0)
1349 "_csv.writer", /*tp_name*/
1350 sizeof(WriterObj), /*tp_basicsize*/
1351 0, /*tp_itemsize*/
1352 /* methods */
1353 (destructor)Writer_dealloc, /*tp_dealloc*/
1354 (printfunc)0, /*tp_print*/
1355 (getattrfunc)0, /*tp_getattr*/
1356 (setattrfunc)0, /*tp_setattr*/
1357 0, /*tp_reserved*/
1358 (reprfunc)0, /*tp_repr*/
1359 0, /*tp_as_number*/
1360 0, /*tp_as_sequence*/
1361 0, /*tp_as_mapping*/
1362 (hashfunc)0, /*tp_hash*/
1363 (ternaryfunc)0, /*tp_call*/
1364 (reprfunc)0, /*tp_str*/
1365 0, /*tp_getattro*/
1366 0, /*tp_setattro*/
1367 0, /*tp_as_buffer*/
1368 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1369 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1370 Writer_Type_doc,
1371 (traverseproc)Writer_traverse, /*tp_traverse*/
1372 (inquiry)Writer_clear, /*tp_clear*/
1373 0, /*tp_richcompare*/
1374 0, /*tp_weaklistoffset*/
1375 (getiterfunc)0, /*tp_iter*/
1376 (getiterfunc)0, /*tp_iternext*/
1377 Writer_methods, /*tp_methods*/
1378 Writer_memberlist, /*tp_members*/
1379 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001380};
1381
1382static PyObject *
1383csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1384{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001385 PyObject * output_file, * dialect = NULL;
1386 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001387 _Py_IDENTIFIER(write);
Skip Montanarob4a04172003-03-20 23:29:12 +00001388
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001389 if (!self)
1390 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001391
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001392 self->dialect = NULL;
1393 self->writeline = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001394
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001395 self->rec = NULL;
1396 self->rec_size = 0;
1397 self->rec_len = 0;
1398 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001399
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001400 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1401 Py_DECREF(self);
1402 return NULL;
1403 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001404 self->writeline = _PyObject_GetAttrId(output_file, &PyId_write);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001405 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1406 PyErr_SetString(PyExc_TypeError,
1407 "argument 1 must have a \"write\" method");
1408 Py_DECREF(self);
1409 return NULL;
1410 }
1411 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1412 if (self->dialect == NULL) {
1413 Py_DECREF(self);
1414 return NULL;
1415 }
1416 PyObject_GC_Track(self);
1417 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001418}
1419
1420/*
1421 * DIALECT REGISTRY
1422 */
1423static PyObject *
1424csv_list_dialects(PyObject *module, PyObject *args)
1425{
Antoine Pitroue7672d32012-05-16 11:33:08 +02001426 return PyDict_Keys(_csvstate_global->dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001427}
1428
1429static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001430csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001431{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001432 PyObject *name_obj, *dialect_obj = NULL;
1433 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001434
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001435 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1436 return NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +01001437 if (!PyUnicode_Check(name_obj)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001438 PyErr_SetString(PyExc_TypeError,
Stefan Krahe6996ed2012-11-02 14:44:20 +01001439 "dialect name must be a string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001440 return NULL;
1441 }
Stefan Krahe6996ed2012-11-02 14:44:20 +01001442 if (PyUnicode_READY(name_obj) == -1)
1443 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001444 dialect = _call_dialect(dialect_obj, kwargs);
1445 if (dialect == NULL)
1446 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001447 if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001448 Py_DECREF(dialect);
1449 return NULL;
1450 }
1451 Py_DECREF(dialect);
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001452 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001453}
1454
1455static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001456csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001457{
Antoine Pitroue7672d32012-05-16 11:33:08 +02001458 if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0)
1459 return PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Serhiy Storchaka228b12e2017-01-23 09:47:21 +02001460 Py_RETURN_NONE;
Skip Montanarob4a04172003-03-20 23:29:12 +00001461}
1462
1463static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001464csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001465{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001466 return get_dialect_from_registry(name_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001467}
1468
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001469static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001470csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001471{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001472 PyObject *new_limit = NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001473 long old_limit = _csvstate_global->field_limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001474
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001475 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1476 return NULL;
1477 if (new_limit != NULL) {
1478 if (!PyLong_CheckExact(new_limit)) {
1479 PyErr_Format(PyExc_TypeError,
1480 "limit must be an integer");
1481 return NULL;
1482 }
Antoine Pitroue7672d32012-05-16 11:33:08 +02001483 _csvstate_global->field_limit = PyLong_AsLong(new_limit);
1484 if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
1485 _csvstate_global->field_limit = old_limit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001486 return NULL;
1487 }
1488 }
1489 return PyLong_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001490}
1491
Skip Montanarob4a04172003-03-20 23:29:12 +00001492/*
1493 * MODULE
1494 */
1495
1496PyDoc_STRVAR(csv_module_doc,
1497"CSV parsing and writing.\n"
1498"\n"
1499"This module provides classes that assist in the reading and writing\n"
1500"of Comma Separated Value (CSV) files, and implements the interface\n"
1501"described by PEP 305. Although many CSV files are simple to parse,\n"
1502"the format is not formally defined by a stable specification and\n"
1503"is subtle enough that parsing lines of a CSV file with something\n"
1504"like line.split(\",\") is bound to fail. The module supports three\n"
1505"basic APIs: reading, writing, and registration of dialects.\n"
1506"\n"
1507"\n"
1508"DIALECT REGISTRATION:\n"
1509"\n"
1510"Readers and writers support a dialect argument, which is a convenient\n"
1511"handle on a group of settings. When the dialect argument is a string,\n"
1512"it identifies one of the dialects previously registered with the module.\n"
1513"If it is a class or instance, the attributes of the argument are used as\n"
1514"the settings for the reader or writer:\n"
1515"\n"
1516" class excel:\n"
1517" delimiter = ','\n"
1518" quotechar = '\"'\n"
1519" escapechar = None\n"
1520" doublequote = True\n"
1521" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001522" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001523" quoting = QUOTE_MINIMAL\n"
1524"\n"
1525"SETTINGS:\n"
1526"\n"
oldkaa0735f2018-02-02 16:52:55 +08001527" * quotechar - specifies a one-character string to use as the\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001528" quoting character. It defaults to '\"'.\n"
oldkaa0735f2018-02-02 16:52:55 +08001529" * delimiter - specifies a one-character string to use as the\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001530" field separator. It defaults to ','.\n"
1531" * skipinitialspace - specifies how to interpret whitespace which\n"
1532" immediately follows a delimiter. It defaults to False, which\n"
1533" means that whitespace immediately following a delimiter is part\n"
1534" of the following field.\n"
oldkaa0735f2018-02-02 16:52:55 +08001535" * lineterminator - specifies the character sequence which should\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001536" terminate rows.\n"
1537" * quoting - controls when quotes should be generated by the writer.\n"
1538" It can take on any of the following module constants:\n"
1539"\n"
1540" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1541" field contains either the quotechar or the delimiter\n"
1542" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1543" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001544" fields which do not parse as integers or floating point\n"
1545" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001546" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
oldkaa0735f2018-02-02 16:52:55 +08001547" * escapechar - specifies a one-character string used to escape\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001548" the delimiter when quoting is set to QUOTE_NONE.\n"
1549" * doublequote - controls the handling of quotes inside fields. When\n"
1550" True, two consecutive quotes are interpreted as one during read,\n"
1551" and when writing, each quote character embedded in the data is\n"
1552" written as two quotes\n");
1553
1554PyDoc_STRVAR(csv_reader_doc,
1555" csv_reader = reader(iterable [, dialect='excel']\n"
1556" [optional keyword args])\n"
1557" for row in csv_reader:\n"
1558" process(row)\n"
1559"\n"
1560"The \"iterable\" argument can be any object that returns a line\n"
1561"of input for each iteration, such as a file object or a list. The\n"
1562"optional \"dialect\" parameter is discussed below. The function\n"
1563"also accepts optional keyword arguments which override settings\n"
1564"provided by the dialect.\n"
1565"\n"
1566"The returned object is an iterator. Each iteration returns a row\n"
Berker Peksage2382c52015-10-02 19:25:32 +03001567"of the CSV file (which can span multiple input lines).\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001568
1569PyDoc_STRVAR(csv_writer_doc,
1570" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1571" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001572" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001573" csv_writer.writerow(row)\n"
1574"\n"
1575" [or]\n"
1576"\n"
1577" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1578" [optional keyword args])\n"
1579" csv_writer.writerows(rows)\n"
1580"\n"
1581"The \"fileobj\" argument can be any object that supports the file API.\n");
1582
1583PyDoc_STRVAR(csv_list_dialects_doc,
1584"Return a list of all know dialect names.\n"
1585" names = csv.list_dialects()");
1586
1587PyDoc_STRVAR(csv_get_dialect_doc,
1588"Return the dialect instance associated with name.\n"
1589" dialect = csv.get_dialect(name)");
1590
1591PyDoc_STRVAR(csv_register_dialect_doc,
1592"Create a mapping from a string name to a dialect class.\n"
Berker Peksag12b50ce2015-06-05 15:17:51 +03001593" dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
Skip Montanarob4a04172003-03-20 23:29:12 +00001594
1595PyDoc_STRVAR(csv_unregister_dialect_doc,
1596"Delete the name/dialect mapping associated with a string name.\n"
1597" csv.unregister_dialect(name)");
1598
Andrew McNamara31d88962005-01-12 03:45:10 +00001599PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001600"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001601" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001602"\n"
1603"Returns old limit. If limit is not given, no new limit is set and\n"
1604"the old limit is returned");
1605
Skip Montanarob4a04172003-03-20 23:29:12 +00001606static struct PyMethodDef csv_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001607 { "reader", (PyCFunction)csv_reader,
1608 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1609 { "writer", (PyCFunction)csv_writer,
1610 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1611 { "list_dialects", (PyCFunction)csv_list_dialects,
1612 METH_NOARGS, csv_list_dialects_doc},
1613 { "register_dialect", (PyCFunction)csv_register_dialect,
1614 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1615 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1616 METH_O, csv_unregister_dialect_doc},
1617 { "get_dialect", (PyCFunction)csv_get_dialect,
1618 METH_O, csv_get_dialect_doc},
1619 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1620 METH_VARARGS, csv_field_size_limit_doc},
1621 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001622};
1623
Martin v. Löwis1a214512008-06-11 05:26:20 +00001624static struct PyModuleDef _csvmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001625 PyModuleDef_HEAD_INIT,
1626 "_csv",
1627 csv_module_doc,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001628 sizeof(_csvstate),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001629 csv_methods,
1630 NULL,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001631 _csv_traverse,
1632 _csv_clear,
1633 _csv_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00001634};
1635
Skip Montanarob4a04172003-03-20 23:29:12 +00001636PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001637PyInit__csv(void)
Skip Montanarob4a04172003-03-20 23:29:12 +00001638{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001639 PyObject *module;
Serhiy Storchaka2d06e842015-12-25 19:53:18 +02001640 const StyleDesc *style;
Skip Montanarob4a04172003-03-20 23:29:12 +00001641
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001642 if (PyType_Ready(&Dialect_Type) < 0)
1643 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001644
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001645 if (PyType_Ready(&Reader_Type) < 0)
1646 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001647
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001648 if (PyType_Ready(&Writer_Type) < 0)
1649 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001650
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001651 /* Create the module and add the functions */
1652 module = PyModule_Create(&_csvmodule);
1653 if (module == NULL)
1654 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001655
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001656 /* Add version to the module. */
1657 if (PyModule_AddStringConstant(module, "__version__",
1658 MODULE_VERSION) == -1)
1659 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001660
Antoine Pitroue7672d32012-05-16 11:33:08 +02001661 /* Set the field limit */
1662 _csvstate(module)->field_limit = 128 * 1024;
1663 /* Do I still need to add this var to the Module Dict? */
1664
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001665 /* Add _dialects dictionary */
Antoine Pitroue7672d32012-05-16 11:33:08 +02001666 _csvstate(module)->dialects = PyDict_New();
1667 if (_csvstate(module)->dialects == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001668 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001669 Py_INCREF(_csvstate(module)->dialects);
1670 if (PyModule_AddObject(module, "_dialects", _csvstate(module)->dialects))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001671 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001672
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001673 /* Add quote styles into dictionary */
1674 for (style = quote_styles; style->name; style++) {
1675 if (PyModule_AddIntConstant(module, style->name,
1676 style->style) == -1)
1677 return NULL;
1678 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001679
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001680 /* Add the Dialect type */
1681 Py_INCREF(&Dialect_Type);
1682 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1683 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001684
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001685 /* Add the CSV exception object to the module. */
Antoine Pitroue7672d32012-05-16 11:33:08 +02001686 _csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1687 if (_csvstate(module)->error_obj == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001688 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001689 Py_INCREF(_csvstate(module)->error_obj);
1690 PyModule_AddObject(module, "Error", _csvstate(module)->error_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001691 return module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001692}