blob: e5324ae91a52143c2dcd6ceb0a49afc23b3cd18e [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
Skip Montanarob4a04172003-03-20 23:29:12 +00009*/
10
Skip Montanaro7b01a832003-04-12 19:23:46 +000011#define MODULE_VERSION "1.0"
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013#include "Python.h"
14#include "structmember.h"
15
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000016
Antoine Pitroue7672d32012-05-16 11:33:08 +020017typedef struct {
18 PyObject *error_obj; /* CSV exception */
19 PyObject *dialects; /* Dialect registry */
20 long field_limit; /* max parsed field size */
21} _csvstate;
22
23#define _csvstate(o) ((_csvstate *)PyModule_GetState(o))
24
25static int
26_csv_clear(PyObject *m)
27{
28 Py_CLEAR(_csvstate(m)->error_obj);
29 Py_CLEAR(_csvstate(m)->dialects);
30 return 0;
31}
32
33static int
34_csv_traverse(PyObject *m, visitproc visit, void *arg)
35{
36 Py_VISIT(_csvstate(m)->error_obj);
37 Py_VISIT(_csvstate(m)->dialects);
38 return 0;
39}
40
41static void
42_csv_free(void *m)
43{
44 _csv_clear((PyObject *)m);
45}
46
47static struct PyModuleDef _csvmodule;
48
49#define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
Skip Montanarob4a04172003-03-20 23:29:12 +000050
51typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000052 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
53 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
R David Murrayc7c42ef2013-03-19 22:41:47 -040054 EAT_CRNL,AFTER_ESCAPED_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000055} ParserState;
56
57typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000058 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000059} QuoteStyle;
60
61typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000062 QuoteStyle style;
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020063 const char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000064} StyleDesc;
65
Serhiy Storchaka2d06e842015-12-25 19:53:18 +020066static const StyleDesc quote_styles[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
68 { QUOTE_ALL, "QUOTE_ALL" },
69 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
70 { QUOTE_NONE, "QUOTE_NONE" },
71 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000072};
73
74typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000075 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 int doublequote; /* is " represented by ""? */
Antoine Pitrou77ea6402011-10-07 04:26:55 +020078 Py_UCS4 delimiter; /* field separator */
79 Py_UCS4 quotechar; /* quote character */
80 Py_UCS4 escapechar; /* escape character */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000081 int skipinitialspace; /* ignore spaces following delimiter? */
82 PyObject *lineterminator; /* string to write between records */
83 int quoting; /* style of quoting to write */
Skip Montanarob4a04172003-03-20 23:29:12 +000084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 int strict; /* raise exception on bad CSV */
Skip Montanarob4a04172003-03-20 23:29:12 +000086} DialectObj;
87
Neal Norwitz227b5332006-03-22 09:28:35 +000088static PyTypeObject Dialect_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000089
90typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +000092
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000093 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +000094
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000095 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +000096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000097 PyObject *fields; /* field list for current record */
98 ParserState state; /* current CSV parse state */
Antoine Pitrou77ea6402011-10-07 04:26:55 +020099 Py_UCS4 *field; /* temporary buffer */
Antoine Pitrou40455752010-08-15 18:51:10 +0000100 Py_ssize_t field_size; /* size of allocated buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000101 Py_ssize_t field_len; /* length of current field */
102 int numeric_field; /* treat field as numeric */
103 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000104} ReaderObj;
105
Neal Norwitz227b5332006-03-22 09:28:35 +0000106static PyTypeObject Reader_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000107
Christian Heimes90aa7642007-12-19 02:45:37 +0000108#define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +0000109
110typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000111 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000112
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000113 PyObject *writeline; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +0000114
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000115 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000116
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200117 Py_UCS4 *rec; /* buffer for parser.join */
Antoine Pitrou40455752010-08-15 18:51:10 +0000118 Py_ssize_t rec_size; /* size of allocated record */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000119 Py_ssize_t rec_len; /* length of record */
120 int num_fields; /* number of fields in record */
Guido van Rossum46264582007-08-06 19:32:18 +0000121} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000122
Neal Norwitz227b5332006-03-22 09:28:35 +0000123static PyTypeObject Writer_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000124
125/*
126 * DIALECT class
127 */
128
129static PyObject *
130get_dialect_from_registry(PyObject * name_obj)
131{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000133
Antoine Pitroue7672d32012-05-16 11:33:08 +0200134 dialect_obj = PyDict_GetItem(_csvstate_global->dialects, name_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000135 if (dialect_obj == NULL) {
136 if (!PyErr_Occurred())
Antoine Pitroue7672d32012-05-16 11:33:08 +0200137 PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000138 }
139 else
140 Py_INCREF(dialect_obj);
141 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000142}
143
Skip Montanarob4a04172003-03-20 23:29:12 +0000144static PyObject *
145get_string(PyObject *str)
146{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000147 Py_XINCREF(str);
148 return str;
Skip Montanarob4a04172003-03-20 23:29:12 +0000149}
150
Skip Montanarob4a04172003-03-20 23:29:12 +0000151static PyObject *
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200152get_nullchar_as_None(Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000153{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 if (c == '\0') {
155 Py_INCREF(Py_None);
156 return Py_None;
157 }
158 else
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200159 return PyUnicode_FromOrdinal(c);
Skip Montanarob4a04172003-03-20 23:29:12 +0000160}
161
Skip Montanarob4a04172003-03-20 23:29:12 +0000162static PyObject *
163Dialect_get_lineterminator(DialectObj *self)
164{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000165 return get_string(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000166}
167
Skip Montanarob4a04172003-03-20 23:29:12 +0000168static PyObject *
Guido van Rossuma9769c22007-08-07 23:59:30 +0000169Dialect_get_delimiter(DialectObj *self)
170{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000171 return get_nullchar_as_None(self->delimiter);
Guido van Rossuma9769c22007-08-07 23:59:30 +0000172}
173
174static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000175Dialect_get_escapechar(DialectObj *self)
176{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000178}
179
Andrew McNamara1196cf12005-01-07 04:42:45 +0000180static PyObject *
181Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000182{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000183 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000184}
185
186static PyObject *
187Dialect_get_quoting(DialectObj *self)
188{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000189 return PyLong_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000190}
191
192static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000193_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000194{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000195 if (src == NULL)
196 *target = dflt;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200197 else {
198 int b = PyObject_IsTrue(src);
199 if (b < 0)
200 return -1;
201 *target = b;
202 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000203 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000204}
205
Andrew McNamara1196cf12005-01-07 04:42:45 +0000206static int
207_set_int(const char *name, int *target, PyObject *src, int dflt)
208{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000209 if (src == NULL)
210 *target = dflt;
211 else {
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200212 int value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000213 if (!PyLong_CheckExact(src)) {
214 PyErr_Format(PyExc_TypeError,
215 "\"%s\" must be an integer", name);
216 return -1;
217 }
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200218 value = _PyLong_AsInt(src);
219 if (value == -1 && PyErr_Occurred()) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000220 return -1;
221 }
Victor Stinner7a6dbb72016-10-19 16:00:37 +0200222 *target = value;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000223 }
224 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000225}
226
227static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200228_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000229{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000230 if (src == NULL)
231 *target = dflt;
232 else {
233 *target = '\0';
234 if (src != Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000235 Py_ssize_t len;
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200236 if (!PyUnicode_Check(src)) {
237 PyErr_Format(PyExc_TypeError,
238 "\"%s\" must be string, not %.200s", name,
239 src->ob_type->tp_name);
240 return -1;
241 }
Victor Stinner9e30aa52011-11-21 02:49:52 +0100242 len = PyUnicode_GetLength(src);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200243 if (len > 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000244 PyErr_Format(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300245 "\"%s\" must be a 1-character string",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000246 name);
247 return -1;
248 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100249 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000250 if (len > 0)
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200251 *target = PyUnicode_READ_CHAR(src, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000252 }
253 }
254 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000255}
256
257static int
258_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
259{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000260 if (src == NULL)
261 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
262 else {
263 if (src == Py_None)
264 *target = NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +0100265 else if (!PyUnicode_Check(src)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000266 PyErr_Format(PyExc_TypeError,
267 "\"%s\" must be a string", name);
268 return -1;
269 }
270 else {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100271 if (PyUnicode_READY(src) == -1)
272 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000273 Py_INCREF(src);
Serhiy Storchaka48842712016-04-06 09:45:48 +0300274 Py_XSETREF(*target, src);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000275 }
276 }
277 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000278}
279
280static int
281dialect_check_quoting(int quoting)
282{
Serhiy Storchaka2d06e842015-12-25 19:53:18 +0200283 const StyleDesc *qs;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000284
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000285 for (qs = quote_styles; qs->name; qs++) {
Victor Stinner706768c2014-08-16 01:03:39 +0200286 if ((int)qs->style == quoting)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000287 return 0;
288 }
289 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
290 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000291}
Skip Montanarob4a04172003-03-20 23:29:12 +0000292
293#define D_OFF(x) offsetof(DialectObj, x)
294
295static struct PyMemberDef Dialect_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000296 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
297 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
298 { "strict", T_INT, D_OFF(strict), READONLY },
299 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000300};
301
302static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000303 { "delimiter", (getter)Dialect_get_delimiter},
304 { "escapechar", (getter)Dialect_get_escapechar},
305 { "lineterminator", (getter)Dialect_get_lineterminator},
306 { "quotechar", (getter)Dialect_get_quotechar},
307 { "quoting", (getter)Dialect_get_quoting},
308 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000309};
310
311static void
312Dialect_dealloc(DialectObj *self)
313{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000314 Py_XDECREF(self->lineterminator);
315 Py_TYPE(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000316}
317
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000318static char *dialect_kws[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000319 "dialect",
320 "delimiter",
321 "doublequote",
322 "escapechar",
323 "lineterminator",
324 "quotechar",
325 "quoting",
326 "skipinitialspace",
327 "strict",
328 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000329};
330
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000331static PyObject *
332dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000333{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000334 DialectObj *self;
335 PyObject *ret = NULL;
336 PyObject *dialect = NULL;
337 PyObject *delimiter = NULL;
338 PyObject *doublequote = NULL;
339 PyObject *escapechar = NULL;
340 PyObject *lineterminator = NULL;
341 PyObject *quotechar = NULL;
342 PyObject *quoting = NULL;
343 PyObject *skipinitialspace = NULL;
344 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000345
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000346 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
347 "|OOOOOOOOO", dialect_kws,
348 &dialect,
349 &delimiter,
350 &doublequote,
351 &escapechar,
352 &lineterminator,
353 &quotechar,
354 &quoting,
355 &skipinitialspace,
356 &strict))
357 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000358
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 if (dialect != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100360 if (PyUnicode_Check(dialect)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000361 dialect = get_dialect_from_registry(dialect);
362 if (dialect == NULL)
363 return NULL;
364 }
365 else
366 Py_INCREF(dialect);
367 /* Can we reuse this instance? */
368 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
369 delimiter == 0 &&
370 doublequote == 0 &&
371 escapechar == 0 &&
372 lineterminator == 0 &&
373 quotechar == 0 &&
374 quoting == 0 &&
375 skipinitialspace == 0 &&
376 strict == 0)
377 return dialect;
378 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000379
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000380 self = (DialectObj *)type->tp_alloc(type, 0);
381 if (self == NULL) {
382 Py_XDECREF(dialect);
383 return NULL;
384 }
385 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000386
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000387 Py_XINCREF(delimiter);
388 Py_XINCREF(doublequote);
389 Py_XINCREF(escapechar);
390 Py_XINCREF(lineterminator);
391 Py_XINCREF(quotechar);
392 Py_XINCREF(quoting);
393 Py_XINCREF(skipinitialspace);
394 Py_XINCREF(strict);
395 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000396#define DIALECT_GETATTR(v, n) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000397 if (v == NULL) \
398 v = PyObject_GetAttrString(dialect, n)
399 DIALECT_GETATTR(delimiter, "delimiter");
400 DIALECT_GETATTR(doublequote, "doublequote");
401 DIALECT_GETATTR(escapechar, "escapechar");
402 DIALECT_GETATTR(lineterminator, "lineterminator");
403 DIALECT_GETATTR(quotechar, "quotechar");
404 DIALECT_GETATTR(quoting, "quoting");
405 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
406 DIALECT_GETATTR(strict, "strict");
407 PyErr_Clear();
408 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000409
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000410 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000411#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000412 if (meth(name, target, src, dflt)) \
413 goto err
414 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
415 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
416 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
417 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
418 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
419 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
420 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
421 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000422
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000423 /* validate options */
424 if (dialect_check_quoting(self->quoting))
425 goto err;
426 if (self->delimiter == 0) {
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200427 PyErr_SetString(PyExc_TypeError,
Berker Peksag0f41acb2014-07-27 23:22:34 +0300428 "\"delimiter\" must be a 1-character string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000429 goto err;
430 }
431 if (quotechar == Py_None && quoting == NULL)
432 self->quoting = QUOTE_NONE;
433 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
434 PyErr_SetString(PyExc_TypeError,
435 "quotechar must be set if quoting enabled");
436 goto err;
437 }
438 if (self->lineterminator == 0) {
439 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
440 goto err;
441 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000442
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000443 ret = (PyObject *)self;
444 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000445err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000446 Py_XDECREF(self);
447 Py_XDECREF(dialect);
448 Py_XDECREF(delimiter);
449 Py_XDECREF(doublequote);
450 Py_XDECREF(escapechar);
451 Py_XDECREF(lineterminator);
452 Py_XDECREF(quotechar);
453 Py_XDECREF(quoting);
454 Py_XDECREF(skipinitialspace);
455 Py_XDECREF(strict);
456 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000457}
458
459
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000460PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000461"CSV dialect\n"
462"\n"
463"The Dialect type records CSV parsing and generation options.\n");
464
465static PyTypeObject Dialect_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000466 PyVarObject_HEAD_INIT(NULL, 0)
467 "_csv.Dialect", /* tp_name */
468 sizeof(DialectObj), /* tp_basicsize */
469 0, /* tp_itemsize */
470 /* methods */
471 (destructor)Dialect_dealloc, /* tp_dealloc */
472 (printfunc)0, /* tp_print */
473 (getattrfunc)0, /* tp_getattr */
474 (setattrfunc)0, /* tp_setattr */
475 0, /* tp_reserved */
476 (reprfunc)0, /* tp_repr */
477 0, /* tp_as_number */
478 0, /* tp_as_sequence */
479 0, /* tp_as_mapping */
480 (hashfunc)0, /* tp_hash */
481 (ternaryfunc)0, /* tp_call */
482 (reprfunc)0, /* tp_str */
483 0, /* tp_getattro */
484 0, /* tp_setattro */
485 0, /* tp_as_buffer */
486 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
487 Dialect_Type_doc, /* tp_doc */
488 0, /* tp_traverse */
489 0, /* tp_clear */
490 0, /* tp_richcompare */
491 0, /* tp_weaklistoffset */
492 0, /* tp_iter */
493 0, /* tp_iternext */
494 0, /* tp_methods */
495 Dialect_memberlist, /* tp_members */
496 Dialect_getsetlist, /* tp_getset */
497 0, /* tp_base */
498 0, /* tp_dict */
499 0, /* tp_descr_get */
500 0, /* tp_descr_set */
501 0, /* tp_dictoffset */
502 0, /* tp_init */
503 0, /* tp_alloc */
504 dialect_new, /* tp_new */
505 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000506};
507
Andrew McNamara91b97462005-01-11 01:07:23 +0000508/*
509 * Return an instance of the dialect type, given a Python instance or kwarg
510 * description of the dialect
511 */
512static PyObject *
513_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
514{
Victor Stinner6412f492016-08-23 00:21:34 +0200515 PyObject *type = (PyObject *)&Dialect_Type;
516 if (dialect_inst) {
517 return _PyObject_FastCallDict(type, &dialect_inst, 1, kwargs);
518 }
519 else {
520 return _PyObject_FastCallDict(type, NULL, 0, kwargs);
521 }
Andrew McNamara91b97462005-01-11 01:07:23 +0000522}
523
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000524/*
525 * READER
526 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000527static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000528parse_save_field(ReaderObj *self)
529{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000530 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000531
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200532 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
533 (void *) self->field, self->field_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000534 if (field == NULL)
535 return -1;
536 self->field_len = 0;
537 if (self->numeric_field) {
538 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000539
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000540 self->numeric_field = 0;
541 tmp = PyNumber_Float(field);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000542 Py_DECREF(field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200543 if (tmp == NULL)
544 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000545 field = tmp;
546 }
Victor Stinnerb80b3782013-11-14 21:29:34 +0100547 if (PyList_Append(self->fields, field) < 0) {
548 Py_DECREF(field);
549 return -1;
550 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000551 Py_DECREF(field);
552 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000553}
554
555static int
556parse_grow_buff(ReaderObj *self)
557{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000558 if (self->field_size == 0) {
559 self->field_size = 4096;
560 if (self->field != NULL)
561 PyMem_Free(self->field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200562 self->field = PyMem_New(Py_UCS4, self->field_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000563 }
564 else {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200565 Py_UCS4 *field = self->field;
Antoine Pitrou40455752010-08-15 18:51:10 +0000566 if (self->field_size > PY_SSIZE_T_MAX / 2) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000567 PyErr_NoMemory();
568 return 0;
569 }
570 self->field_size *= 2;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200571 self->field = PyMem_Resize(field, Py_UCS4, self->field_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000572 }
573 if (self->field == NULL) {
574 PyErr_NoMemory();
575 return 0;
576 }
577 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000578}
579
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000580static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200581parse_add_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000582{
Antoine Pitroue7672d32012-05-16 11:33:08 +0200583 if (self->field_len >= _csvstate_global->field_limit) {
584 PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
585 _csvstate_global->field_limit);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000586 return -1;
587 }
588 if (self->field_len == self->field_size && !parse_grow_buff(self))
589 return -1;
590 self->field[self->field_len++] = c;
591 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000592}
593
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000594static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200595parse_process_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000596{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000597 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000598
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000599 switch (self->state) {
600 case START_RECORD:
601 /* start of record */
602 if (c == '\0')
603 /* empty line - return [] */
604 break;
605 else if (c == '\n' || c == '\r') {
606 self->state = EAT_CRNL;
607 break;
608 }
609 /* normal character - handle as START_FIELD */
610 self->state = START_FIELD;
611 /* fallthru */
612 case START_FIELD:
613 /* expecting field */
614 if (c == '\n' || c == '\r' || c == '\0') {
615 /* save empty field - return [fields] */
616 if (parse_save_field(self) < 0)
617 return -1;
618 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
619 }
620 else if (c == dialect->quotechar &&
621 dialect->quoting != QUOTE_NONE) {
622 /* start quoted field */
623 self->state = IN_QUOTED_FIELD;
624 }
625 else if (c == dialect->escapechar) {
626 /* possible escaped character */
627 self->state = ESCAPED_CHAR;
628 }
629 else if (c == ' ' && dialect->skipinitialspace)
630 /* ignore space at start of field */
631 ;
632 else if (c == dialect->delimiter) {
633 /* save empty field */
634 if (parse_save_field(self) < 0)
635 return -1;
636 }
637 else {
638 /* begin new unquoted field */
639 if (dialect->quoting == QUOTE_NONNUMERIC)
640 self->numeric_field = 1;
641 if (parse_add_char(self, c) < 0)
642 return -1;
643 self->state = IN_FIELD;
644 }
645 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000646
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000647 case ESCAPED_CHAR:
R David Murray9a7d3762013-03-20 00:15:20 -0400648 if (c == '\n' || c=='\r') {
R David Murrayc7c42ef2013-03-19 22:41:47 -0400649 if (parse_add_char(self, c) < 0)
650 return -1;
651 self->state = AFTER_ESCAPED_CRNL;
652 break;
653 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000654 if (c == '\0')
655 c = '\n';
656 if (parse_add_char(self, c) < 0)
657 return -1;
658 self->state = IN_FIELD;
659 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000660
R David Murrayc7c42ef2013-03-19 22:41:47 -0400661 case AFTER_ESCAPED_CRNL:
662 if (c == '\0')
663 break;
664 /*fallthru*/
665
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000666 case IN_FIELD:
667 /* in unquoted field */
668 if (c == '\n' || c == '\r' || c == '\0') {
669 /* end of line - return [fields] */
670 if (parse_save_field(self) < 0)
671 return -1;
672 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
673 }
674 else if (c == dialect->escapechar) {
675 /* possible escaped character */
676 self->state = ESCAPED_CHAR;
677 }
678 else if (c == dialect->delimiter) {
679 /* save field - wait for new field */
680 if (parse_save_field(self) < 0)
681 return -1;
682 self->state = START_FIELD;
683 }
684 else {
685 /* normal character - save in field */
686 if (parse_add_char(self, c) < 0)
687 return -1;
688 }
689 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000690
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000691 case IN_QUOTED_FIELD:
692 /* in quoted field */
693 if (c == '\0')
694 ;
695 else if (c == dialect->escapechar) {
696 /* Possible escape character */
697 self->state = ESCAPE_IN_QUOTED_FIELD;
698 }
699 else if (c == dialect->quotechar &&
700 dialect->quoting != QUOTE_NONE) {
701 if (dialect->doublequote) {
702 /* doublequote; " represented by "" */
703 self->state = QUOTE_IN_QUOTED_FIELD;
704 }
705 else {
706 /* end of quote part of field */
707 self->state = IN_FIELD;
708 }
709 }
710 else {
711 /* normal character - save in field */
712 if (parse_add_char(self, c) < 0)
713 return -1;
714 }
715 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000716
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000717 case ESCAPE_IN_QUOTED_FIELD:
718 if (c == '\0')
719 c = '\n';
720 if (parse_add_char(self, c) < 0)
721 return -1;
722 self->state = IN_QUOTED_FIELD;
723 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000724
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000725 case QUOTE_IN_QUOTED_FIELD:
Serhiy Storchaka6a7b3a72016-04-17 08:32:47 +0300726 /* doublequote - seen a quote in a quoted field */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000727 if (dialect->quoting != QUOTE_NONE &&
728 c == dialect->quotechar) {
729 /* save "" as " */
730 if (parse_add_char(self, c) < 0)
731 return -1;
732 self->state = IN_QUOTED_FIELD;
733 }
734 else if (c == dialect->delimiter) {
735 /* save field - wait for new field */
736 if (parse_save_field(self) < 0)
737 return -1;
738 self->state = START_FIELD;
739 }
740 else if (c == '\n' || c == '\r' || c == '\0') {
741 /* end of line - return [fields] */
742 if (parse_save_field(self) < 0)
743 return -1;
744 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
745 }
746 else if (!dialect->strict) {
747 if (parse_add_char(self, c) < 0)
748 return -1;
749 self->state = IN_FIELD;
750 }
751 else {
752 /* illegal */
Antoine Pitroue7672d32012-05-16 11:33:08 +0200753 PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000754 dialect->delimiter,
755 dialect->quotechar);
756 return -1;
757 }
758 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000759
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000760 case EAT_CRNL:
761 if (c == '\n' || c == '\r')
762 ;
763 else if (c == '\0')
764 self->state = START_RECORD;
765 else {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200766 PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000767 return -1;
768 }
769 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000770
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000771 }
772 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000773}
774
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000775static int
776parse_reset(ReaderObj *self)
777{
Serhiy Storchaka48842712016-04-06 09:45:48 +0300778 Py_XSETREF(self->fields, PyList_New(0));
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000779 if (self->fields == NULL)
780 return -1;
781 self->field_len = 0;
782 self->state = START_RECORD;
783 self->numeric_field = 0;
784 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000785}
Skip Montanarob4a04172003-03-20 23:29:12 +0000786
787static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000788Reader_iternext(ReaderObj *self)
789{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000790 PyObject *fields = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200791 Py_UCS4 c;
792 Py_ssize_t pos, linelen;
793 unsigned int kind;
794 void *data;
795 PyObject *lineobj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000796
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000797 if (parse_reset(self) < 0)
798 return NULL;
799 do {
800 lineobj = PyIter_Next(self->input_iter);
801 if (lineobj == NULL) {
802 /* End of input OR exception */
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700803 if (!PyErr_Occurred() && (self->field_len != 0 ||
804 self->state == IN_QUOTED_FIELD)) {
805 if (self->dialect->strict)
Senthil Kumaran49d13022012-09-25 02:37:20 -0700806 PyErr_SetString(_csvstate_global->error_obj,
807 "unexpected end of data");
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700808 else if (parse_save_field(self) >= 0)
809 break;
810 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000811 return NULL;
812 }
813 if (!PyUnicode_Check(lineobj)) {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200814 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000815 "iterator should return strings, "
816 "not %.200s "
817 "(did you open the file in text mode?)",
818 lineobj->ob_type->tp_name
819 );
820 Py_DECREF(lineobj);
821 return NULL;
822 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100823 if (PyUnicode_READY(lineobj) == -1) {
824 Py_DECREF(lineobj);
825 return NULL;
826 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000827 ++self->line_num;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200828 kind = PyUnicode_KIND(lineobj);
829 data = PyUnicode_DATA(lineobj);
830 pos = 0;
831 linelen = PyUnicode_GET_LENGTH(lineobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000832 while (linelen--) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200833 c = PyUnicode_READ(kind, data, pos);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000834 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000835 Py_DECREF(lineobj);
Antoine Pitroue7672d32012-05-16 11:33:08 +0200836 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000837 "line contains NULL byte");
838 goto err;
839 }
840 if (parse_process_char(self, c) < 0) {
841 Py_DECREF(lineobj);
842 goto err;
843 }
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200844 pos++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000845 }
846 Py_DECREF(lineobj);
847 if (parse_process_char(self, 0) < 0)
848 goto err;
849 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000850
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000851 fields = self->fields;
852 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000853err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000854 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000855}
856
857static void
858Reader_dealloc(ReaderObj *self)
859{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000860 PyObject_GC_UnTrack(self);
861 Py_XDECREF(self->dialect);
862 Py_XDECREF(self->input_iter);
863 Py_XDECREF(self->fields);
864 if (self->field != NULL)
865 PyMem_Free(self->field);
866 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000867}
868
869static int
870Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
871{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000872 Py_VISIT(self->dialect);
873 Py_VISIT(self->input_iter);
874 Py_VISIT(self->fields);
875 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000876}
877
878static int
879Reader_clear(ReaderObj *self)
880{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000881 Py_CLEAR(self->dialect);
882 Py_CLEAR(self->input_iter);
883 Py_CLEAR(self->fields);
884 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000885}
886
887PyDoc_STRVAR(Reader_Type_doc,
888"CSV reader\n"
889"\n"
890"Reader objects are responsible for reading and parsing tabular data\n"
891"in CSV format.\n"
892);
893
894static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000895 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000896};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000897#define R_OFF(x) offsetof(ReaderObj, x)
898
899static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000900 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
901 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
902 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000903};
904
Skip Montanarob4a04172003-03-20 23:29:12 +0000905
906static PyTypeObject Reader_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000907 PyVarObject_HEAD_INIT(NULL, 0)
908 "_csv.reader", /*tp_name*/
909 sizeof(ReaderObj), /*tp_basicsize*/
910 0, /*tp_itemsize*/
911 /* methods */
912 (destructor)Reader_dealloc, /*tp_dealloc*/
913 (printfunc)0, /*tp_print*/
914 (getattrfunc)0, /*tp_getattr*/
915 (setattrfunc)0, /*tp_setattr*/
916 0, /*tp_reserved*/
917 (reprfunc)0, /*tp_repr*/
918 0, /*tp_as_number*/
919 0, /*tp_as_sequence*/
920 0, /*tp_as_mapping*/
921 (hashfunc)0, /*tp_hash*/
922 (ternaryfunc)0, /*tp_call*/
923 (reprfunc)0, /*tp_str*/
924 0, /*tp_getattro*/
925 0, /*tp_setattro*/
926 0, /*tp_as_buffer*/
927 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
928 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
929 Reader_Type_doc, /*tp_doc*/
930 (traverseproc)Reader_traverse, /*tp_traverse*/
931 (inquiry)Reader_clear, /*tp_clear*/
932 0, /*tp_richcompare*/
933 0, /*tp_weaklistoffset*/
934 PyObject_SelfIter, /*tp_iter*/
935 (getiterfunc)Reader_iternext, /*tp_iternext*/
936 Reader_methods, /*tp_methods*/
937 Reader_memberlist, /*tp_members*/
938 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000939
940};
941
942static PyObject *
943csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
944{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000945 PyObject * iterator, * dialect = NULL;
946 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000947
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000948 if (!self)
949 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000950
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000951 self->dialect = NULL;
952 self->fields = NULL;
953 self->input_iter = NULL;
954 self->field = NULL;
955 self->field_size = 0;
956 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000957
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000958 if (parse_reset(self) < 0) {
959 Py_DECREF(self);
960 return NULL;
961 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000962
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000963 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
964 Py_DECREF(self);
965 return NULL;
966 }
967 self->input_iter = PyObject_GetIter(iterator);
968 if (self->input_iter == NULL) {
969 PyErr_SetString(PyExc_TypeError,
970 "argument 1 must be an iterator");
971 Py_DECREF(self);
972 return NULL;
973 }
974 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
975 if (self->dialect == NULL) {
976 Py_DECREF(self);
977 return NULL;
978 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000979
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000980 PyObject_GC_Track(self);
981 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +0000982}
983
984/*
985 * WRITER
986 */
987/* ---------------------------------------------------------------- */
988static void
989join_reset(WriterObj *self)
990{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000991 self->rec_len = 0;
992 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000993}
994
995#define MEM_INCR 32768
996
997/* Calculate new record length or append field to record. Return new
998 * record length.
999 */
Antoine Pitrou40455752010-08-15 18:51:10 +00001000static Py_ssize_t
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001001join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001002 Py_ssize_t field_len, int *quoted,
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001003 int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +00001004{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001005 DialectObj *dialect = self->dialect;
1006 int i;
Antoine Pitrou40455752010-08-15 18:51:10 +00001007 Py_ssize_t rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001008
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001009#define INCLEN \
1010 do {\
1011 if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \
1012 goto overflow; \
1013 } \
1014 rec_len++; \
1015 } while(0)
1016
1017#define ADDCH(c) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001018 do {\
1019 if (copy_phase) \
1020 self->rec[rec_len] = c;\
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001021 INCLEN;\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001022 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001023
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001024 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001025
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001026 /* If this is not the first field we need a field separator */
1027 if (self->num_fields > 0)
1028 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001029
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001030 /* Handle preceding quote */
1031 if (copy_phase && *quoted)
1032 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001033
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001034 /* Copy/count field data */
1035 /* If field is null just pass over */
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001036 for (i = 0; field_data && (i < field_len); i++) {
1037 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001038 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001039
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001040 if (c == dialect->delimiter ||
1041 c == dialect->escapechar ||
1042 c == dialect->quotechar ||
Martin v. Löwis5f4f4c52011-11-01 18:42:23 +01001043 PyUnicode_FindChar(
1044 dialect->lineterminator, c, 0,
1045 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001046 if (dialect->quoting == QUOTE_NONE)
1047 want_escape = 1;
1048 else {
1049 if (c == dialect->quotechar) {
1050 if (dialect->doublequote)
1051 ADDCH(dialect->quotechar);
1052 else
1053 want_escape = 1;
1054 }
1055 if (!want_escape)
1056 *quoted = 1;
1057 }
1058 if (want_escape) {
1059 if (!dialect->escapechar) {
Antoine Pitroue7672d32012-05-16 11:33:08 +02001060 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001061 "need to escape, but no escapechar set");
1062 return -1;
1063 }
1064 ADDCH(dialect->escapechar);
1065 }
1066 }
1067 /* Copy field character into record buffer.
1068 */
1069 ADDCH(c);
1070 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001071
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001072 if (*quoted) {
1073 if (copy_phase)
1074 ADDCH(dialect->quotechar);
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001075 else {
1076 INCLEN; /* starting quote */
1077 INCLEN; /* ending quote */
1078 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001079 }
1080 return rec_len;
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001081
1082 overflow:
1083 PyErr_NoMemory();
1084 return -1;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001085#undef ADDCH
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001086#undef INCLEN
Skip Montanarob4a04172003-03-20 23:29:12 +00001087}
1088
1089static int
Antoine Pitrou40455752010-08-15 18:51:10 +00001090join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
Skip Montanarob4a04172003-03-20 23:29:12 +00001091{
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001092
Antoine Pitrou40455752010-08-15 18:51:10 +00001093 if (rec_len < 0 || rec_len > PY_SSIZE_T_MAX - MEM_INCR) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001094 PyErr_NoMemory();
1095 return 0;
1096 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001097
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001098 if (rec_len > self->rec_size) {
1099 if (self->rec_size == 0) {
1100 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1101 if (self->rec != NULL)
1102 PyMem_Free(self->rec);
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001103 self->rec = PyMem_New(Py_UCS4, self->rec_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001104 }
1105 else {
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001106 Py_UCS4* old_rec = self->rec;
Skip Montanarob4a04172003-03-20 23:29:12 +00001107
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001108 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001109 self->rec = PyMem_Resize(old_rec, Py_UCS4, self->rec_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001110 if (self->rec == NULL)
1111 PyMem_Free(old_rec);
1112 }
1113 if (self->rec == NULL) {
1114 PyErr_NoMemory();
1115 return 0;
1116 }
1117 }
1118 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001119}
1120
1121static int
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001122join_append(WriterObj *self, PyObject *field, int quoted)
Skip Montanarob4a04172003-03-20 23:29:12 +00001123{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001124 unsigned int field_kind = -1;
1125 void *field_data = NULL;
1126 Py_ssize_t field_len = 0;
Antoine Pitrou40455752010-08-15 18:51:10 +00001127 Py_ssize_t rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001128
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001129 if (field != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +01001130 if (PyUnicode_READY(field) == -1)
1131 return 0;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001132 field_kind = PyUnicode_KIND(field);
1133 field_data = PyUnicode_DATA(field);
1134 field_len = PyUnicode_GET_LENGTH(field);
1135 }
1136 rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001137 &quoted, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001138 if (rec_len < 0)
1139 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001140
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001141 /* grow record buffer if necessary */
1142 if (!join_check_rec_size(self, rec_len))
1143 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001144
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001145 self->rec_len = join_append_data(self, field_kind, field_data, field_len,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001146 &quoted, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001147 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001149 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001150}
1151
1152static int
1153join_append_lineterminator(WriterObj *self)
1154{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001155 Py_ssize_t terminator_len, i;
1156 unsigned int term_kind;
1157 void *term_data;
Skip Montanarob4a04172003-03-20 23:29:12 +00001158
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001159 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001160 if (terminator_len == -1)
1161 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001162
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001163 /* grow record buffer if necessary */
1164 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1165 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001166
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001167 term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1168 term_data = PyUnicode_DATA(self->dialect->lineterminator);
1169 for (i = 0; i < terminator_len; i++)
1170 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001171 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001172
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001173 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001174}
1175
1176PyDoc_STRVAR(csv_writerow_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001177"writerow(iterable)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001178"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001179"Construct and write a CSV record from an iterable of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001180"elements will be converted to string.");
1181
1182static PyObject *
1183csv_writerow(WriterObj *self, PyObject *seq)
1184{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001185 DialectObj *dialect = self->dialect;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001186 PyObject *iter, *field, *line, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001187
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001188 iter = PyObject_GetIter(seq);
1189 if (iter == NULL)
1190 return PyErr_Format(_csvstate_global->error_obj,
1191 "iterable expected, not %.200s",
1192 seq->ob_type->tp_name);
Skip Montanarob4a04172003-03-20 23:29:12 +00001193
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001194 /* Join all fields in internal buffer.
1195 */
1196 join_reset(self);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001197 while ((field = PyIter_Next(iter))) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001198 int append_ok;
1199 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001200
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001201 switch (dialect->quoting) {
1202 case QUOTE_NONNUMERIC:
1203 quoted = !PyNumber_Check(field);
1204 break;
1205 case QUOTE_ALL:
1206 quoted = 1;
1207 break;
1208 default:
1209 quoted = 0;
1210 break;
1211 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001212
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001213 if (PyUnicode_Check(field)) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001214 append_ok = join_append(self, field, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001215 Py_DECREF(field);
1216 }
1217 else if (field == Py_None) {
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001218 append_ok = join_append(self, NULL, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001219 Py_DECREF(field);
1220 }
1221 else {
1222 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001223
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001224 str = PyObject_Str(field);
1225 Py_DECREF(field);
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001226 if (str == NULL) {
1227 Py_DECREF(iter);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001228 return NULL;
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001229 }
1230 append_ok = join_append(self, str, quoted);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001231 Py_DECREF(str);
1232 }
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001233 if (!append_ok) {
1234 Py_DECREF(iter);
1235 return NULL;
1236 }
1237 }
1238 Py_DECREF(iter);
1239 if (PyErr_Occurred())
1240 return NULL;
1241
1242 if (self->num_fields > 0 && self->rec_size == 0) {
1243 if (dialect->quoting == QUOTE_NONE) {
1244 PyErr_Format(_csvstate_global->error_obj,
1245 "single empty field record must be quoted");
1246 return NULL;
1247 }
1248 self->num_fields--;
1249 if (!join_append(self, NULL, 1))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001250 return NULL;
1251 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001252
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001253 /* Add line terminator.
1254 */
1255 if (!join_append_lineterminator(self))
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001256 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001257
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001258 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1259 (void *) self->rec, self->rec_len);
1260 if (line == NULL)
1261 return NULL;
Victor Stinnerde4ae3d2016-12-04 22:59:09 +01001262 result = PyObject_CallFunctionObjArgs(self->writeline, line, NULL);
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001263 Py_DECREF(line);
1264 return result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001265}
1266
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001267PyDoc_STRVAR(csv_writerows_doc,
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001268"writerows(iterable of iterables)\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001269"\n"
Serhiy Storchaka7901b482015-03-30 09:09:54 +03001270"Construct and write a series of iterables to a csv file. Non-string\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001271"elements will be converted to string.");
1272
Skip Montanarob4a04172003-03-20 23:29:12 +00001273static PyObject *
1274csv_writerows(WriterObj *self, PyObject *seqseq)
1275{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001276 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001277
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001278 row_iter = PyObject_GetIter(seqseq);
1279 if (row_iter == NULL) {
1280 PyErr_SetString(PyExc_TypeError,
1281 "writerows() argument must be iterable");
1282 return NULL;
1283 }
1284 while ((row_obj = PyIter_Next(row_iter))) {
1285 result = csv_writerow(self, row_obj);
1286 Py_DECREF(row_obj);
1287 if (!result) {
1288 Py_DECREF(row_iter);
1289 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001290 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001291 else
1292 Py_DECREF(result);
1293 }
1294 Py_DECREF(row_iter);
1295 if (PyErr_Occurred())
1296 return NULL;
1297 Py_INCREF(Py_None);
1298 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001299}
1300
1301static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001302 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1303 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1304 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001305};
1306
1307#define W_OFF(x) offsetof(WriterObj, x)
1308
1309static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001310 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1311 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001312};
1313
1314static void
1315Writer_dealloc(WriterObj *self)
1316{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001317 PyObject_GC_UnTrack(self);
1318 Py_XDECREF(self->dialect);
1319 Py_XDECREF(self->writeline);
1320 if (self->rec != NULL)
1321 PyMem_Free(self->rec);
1322 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001323}
1324
1325static int
1326Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1327{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001328 Py_VISIT(self->dialect);
1329 Py_VISIT(self->writeline);
1330 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001331}
1332
1333static int
1334Writer_clear(WriterObj *self)
1335{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001336 Py_CLEAR(self->dialect);
1337 Py_CLEAR(self->writeline);
1338 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001339}
1340
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001341PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001342"CSV writer\n"
1343"\n"
1344"Writer objects are responsible for generating tabular data\n"
1345"in CSV format from sequence input.\n"
1346);
1347
1348static PyTypeObject Writer_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001349 PyVarObject_HEAD_INIT(NULL, 0)
1350 "_csv.writer", /*tp_name*/
1351 sizeof(WriterObj), /*tp_basicsize*/
1352 0, /*tp_itemsize*/
1353 /* methods */
1354 (destructor)Writer_dealloc, /*tp_dealloc*/
1355 (printfunc)0, /*tp_print*/
1356 (getattrfunc)0, /*tp_getattr*/
1357 (setattrfunc)0, /*tp_setattr*/
1358 0, /*tp_reserved*/
1359 (reprfunc)0, /*tp_repr*/
1360 0, /*tp_as_number*/
1361 0, /*tp_as_sequence*/
1362 0, /*tp_as_mapping*/
1363 (hashfunc)0, /*tp_hash*/
1364 (ternaryfunc)0, /*tp_call*/
1365 (reprfunc)0, /*tp_str*/
1366 0, /*tp_getattro*/
1367 0, /*tp_setattro*/
1368 0, /*tp_as_buffer*/
1369 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1370 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1371 Writer_Type_doc,
1372 (traverseproc)Writer_traverse, /*tp_traverse*/
1373 (inquiry)Writer_clear, /*tp_clear*/
1374 0, /*tp_richcompare*/
1375 0, /*tp_weaklistoffset*/
1376 (getiterfunc)0, /*tp_iter*/
1377 (getiterfunc)0, /*tp_iternext*/
1378 Writer_methods, /*tp_methods*/
1379 Writer_memberlist, /*tp_members*/
1380 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001381};
1382
1383static PyObject *
1384csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1385{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001386 PyObject * output_file, * dialect = NULL;
1387 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001388 _Py_IDENTIFIER(write);
Skip Montanarob4a04172003-03-20 23:29:12 +00001389
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001390 if (!self)
1391 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001392
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001393 self->dialect = NULL;
1394 self->writeline = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001395
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001396 self->rec = NULL;
1397 self->rec_size = 0;
1398 self->rec_len = 0;
1399 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001400
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001401 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1402 Py_DECREF(self);
1403 return NULL;
1404 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001405 self->writeline = _PyObject_GetAttrId(output_file, &PyId_write);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001406 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1407 PyErr_SetString(PyExc_TypeError,
1408 "argument 1 must have a \"write\" method");
1409 Py_DECREF(self);
1410 return NULL;
1411 }
1412 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1413 if (self->dialect == NULL) {
1414 Py_DECREF(self);
1415 return NULL;
1416 }
1417 PyObject_GC_Track(self);
1418 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001419}
1420
1421/*
1422 * DIALECT REGISTRY
1423 */
1424static PyObject *
1425csv_list_dialects(PyObject *module, PyObject *args)
1426{
Antoine Pitroue7672d32012-05-16 11:33:08 +02001427 return PyDict_Keys(_csvstate_global->dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001428}
1429
1430static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001431csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001432{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001433 PyObject *name_obj, *dialect_obj = NULL;
1434 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001435
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001436 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1437 return NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +01001438 if (!PyUnicode_Check(name_obj)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001439 PyErr_SetString(PyExc_TypeError,
Stefan Krahe6996ed2012-11-02 14:44:20 +01001440 "dialect name must be a string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001441 return NULL;
1442 }
Stefan Krahe6996ed2012-11-02 14:44:20 +01001443 if (PyUnicode_READY(name_obj) == -1)
1444 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001445 dialect = _call_dialect(dialect_obj, kwargs);
1446 if (dialect == NULL)
1447 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001448 if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001449 Py_DECREF(dialect);
1450 return NULL;
1451 }
1452 Py_DECREF(dialect);
1453 Py_INCREF(Py_None);
1454 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001455}
1456
1457static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001458csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001459{
Antoine Pitroue7672d32012-05-16 11:33:08 +02001460 if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0)
1461 return PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001462 Py_INCREF(Py_None);
1463 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001464}
1465
1466static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001467csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001468{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001469 return get_dialect_from_registry(name_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001470}
1471
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001472static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001473csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001474{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001475 PyObject *new_limit = NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001476 long old_limit = _csvstate_global->field_limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001477
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001478 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1479 return NULL;
1480 if (new_limit != NULL) {
1481 if (!PyLong_CheckExact(new_limit)) {
1482 PyErr_Format(PyExc_TypeError,
1483 "limit must be an integer");
1484 return NULL;
1485 }
Antoine Pitroue7672d32012-05-16 11:33:08 +02001486 _csvstate_global->field_limit = PyLong_AsLong(new_limit);
1487 if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
1488 _csvstate_global->field_limit = old_limit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001489 return NULL;
1490 }
1491 }
1492 return PyLong_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001493}
1494
Skip Montanarob4a04172003-03-20 23:29:12 +00001495/*
1496 * MODULE
1497 */
1498
1499PyDoc_STRVAR(csv_module_doc,
1500"CSV parsing and writing.\n"
1501"\n"
1502"This module provides classes that assist in the reading and writing\n"
1503"of Comma Separated Value (CSV) files, and implements the interface\n"
1504"described by PEP 305. Although many CSV files are simple to parse,\n"
1505"the format is not formally defined by a stable specification and\n"
1506"is subtle enough that parsing lines of a CSV file with something\n"
1507"like line.split(\",\") is bound to fail. The module supports three\n"
1508"basic APIs: reading, writing, and registration of dialects.\n"
1509"\n"
1510"\n"
1511"DIALECT REGISTRATION:\n"
1512"\n"
1513"Readers and writers support a dialect argument, which is a convenient\n"
1514"handle on a group of settings. When the dialect argument is a string,\n"
1515"it identifies one of the dialects previously registered with the module.\n"
1516"If it is a class or instance, the attributes of the argument are used as\n"
1517"the settings for the reader or writer:\n"
1518"\n"
1519" class excel:\n"
1520" delimiter = ','\n"
1521" quotechar = '\"'\n"
1522" escapechar = None\n"
1523" doublequote = True\n"
1524" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001525" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001526" quoting = QUOTE_MINIMAL\n"
1527"\n"
1528"SETTINGS:\n"
1529"\n"
1530" * quotechar - specifies a one-character string to use as the \n"
1531" quoting character. It defaults to '\"'.\n"
1532" * delimiter - specifies a one-character string to use as the \n"
1533" field separator. It defaults to ','.\n"
1534" * skipinitialspace - specifies how to interpret whitespace which\n"
1535" immediately follows a delimiter. It defaults to False, which\n"
1536" means that whitespace immediately following a delimiter is part\n"
1537" of the following field.\n"
1538" * lineterminator - specifies the character sequence which should \n"
1539" terminate rows.\n"
1540" * quoting - controls when quotes should be generated by the writer.\n"
1541" It can take on any of the following module constants:\n"
1542"\n"
1543" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1544" field contains either the quotechar or the delimiter\n"
1545" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1546" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001547" fields which do not parse as integers or floating point\n"
1548" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001549" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1550" * escapechar - specifies a one-character string used to escape \n"
1551" the delimiter when quoting is set to QUOTE_NONE.\n"
1552" * doublequote - controls the handling of quotes inside fields. When\n"
1553" True, two consecutive quotes are interpreted as one during read,\n"
1554" and when writing, each quote character embedded in the data is\n"
1555" written as two quotes\n");
1556
1557PyDoc_STRVAR(csv_reader_doc,
1558" csv_reader = reader(iterable [, dialect='excel']\n"
1559" [optional keyword args])\n"
1560" for row in csv_reader:\n"
1561" process(row)\n"
1562"\n"
1563"The \"iterable\" argument can be any object that returns a line\n"
1564"of input for each iteration, such as a file object or a list. The\n"
1565"optional \"dialect\" parameter is discussed below. The function\n"
1566"also accepts optional keyword arguments which override settings\n"
1567"provided by the dialect.\n"
1568"\n"
1569"The returned object is an iterator. Each iteration returns a row\n"
Berker Peksage2382c52015-10-02 19:25:32 +03001570"of the CSV file (which can span multiple input lines).\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001571
1572PyDoc_STRVAR(csv_writer_doc,
1573" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1574" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001575" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001576" csv_writer.writerow(row)\n"
1577"\n"
1578" [or]\n"
1579"\n"
1580" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1581" [optional keyword args])\n"
1582" csv_writer.writerows(rows)\n"
1583"\n"
1584"The \"fileobj\" argument can be any object that supports the file API.\n");
1585
1586PyDoc_STRVAR(csv_list_dialects_doc,
1587"Return a list of all know dialect names.\n"
1588" names = csv.list_dialects()");
1589
1590PyDoc_STRVAR(csv_get_dialect_doc,
1591"Return the dialect instance associated with name.\n"
1592" dialect = csv.get_dialect(name)");
1593
1594PyDoc_STRVAR(csv_register_dialect_doc,
1595"Create a mapping from a string name to a dialect class.\n"
Berker Peksag12b50ce2015-06-05 15:17:51 +03001596" dialect = csv.register_dialect(name[, dialect[, **fmtparams]])");
Skip Montanarob4a04172003-03-20 23:29:12 +00001597
1598PyDoc_STRVAR(csv_unregister_dialect_doc,
1599"Delete the name/dialect mapping associated with a string name.\n"
1600" csv.unregister_dialect(name)");
1601
Andrew McNamara31d88962005-01-12 03:45:10 +00001602PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001603"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001604" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001605"\n"
1606"Returns old limit. If limit is not given, no new limit is set and\n"
1607"the old limit is returned");
1608
Skip Montanarob4a04172003-03-20 23:29:12 +00001609static struct PyMethodDef csv_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001610 { "reader", (PyCFunction)csv_reader,
1611 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1612 { "writer", (PyCFunction)csv_writer,
1613 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1614 { "list_dialects", (PyCFunction)csv_list_dialects,
1615 METH_NOARGS, csv_list_dialects_doc},
1616 { "register_dialect", (PyCFunction)csv_register_dialect,
1617 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1618 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1619 METH_O, csv_unregister_dialect_doc},
1620 { "get_dialect", (PyCFunction)csv_get_dialect,
1621 METH_O, csv_get_dialect_doc},
1622 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1623 METH_VARARGS, csv_field_size_limit_doc},
1624 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001625};
1626
Martin v. Löwis1a214512008-06-11 05:26:20 +00001627static struct PyModuleDef _csvmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001628 PyModuleDef_HEAD_INIT,
1629 "_csv",
1630 csv_module_doc,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001631 sizeof(_csvstate),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001632 csv_methods,
1633 NULL,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001634 _csv_traverse,
1635 _csv_clear,
1636 _csv_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00001637};
1638
Skip Montanarob4a04172003-03-20 23:29:12 +00001639PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001640PyInit__csv(void)
Skip Montanarob4a04172003-03-20 23:29:12 +00001641{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001642 PyObject *module;
Serhiy Storchaka2d06e842015-12-25 19:53:18 +02001643 const StyleDesc *style;
Skip Montanarob4a04172003-03-20 23:29:12 +00001644
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001645 if (PyType_Ready(&Dialect_Type) < 0)
1646 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001647
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001648 if (PyType_Ready(&Reader_Type) < 0)
1649 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001650
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001651 if (PyType_Ready(&Writer_Type) < 0)
1652 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001653
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001654 /* Create the module and add the functions */
1655 module = PyModule_Create(&_csvmodule);
1656 if (module == NULL)
1657 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001658
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001659 /* Add version to the module. */
1660 if (PyModule_AddStringConstant(module, "__version__",
1661 MODULE_VERSION) == -1)
1662 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001663
Antoine Pitroue7672d32012-05-16 11:33:08 +02001664 /* Set the field limit */
1665 _csvstate(module)->field_limit = 128 * 1024;
1666 /* Do I still need to add this var to the Module Dict? */
1667
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001668 /* Add _dialects dictionary */
Antoine Pitroue7672d32012-05-16 11:33:08 +02001669 _csvstate(module)->dialects = PyDict_New();
1670 if (_csvstate(module)->dialects == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001671 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001672 Py_INCREF(_csvstate(module)->dialects);
1673 if (PyModule_AddObject(module, "_dialects", _csvstate(module)->dialects))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001674 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001675
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001676 /* Add quote styles into dictionary */
1677 for (style = quote_styles; style->name; style++) {
1678 if (PyModule_AddIntConstant(module, style->name,
1679 style->style) == -1)
1680 return NULL;
1681 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001682
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001683 /* Add the Dialect type */
1684 Py_INCREF(&Dialect_Type);
1685 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1686 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001687
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001688 /* Add the CSV exception object to the module. */
Antoine Pitroue7672d32012-05-16 11:33:08 +02001689 _csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1690 if (_csvstate(module)->error_obj == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001691 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001692 Py_INCREF(_csvstate(module)->error_obj);
1693 PyModule_AddObject(module, "Error", _csvstate(module)->error_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001694 return module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001695}