blob: 88e3e9065842e80c410e3b0f06884dd4332e637c [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
9**** For people modifying this code, please note that as of this writing
Skip Montanarodfa35fa2003-04-11 21:40:01 +000010**** (2003-03-23), it is intended that this code should work with Python
Skip Montanaroa16b21f2003-03-23 14:32:54 +000011**** 2.2.
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013*/
14
Skip Montanaro7b01a832003-04-12 19:23:46 +000015#define MODULE_VERSION "1.0"
16
Skip Montanarob4a04172003-03-20 23:29:12 +000017#include "Python.h"
18#include "structmember.h"
19
Skip Montanaroa16b21f2003-03-23 14:32:54 +000020
Skip Montanarob4a04172003-03-20 23:29:12 +000021/* begin 2.2 compatibility macros */
22#ifndef PyDoc_STRVAR
23/* Define macros for inline documentation. */
24#define PyDoc_VAR(name) static char name[]
25#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
26#ifdef WITH_DOC_STRINGS
27#define PyDoc_STR(str) str
28#else
29#define PyDoc_STR(str) ""
30#endif
31#endif /* ifndef PyDoc_STRVAR */
32
33#ifndef PyMODINIT_FUNC
Antoine Pitrouc83ea132010-05-09 14:46:46 +000034# if defined(__cplusplus)
35# define PyMODINIT_FUNC extern "C" void
36# else /* __cplusplus */
37# define PyMODINIT_FUNC void
38# endif /* __cplusplus */
Skip Montanarob4a04172003-03-20 23:29:12 +000039#endif
Thomas Wouters2742c5e2006-04-15 17:33:14 +000040
41#ifndef Py_CLEAR
Antoine Pitrouc83ea132010-05-09 14:46:46 +000042#define Py_CLEAR(op) \
43 do { \
44 if (op) { \
45 PyObject *tmp = (PyObject *)(op); \
46 (op) = NULL; \
47 Py_DECREF(tmp); \
48 } \
49 } while (0)
Thomas Wouters2742c5e2006-04-15 17:33:14 +000050#endif
Thomas Woutersc6e55062006-04-15 21:47:09 +000051#ifndef Py_VISIT
Antoine Pitrouc83ea132010-05-09 14:46:46 +000052#define Py_VISIT(op) \
53 do { \
54 if (op) { \
55 int vret = visit((PyObject *)(op), arg); \
56 if (vret) \
57 return vret; \
58 } \
59 } while (0)
Thomas Woutersc6e55062006-04-15 21:47:09 +000060#endif
Thomas Wouters2742c5e2006-04-15 17:33:14 +000061
Skip Montanarob4a04172003-03-20 23:29:12 +000062/* end 2.2 compatibility macros */
63
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000064#define IS_BASESTRING(o) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +000065 PyObject_TypeCheck(o, &PyBaseString_Type)
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000066
Antoine Pitrouc83ea132010-05-09 14:46:46 +000067static PyObject *error_obj; /* CSV exception */
Skip Montanarob4a04172003-03-20 23:29:12 +000068static PyObject *dialects; /* Dialect registry */
Antoine Pitrouc83ea132010-05-09 14:46:46 +000069static long field_limit = 128 * 1024; /* max parsed field size */
Skip Montanarob4a04172003-03-20 23:29:12 +000070
71typedef enum {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000072 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
73 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
74 EAT_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000075} ParserState;
76
77typedef enum {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000078 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000079} QuoteStyle;
80
81typedef struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000082 QuoteStyle style;
83 char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000084} StyleDesc;
85
86static StyleDesc quote_styles[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000087 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
88 { QUOTE_ALL, "QUOTE_ALL" },
89 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
90 { QUOTE_NONE, "QUOTE_NONE" },
91 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000092};
93
94typedef struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000095 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +000096
Antoine Pitrouc83ea132010-05-09 14:46:46 +000097 int doublequote; /* is " represented by ""? */
98 char delimiter; /* field separator */
99 char quotechar; /* quote character */
100 char escapechar; /* escape character */
101 int skipinitialspace; /* ignore spaces following delimiter? */
102 PyObject *lineterminator; /* string to write between records */
103 int quoting; /* style of quoting to write */
104
105 int strict; /* raise exception on bad CSV */
Skip Montanarob4a04172003-03-20 23:29:12 +0000106} DialectObj;
107
108staticforward PyTypeObject Dialect_Type;
109
110typedef struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000111 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000112
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000113 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +0000114
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000115 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000116
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000117 PyObject *fields; /* field list for current record */
118 ParserState state; /* current CSV parse state */
119 char *field; /* build current field in here */
120 int field_size; /* size of allocated buffer */
121 int field_len; /* length of current field */
122 int numeric_field; /* treat field as numeric */
123 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000124} ReaderObj;
125
126staticforward PyTypeObject Reader_Type;
127
Christian Heimese93237d2007-12-19 02:37:44 +0000128#define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +0000129
130typedef struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000131 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000132
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000133 PyObject *writeline; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +0000134
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000135 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000136
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000137 char *rec; /* buffer for parser.join */
138 int rec_size; /* size of allocated record */
139 int rec_len; /* length of record */
140 int num_fields; /* number of fields in record */
141} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000142
143staticforward PyTypeObject Writer_Type;
144
145/*
146 * DIALECT class
147 */
148
149static PyObject *
150get_dialect_from_registry(PyObject * name_obj)
151{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000152 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000153
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000154 dialect_obj = PyDict_GetItem(dialects, name_obj);
155 if (dialect_obj == NULL) {
156 if (!PyErr_Occurred())
157 PyErr_Format(error_obj, "unknown dialect");
158 }
159 else
160 Py_INCREF(dialect_obj);
161 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000162}
163
Skip Montanarob4a04172003-03-20 23:29:12 +0000164static PyObject *
165get_string(PyObject *str)
166{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000167 Py_XINCREF(str);
168 return str;
Skip Montanarob4a04172003-03-20 23:29:12 +0000169}
170
Skip Montanarob4a04172003-03-20 23:29:12 +0000171static PyObject *
172get_nullchar_as_None(char c)
173{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000174 if (c == '\0') {
175 Py_INCREF(Py_None);
176 return Py_None;
177 }
178 else
179 return PyString_FromStringAndSize((char*)&c, 1);
Skip Montanarob4a04172003-03-20 23:29:12 +0000180}
181
Skip Montanarob4a04172003-03-20 23:29:12 +0000182static PyObject *
183Dialect_get_lineterminator(DialectObj *self)
184{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000185 return get_string(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000186}
187
Skip Montanarob4a04172003-03-20 23:29:12 +0000188static PyObject *
189Dialect_get_escapechar(DialectObj *self)
190{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000191 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000192}
193
Andrew McNamara1196cf12005-01-07 04:42:45 +0000194static PyObject *
195Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000196{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000197 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000198}
199
200static PyObject *
201Dialect_get_quoting(DialectObj *self)
202{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000203 return PyInt_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000204}
205
206static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000207_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000208{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000209 if (src == NULL)
210 *target = dflt;
Antoine Pitrouc5bef752012-08-15 23:16:51 +0200211 else {
212 int b = PyObject_IsTrue(src);
213 if (b < 0)
214 return -1;
215 *target = b;
216 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000217 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000218}
219
Andrew McNamara1196cf12005-01-07 04:42:45 +0000220static int
221_set_int(const char *name, int *target, PyObject *src, int dflt)
222{
Serhiy Storchaka994f04d2016-12-27 15:09:36 +0200223 int value;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000224 if (src == NULL)
225 *target = dflt;
226 else {
Serhiy Storchaka48c8bf22018-07-31 09:09:36 +0300227 if (!_PyAnyInt_Check(src)) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000228 PyErr_Format(PyExc_TypeError,
229 "\"%s\" must be an integer", name);
230 return -1;
231 }
Serhiy Storchaka994f04d2016-12-27 15:09:36 +0200232 value = PyInt_AsLong(src);
233 if (value == -1 && PyErr_Occurred())
234 return -1;
235 *target = value;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000236 }
237 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000238}
239
240static int
241_set_char(const char *name, char *target, PyObject *src, char dflt)
242{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000243 if (src == NULL)
244 *target = dflt;
245 else {
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200246 *target = '\0';
247 if (src != Py_None) {
248 Py_ssize_t len;
249 if (!PyString_Check(src)) {
250 PyErr_Format(PyExc_TypeError,
251 "\"%s\" must be string, not %.200s", name,
252 src->ob_type->tp_name);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000253 return -1;
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200254 }
255 len = PyString_GET_SIZE(src);
256 if (len > 1) {
257 PyErr_Format(PyExc_TypeError,
258 "\"%s\" must be an 1-character string",
259 name);
260 return -1;
261 }
262 if (len > 0)
263 *target = *PyString_AS_STRING(src);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000264 }
265 }
266 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000267}
268
269static int
270_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
271{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000272 if (src == NULL)
273 *target = PyString_FromString(dflt);
274 else {
275 if (src == Py_None)
276 *target = NULL;
277 else if (!IS_BASESTRING(src)) {
278 PyErr_Format(PyExc_TypeError,
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200279 "\"%s\" must be a string", name);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000280 return -1;
281 }
282 else {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000283 Py_INCREF(src);
Serhiy Storchakabc62af12016-04-06 09:51:18 +0300284 Py_XSETREF(*target, src);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000285 }
286 }
287 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000288}
289
290static int
291dialect_check_quoting(int quoting)
292{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000293 StyleDesc *qs = quote_styles;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000294
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000295 for (qs = quote_styles; qs->name; qs++) {
296 if (qs->style == quoting)
297 return 0;
298 }
299 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
300 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000301}
Skip Montanarob4a04172003-03-20 23:29:12 +0000302
303#define D_OFF(x) offsetof(DialectObj, x)
304
305static struct PyMemberDef Dialect_memberlist[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000306 { "delimiter", T_CHAR, D_OFF(delimiter), READONLY },
307 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
308 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
309 { "strict", T_INT, D_OFF(strict), READONLY },
310 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000311};
312
313static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000314 { "escapechar", (getter)Dialect_get_escapechar},
315 { "lineterminator", (getter)Dialect_get_lineterminator},
316 { "quotechar", (getter)Dialect_get_quotechar},
317 { "quoting", (getter)Dialect_get_quoting},
318 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000319};
320
321static void
322Dialect_dealloc(DialectObj *self)
323{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000324 Py_XDECREF(self->lineterminator);
325 Py_TYPE(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000326}
327
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000328static char *dialect_kws[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000329 "dialect",
330 "delimiter",
331 "doublequote",
332 "escapechar",
333 "lineterminator",
334 "quotechar",
335 "quoting",
336 "skipinitialspace",
337 "strict",
338 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000339};
340
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000341static PyObject *
342dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000343{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000344 DialectObj *self;
345 PyObject *ret = NULL;
346 PyObject *dialect = NULL;
347 PyObject *delimiter = NULL;
348 PyObject *doublequote = NULL;
349 PyObject *escapechar = NULL;
350 PyObject *lineterminator = NULL;
351 PyObject *quotechar = NULL;
352 PyObject *quoting = NULL;
353 PyObject *skipinitialspace = NULL;
354 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000355
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000356 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
357 "|OOOOOOOOO", dialect_kws,
358 &dialect,
359 &delimiter,
360 &doublequote,
361 &escapechar,
362 &lineterminator,
363 &quotechar,
364 &quoting,
365 &skipinitialspace,
366 &strict))
367 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000368
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000369 if (dialect != NULL) {
370 if (IS_BASESTRING(dialect)) {
371 dialect = get_dialect_from_registry(dialect);
372 if (dialect == NULL)
373 return NULL;
374 }
375 else
376 Py_INCREF(dialect);
377 /* Can we reuse this instance? */
378 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
379 delimiter == 0 &&
380 doublequote == 0 &&
381 escapechar == 0 &&
382 lineterminator == 0 &&
383 quotechar == 0 &&
384 quoting == 0 &&
385 skipinitialspace == 0 &&
386 strict == 0)
387 return dialect;
388 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000389
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000390 self = (DialectObj *)type->tp_alloc(type, 0);
391 if (self == NULL) {
392 Py_XDECREF(dialect);
393 return NULL;
394 }
395 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000396
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000397 Py_XINCREF(delimiter);
398 Py_XINCREF(doublequote);
399 Py_XINCREF(escapechar);
400 Py_XINCREF(lineterminator);
401 Py_XINCREF(quotechar);
402 Py_XINCREF(quoting);
403 Py_XINCREF(skipinitialspace);
404 Py_XINCREF(strict);
405 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000406#define DIALECT_GETATTR(v, n) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000407 if (v == NULL) \
408 v = PyObject_GetAttrString(dialect, n)
409 DIALECT_GETATTR(delimiter, "delimiter");
410 DIALECT_GETATTR(doublequote, "doublequote");
411 DIALECT_GETATTR(escapechar, "escapechar");
412 DIALECT_GETATTR(lineterminator, "lineterminator");
413 DIALECT_GETATTR(quotechar, "quotechar");
414 DIALECT_GETATTR(quoting, "quoting");
415 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
416 DIALECT_GETATTR(strict, "strict");
417 PyErr_Clear();
418 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000419
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000420 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000421#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000422 if (meth(name, target, src, dflt)) \
423 goto err
424 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
425 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
426 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
427 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
428 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
429 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
430 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
431 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000432
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000433 /* validate options */
434 if (dialect_check_quoting(self->quoting))
435 goto err;
436 if (self->delimiter == 0) {
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200437 PyErr_SetString(PyExc_TypeError,
438 "\"delimiter\" must be an 1-character string");
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000439 goto err;
440 }
441 if (quotechar == Py_None && quoting == NULL)
442 self->quoting = QUOTE_NONE;
443 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
444 PyErr_SetString(PyExc_TypeError,
445 "quotechar must be set if quoting enabled");
446 goto err;
447 }
448 if (self->lineterminator == 0) {
449 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
450 goto err;
451 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000452
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000453 ret = (PyObject *)self;
454 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000455err:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000456 Py_XDECREF(self);
457 Py_XDECREF(dialect);
458 Py_XDECREF(delimiter);
459 Py_XDECREF(doublequote);
460 Py_XDECREF(escapechar);
461 Py_XDECREF(lineterminator);
462 Py_XDECREF(quotechar);
463 Py_XDECREF(quoting);
464 Py_XDECREF(skipinitialspace);
465 Py_XDECREF(strict);
466 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000467}
468
469
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000470PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000471"CSV dialect\n"
472"\n"
473"The Dialect type records CSV parsing and generation options.\n");
474
475static PyTypeObject Dialect_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000476 PyVarObject_HEAD_INIT(NULL, 0)
477 "_csv.Dialect", /* tp_name */
478 sizeof(DialectObj), /* tp_basicsize */
479 0, /* tp_itemsize */
480 /* methods */
481 (destructor)Dialect_dealloc, /* tp_dealloc */
482 (printfunc)0, /* tp_print */
483 (getattrfunc)0, /* tp_getattr */
484 (setattrfunc)0, /* tp_setattr */
485 (cmpfunc)0, /* tp_compare */
486 (reprfunc)0, /* tp_repr */
487 0, /* tp_as_number */
488 0, /* tp_as_sequence */
489 0, /* tp_as_mapping */
490 (hashfunc)0, /* tp_hash */
491 (ternaryfunc)0, /* tp_call */
492 (reprfunc)0, /* tp_str */
493 0, /* tp_getattro */
494 0, /* tp_setattro */
495 0, /* tp_as_buffer */
496 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
497 Dialect_Type_doc, /* tp_doc */
498 0, /* tp_traverse */
499 0, /* tp_clear */
500 0, /* tp_richcompare */
501 0, /* tp_weaklistoffset */
502 0, /* tp_iter */
503 0, /* tp_iternext */
504 0, /* tp_methods */
505 Dialect_memberlist, /* tp_members */
506 Dialect_getsetlist, /* tp_getset */
507 0, /* tp_base */
508 0, /* tp_dict */
509 0, /* tp_descr_get */
510 0, /* tp_descr_set */
511 0, /* tp_dictoffset */
512 0, /* tp_init */
513 0, /* tp_alloc */
514 dialect_new, /* tp_new */
515 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000516};
517
Andrew McNamara91b97462005-01-11 01:07:23 +0000518/*
519 * Return an instance of the dialect type, given a Python instance or kwarg
520 * description of the dialect
521 */
522static PyObject *
523_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
524{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000525 PyObject *ctor_args;
526 PyObject *dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000527
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000528 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
529 if (ctor_args == NULL)
530 return NULL;
531 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
532 Py_DECREF(ctor_args);
533 return dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000534}
535
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000536/*
537 * READER
538 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000539static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000540parse_save_field(ReaderObj *self)
541{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000542 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000543
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000544 field = PyString_FromStringAndSize(self->field, self->field_len);
545 if (field == NULL)
546 return -1;
547 self->field_len = 0;
548 if (self->numeric_field) {
549 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000550
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000551 self->numeric_field = 0;
552 tmp = PyNumber_Float(field);
553 if (tmp == NULL) {
554 Py_DECREF(field);
555 return -1;
556 }
557 Py_DECREF(field);
558 field = tmp;
559 }
560 PyList_Append(self->fields, field);
561 Py_DECREF(field);
562 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000563}
564
565static int
566parse_grow_buff(ReaderObj *self)
567{
Sergey Fedoseevae1f0122018-08-17 10:36:15 +0500568 unsigned field_size_new;
569 char *field_new;
570
571 assert((unsigned)self->field_size <= INT_MAX);
572
573 field_size_new = self->field_size ? 2 * (unsigned)self->field_size : 4096;
574 if (field_size_new > INT_MAX) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000575 PyErr_NoMemory();
576 return 0;
577 }
Sergey Fedoseevae1f0122018-08-17 10:36:15 +0500578 field_new = (char *)PyMem_Realloc(self->field, field_size_new);
579 if (field_new == NULL) {
580 PyErr_NoMemory();
581 return 0;
582 }
583 self->field = field_new;
584 self->field_size = (int)field_size_new;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000585 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000586}
587
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000588static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000589parse_add_char(ReaderObj *self, char c)
590{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000591 if (self->field_len >= field_limit) {
592 PyErr_Format(error_obj, "field larger than field limit (%ld)",
593 field_limit);
594 return -1;
595 }
596 if (self->field_len == self->field_size && !parse_grow_buff(self))
597 return -1;
598 self->field[self->field_len++] = c;
599 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000600}
601
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000602static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000603parse_process_char(ReaderObj *self, char c)
604{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000605 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000606
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000607 switch (self->state) {
608 case START_RECORD:
609 /* start of record */
610 if (c == '\0')
611 /* empty line - return [] */
612 break;
613 else if (c == '\n' || c == '\r') {
614 self->state = EAT_CRNL;
615 break;
616 }
617 /* normal character - handle as START_FIELD */
618 self->state = START_FIELD;
619 /* fallthru */
620 case START_FIELD:
621 /* expecting field */
622 if (c == '\n' || c == '\r' || c == '\0') {
623 /* save empty field - return [fields] */
624 if (parse_save_field(self) < 0)
625 return -1;
626 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
627 }
628 else if (c == dialect->quotechar &&
629 dialect->quoting != QUOTE_NONE) {
630 /* start quoted field */
631 self->state = IN_QUOTED_FIELD;
632 }
633 else if (c == dialect->escapechar) {
634 /* possible escaped character */
635 self->state = ESCAPED_CHAR;
636 }
637 else if (c == ' ' && dialect->skipinitialspace)
638 /* ignore space at start of field */
639 ;
640 else if (c == dialect->delimiter) {
641 /* save empty field */
642 if (parse_save_field(self) < 0)
643 return -1;
644 }
645 else {
646 /* begin new unquoted field */
647 if (dialect->quoting == QUOTE_NONNUMERIC)
648 self->numeric_field = 1;
649 if (parse_add_char(self, c) < 0)
650 return -1;
651 self->state = IN_FIELD;
652 }
653 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000654
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000655 case ESCAPED_CHAR:
656 if (c == '\0')
657 c = '\n';
658 if (parse_add_char(self, c) < 0)
659 return -1;
660 self->state = IN_FIELD;
661 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000662
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000663 case IN_FIELD:
664 /* in unquoted field */
665 if (c == '\n' || c == '\r' || c == '\0') {
666 /* end of line - return [fields] */
667 if (parse_save_field(self) < 0)
668 return -1;
669 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
670 }
671 else if (c == dialect->escapechar) {
672 /* possible escaped character */
673 self->state = ESCAPED_CHAR;
674 }
675 else if (c == dialect->delimiter) {
676 /* save field - wait for new field */
677 if (parse_save_field(self) < 0)
678 return -1;
679 self->state = START_FIELD;
680 }
681 else {
682 /* normal character - save in field */
683 if (parse_add_char(self, c) < 0)
684 return -1;
685 }
686 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000687
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000688 case IN_QUOTED_FIELD:
689 /* in quoted field */
690 if (c == '\0')
691 ;
692 else if (c == dialect->escapechar) {
693 /* Possible escape character */
694 self->state = ESCAPE_IN_QUOTED_FIELD;
695 }
696 else if (c == dialect->quotechar &&
697 dialect->quoting != QUOTE_NONE) {
698 if (dialect->doublequote) {
699 /* doublequote; " represented by "" */
700 self->state = QUOTE_IN_QUOTED_FIELD;
701 }
702 else {
703 /* end of quote part of field */
704 self->state = IN_FIELD;
705 }
706 }
707 else {
708 /* normal character - save in field */
709 if (parse_add_char(self, c) < 0)
710 return -1;
711 }
712 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000713
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000714 case ESCAPE_IN_QUOTED_FIELD:
715 if (c == '\0')
716 c = '\n';
717 if (parse_add_char(self, c) < 0)
718 return -1;
719 self->state = IN_QUOTED_FIELD;
720 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000721
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000722 case QUOTE_IN_QUOTED_FIELD:
Serhiy Storchaka9a118f12016-04-17 09:37:36 +0300723 /* doublequote - seen a quote in a quoted field */
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000724 if (dialect->quoting != QUOTE_NONE &&
725 c == dialect->quotechar) {
726 /* save "" as " */
727 if (parse_add_char(self, c) < 0)
728 return -1;
729 self->state = IN_QUOTED_FIELD;
730 }
731 else if (c == dialect->delimiter) {
732 /* save field - wait for new field */
733 if (parse_save_field(self) < 0)
734 return -1;
735 self->state = START_FIELD;
736 }
737 else if (c == '\n' || c == '\r' || c == '\0') {
738 /* end of line - return [fields] */
739 if (parse_save_field(self) < 0)
740 return -1;
741 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
742 }
743 else if (!dialect->strict) {
744 if (parse_add_char(self, c) < 0)
745 return -1;
746 self->state = IN_FIELD;
747 }
748 else {
749 /* illegal */
750 PyErr_Format(error_obj, "'%c' expected after '%c'",
751 dialect->delimiter,
752 dialect->quotechar);
753 return -1;
754 }
755 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000756
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000757 case EAT_CRNL:
758 if (c == '\n' || c == '\r')
759 ;
760 else if (c == '\0')
761 self->state = START_RECORD;
762 else {
763 PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
764 return -1;
765 }
766 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000767
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000768 }
769 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000770}
771
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000772static int
773parse_reset(ReaderObj *self)
774{
Serhiy Storchakabc62af12016-04-06 09:51:18 +0300775 Py_XSETREF(self->fields, PyList_New(0));
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000776 if (self->fields == NULL)
777 return -1;
778 self->field_len = 0;
779 self->state = START_RECORD;
780 self->numeric_field = 0;
781 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000782}
Skip Montanarob4a04172003-03-20 23:29:12 +0000783
784static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000785Reader_iternext(ReaderObj *self)
786{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000787 PyObject *lineobj;
788 PyObject *fields = NULL;
789 char *line, c;
790 int linelen;
Skip Montanarob4a04172003-03-20 23:29:12 +0000791
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000792 if (parse_reset(self) < 0)
793 return NULL;
794 do {
795 lineobj = PyIter_Next(self->input_iter);
796 if (lineobj == NULL) {
797 /* End of input OR exception */
Senthil Kumarand41dc7c2012-09-25 02:48:21 -0700798 if (!PyErr_Occurred() && (self->field_len != 0 ||
799 self->state == IN_QUOTED_FIELD)) {
800 if (self->dialect->strict)
801 PyErr_SetString(error_obj, "unexpected end of data");
802 else if (parse_save_field(self) >= 0 )
803 break;
804 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000805 return NULL;
806 }
807 ++self->line_num;
Skip Montanarob4a04172003-03-20 23:29:12 +0000808
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000809 line = PyString_AsString(lineobj);
810 linelen = PyString_Size(lineobj);
Skip Montanarob4a04172003-03-20 23:29:12 +0000811
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000812 if (line == NULL || linelen < 0) {
813 Py_DECREF(lineobj);
814 return NULL;
815 }
816 while (linelen--) {
817 c = *line++;
818 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000819 Py_DECREF(lineobj);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000820 PyErr_Format(error_obj,
821 "line contains NULL byte");
822 goto err;
823 }
824 if (parse_process_char(self, c) < 0) {
825 Py_DECREF(lineobj);
826 goto err;
827 }
828 }
829 Py_DECREF(lineobj);
830 if (parse_process_char(self, 0) < 0)
831 goto err;
832 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000833
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000834 fields = self->fields;
835 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000836err:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000837 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000838}
839
840static void
841Reader_dealloc(ReaderObj *self)
842{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000843 PyObject_GC_UnTrack(self);
844 Py_XDECREF(self->dialect);
845 Py_XDECREF(self->input_iter);
846 Py_XDECREF(self->fields);
847 if (self->field != NULL)
848 PyMem_Free(self->field);
849 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000850}
851
852static int
853Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
854{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000855 Py_VISIT(self->dialect);
856 Py_VISIT(self->input_iter);
857 Py_VISIT(self->fields);
858 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000859}
860
861static int
862Reader_clear(ReaderObj *self)
863{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000864 Py_CLEAR(self->dialect);
865 Py_CLEAR(self->input_iter);
866 Py_CLEAR(self->fields);
867 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000868}
869
870PyDoc_STRVAR(Reader_Type_doc,
871"CSV reader\n"
872"\n"
873"Reader objects are responsible for reading and parsing tabular data\n"
874"in CSV format.\n"
875);
876
877static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000878 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000879};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000880#define R_OFF(x) offsetof(ReaderObj, x)
881
882static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000883 { "dialect", T_OBJECT, R_OFF(dialect), RO },
884 { "line_num", T_ULONG, R_OFF(line_num), RO },
885 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000886};
887
Skip Montanarob4a04172003-03-20 23:29:12 +0000888
889static PyTypeObject Reader_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000890 PyVarObject_HEAD_INIT(NULL, 0)
891 "_csv.reader", /*tp_name*/
892 sizeof(ReaderObj), /*tp_basicsize*/
893 0, /*tp_itemsize*/
894 /* methods */
895 (destructor)Reader_dealloc, /*tp_dealloc*/
896 (printfunc)0, /*tp_print*/
897 (getattrfunc)0, /*tp_getattr*/
898 (setattrfunc)0, /*tp_setattr*/
899 (cmpfunc)0, /*tp_compare*/
900 (reprfunc)0, /*tp_repr*/
901 0, /*tp_as_number*/
902 0, /*tp_as_sequence*/
903 0, /*tp_as_mapping*/
904 (hashfunc)0, /*tp_hash*/
905 (ternaryfunc)0, /*tp_call*/
906 (reprfunc)0, /*tp_str*/
907 0, /*tp_getattro*/
908 0, /*tp_setattro*/
909 0, /*tp_as_buffer*/
910 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
911 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
912 Reader_Type_doc, /*tp_doc*/
913 (traverseproc)Reader_traverse, /*tp_traverse*/
914 (inquiry)Reader_clear, /*tp_clear*/
915 0, /*tp_richcompare*/
916 0, /*tp_weaklistoffset*/
917 PyObject_SelfIter, /*tp_iter*/
918 (getiterfunc)Reader_iternext, /*tp_iternext*/
919 Reader_methods, /*tp_methods*/
920 Reader_memberlist, /*tp_members*/
921 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000922
923};
924
925static PyObject *
926csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
927{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000928 PyObject * iterator, * dialect = NULL;
929 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000930
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000931 if (!self)
932 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000933
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000934 self->dialect = NULL;
935 self->fields = NULL;
936 self->input_iter = NULL;
937 self->field = NULL;
938 self->field_size = 0;
939 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000940
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000941 if (parse_reset(self) < 0) {
942 Py_DECREF(self);
943 return NULL;
944 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000945
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000946 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
947 Py_DECREF(self);
948 return NULL;
949 }
950 self->input_iter = PyObject_GetIter(iterator);
951 if (self->input_iter == NULL) {
952 PyErr_SetString(PyExc_TypeError,
953 "argument 1 must be an iterator");
954 Py_DECREF(self);
955 return NULL;
956 }
957 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
958 if (self->dialect == NULL) {
959 Py_DECREF(self);
960 return NULL;
961 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000962
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000963 PyObject_GC_Track(self);
964 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +0000965}
966
967/*
968 * WRITER
969 */
970/* ---------------------------------------------------------------- */
971static void
972join_reset(WriterObj *self)
973{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000974 self->rec_len = 0;
975 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000976}
977
978#define MEM_INCR 32768
979
980/* Calculate new record length or append field to record. Return new
981 * record length.
982 */
983static int
984join_append_data(WriterObj *self, char *field, int quote_empty,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000985 int *quoted, int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +0000986{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000987 DialectObj *dialect = self->dialect;
988 int i, rec_len;
989 char *lineterm;
Andrew McNamarac89f2842005-01-12 07:44:42 +0000990
Benjamin Petersond81ad0d2016-08-13 17:17:06 -0700991#define INCLEN \
992 do {\
993 if (!copy_phase && rec_len == INT_MAX) { \
994 goto overflow; \
995 } \
996 rec_len++; \
997 } while(0)
998
999#define ADDCH(c) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001000 do {\
1001 if (copy_phase) \
1002 self->rec[rec_len] = c;\
Benjamin Petersond81ad0d2016-08-13 17:17:06 -07001003 INCLEN;\
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001004 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001005
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001006 lineterm = PyString_AsString(dialect->lineterminator);
1007 if (lineterm == NULL)
1008 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001009
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001010 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001011
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001012 /* If this is not the first field we need a field separator */
1013 if (self->num_fields > 0)
1014 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001015
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001016 /* Handle preceding quote */
1017 if (copy_phase && *quoted)
1018 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001019
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001020 /* Copy/count field data */
1021 for (i = 0;; i++) {
1022 char c = field[i];
1023 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001024
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001025 if (c == '\0')
1026 break;
Skip Montanarob4a04172003-03-20 23:29:12 +00001027
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001028 if (c == dialect->delimiter ||
1029 c == dialect->escapechar ||
1030 c == dialect->quotechar ||
1031 strchr(lineterm, c)) {
1032 if (dialect->quoting == QUOTE_NONE)
1033 want_escape = 1;
1034 else {
1035 if (c == dialect->quotechar) {
1036 if (dialect->doublequote)
1037 ADDCH(dialect->quotechar);
1038 else
1039 want_escape = 1;
1040 }
1041 if (!want_escape)
1042 *quoted = 1;
1043 }
1044 if (want_escape) {
1045 if (!dialect->escapechar) {
1046 PyErr_Format(error_obj,
1047 "need to escape, but no escapechar set");
1048 return -1;
1049 }
1050 ADDCH(dialect->escapechar);
1051 }
1052 }
1053 /* Copy field character into record buffer.
1054 */
1055 ADDCH(c);
1056 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001057
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001058 /* If field is empty check if it needs to be quoted.
1059 */
1060 if (i == 0 && quote_empty) {
1061 if (dialect->quoting == QUOTE_NONE) {
1062 PyErr_Format(error_obj,
1063 "single empty field record must be quoted");
1064 return -1;
1065 }
1066 else
1067 *quoted = 1;
1068 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001069
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001070 if (*quoted) {
1071 if (copy_phase)
1072 ADDCH(dialect->quotechar);
Benjamin Petersond81ad0d2016-08-13 17:17:06 -07001073 else {
1074 INCLEN; /* starting quote */
1075 INCLEN; /* ending quote */
1076 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001077 }
1078 return rec_len;
Benjamin Petersond81ad0d2016-08-13 17:17:06 -07001079
1080 overflow:
1081 PyErr_NoMemory();
1082 return -1;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001083#undef ADDCH
Benjamin Petersond81ad0d2016-08-13 17:17:06 -07001084#undef INCLEN
Skip Montanarob4a04172003-03-20 23:29:12 +00001085}
1086
1087static int
1088join_check_rec_size(WriterObj *self, int rec_len)
1089{
Sergey Fedoseevae1f0122018-08-17 10:36:15 +05001090 unsigned rec_size_new;
1091 char *rec_new;
Gregory P. Smith9d534572008-06-11 07:41:16 +00001092
Sergey Fedoseevae1f0122018-08-17 10:36:15 +05001093 assert(rec_len >= 0);
Gregory P. Smith9d534572008-06-11 07:41:16 +00001094
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001095 if (rec_len > self->rec_size) {
Sergey Fedoseevae1f0122018-08-17 10:36:15 +05001096 rec_size_new = (unsigned)(rec_len / MEM_INCR + 1) * MEM_INCR;
1097 if (rec_size_new > INT_MAX) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001098 PyErr_NoMemory();
1099 return 0;
1100 }
Sergey Fedoseevae1f0122018-08-17 10:36:15 +05001101 rec_new = (char *)PyMem_Realloc(self->rec, rec_size_new);
1102 if (rec_new == NULL) {
1103 PyErr_NoMemory();
1104 return 0;
1105 }
1106 self->rec = rec_new;
1107 self->rec_size = (int)rec_size_new;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001108 }
1109 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001110}
1111
1112static int
1113join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
1114{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001115 int rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001116
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001117 rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1118 if (rec_len < 0)
1119 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001120
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001121 /* grow record buffer if necessary */
1122 if (!join_check_rec_size(self, rec_len))
1123 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001124
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001125 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1126 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001127
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001128 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001129}
1130
1131static int
1132join_append_lineterminator(WriterObj *self)
1133{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001134 int terminator_len;
1135 char *terminator;
Skip Montanarob4a04172003-03-20 23:29:12 +00001136
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001137 terminator_len = PyString_Size(self->dialect->lineterminator);
1138 if (terminator_len == -1)
1139 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001140
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001141 /* grow record buffer if necessary */
1142 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1143 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001144
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001145 terminator = PyString_AsString(self->dialect->lineterminator);
1146 if (terminator == NULL)
1147 return 0;
1148 memmove(self->rec + self->rec_len, terminator, terminator_len);
1149 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001150
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001151 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001152}
1153
1154PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001155"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001156"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001157"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001158"elements will be converted to string.");
1159
1160static PyObject *
1161csv_writerow(WriterObj *self, PyObject *seq)
1162{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001163 DialectObj *dialect = self->dialect;
1164 int len, i;
Skip Montanarob4a04172003-03-20 23:29:12 +00001165
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001166 if (!PySequence_Check(seq))
1167 return PyErr_Format(error_obj, "sequence expected");
Skip Montanarob4a04172003-03-20 23:29:12 +00001168
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001169 len = PySequence_Length(seq);
1170 if (len < 0)
1171 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001172
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001173 /* Join all fields in internal buffer.
1174 */
1175 join_reset(self);
1176 for (i = 0; i < len; i++) {
1177 PyObject *field;
1178 int append_ok;
1179 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001180
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001181 field = PySequence_GetItem(seq, i);
1182 if (field == NULL)
1183 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001184
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001185 switch (dialect->quoting) {
1186 case QUOTE_NONNUMERIC:
1187 quoted = !PyNumber_Check(field);
1188 break;
1189 case QUOTE_ALL:
1190 quoted = 1;
1191 break;
1192 default:
1193 quoted = 0;
1194 break;
1195 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001196
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001197 if (PyString_Check(field)) {
1198 append_ok = join_append(self,
1199 PyString_AS_STRING(field),
1200 &quoted, len == 1);
1201 Py_DECREF(field);
1202 }
1203 else if (field == Py_None) {
1204 append_ok = join_append(self, "", &quoted, len == 1);
1205 Py_DECREF(field);
1206 }
1207 else {
1208 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001209
Raymond Hettingerf5377022011-12-11 22:31:09 -08001210 if (PyFloat_Check(field)) {
1211 str = PyObject_Repr(field);
1212 } else {
1213 str = PyObject_Str(field);
1214 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001215 Py_DECREF(field);
1216 if (str == NULL)
1217 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001218
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001219 append_ok = join_append(self, PyString_AS_STRING(str),
1220 &quoted, len == 1);
1221 Py_DECREF(str);
1222 }
1223 if (!append_ok)
1224 return NULL;
1225 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001226
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001227 /* Add line terminator.
1228 */
1229 if (!join_append_lineterminator(self))
1230 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001231
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001232 return PyObject_CallFunction(self->writeline,
1233 "(s#)", self->rec, self->rec_len);
Skip Montanarob4a04172003-03-20 23:29:12 +00001234}
1235
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001236PyDoc_STRVAR(csv_writerows_doc,
1237"writerows(sequence of sequences)\n"
1238"\n"
1239"Construct and write a series of sequences to a csv file. Non-string\n"
1240"elements will be converted to string.");
1241
Skip Montanarob4a04172003-03-20 23:29:12 +00001242static PyObject *
1243csv_writerows(WriterObj *self, PyObject *seqseq)
1244{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001245 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001246
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001247 row_iter = PyObject_GetIter(seqseq);
1248 if (row_iter == NULL) {
1249 PyErr_SetString(PyExc_TypeError,
1250 "writerows() argument must be iterable");
1251 return NULL;
1252 }
1253 while ((row_obj = PyIter_Next(row_iter))) {
1254 result = csv_writerow(self, row_obj);
1255 Py_DECREF(row_obj);
1256 if (!result) {
1257 Py_DECREF(row_iter);
1258 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001259 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001260 else
1261 Py_DECREF(result);
1262 }
1263 Py_DECREF(row_iter);
1264 if (PyErr_Occurred())
1265 return NULL;
1266 Py_INCREF(Py_None);
1267 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001268}
1269
1270static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001271 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1272 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1273 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001274};
1275
1276#define W_OFF(x) offsetof(WriterObj, x)
1277
1278static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001279 { "dialect", T_OBJECT, W_OFF(dialect), RO },
1280 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001281};
1282
1283static void
1284Writer_dealloc(WriterObj *self)
1285{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001286 PyObject_GC_UnTrack(self);
1287 Py_XDECREF(self->dialect);
1288 Py_XDECREF(self->writeline);
1289 if (self->rec != NULL)
1290 PyMem_Free(self->rec);
1291 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001292}
1293
1294static int
1295Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1296{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001297 Py_VISIT(self->dialect);
1298 Py_VISIT(self->writeline);
1299 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001300}
1301
1302static int
1303Writer_clear(WriterObj *self)
1304{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001305 Py_CLEAR(self->dialect);
1306 Py_CLEAR(self->writeline);
1307 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001308}
1309
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001310PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001311"CSV writer\n"
1312"\n"
1313"Writer objects are responsible for generating tabular data\n"
1314"in CSV format from sequence input.\n"
1315);
1316
1317static PyTypeObject Writer_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001318 PyVarObject_HEAD_INIT(NULL, 0)
1319 "_csv.writer", /*tp_name*/
1320 sizeof(WriterObj), /*tp_basicsize*/
1321 0, /*tp_itemsize*/
1322 /* methods */
1323 (destructor)Writer_dealloc, /*tp_dealloc*/
1324 (printfunc)0, /*tp_print*/
1325 (getattrfunc)0, /*tp_getattr*/
1326 (setattrfunc)0, /*tp_setattr*/
1327 (cmpfunc)0, /*tp_compare*/
1328 (reprfunc)0, /*tp_repr*/
1329 0, /*tp_as_number*/
1330 0, /*tp_as_sequence*/
1331 0, /*tp_as_mapping*/
1332 (hashfunc)0, /*tp_hash*/
1333 (ternaryfunc)0, /*tp_call*/
1334 (reprfunc)0, /*tp_str*/
1335 0, /*tp_getattro*/
1336 0, /*tp_setattro*/
1337 0, /*tp_as_buffer*/
1338 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1339 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1340 Writer_Type_doc,
1341 (traverseproc)Writer_traverse, /*tp_traverse*/
1342 (inquiry)Writer_clear, /*tp_clear*/
1343 0, /*tp_richcompare*/
1344 0, /*tp_weaklistoffset*/
1345 (getiterfunc)0, /*tp_iter*/
1346 (getiterfunc)0, /*tp_iternext*/
1347 Writer_methods, /*tp_methods*/
1348 Writer_memberlist, /*tp_members*/
1349 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001350};
1351
1352static PyObject *
1353csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1354{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001355 PyObject * output_file, * dialect = NULL;
1356 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001357
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001358 if (!self)
1359 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001360
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001361 self->dialect = NULL;
1362 self->writeline = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001363
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001364 self->rec = NULL;
1365 self->rec_size = 0;
1366 self->rec_len = 0;
1367 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001368
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001369 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1370 Py_DECREF(self);
1371 return NULL;
1372 }
1373 self->writeline = PyObject_GetAttrString(output_file, "write");
1374 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1375 PyErr_SetString(PyExc_TypeError,
1376 "argument 1 must have a \"write\" method");
1377 Py_DECREF(self);
1378 return NULL;
1379 }
1380 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1381 if (self->dialect == NULL) {
1382 Py_DECREF(self);
1383 return NULL;
1384 }
1385 PyObject_GC_Track(self);
1386 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001387}
1388
1389/*
1390 * DIALECT REGISTRY
1391 */
1392static PyObject *
1393csv_list_dialects(PyObject *module, PyObject *args)
1394{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001395 return PyDict_Keys(dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001396}
1397
1398static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001399csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001400{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001401 PyObject *name_obj, *dialect_obj = NULL;
1402 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001403
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001404 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1405 return NULL;
1406 if (!IS_BASESTRING(name_obj)) {
1407 PyErr_SetString(PyExc_TypeError,
1408 "dialect name must be a string or unicode");
1409 return NULL;
1410 }
1411 dialect = _call_dialect(dialect_obj, kwargs);
1412 if (dialect == NULL)
1413 return NULL;
1414 if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1415 Py_DECREF(dialect);
1416 return NULL;
1417 }
1418 Py_DECREF(dialect);
1419 Py_INCREF(Py_None);
1420 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001421}
1422
1423static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001424csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001425{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001426 if (PyDict_DelItem(dialects, name_obj) < 0)
1427 return PyErr_Format(error_obj, "unknown dialect");
1428 Py_INCREF(Py_None);
1429 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001430}
1431
1432static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001433csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001434{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001435 return get_dialect_from_registry(name_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001436}
1437
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001438static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001439csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001440{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001441 PyObject *new_limit = NULL;
Serhiy Storchaka994f04d2016-12-27 15:09:36 +02001442 long old_limit = field_limit, limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001443
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001444 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1445 return NULL;
1446 if (new_limit != NULL) {
Serhiy Storchaka48c8bf22018-07-31 09:09:36 +03001447 if (!_PyAnyInt_Check(new_limit)) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001448 PyErr_Format(PyExc_TypeError,
1449 "limit must be an integer");
1450 return NULL;
1451 }
Serhiy Storchaka994f04d2016-12-27 15:09:36 +02001452 limit = PyInt_AsLong(new_limit);
1453 if (limit == -1 && PyErr_Occurred())
1454 return NULL;
1455 field_limit = limit;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001456 }
1457 return PyInt_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001458}
1459
Skip Montanarob4a04172003-03-20 23:29:12 +00001460/*
1461 * MODULE
1462 */
1463
1464PyDoc_STRVAR(csv_module_doc,
1465"CSV parsing and writing.\n"
1466"\n"
1467"This module provides classes that assist in the reading and writing\n"
1468"of Comma Separated Value (CSV) files, and implements the interface\n"
1469"described by PEP 305. Although many CSV files are simple to parse,\n"
1470"the format is not formally defined by a stable specification and\n"
1471"is subtle enough that parsing lines of a CSV file with something\n"
1472"like line.split(\",\") is bound to fail. The module supports three\n"
1473"basic APIs: reading, writing, and registration of dialects.\n"
1474"\n"
1475"\n"
1476"DIALECT REGISTRATION:\n"
1477"\n"
1478"Readers and writers support a dialect argument, which is a convenient\n"
1479"handle on a group of settings. When the dialect argument is a string,\n"
1480"it identifies one of the dialects previously registered with the module.\n"
1481"If it is a class or instance, the attributes of the argument are used as\n"
1482"the settings for the reader or writer:\n"
1483"\n"
1484" class excel:\n"
1485" delimiter = ','\n"
1486" quotechar = '\"'\n"
1487" escapechar = None\n"
1488" doublequote = True\n"
1489" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001490" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001491" quoting = QUOTE_MINIMAL\n"
1492"\n"
1493"SETTINGS:\n"
1494"\n"
1495" * quotechar - specifies a one-character string to use as the \n"
1496" quoting character. It defaults to '\"'.\n"
1497" * delimiter - specifies a one-character string to use as the \n"
1498" field separator. It defaults to ','.\n"
1499" * skipinitialspace - specifies how to interpret whitespace which\n"
1500" immediately follows a delimiter. It defaults to False, which\n"
1501" means that whitespace immediately following a delimiter is part\n"
1502" of the following field.\n"
1503" * lineterminator - specifies the character sequence which should \n"
1504" terminate rows.\n"
1505" * quoting - controls when quotes should be generated by the writer.\n"
1506" It can take on any of the following module constants:\n"
1507"\n"
1508" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1509" field contains either the quotechar or the delimiter\n"
1510" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1511" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001512" fields which do not parse as integers or floating point\n"
1513" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001514" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1515" * escapechar - specifies a one-character string used to escape \n"
1516" the delimiter when quoting is set to QUOTE_NONE.\n"
1517" * doublequote - controls the handling of quotes inside fields. When\n"
1518" True, two consecutive quotes are interpreted as one during read,\n"
1519" and when writing, each quote character embedded in the data is\n"
1520" written as two quotes\n");
1521
1522PyDoc_STRVAR(csv_reader_doc,
1523" csv_reader = reader(iterable [, dialect='excel']\n"
1524" [optional keyword args])\n"
1525" for row in csv_reader:\n"
1526" process(row)\n"
1527"\n"
1528"The \"iterable\" argument can be any object that returns a line\n"
1529"of input for each iteration, such as a file object or a list. The\n"
1530"optional \"dialect\" parameter is discussed below. The function\n"
1531"also accepts optional keyword arguments which override settings\n"
1532"provided by the dialect.\n"
1533"\n"
1534"The returned object is an iterator. Each iteration returns a row\n"
Berker Peksag4e294772015-10-02 19:30:21 +03001535"of the CSV file (which can span multiple input lines).\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001536
1537PyDoc_STRVAR(csv_writer_doc,
1538" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1539" [optional keyword args])\n"
Fredrik Lundh4aaaa492006-04-04 16:51:13 +00001540" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001541" csv_writer.writerow(row)\n"
1542"\n"
1543" [or]\n"
1544"\n"
1545" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1546" [optional keyword args])\n"
1547" csv_writer.writerows(rows)\n"
1548"\n"
1549"The \"fileobj\" argument can be any object that supports the file API.\n");
1550
1551PyDoc_STRVAR(csv_list_dialects_doc,
1552"Return a list of all know dialect names.\n"
1553" names = csv.list_dialects()");
1554
1555PyDoc_STRVAR(csv_get_dialect_doc,
1556"Return the dialect instance associated with name.\n"
1557" dialect = csv.get_dialect(name)");
1558
1559PyDoc_STRVAR(csv_register_dialect_doc,
1560"Create a mapping from a string name to a dialect class.\n"
1561" dialect = csv.register_dialect(name, dialect)");
1562
1563PyDoc_STRVAR(csv_unregister_dialect_doc,
1564"Delete the name/dialect mapping associated with a string name.\n"
1565" csv.unregister_dialect(name)");
1566
Andrew McNamara31d88962005-01-12 03:45:10 +00001567PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001568"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001569" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001570"\n"
1571"Returns old limit. If limit is not given, no new limit is set and\n"
1572"the old limit is returned");
1573
Skip Montanarob4a04172003-03-20 23:29:12 +00001574static struct PyMethodDef csv_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001575 { "reader", (PyCFunction)csv_reader,
1576 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1577 { "writer", (PyCFunction)csv_writer,
1578 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1579 { "list_dialects", (PyCFunction)csv_list_dialects,
1580 METH_NOARGS, csv_list_dialects_doc},
1581 { "register_dialect", (PyCFunction)csv_register_dialect,
1582 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1583 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1584 METH_O, csv_unregister_dialect_doc},
1585 { "get_dialect", (PyCFunction)csv_get_dialect,
1586 METH_O, csv_get_dialect_doc},
1587 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1588 METH_VARARGS, csv_field_size_limit_doc},
1589 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001590};
1591
1592PyMODINIT_FUNC
1593init_csv(void)
1594{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001595 PyObject *module;
1596 StyleDesc *style;
Skip Montanarob4a04172003-03-20 23:29:12 +00001597
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001598 if (PyType_Ready(&Dialect_Type) < 0)
1599 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001600
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001601 if (PyType_Ready(&Reader_Type) < 0)
1602 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001603
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001604 if (PyType_Ready(&Writer_Type) < 0)
1605 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001606
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001607 /* Create the module and add the functions */
1608 module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1609 if (module == NULL)
1610 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001611
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001612 /* Add version to the module. */
1613 if (PyModule_AddStringConstant(module, "__version__",
1614 MODULE_VERSION) == -1)
1615 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001616
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001617 /* Add _dialects dictionary */
1618 dialects = PyDict_New();
1619 if (dialects == NULL)
1620 return;
1621 if (PyModule_AddObject(module, "_dialects", dialects))
1622 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001623
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001624 /* Add quote styles into dictionary */
1625 for (style = quote_styles; style->name; style++) {
1626 if (PyModule_AddIntConstant(module, style->name,
1627 style->style) == -1)
1628 return;
1629 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001630
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001631 /* Add the Dialect type */
1632 Py_INCREF(&Dialect_Type);
1633 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1634 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001635
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001636 /* Add the CSV exception object to the module. */
1637 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1638 if (error_obj == NULL)
1639 return;
1640 PyModule_AddObject(module, "Error", error_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001641}