blob: c39c0f10c3cc66d2cef954211e0058d81c2e8ba4 [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
9**** For people modifying this code, please note that as of this writing
Skip Montanarodfa35fa2003-04-11 21:40:01 +000010**** (2003-03-23), it is intended that this code should work with Python
Skip Montanaroa16b21f2003-03-23 14:32:54 +000011**** 2.2.
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013*/
14
Skip Montanaro7b01a832003-04-12 19:23:46 +000015#define MODULE_VERSION "1.0"
16
Skip Montanarob4a04172003-03-20 23:29:12 +000017#include "Python.h"
18#include "structmember.h"
19
Skip Montanaroa16b21f2003-03-23 14:32:54 +000020
Skip Montanarob4a04172003-03-20 23:29:12 +000021/* begin 2.2 compatibility macros */
22#ifndef PyDoc_STRVAR
23/* Define macros for inline documentation. */
24#define PyDoc_VAR(name) static char name[]
25#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
26#ifdef WITH_DOC_STRINGS
27#define PyDoc_STR(str) str
28#else
29#define PyDoc_STR(str) ""
30#endif
31#endif /* ifndef PyDoc_STRVAR */
32
33#ifndef PyMODINIT_FUNC
Antoine Pitrouc83ea132010-05-09 14:46:46 +000034# if defined(__cplusplus)
35# define PyMODINIT_FUNC extern "C" void
36# else /* __cplusplus */
37# define PyMODINIT_FUNC void
38# endif /* __cplusplus */
Skip Montanarob4a04172003-03-20 23:29:12 +000039#endif
Thomas Wouters2742c5e2006-04-15 17:33:14 +000040
41#ifndef Py_CLEAR
Antoine Pitrouc83ea132010-05-09 14:46:46 +000042#define Py_CLEAR(op) \
43 do { \
44 if (op) { \
45 PyObject *tmp = (PyObject *)(op); \
46 (op) = NULL; \
47 Py_DECREF(tmp); \
48 } \
49 } while (0)
Thomas Wouters2742c5e2006-04-15 17:33:14 +000050#endif
Thomas Woutersc6e55062006-04-15 21:47:09 +000051#ifndef Py_VISIT
Antoine Pitrouc83ea132010-05-09 14:46:46 +000052#define Py_VISIT(op) \
53 do { \
54 if (op) { \
55 int vret = visit((PyObject *)(op), arg); \
56 if (vret) \
57 return vret; \
58 } \
59 } while (0)
Thomas Woutersc6e55062006-04-15 21:47:09 +000060#endif
Thomas Wouters2742c5e2006-04-15 17:33:14 +000061
Skip Montanarob4a04172003-03-20 23:29:12 +000062/* end 2.2 compatibility macros */
63
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000064#define IS_BASESTRING(o) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +000065 PyObject_TypeCheck(o, &PyBaseString_Type)
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000066
Antoine Pitrouc83ea132010-05-09 14:46:46 +000067static PyObject *error_obj; /* CSV exception */
Skip Montanarob4a04172003-03-20 23:29:12 +000068static PyObject *dialects; /* Dialect registry */
Antoine Pitrouc83ea132010-05-09 14:46:46 +000069static long field_limit = 128 * 1024; /* max parsed field size */
Skip Montanarob4a04172003-03-20 23:29:12 +000070
71typedef enum {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000072 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
73 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
74 EAT_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000075} ParserState;
76
77typedef enum {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000078 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000079} QuoteStyle;
80
81typedef struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000082 QuoteStyle style;
83 char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000084} StyleDesc;
85
86static StyleDesc quote_styles[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000087 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
88 { QUOTE_ALL, "QUOTE_ALL" },
89 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
90 { QUOTE_NONE, "QUOTE_NONE" },
91 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000092};
93
94typedef struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000095 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +000096
Antoine Pitrouc83ea132010-05-09 14:46:46 +000097 int doublequote; /* is " represented by ""? */
98 char delimiter; /* field separator */
99 char quotechar; /* quote character */
100 char escapechar; /* escape character */
101 int skipinitialspace; /* ignore spaces following delimiter? */
102 PyObject *lineterminator; /* string to write between records */
103 int quoting; /* style of quoting to write */
104
105 int strict; /* raise exception on bad CSV */
Skip Montanarob4a04172003-03-20 23:29:12 +0000106} DialectObj;
107
108staticforward PyTypeObject Dialect_Type;
109
110typedef struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000111 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000112
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000113 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +0000114
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000115 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000116
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000117 PyObject *fields; /* field list for current record */
118 ParserState state; /* current CSV parse state */
119 char *field; /* build current field in here */
120 int field_size; /* size of allocated buffer */
121 int field_len; /* length of current field */
122 int numeric_field; /* treat field as numeric */
123 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000124} ReaderObj;
125
126staticforward PyTypeObject Reader_Type;
127
Christian Heimese93237d2007-12-19 02:37:44 +0000128#define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +0000129
130typedef struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000131 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000132
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000133 PyObject *writeline; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +0000134
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000135 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000136
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000137 char *rec; /* buffer for parser.join */
138 int rec_size; /* size of allocated record */
139 int rec_len; /* length of record */
140 int num_fields; /* number of fields in record */
141} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000142
143staticforward PyTypeObject Writer_Type;
144
145/*
146 * DIALECT class
147 */
148
149static PyObject *
150get_dialect_from_registry(PyObject * name_obj)
151{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000152 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000153
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000154 dialect_obj = PyDict_GetItem(dialects, name_obj);
155 if (dialect_obj == NULL) {
156 if (!PyErr_Occurred())
157 PyErr_Format(error_obj, "unknown dialect");
158 }
159 else
160 Py_INCREF(dialect_obj);
161 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000162}
163
Skip Montanarob4a04172003-03-20 23:29:12 +0000164static PyObject *
165get_string(PyObject *str)
166{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000167 Py_XINCREF(str);
168 return str;
Skip Montanarob4a04172003-03-20 23:29:12 +0000169}
170
Skip Montanarob4a04172003-03-20 23:29:12 +0000171static PyObject *
172get_nullchar_as_None(char c)
173{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000174 if (c == '\0') {
175 Py_INCREF(Py_None);
176 return Py_None;
177 }
178 else
179 return PyString_FromStringAndSize((char*)&c, 1);
Skip Montanarob4a04172003-03-20 23:29:12 +0000180}
181
Skip Montanarob4a04172003-03-20 23:29:12 +0000182static PyObject *
183Dialect_get_lineterminator(DialectObj *self)
184{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000185 return get_string(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000186}
187
Skip Montanarob4a04172003-03-20 23:29:12 +0000188static PyObject *
189Dialect_get_escapechar(DialectObj *self)
190{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000191 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000192}
193
Andrew McNamara1196cf12005-01-07 04:42:45 +0000194static PyObject *
195Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000196{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000197 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000198}
199
200static PyObject *
201Dialect_get_quoting(DialectObj *self)
202{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000203 return PyInt_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000204}
205
206static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000207_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000208{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000209 if (src == NULL)
210 *target = dflt;
Antoine Pitrouc5bef752012-08-15 23:16:51 +0200211 else {
212 int b = PyObject_IsTrue(src);
213 if (b < 0)
214 return -1;
215 *target = b;
216 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000217 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000218}
219
Andrew McNamara1196cf12005-01-07 04:42:45 +0000220static int
221_set_int(const char *name, int *target, PyObject *src, int dflt)
222{
Serhiy Storchaka994f04d2016-12-27 15:09:36 +0200223 int value;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000224 if (src == NULL)
225 *target = dflt;
226 else {
Serhiy Storchaka994f04d2016-12-27 15:09:36 +0200227 if (!PyInt_Check(src) && !PyLong_Check(src)) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000228 PyErr_Format(PyExc_TypeError,
229 "\"%s\" must be an integer", name);
230 return -1;
231 }
Serhiy Storchaka994f04d2016-12-27 15:09:36 +0200232 value = PyInt_AsLong(src);
233 if (value == -1 && PyErr_Occurred())
234 return -1;
235 *target = value;
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000236 }
237 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000238}
239
240static int
241_set_char(const char *name, char *target, PyObject *src, char dflt)
242{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000243 if (src == NULL)
244 *target = dflt;
245 else {
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200246 *target = '\0';
247 if (src != Py_None) {
248 Py_ssize_t len;
249 if (!PyString_Check(src)) {
250 PyErr_Format(PyExc_TypeError,
251 "\"%s\" must be string, not %.200s", name,
252 src->ob_type->tp_name);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000253 return -1;
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200254 }
255 len = PyString_GET_SIZE(src);
256 if (len > 1) {
257 PyErr_Format(PyExc_TypeError,
258 "\"%s\" must be an 1-character string",
259 name);
260 return -1;
261 }
262 if (len > 0)
263 *target = *PyString_AS_STRING(src);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000264 }
265 }
266 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000267}
268
269static int
270_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
271{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000272 if (src == NULL)
273 *target = PyString_FromString(dflt);
274 else {
275 if (src == Py_None)
276 *target = NULL;
277 else if (!IS_BASESTRING(src)) {
278 PyErr_Format(PyExc_TypeError,
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200279 "\"%s\" must be a string", name);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000280 return -1;
281 }
282 else {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000283 Py_INCREF(src);
Serhiy Storchakabc62af12016-04-06 09:51:18 +0300284 Py_XSETREF(*target, src);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000285 }
286 }
287 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000288}
289
290static int
291dialect_check_quoting(int quoting)
292{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000293 StyleDesc *qs = quote_styles;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000294
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000295 for (qs = quote_styles; qs->name; qs++) {
296 if (qs->style == quoting)
297 return 0;
298 }
299 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
300 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000301}
Skip Montanarob4a04172003-03-20 23:29:12 +0000302
303#define D_OFF(x) offsetof(DialectObj, x)
304
305static struct PyMemberDef Dialect_memberlist[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000306 { "delimiter", T_CHAR, D_OFF(delimiter), READONLY },
307 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
308 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
309 { "strict", T_INT, D_OFF(strict), READONLY },
310 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000311};
312
313static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000314 { "escapechar", (getter)Dialect_get_escapechar},
315 { "lineterminator", (getter)Dialect_get_lineterminator},
316 { "quotechar", (getter)Dialect_get_quotechar},
317 { "quoting", (getter)Dialect_get_quoting},
318 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000319};
320
321static void
322Dialect_dealloc(DialectObj *self)
323{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000324 Py_XDECREF(self->lineterminator);
325 Py_TYPE(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000326}
327
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000328static char *dialect_kws[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000329 "dialect",
330 "delimiter",
331 "doublequote",
332 "escapechar",
333 "lineterminator",
334 "quotechar",
335 "quoting",
336 "skipinitialspace",
337 "strict",
338 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000339};
340
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000341static PyObject *
342dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000343{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000344 DialectObj *self;
345 PyObject *ret = NULL;
346 PyObject *dialect = NULL;
347 PyObject *delimiter = NULL;
348 PyObject *doublequote = NULL;
349 PyObject *escapechar = NULL;
350 PyObject *lineterminator = NULL;
351 PyObject *quotechar = NULL;
352 PyObject *quoting = NULL;
353 PyObject *skipinitialspace = NULL;
354 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000355
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000356 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
357 "|OOOOOOOOO", dialect_kws,
358 &dialect,
359 &delimiter,
360 &doublequote,
361 &escapechar,
362 &lineterminator,
363 &quotechar,
364 &quoting,
365 &skipinitialspace,
366 &strict))
367 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000368
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000369 if (dialect != NULL) {
370 if (IS_BASESTRING(dialect)) {
371 dialect = get_dialect_from_registry(dialect);
372 if (dialect == NULL)
373 return NULL;
374 }
375 else
376 Py_INCREF(dialect);
377 /* Can we reuse this instance? */
378 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
379 delimiter == 0 &&
380 doublequote == 0 &&
381 escapechar == 0 &&
382 lineterminator == 0 &&
383 quotechar == 0 &&
384 quoting == 0 &&
385 skipinitialspace == 0 &&
386 strict == 0)
387 return dialect;
388 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000389
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000390 self = (DialectObj *)type->tp_alloc(type, 0);
391 if (self == NULL) {
392 Py_XDECREF(dialect);
393 return NULL;
394 }
395 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000396
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000397 Py_XINCREF(delimiter);
398 Py_XINCREF(doublequote);
399 Py_XINCREF(escapechar);
400 Py_XINCREF(lineterminator);
401 Py_XINCREF(quotechar);
402 Py_XINCREF(quoting);
403 Py_XINCREF(skipinitialspace);
404 Py_XINCREF(strict);
405 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000406#define DIALECT_GETATTR(v, n) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000407 if (v == NULL) \
408 v = PyObject_GetAttrString(dialect, n)
409 DIALECT_GETATTR(delimiter, "delimiter");
410 DIALECT_GETATTR(doublequote, "doublequote");
411 DIALECT_GETATTR(escapechar, "escapechar");
412 DIALECT_GETATTR(lineterminator, "lineterminator");
413 DIALECT_GETATTR(quotechar, "quotechar");
414 DIALECT_GETATTR(quoting, "quoting");
415 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
416 DIALECT_GETATTR(strict, "strict");
417 PyErr_Clear();
418 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000419
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000420 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000421#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000422 if (meth(name, target, src, dflt)) \
423 goto err
424 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
425 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
426 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
427 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
428 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
429 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
430 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
431 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000432
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000433 /* validate options */
434 if (dialect_check_quoting(self->quoting))
435 goto err;
436 if (self->delimiter == 0) {
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200437 PyErr_SetString(PyExc_TypeError,
438 "\"delimiter\" must be an 1-character string");
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000439 goto err;
440 }
441 if (quotechar == Py_None && quoting == NULL)
442 self->quoting = QUOTE_NONE;
443 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
444 PyErr_SetString(PyExc_TypeError,
445 "quotechar must be set if quoting enabled");
446 goto err;
447 }
448 if (self->lineterminator == 0) {
449 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
450 goto err;
451 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000452
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000453 ret = (PyObject *)self;
454 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000455err:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000456 Py_XDECREF(self);
457 Py_XDECREF(dialect);
458 Py_XDECREF(delimiter);
459 Py_XDECREF(doublequote);
460 Py_XDECREF(escapechar);
461 Py_XDECREF(lineterminator);
462 Py_XDECREF(quotechar);
463 Py_XDECREF(quoting);
464 Py_XDECREF(skipinitialspace);
465 Py_XDECREF(strict);
466 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000467}
468
469
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000470PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000471"CSV dialect\n"
472"\n"
473"The Dialect type records CSV parsing and generation options.\n");
474
475static PyTypeObject Dialect_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000476 PyVarObject_HEAD_INIT(NULL, 0)
477 "_csv.Dialect", /* tp_name */
478 sizeof(DialectObj), /* tp_basicsize */
479 0, /* tp_itemsize */
480 /* methods */
481 (destructor)Dialect_dealloc, /* tp_dealloc */
482 (printfunc)0, /* tp_print */
483 (getattrfunc)0, /* tp_getattr */
484 (setattrfunc)0, /* tp_setattr */
485 (cmpfunc)0, /* tp_compare */
486 (reprfunc)0, /* tp_repr */
487 0, /* tp_as_number */
488 0, /* tp_as_sequence */
489 0, /* tp_as_mapping */
490 (hashfunc)0, /* tp_hash */
491 (ternaryfunc)0, /* tp_call */
492 (reprfunc)0, /* tp_str */
493 0, /* tp_getattro */
494 0, /* tp_setattro */
495 0, /* tp_as_buffer */
496 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
497 Dialect_Type_doc, /* tp_doc */
498 0, /* tp_traverse */
499 0, /* tp_clear */
500 0, /* tp_richcompare */
501 0, /* tp_weaklistoffset */
502 0, /* tp_iter */
503 0, /* tp_iternext */
504 0, /* tp_methods */
505 Dialect_memberlist, /* tp_members */
506 Dialect_getsetlist, /* tp_getset */
507 0, /* tp_base */
508 0, /* tp_dict */
509 0, /* tp_descr_get */
510 0, /* tp_descr_set */
511 0, /* tp_dictoffset */
512 0, /* tp_init */
513 0, /* tp_alloc */
514 dialect_new, /* tp_new */
515 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000516};
517
Andrew McNamara91b97462005-01-11 01:07:23 +0000518/*
519 * Return an instance of the dialect type, given a Python instance or kwarg
520 * description of the dialect
521 */
522static PyObject *
523_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
524{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000525 PyObject *ctor_args;
526 PyObject *dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000527
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000528 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
529 if (ctor_args == NULL)
530 return NULL;
531 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
532 Py_DECREF(ctor_args);
533 return dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000534}
535
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000536/*
537 * READER
538 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000539static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000540parse_save_field(ReaderObj *self)
541{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000542 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000543
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000544 field = PyString_FromStringAndSize(self->field, self->field_len);
545 if (field == NULL)
546 return -1;
547 self->field_len = 0;
548 if (self->numeric_field) {
549 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000550
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000551 self->numeric_field = 0;
552 tmp = PyNumber_Float(field);
553 if (tmp == NULL) {
554 Py_DECREF(field);
555 return -1;
556 }
557 Py_DECREF(field);
558 field = tmp;
559 }
560 PyList_Append(self->fields, field);
561 Py_DECREF(field);
562 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000563}
564
565static int
566parse_grow_buff(ReaderObj *self)
567{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000568 if (self->field_size == 0) {
569 self->field_size = 4096;
570 if (self->field != NULL)
571 PyMem_Free(self->field);
572 self->field = PyMem_Malloc(self->field_size);
573 }
574 else {
575 if (self->field_size > INT_MAX / 2) {
576 PyErr_NoMemory();
577 return 0;
578 }
579 self->field_size *= 2;
580 self->field = PyMem_Realloc(self->field, self->field_size);
581 }
582 if (self->field == NULL) {
583 PyErr_NoMemory();
584 return 0;
585 }
586 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000587}
588
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000589static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000590parse_add_char(ReaderObj *self, char c)
591{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000592 if (self->field_len >= field_limit) {
593 PyErr_Format(error_obj, "field larger than field limit (%ld)",
594 field_limit);
595 return -1;
596 }
597 if (self->field_len == self->field_size && !parse_grow_buff(self))
598 return -1;
599 self->field[self->field_len++] = c;
600 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000601}
602
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000603static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000604parse_process_char(ReaderObj *self, char c)
605{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000606 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000607
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000608 switch (self->state) {
609 case START_RECORD:
610 /* start of record */
611 if (c == '\0')
612 /* empty line - return [] */
613 break;
614 else if (c == '\n' || c == '\r') {
615 self->state = EAT_CRNL;
616 break;
617 }
618 /* normal character - handle as START_FIELD */
619 self->state = START_FIELD;
620 /* fallthru */
621 case START_FIELD:
622 /* expecting field */
623 if (c == '\n' || c == '\r' || c == '\0') {
624 /* save empty field - return [fields] */
625 if (parse_save_field(self) < 0)
626 return -1;
627 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
628 }
629 else if (c == dialect->quotechar &&
630 dialect->quoting != QUOTE_NONE) {
631 /* start quoted field */
632 self->state = IN_QUOTED_FIELD;
633 }
634 else if (c == dialect->escapechar) {
635 /* possible escaped character */
636 self->state = ESCAPED_CHAR;
637 }
638 else if (c == ' ' && dialect->skipinitialspace)
639 /* ignore space at start of field */
640 ;
641 else if (c == dialect->delimiter) {
642 /* save empty field */
643 if (parse_save_field(self) < 0)
644 return -1;
645 }
646 else {
647 /* begin new unquoted field */
648 if (dialect->quoting == QUOTE_NONNUMERIC)
649 self->numeric_field = 1;
650 if (parse_add_char(self, c) < 0)
651 return -1;
652 self->state = IN_FIELD;
653 }
654 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000655
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000656 case ESCAPED_CHAR:
657 if (c == '\0')
658 c = '\n';
659 if (parse_add_char(self, c) < 0)
660 return -1;
661 self->state = IN_FIELD;
662 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000663
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000664 case IN_FIELD:
665 /* in unquoted field */
666 if (c == '\n' || c == '\r' || c == '\0') {
667 /* end of line - return [fields] */
668 if (parse_save_field(self) < 0)
669 return -1;
670 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
671 }
672 else if (c == dialect->escapechar) {
673 /* possible escaped character */
674 self->state = ESCAPED_CHAR;
675 }
676 else if (c == dialect->delimiter) {
677 /* save field - wait for new field */
678 if (parse_save_field(self) < 0)
679 return -1;
680 self->state = START_FIELD;
681 }
682 else {
683 /* normal character - save in field */
684 if (parse_add_char(self, c) < 0)
685 return -1;
686 }
687 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000688
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000689 case IN_QUOTED_FIELD:
690 /* in quoted field */
691 if (c == '\0')
692 ;
693 else if (c == dialect->escapechar) {
694 /* Possible escape character */
695 self->state = ESCAPE_IN_QUOTED_FIELD;
696 }
697 else if (c == dialect->quotechar &&
698 dialect->quoting != QUOTE_NONE) {
699 if (dialect->doublequote) {
700 /* doublequote; " represented by "" */
701 self->state = QUOTE_IN_QUOTED_FIELD;
702 }
703 else {
704 /* end of quote part of field */
705 self->state = IN_FIELD;
706 }
707 }
708 else {
709 /* normal character - save in field */
710 if (parse_add_char(self, c) < 0)
711 return -1;
712 }
713 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000714
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000715 case ESCAPE_IN_QUOTED_FIELD:
716 if (c == '\0')
717 c = '\n';
718 if (parse_add_char(self, c) < 0)
719 return -1;
720 self->state = IN_QUOTED_FIELD;
721 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000722
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000723 case QUOTE_IN_QUOTED_FIELD:
Serhiy Storchaka9a118f12016-04-17 09:37:36 +0300724 /* doublequote - seen a quote in a quoted field */
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000725 if (dialect->quoting != QUOTE_NONE &&
726 c == dialect->quotechar) {
727 /* save "" as " */
728 if (parse_add_char(self, c) < 0)
729 return -1;
730 self->state = IN_QUOTED_FIELD;
731 }
732 else if (c == dialect->delimiter) {
733 /* save field - wait for new field */
734 if (parse_save_field(self) < 0)
735 return -1;
736 self->state = START_FIELD;
737 }
738 else if (c == '\n' || c == '\r' || c == '\0') {
739 /* end of line - return [fields] */
740 if (parse_save_field(self) < 0)
741 return -1;
742 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
743 }
744 else if (!dialect->strict) {
745 if (parse_add_char(self, c) < 0)
746 return -1;
747 self->state = IN_FIELD;
748 }
749 else {
750 /* illegal */
751 PyErr_Format(error_obj, "'%c' expected after '%c'",
752 dialect->delimiter,
753 dialect->quotechar);
754 return -1;
755 }
756 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000757
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000758 case EAT_CRNL:
759 if (c == '\n' || c == '\r')
760 ;
761 else if (c == '\0')
762 self->state = START_RECORD;
763 else {
764 PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
765 return -1;
766 }
767 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000768
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000769 }
770 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000771}
772
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000773static int
774parse_reset(ReaderObj *self)
775{
Serhiy Storchakabc62af12016-04-06 09:51:18 +0300776 Py_XSETREF(self->fields, PyList_New(0));
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000777 if (self->fields == NULL)
778 return -1;
779 self->field_len = 0;
780 self->state = START_RECORD;
781 self->numeric_field = 0;
782 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000783}
Skip Montanarob4a04172003-03-20 23:29:12 +0000784
785static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000786Reader_iternext(ReaderObj *self)
787{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000788 PyObject *lineobj;
789 PyObject *fields = NULL;
790 char *line, c;
791 int linelen;
Skip Montanarob4a04172003-03-20 23:29:12 +0000792
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000793 if (parse_reset(self) < 0)
794 return NULL;
795 do {
796 lineobj = PyIter_Next(self->input_iter);
797 if (lineobj == NULL) {
798 /* End of input OR exception */
Senthil Kumarand41dc7c2012-09-25 02:48:21 -0700799 if (!PyErr_Occurred() && (self->field_len != 0 ||
800 self->state == IN_QUOTED_FIELD)) {
801 if (self->dialect->strict)
802 PyErr_SetString(error_obj, "unexpected end of data");
803 else if (parse_save_field(self) >= 0 )
804 break;
805 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000806 return NULL;
807 }
808 ++self->line_num;
Skip Montanarob4a04172003-03-20 23:29:12 +0000809
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000810 line = PyString_AsString(lineobj);
811 linelen = PyString_Size(lineobj);
Skip Montanarob4a04172003-03-20 23:29:12 +0000812
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000813 if (line == NULL || linelen < 0) {
814 Py_DECREF(lineobj);
815 return NULL;
816 }
817 while (linelen--) {
818 c = *line++;
819 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000820 Py_DECREF(lineobj);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000821 PyErr_Format(error_obj,
822 "line contains NULL byte");
823 goto err;
824 }
825 if (parse_process_char(self, c) < 0) {
826 Py_DECREF(lineobj);
827 goto err;
828 }
829 }
830 Py_DECREF(lineobj);
831 if (parse_process_char(self, 0) < 0)
832 goto err;
833 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000834
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000835 fields = self->fields;
836 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000837err:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000838 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000839}
840
841static void
842Reader_dealloc(ReaderObj *self)
843{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000844 PyObject_GC_UnTrack(self);
845 Py_XDECREF(self->dialect);
846 Py_XDECREF(self->input_iter);
847 Py_XDECREF(self->fields);
848 if (self->field != NULL)
849 PyMem_Free(self->field);
850 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000851}
852
853static int
854Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
855{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000856 Py_VISIT(self->dialect);
857 Py_VISIT(self->input_iter);
858 Py_VISIT(self->fields);
859 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000860}
861
862static int
863Reader_clear(ReaderObj *self)
864{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000865 Py_CLEAR(self->dialect);
866 Py_CLEAR(self->input_iter);
867 Py_CLEAR(self->fields);
868 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000869}
870
871PyDoc_STRVAR(Reader_Type_doc,
872"CSV reader\n"
873"\n"
874"Reader objects are responsible for reading and parsing tabular data\n"
875"in CSV format.\n"
876);
877
878static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000879 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000880};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000881#define R_OFF(x) offsetof(ReaderObj, x)
882
883static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000884 { "dialect", T_OBJECT, R_OFF(dialect), RO },
885 { "line_num", T_ULONG, R_OFF(line_num), RO },
886 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000887};
888
Skip Montanarob4a04172003-03-20 23:29:12 +0000889
890static PyTypeObject Reader_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000891 PyVarObject_HEAD_INIT(NULL, 0)
892 "_csv.reader", /*tp_name*/
893 sizeof(ReaderObj), /*tp_basicsize*/
894 0, /*tp_itemsize*/
895 /* methods */
896 (destructor)Reader_dealloc, /*tp_dealloc*/
897 (printfunc)0, /*tp_print*/
898 (getattrfunc)0, /*tp_getattr*/
899 (setattrfunc)0, /*tp_setattr*/
900 (cmpfunc)0, /*tp_compare*/
901 (reprfunc)0, /*tp_repr*/
902 0, /*tp_as_number*/
903 0, /*tp_as_sequence*/
904 0, /*tp_as_mapping*/
905 (hashfunc)0, /*tp_hash*/
906 (ternaryfunc)0, /*tp_call*/
907 (reprfunc)0, /*tp_str*/
908 0, /*tp_getattro*/
909 0, /*tp_setattro*/
910 0, /*tp_as_buffer*/
911 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
912 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
913 Reader_Type_doc, /*tp_doc*/
914 (traverseproc)Reader_traverse, /*tp_traverse*/
915 (inquiry)Reader_clear, /*tp_clear*/
916 0, /*tp_richcompare*/
917 0, /*tp_weaklistoffset*/
918 PyObject_SelfIter, /*tp_iter*/
919 (getiterfunc)Reader_iternext, /*tp_iternext*/
920 Reader_methods, /*tp_methods*/
921 Reader_memberlist, /*tp_members*/
922 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000923
924};
925
926static PyObject *
927csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
928{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000929 PyObject * iterator, * dialect = NULL;
930 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000931
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000932 if (!self)
933 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000934
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000935 self->dialect = NULL;
936 self->fields = NULL;
937 self->input_iter = NULL;
938 self->field = NULL;
939 self->field_size = 0;
940 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000941
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000942 if (parse_reset(self) < 0) {
943 Py_DECREF(self);
944 return NULL;
945 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000946
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000947 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
948 Py_DECREF(self);
949 return NULL;
950 }
951 self->input_iter = PyObject_GetIter(iterator);
952 if (self->input_iter == NULL) {
953 PyErr_SetString(PyExc_TypeError,
954 "argument 1 must be an iterator");
955 Py_DECREF(self);
956 return NULL;
957 }
958 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
959 if (self->dialect == NULL) {
960 Py_DECREF(self);
961 return NULL;
962 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000963
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000964 PyObject_GC_Track(self);
965 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +0000966}
967
968/*
969 * WRITER
970 */
971/* ---------------------------------------------------------------- */
972static void
973join_reset(WriterObj *self)
974{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000975 self->rec_len = 0;
976 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000977}
978
979#define MEM_INCR 32768
980
981/* Calculate new record length or append field to record. Return new
982 * record length.
983 */
984static int
985join_append_data(WriterObj *self, char *field, int quote_empty,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000986 int *quoted, int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +0000987{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000988 DialectObj *dialect = self->dialect;
989 int i, rec_len;
990 char *lineterm;
Andrew McNamarac89f2842005-01-12 07:44:42 +0000991
Benjamin Petersond81ad0d2016-08-13 17:17:06 -0700992#define INCLEN \
993 do {\
994 if (!copy_phase && rec_len == INT_MAX) { \
995 goto overflow; \
996 } \
997 rec_len++; \
998 } while(0)
999
1000#define ADDCH(c) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001001 do {\
1002 if (copy_phase) \
1003 self->rec[rec_len] = c;\
Benjamin Petersond81ad0d2016-08-13 17:17:06 -07001004 INCLEN;\
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001005 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001006
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001007 lineterm = PyString_AsString(dialect->lineterminator);
1008 if (lineterm == NULL)
1009 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001010
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001011 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001012
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001013 /* If this is not the first field we need a field separator */
1014 if (self->num_fields > 0)
1015 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001016
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001017 /* Handle preceding quote */
1018 if (copy_phase && *quoted)
1019 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001020
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001021 /* Copy/count field data */
1022 for (i = 0;; i++) {
1023 char c = field[i];
1024 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001025
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001026 if (c == '\0')
1027 break;
Skip Montanarob4a04172003-03-20 23:29:12 +00001028
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001029 if (c == dialect->delimiter ||
1030 c == dialect->escapechar ||
1031 c == dialect->quotechar ||
1032 strchr(lineterm, c)) {
1033 if (dialect->quoting == QUOTE_NONE)
1034 want_escape = 1;
1035 else {
1036 if (c == dialect->quotechar) {
1037 if (dialect->doublequote)
1038 ADDCH(dialect->quotechar);
1039 else
1040 want_escape = 1;
1041 }
1042 if (!want_escape)
1043 *quoted = 1;
1044 }
1045 if (want_escape) {
1046 if (!dialect->escapechar) {
1047 PyErr_Format(error_obj,
1048 "need to escape, but no escapechar set");
1049 return -1;
1050 }
1051 ADDCH(dialect->escapechar);
1052 }
1053 }
1054 /* Copy field character into record buffer.
1055 */
1056 ADDCH(c);
1057 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001058
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001059 /* If field is empty check if it needs to be quoted.
1060 */
1061 if (i == 0 && quote_empty) {
1062 if (dialect->quoting == QUOTE_NONE) {
1063 PyErr_Format(error_obj,
1064 "single empty field record must be quoted");
1065 return -1;
1066 }
1067 else
1068 *quoted = 1;
1069 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001070
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001071 if (*quoted) {
1072 if (copy_phase)
1073 ADDCH(dialect->quotechar);
Benjamin Petersond81ad0d2016-08-13 17:17:06 -07001074 else {
1075 INCLEN; /* starting quote */
1076 INCLEN; /* ending quote */
1077 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001078 }
1079 return rec_len;
Benjamin Petersond81ad0d2016-08-13 17:17:06 -07001080
1081 overflow:
1082 PyErr_NoMemory();
1083 return -1;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001084#undef ADDCH
Benjamin Petersond81ad0d2016-08-13 17:17:06 -07001085#undef INCLEN
Skip Montanarob4a04172003-03-20 23:29:12 +00001086}
1087
1088static int
1089join_check_rec_size(WriterObj *self, int rec_len)
1090{
Gregory P. Smith9d534572008-06-11 07:41:16 +00001091
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001092 if (rec_len < 0 || rec_len > INT_MAX - MEM_INCR) {
1093 PyErr_NoMemory();
1094 return 0;
1095 }
Gregory P. Smith9d534572008-06-11 07:41:16 +00001096
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001097 if (rec_len > self->rec_size) {
1098 if (self->rec_size == 0) {
1099 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1100 if (self->rec != NULL)
1101 PyMem_Free(self->rec);
1102 self->rec = PyMem_Malloc(self->rec_size);
1103 }
1104 else {
1105 char *old_rec = self->rec;
Skip Montanarob4a04172003-03-20 23:29:12 +00001106
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001107 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1108 self->rec = PyMem_Realloc(self->rec, self->rec_size);
1109 if (self->rec == NULL)
1110 PyMem_Free(old_rec);
1111 }
1112 if (self->rec == NULL) {
1113 PyErr_NoMemory();
1114 return 0;
1115 }
1116 }
1117 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001118}
1119
1120static int
1121join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
1122{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001123 int rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001124
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001125 rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1126 if (rec_len < 0)
1127 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001128
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001129 /* grow record buffer if necessary */
1130 if (!join_check_rec_size(self, rec_len))
1131 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001132
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001133 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1134 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001135
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001136 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001137}
1138
1139static int
1140join_append_lineterminator(WriterObj *self)
1141{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001142 int terminator_len;
1143 char *terminator;
Skip Montanarob4a04172003-03-20 23:29:12 +00001144
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001145 terminator_len = PyString_Size(self->dialect->lineterminator);
1146 if (terminator_len == -1)
1147 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001148
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001149 /* grow record buffer if necessary */
1150 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1151 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001152
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001153 terminator = PyString_AsString(self->dialect->lineterminator);
1154 if (terminator == NULL)
1155 return 0;
1156 memmove(self->rec + self->rec_len, terminator, terminator_len);
1157 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001158
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001159 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001160}
1161
1162PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001163"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001164"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001165"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001166"elements will be converted to string.");
1167
1168static PyObject *
1169csv_writerow(WriterObj *self, PyObject *seq)
1170{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001171 DialectObj *dialect = self->dialect;
1172 int len, i;
Skip Montanarob4a04172003-03-20 23:29:12 +00001173
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001174 if (!PySequence_Check(seq))
1175 return PyErr_Format(error_obj, "sequence expected");
Skip Montanarob4a04172003-03-20 23:29:12 +00001176
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001177 len = PySequence_Length(seq);
1178 if (len < 0)
1179 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001180
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001181 /* Join all fields in internal buffer.
1182 */
1183 join_reset(self);
1184 for (i = 0; i < len; i++) {
1185 PyObject *field;
1186 int append_ok;
1187 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001188
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001189 field = PySequence_GetItem(seq, i);
1190 if (field == NULL)
1191 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001192
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001193 switch (dialect->quoting) {
1194 case QUOTE_NONNUMERIC:
1195 quoted = !PyNumber_Check(field);
1196 break;
1197 case QUOTE_ALL:
1198 quoted = 1;
1199 break;
1200 default:
1201 quoted = 0;
1202 break;
1203 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001204
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001205 if (PyString_Check(field)) {
1206 append_ok = join_append(self,
1207 PyString_AS_STRING(field),
1208 &quoted, len == 1);
1209 Py_DECREF(field);
1210 }
1211 else if (field == Py_None) {
1212 append_ok = join_append(self, "", &quoted, len == 1);
1213 Py_DECREF(field);
1214 }
1215 else {
1216 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001217
Raymond Hettingerf5377022011-12-11 22:31:09 -08001218 if (PyFloat_Check(field)) {
1219 str = PyObject_Repr(field);
1220 } else {
1221 str = PyObject_Str(field);
1222 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001223 Py_DECREF(field);
1224 if (str == NULL)
1225 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001226
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001227 append_ok = join_append(self, PyString_AS_STRING(str),
1228 &quoted, len == 1);
1229 Py_DECREF(str);
1230 }
1231 if (!append_ok)
1232 return NULL;
1233 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001234
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001235 /* Add line terminator.
1236 */
1237 if (!join_append_lineterminator(self))
1238 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001239
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001240 return PyObject_CallFunction(self->writeline,
1241 "(s#)", self->rec, self->rec_len);
Skip Montanarob4a04172003-03-20 23:29:12 +00001242}
1243
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001244PyDoc_STRVAR(csv_writerows_doc,
1245"writerows(sequence of sequences)\n"
1246"\n"
1247"Construct and write a series of sequences to a csv file. Non-string\n"
1248"elements will be converted to string.");
1249
Skip Montanarob4a04172003-03-20 23:29:12 +00001250static PyObject *
1251csv_writerows(WriterObj *self, PyObject *seqseq)
1252{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001253 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001254
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001255 row_iter = PyObject_GetIter(seqseq);
1256 if (row_iter == NULL) {
1257 PyErr_SetString(PyExc_TypeError,
1258 "writerows() argument must be iterable");
1259 return NULL;
1260 }
1261 while ((row_obj = PyIter_Next(row_iter))) {
1262 result = csv_writerow(self, row_obj);
1263 Py_DECREF(row_obj);
1264 if (!result) {
1265 Py_DECREF(row_iter);
1266 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001267 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001268 else
1269 Py_DECREF(result);
1270 }
1271 Py_DECREF(row_iter);
1272 if (PyErr_Occurred())
1273 return NULL;
1274 Py_INCREF(Py_None);
1275 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001276}
1277
1278static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001279 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1280 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1281 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001282};
1283
1284#define W_OFF(x) offsetof(WriterObj, x)
1285
1286static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001287 { "dialect", T_OBJECT, W_OFF(dialect), RO },
1288 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001289};
1290
1291static void
1292Writer_dealloc(WriterObj *self)
1293{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001294 PyObject_GC_UnTrack(self);
1295 Py_XDECREF(self->dialect);
1296 Py_XDECREF(self->writeline);
1297 if (self->rec != NULL)
1298 PyMem_Free(self->rec);
1299 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001300}
1301
1302static int
1303Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1304{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001305 Py_VISIT(self->dialect);
1306 Py_VISIT(self->writeline);
1307 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001308}
1309
1310static int
1311Writer_clear(WriterObj *self)
1312{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001313 Py_CLEAR(self->dialect);
1314 Py_CLEAR(self->writeline);
1315 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001316}
1317
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001318PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001319"CSV writer\n"
1320"\n"
1321"Writer objects are responsible for generating tabular data\n"
1322"in CSV format from sequence input.\n"
1323);
1324
1325static PyTypeObject Writer_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001326 PyVarObject_HEAD_INIT(NULL, 0)
1327 "_csv.writer", /*tp_name*/
1328 sizeof(WriterObj), /*tp_basicsize*/
1329 0, /*tp_itemsize*/
1330 /* methods */
1331 (destructor)Writer_dealloc, /*tp_dealloc*/
1332 (printfunc)0, /*tp_print*/
1333 (getattrfunc)0, /*tp_getattr*/
1334 (setattrfunc)0, /*tp_setattr*/
1335 (cmpfunc)0, /*tp_compare*/
1336 (reprfunc)0, /*tp_repr*/
1337 0, /*tp_as_number*/
1338 0, /*tp_as_sequence*/
1339 0, /*tp_as_mapping*/
1340 (hashfunc)0, /*tp_hash*/
1341 (ternaryfunc)0, /*tp_call*/
1342 (reprfunc)0, /*tp_str*/
1343 0, /*tp_getattro*/
1344 0, /*tp_setattro*/
1345 0, /*tp_as_buffer*/
1346 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1347 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1348 Writer_Type_doc,
1349 (traverseproc)Writer_traverse, /*tp_traverse*/
1350 (inquiry)Writer_clear, /*tp_clear*/
1351 0, /*tp_richcompare*/
1352 0, /*tp_weaklistoffset*/
1353 (getiterfunc)0, /*tp_iter*/
1354 (getiterfunc)0, /*tp_iternext*/
1355 Writer_methods, /*tp_methods*/
1356 Writer_memberlist, /*tp_members*/
1357 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001358};
1359
1360static PyObject *
1361csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1362{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001363 PyObject * output_file, * dialect = NULL;
1364 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001365
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001366 if (!self)
1367 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001368
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001369 self->dialect = NULL;
1370 self->writeline = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001371
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001372 self->rec = NULL;
1373 self->rec_size = 0;
1374 self->rec_len = 0;
1375 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001376
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001377 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1378 Py_DECREF(self);
1379 return NULL;
1380 }
1381 self->writeline = PyObject_GetAttrString(output_file, "write");
1382 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1383 PyErr_SetString(PyExc_TypeError,
1384 "argument 1 must have a \"write\" method");
1385 Py_DECREF(self);
1386 return NULL;
1387 }
1388 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1389 if (self->dialect == NULL) {
1390 Py_DECREF(self);
1391 return NULL;
1392 }
1393 PyObject_GC_Track(self);
1394 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001395}
1396
1397/*
1398 * DIALECT REGISTRY
1399 */
1400static PyObject *
1401csv_list_dialects(PyObject *module, PyObject *args)
1402{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001403 return PyDict_Keys(dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001404}
1405
1406static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001407csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001408{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001409 PyObject *name_obj, *dialect_obj = NULL;
1410 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001411
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001412 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1413 return NULL;
1414 if (!IS_BASESTRING(name_obj)) {
1415 PyErr_SetString(PyExc_TypeError,
1416 "dialect name must be a string or unicode");
1417 return NULL;
1418 }
1419 dialect = _call_dialect(dialect_obj, kwargs);
1420 if (dialect == NULL)
1421 return NULL;
1422 if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1423 Py_DECREF(dialect);
1424 return NULL;
1425 }
1426 Py_DECREF(dialect);
1427 Py_INCREF(Py_None);
1428 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001429}
1430
1431static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001432csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001433{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001434 if (PyDict_DelItem(dialects, name_obj) < 0)
1435 return PyErr_Format(error_obj, "unknown dialect");
1436 Py_INCREF(Py_None);
1437 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001438}
1439
1440static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001441csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001442{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001443 return get_dialect_from_registry(name_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001444}
1445
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001446static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001447csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001448{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001449 PyObject *new_limit = NULL;
Serhiy Storchaka994f04d2016-12-27 15:09:36 +02001450 long old_limit = field_limit, limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001451
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001452 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1453 return NULL;
1454 if (new_limit != NULL) {
Serhiy Storchaka994f04d2016-12-27 15:09:36 +02001455 if (!PyInt_Check(new_limit) && !PyLong_Check(new_limit)) {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001456 PyErr_Format(PyExc_TypeError,
1457 "limit must be an integer");
1458 return NULL;
1459 }
Serhiy Storchaka994f04d2016-12-27 15:09:36 +02001460 limit = PyInt_AsLong(new_limit);
1461 if (limit == -1 && PyErr_Occurred())
1462 return NULL;
1463 field_limit = limit;
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001464 }
1465 return PyInt_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001466}
1467
Skip Montanarob4a04172003-03-20 23:29:12 +00001468/*
1469 * MODULE
1470 */
1471
1472PyDoc_STRVAR(csv_module_doc,
1473"CSV parsing and writing.\n"
1474"\n"
1475"This module provides classes that assist in the reading and writing\n"
1476"of Comma Separated Value (CSV) files, and implements the interface\n"
1477"described by PEP 305. Although many CSV files are simple to parse,\n"
1478"the format is not formally defined by a stable specification and\n"
1479"is subtle enough that parsing lines of a CSV file with something\n"
1480"like line.split(\",\") is bound to fail. The module supports three\n"
1481"basic APIs: reading, writing, and registration of dialects.\n"
1482"\n"
1483"\n"
1484"DIALECT REGISTRATION:\n"
1485"\n"
1486"Readers and writers support a dialect argument, which is a convenient\n"
1487"handle on a group of settings. When the dialect argument is a string,\n"
1488"it identifies one of the dialects previously registered with the module.\n"
1489"If it is a class or instance, the attributes of the argument are used as\n"
1490"the settings for the reader or writer:\n"
1491"\n"
1492" class excel:\n"
1493" delimiter = ','\n"
1494" quotechar = '\"'\n"
1495" escapechar = None\n"
1496" doublequote = True\n"
1497" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001498" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001499" quoting = QUOTE_MINIMAL\n"
1500"\n"
1501"SETTINGS:\n"
1502"\n"
1503" * quotechar - specifies a one-character string to use as the \n"
1504" quoting character. It defaults to '\"'.\n"
1505" * delimiter - specifies a one-character string to use as the \n"
1506" field separator. It defaults to ','.\n"
1507" * skipinitialspace - specifies how to interpret whitespace which\n"
1508" immediately follows a delimiter. It defaults to False, which\n"
1509" means that whitespace immediately following a delimiter is part\n"
1510" of the following field.\n"
1511" * lineterminator - specifies the character sequence which should \n"
1512" terminate rows.\n"
1513" * quoting - controls when quotes should be generated by the writer.\n"
1514" It can take on any of the following module constants:\n"
1515"\n"
1516" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1517" field contains either the quotechar or the delimiter\n"
1518" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1519" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001520" fields which do not parse as integers or floating point\n"
1521" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001522" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1523" * escapechar - specifies a one-character string used to escape \n"
1524" the delimiter when quoting is set to QUOTE_NONE.\n"
1525" * doublequote - controls the handling of quotes inside fields. When\n"
1526" True, two consecutive quotes are interpreted as one during read,\n"
1527" and when writing, each quote character embedded in the data is\n"
1528" written as two quotes\n");
1529
1530PyDoc_STRVAR(csv_reader_doc,
1531" csv_reader = reader(iterable [, dialect='excel']\n"
1532" [optional keyword args])\n"
1533" for row in csv_reader:\n"
1534" process(row)\n"
1535"\n"
1536"The \"iterable\" argument can be any object that returns a line\n"
1537"of input for each iteration, such as a file object or a list. The\n"
1538"optional \"dialect\" parameter is discussed below. The function\n"
1539"also accepts optional keyword arguments which override settings\n"
1540"provided by the dialect.\n"
1541"\n"
1542"The returned object is an iterator. Each iteration returns a row\n"
Berker Peksag4e294772015-10-02 19:30:21 +03001543"of the CSV file (which can span multiple input lines).\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001544
1545PyDoc_STRVAR(csv_writer_doc,
1546" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1547" [optional keyword args])\n"
Fredrik Lundh4aaaa492006-04-04 16:51:13 +00001548" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001549" csv_writer.writerow(row)\n"
1550"\n"
1551" [or]\n"
1552"\n"
1553" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1554" [optional keyword args])\n"
1555" csv_writer.writerows(rows)\n"
1556"\n"
1557"The \"fileobj\" argument can be any object that supports the file API.\n");
1558
1559PyDoc_STRVAR(csv_list_dialects_doc,
1560"Return a list of all know dialect names.\n"
1561" names = csv.list_dialects()");
1562
1563PyDoc_STRVAR(csv_get_dialect_doc,
1564"Return the dialect instance associated with name.\n"
1565" dialect = csv.get_dialect(name)");
1566
1567PyDoc_STRVAR(csv_register_dialect_doc,
1568"Create a mapping from a string name to a dialect class.\n"
1569" dialect = csv.register_dialect(name, dialect)");
1570
1571PyDoc_STRVAR(csv_unregister_dialect_doc,
1572"Delete the name/dialect mapping associated with a string name.\n"
1573" csv.unregister_dialect(name)");
1574
Andrew McNamara31d88962005-01-12 03:45:10 +00001575PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001576"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001577" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001578"\n"
1579"Returns old limit. If limit is not given, no new limit is set and\n"
1580"the old limit is returned");
1581
Skip Montanarob4a04172003-03-20 23:29:12 +00001582static struct PyMethodDef csv_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001583 { "reader", (PyCFunction)csv_reader,
1584 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1585 { "writer", (PyCFunction)csv_writer,
1586 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1587 { "list_dialects", (PyCFunction)csv_list_dialects,
1588 METH_NOARGS, csv_list_dialects_doc},
1589 { "register_dialect", (PyCFunction)csv_register_dialect,
1590 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1591 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1592 METH_O, csv_unregister_dialect_doc},
1593 { "get_dialect", (PyCFunction)csv_get_dialect,
1594 METH_O, csv_get_dialect_doc},
1595 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1596 METH_VARARGS, csv_field_size_limit_doc},
1597 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001598};
1599
1600PyMODINIT_FUNC
1601init_csv(void)
1602{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001603 PyObject *module;
1604 StyleDesc *style;
Skip Montanarob4a04172003-03-20 23:29:12 +00001605
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001606 if (PyType_Ready(&Dialect_Type) < 0)
1607 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001608
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001609 if (PyType_Ready(&Reader_Type) < 0)
1610 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001611
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001612 if (PyType_Ready(&Writer_Type) < 0)
1613 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001614
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001615 /* Create the module and add the functions */
1616 module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1617 if (module == NULL)
1618 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001619
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001620 /* Add version to the module. */
1621 if (PyModule_AddStringConstant(module, "__version__",
1622 MODULE_VERSION) == -1)
1623 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001624
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001625 /* Add _dialects dictionary */
1626 dialects = PyDict_New();
1627 if (dialects == NULL)
1628 return;
1629 if (PyModule_AddObject(module, "_dialects", dialects))
1630 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001631
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001632 /* Add quote styles into dictionary */
1633 for (style = quote_styles; style->name; style++) {
1634 if (PyModule_AddIntConstant(module, style->name,
1635 style->style) == -1)
1636 return;
1637 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001638
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001639 /* Add the Dialect type */
1640 Py_INCREF(&Dialect_Type);
1641 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1642 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001643
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001644 /* Add the CSV exception object to the module. */
1645 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1646 if (error_obj == NULL)
1647 return;
1648 PyModule_AddObject(module, "Error", error_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001649}