blob: af4665897ac106e7d7e91f52c6b975dd990970dd [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
9**** For people modifying this code, please note that as of this writing
Skip Montanarodfa35fa2003-04-11 21:40:01 +000010**** (2003-03-23), it is intended that this code should work with Python
Skip Montanaroa16b21f2003-03-23 14:32:54 +000011**** 2.2.
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013*/
14
Skip Montanaro7b01a832003-04-12 19:23:46 +000015#define MODULE_VERSION "1.0"
16
Skip Montanarob4a04172003-03-20 23:29:12 +000017#include "Python.h"
18#include "structmember.h"
19
Skip Montanaroa16b21f2003-03-23 14:32:54 +000020
Skip Montanarob4a04172003-03-20 23:29:12 +000021/* begin 2.2 compatibility macros */
22#ifndef PyDoc_STRVAR
23/* Define macros for inline documentation. */
24#define PyDoc_VAR(name) static char name[]
25#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
26#ifdef WITH_DOC_STRINGS
27#define PyDoc_STR(str) str
28#else
29#define PyDoc_STR(str) ""
30#endif
31#endif /* ifndef PyDoc_STRVAR */
32
33#ifndef PyMODINIT_FUNC
Antoine Pitrouc83ea132010-05-09 14:46:46 +000034# if defined(__cplusplus)
35# define PyMODINIT_FUNC extern "C" void
36# else /* __cplusplus */
37# define PyMODINIT_FUNC void
38# endif /* __cplusplus */
Skip Montanarob4a04172003-03-20 23:29:12 +000039#endif
Thomas Wouters2742c5e2006-04-15 17:33:14 +000040
41#ifndef Py_CLEAR
Antoine Pitrouc83ea132010-05-09 14:46:46 +000042#define Py_CLEAR(op) \
43 do { \
44 if (op) { \
45 PyObject *tmp = (PyObject *)(op); \
46 (op) = NULL; \
47 Py_DECREF(tmp); \
48 } \
49 } while (0)
Thomas Wouters2742c5e2006-04-15 17:33:14 +000050#endif
Thomas Woutersc6e55062006-04-15 21:47:09 +000051#ifndef Py_VISIT
Antoine Pitrouc83ea132010-05-09 14:46:46 +000052#define Py_VISIT(op) \
53 do { \
54 if (op) { \
55 int vret = visit((PyObject *)(op), arg); \
56 if (vret) \
57 return vret; \
58 } \
59 } while (0)
Thomas Woutersc6e55062006-04-15 21:47:09 +000060#endif
Thomas Wouters2742c5e2006-04-15 17:33:14 +000061
Skip Montanarob4a04172003-03-20 23:29:12 +000062/* end 2.2 compatibility macros */
63
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000064#define IS_BASESTRING(o) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +000065 PyObject_TypeCheck(o, &PyBaseString_Type)
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000066
Antoine Pitrouc83ea132010-05-09 14:46:46 +000067static PyObject *error_obj; /* CSV exception */
Skip Montanarob4a04172003-03-20 23:29:12 +000068static PyObject *dialects; /* Dialect registry */
Antoine Pitrouc83ea132010-05-09 14:46:46 +000069static long field_limit = 128 * 1024; /* max parsed field size */
Skip Montanarob4a04172003-03-20 23:29:12 +000070
71typedef enum {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000072 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
73 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
74 EAT_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000075} ParserState;
76
77typedef enum {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000078 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000079} QuoteStyle;
80
81typedef struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000082 QuoteStyle style;
83 char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000084} StyleDesc;
85
86static StyleDesc quote_styles[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000087 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
88 { QUOTE_ALL, "QUOTE_ALL" },
89 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
90 { QUOTE_NONE, "QUOTE_NONE" },
91 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000092};
93
94typedef struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000095 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +000096
Antoine Pitrouc83ea132010-05-09 14:46:46 +000097 int doublequote; /* is " represented by ""? */
98 char delimiter; /* field separator */
99 char quotechar; /* quote character */
100 char escapechar; /* escape character */
101 int skipinitialspace; /* ignore spaces following delimiter? */
102 PyObject *lineterminator; /* string to write between records */
103 int quoting; /* style of quoting to write */
104
105 int strict; /* raise exception on bad CSV */
Skip Montanarob4a04172003-03-20 23:29:12 +0000106} DialectObj;
107
108staticforward PyTypeObject Dialect_Type;
109
110typedef struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000111 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000112
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000113 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +0000114
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000115 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000116
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000117 PyObject *fields; /* field list for current record */
118 ParserState state; /* current CSV parse state */
119 char *field; /* build current field in here */
120 int field_size; /* size of allocated buffer */
121 int field_len; /* length of current field */
122 int numeric_field; /* treat field as numeric */
123 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000124} ReaderObj;
125
126staticforward PyTypeObject Reader_Type;
127
Christian Heimese93237d2007-12-19 02:37:44 +0000128#define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +0000129
130typedef struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000131 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000132
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000133 PyObject *writeline; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +0000134
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000135 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000136
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000137 char *rec; /* buffer for parser.join */
138 int rec_size; /* size of allocated record */
139 int rec_len; /* length of record */
140 int num_fields; /* number of fields in record */
141} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000142
143staticforward PyTypeObject Writer_Type;
144
145/*
146 * DIALECT class
147 */
148
149static PyObject *
150get_dialect_from_registry(PyObject * name_obj)
151{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000152 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000153
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000154 dialect_obj = PyDict_GetItem(dialects, name_obj);
155 if (dialect_obj == NULL) {
156 if (!PyErr_Occurred())
157 PyErr_Format(error_obj, "unknown dialect");
158 }
159 else
160 Py_INCREF(dialect_obj);
161 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000162}
163
Skip Montanarob4a04172003-03-20 23:29:12 +0000164static PyObject *
165get_string(PyObject *str)
166{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000167 Py_XINCREF(str);
168 return str;
Skip Montanarob4a04172003-03-20 23:29:12 +0000169}
170
Skip Montanarob4a04172003-03-20 23:29:12 +0000171static PyObject *
172get_nullchar_as_None(char c)
173{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000174 if (c == '\0') {
175 Py_INCREF(Py_None);
176 return Py_None;
177 }
178 else
179 return PyString_FromStringAndSize((char*)&c, 1);
Skip Montanarob4a04172003-03-20 23:29:12 +0000180}
181
Skip Montanarob4a04172003-03-20 23:29:12 +0000182static PyObject *
183Dialect_get_lineterminator(DialectObj *self)
184{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000185 return get_string(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000186}
187
Skip Montanarob4a04172003-03-20 23:29:12 +0000188static PyObject *
189Dialect_get_escapechar(DialectObj *self)
190{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000191 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000192}
193
Andrew McNamara1196cf12005-01-07 04:42:45 +0000194static PyObject *
195Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000196{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000197 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000198}
199
200static PyObject *
201Dialect_get_quoting(DialectObj *self)
202{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000203 return PyInt_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000204}
205
206static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000207_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000208{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000209 if (src == NULL)
210 *target = dflt;
Antoine Pitrouc5bef752012-08-15 23:16:51 +0200211 else {
212 int b = PyObject_IsTrue(src);
213 if (b < 0)
214 return -1;
215 *target = b;
216 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000217 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000218}
219
Andrew McNamara1196cf12005-01-07 04:42:45 +0000220static int
221_set_int(const char *name, int *target, PyObject *src, int dflt)
222{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000223 if (src == NULL)
224 *target = dflt;
225 else {
226 if (!PyInt_Check(src)) {
227 PyErr_Format(PyExc_TypeError,
228 "\"%s\" must be an integer", name);
229 return -1;
230 }
231 *target = PyInt_AsLong(src);
232 }
233 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000234}
235
236static int
237_set_char(const char *name, char *target, PyObject *src, char dflt)
238{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000239 if (src == NULL)
240 *target = dflt;
241 else {
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200242 *target = '\0';
243 if (src != Py_None) {
244 Py_ssize_t len;
245 if (!PyString_Check(src)) {
246 PyErr_Format(PyExc_TypeError,
247 "\"%s\" must be string, not %.200s", name,
248 src->ob_type->tp_name);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000249 return -1;
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200250 }
251 len = PyString_GET_SIZE(src);
252 if (len > 1) {
253 PyErr_Format(PyExc_TypeError,
254 "\"%s\" must be an 1-character string",
255 name);
256 return -1;
257 }
258 if (len > 0)
259 *target = *PyString_AS_STRING(src);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000260 }
261 }
262 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000263}
264
265static int
266_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
267{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000268 if (src == NULL)
269 *target = PyString_FromString(dflt);
270 else {
271 if (src == Py_None)
272 *target = NULL;
273 else if (!IS_BASESTRING(src)) {
274 PyErr_Format(PyExc_TypeError,
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200275 "\"%s\" must be a string", name);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000276 return -1;
277 }
278 else {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000279 Py_INCREF(src);
Serhiy Storchakabc62af12016-04-06 09:51:18 +0300280 Py_XSETREF(*target, src);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000281 }
282 }
283 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000284}
285
286static int
287dialect_check_quoting(int quoting)
288{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000289 StyleDesc *qs = quote_styles;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000290
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000291 for (qs = quote_styles; qs->name; qs++) {
292 if (qs->style == quoting)
293 return 0;
294 }
295 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
296 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000297}
Skip Montanarob4a04172003-03-20 23:29:12 +0000298
299#define D_OFF(x) offsetof(DialectObj, x)
300
301static struct PyMemberDef Dialect_memberlist[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000302 { "delimiter", T_CHAR, D_OFF(delimiter), READONLY },
303 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
304 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
305 { "strict", T_INT, D_OFF(strict), READONLY },
306 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000307};
308
309static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000310 { "escapechar", (getter)Dialect_get_escapechar},
311 { "lineterminator", (getter)Dialect_get_lineterminator},
312 { "quotechar", (getter)Dialect_get_quotechar},
313 { "quoting", (getter)Dialect_get_quoting},
314 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000315};
316
317static void
318Dialect_dealloc(DialectObj *self)
319{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000320 Py_XDECREF(self->lineterminator);
321 Py_TYPE(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000322}
323
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000324static char *dialect_kws[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000325 "dialect",
326 "delimiter",
327 "doublequote",
328 "escapechar",
329 "lineterminator",
330 "quotechar",
331 "quoting",
332 "skipinitialspace",
333 "strict",
334 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000335};
336
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000337static PyObject *
338dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000339{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000340 DialectObj *self;
341 PyObject *ret = NULL;
342 PyObject *dialect = NULL;
343 PyObject *delimiter = NULL;
344 PyObject *doublequote = NULL;
345 PyObject *escapechar = NULL;
346 PyObject *lineterminator = NULL;
347 PyObject *quotechar = NULL;
348 PyObject *quoting = NULL;
349 PyObject *skipinitialspace = NULL;
350 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000351
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000352 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
353 "|OOOOOOOOO", dialect_kws,
354 &dialect,
355 &delimiter,
356 &doublequote,
357 &escapechar,
358 &lineterminator,
359 &quotechar,
360 &quoting,
361 &skipinitialspace,
362 &strict))
363 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000364
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000365 if (dialect != NULL) {
366 if (IS_BASESTRING(dialect)) {
367 dialect = get_dialect_from_registry(dialect);
368 if (dialect == NULL)
369 return NULL;
370 }
371 else
372 Py_INCREF(dialect);
373 /* Can we reuse this instance? */
374 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
375 delimiter == 0 &&
376 doublequote == 0 &&
377 escapechar == 0 &&
378 lineterminator == 0 &&
379 quotechar == 0 &&
380 quoting == 0 &&
381 skipinitialspace == 0 &&
382 strict == 0)
383 return dialect;
384 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000385
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000386 self = (DialectObj *)type->tp_alloc(type, 0);
387 if (self == NULL) {
388 Py_XDECREF(dialect);
389 return NULL;
390 }
391 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000392
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000393 Py_XINCREF(delimiter);
394 Py_XINCREF(doublequote);
395 Py_XINCREF(escapechar);
396 Py_XINCREF(lineterminator);
397 Py_XINCREF(quotechar);
398 Py_XINCREF(quoting);
399 Py_XINCREF(skipinitialspace);
400 Py_XINCREF(strict);
401 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000402#define DIALECT_GETATTR(v, n) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000403 if (v == NULL) \
404 v = PyObject_GetAttrString(dialect, n)
405 DIALECT_GETATTR(delimiter, "delimiter");
406 DIALECT_GETATTR(doublequote, "doublequote");
407 DIALECT_GETATTR(escapechar, "escapechar");
408 DIALECT_GETATTR(lineterminator, "lineterminator");
409 DIALECT_GETATTR(quotechar, "quotechar");
410 DIALECT_GETATTR(quoting, "quoting");
411 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
412 DIALECT_GETATTR(strict, "strict");
413 PyErr_Clear();
414 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000415
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000416 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000417#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000418 if (meth(name, target, src, dflt)) \
419 goto err
420 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
421 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
422 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
423 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
424 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
425 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
426 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
427 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000428
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000429 /* validate options */
430 if (dialect_check_quoting(self->quoting))
431 goto err;
432 if (self->delimiter == 0) {
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200433 PyErr_SetString(PyExc_TypeError,
434 "\"delimiter\" must be an 1-character string");
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000435 goto err;
436 }
437 if (quotechar == Py_None && quoting == NULL)
438 self->quoting = QUOTE_NONE;
439 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
440 PyErr_SetString(PyExc_TypeError,
441 "quotechar must be set if quoting enabled");
442 goto err;
443 }
444 if (self->lineterminator == 0) {
445 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
446 goto err;
447 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000448
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000449 ret = (PyObject *)self;
450 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000451err:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000452 Py_XDECREF(self);
453 Py_XDECREF(dialect);
454 Py_XDECREF(delimiter);
455 Py_XDECREF(doublequote);
456 Py_XDECREF(escapechar);
457 Py_XDECREF(lineterminator);
458 Py_XDECREF(quotechar);
459 Py_XDECREF(quoting);
460 Py_XDECREF(skipinitialspace);
461 Py_XDECREF(strict);
462 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000463}
464
465
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000466PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000467"CSV dialect\n"
468"\n"
469"The Dialect type records CSV parsing and generation options.\n");
470
471static PyTypeObject Dialect_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000472 PyVarObject_HEAD_INIT(NULL, 0)
473 "_csv.Dialect", /* tp_name */
474 sizeof(DialectObj), /* tp_basicsize */
475 0, /* tp_itemsize */
476 /* methods */
477 (destructor)Dialect_dealloc, /* tp_dealloc */
478 (printfunc)0, /* tp_print */
479 (getattrfunc)0, /* tp_getattr */
480 (setattrfunc)0, /* tp_setattr */
481 (cmpfunc)0, /* tp_compare */
482 (reprfunc)0, /* tp_repr */
483 0, /* tp_as_number */
484 0, /* tp_as_sequence */
485 0, /* tp_as_mapping */
486 (hashfunc)0, /* tp_hash */
487 (ternaryfunc)0, /* tp_call */
488 (reprfunc)0, /* tp_str */
489 0, /* tp_getattro */
490 0, /* tp_setattro */
491 0, /* tp_as_buffer */
492 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
493 Dialect_Type_doc, /* tp_doc */
494 0, /* tp_traverse */
495 0, /* tp_clear */
496 0, /* tp_richcompare */
497 0, /* tp_weaklistoffset */
498 0, /* tp_iter */
499 0, /* tp_iternext */
500 0, /* tp_methods */
501 Dialect_memberlist, /* tp_members */
502 Dialect_getsetlist, /* tp_getset */
503 0, /* tp_base */
504 0, /* tp_dict */
505 0, /* tp_descr_get */
506 0, /* tp_descr_set */
507 0, /* tp_dictoffset */
508 0, /* tp_init */
509 0, /* tp_alloc */
510 dialect_new, /* tp_new */
511 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000512};
513
Andrew McNamara91b97462005-01-11 01:07:23 +0000514/*
515 * Return an instance of the dialect type, given a Python instance or kwarg
516 * description of the dialect
517 */
518static PyObject *
519_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
520{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000521 PyObject *ctor_args;
522 PyObject *dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000523
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000524 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
525 if (ctor_args == NULL)
526 return NULL;
527 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
528 Py_DECREF(ctor_args);
529 return dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000530}
531
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000532/*
533 * READER
534 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000535static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000536parse_save_field(ReaderObj *self)
537{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000538 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000539
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000540 field = PyString_FromStringAndSize(self->field, self->field_len);
541 if (field == NULL)
542 return -1;
543 self->field_len = 0;
544 if (self->numeric_field) {
545 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000546
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000547 self->numeric_field = 0;
548 tmp = PyNumber_Float(field);
549 if (tmp == NULL) {
550 Py_DECREF(field);
551 return -1;
552 }
553 Py_DECREF(field);
554 field = tmp;
555 }
556 PyList_Append(self->fields, field);
557 Py_DECREF(field);
558 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000559}
560
561static int
562parse_grow_buff(ReaderObj *self)
563{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000564 if (self->field_size == 0) {
565 self->field_size = 4096;
566 if (self->field != NULL)
567 PyMem_Free(self->field);
568 self->field = PyMem_Malloc(self->field_size);
569 }
570 else {
571 if (self->field_size > INT_MAX / 2) {
572 PyErr_NoMemory();
573 return 0;
574 }
575 self->field_size *= 2;
576 self->field = PyMem_Realloc(self->field, self->field_size);
577 }
578 if (self->field == NULL) {
579 PyErr_NoMemory();
580 return 0;
581 }
582 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000583}
584
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000585static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000586parse_add_char(ReaderObj *self, char c)
587{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000588 if (self->field_len >= field_limit) {
589 PyErr_Format(error_obj, "field larger than field limit (%ld)",
590 field_limit);
591 return -1;
592 }
593 if (self->field_len == self->field_size && !parse_grow_buff(self))
594 return -1;
595 self->field[self->field_len++] = c;
596 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000597}
598
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000599static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000600parse_process_char(ReaderObj *self, char c)
601{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000602 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000603
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000604 switch (self->state) {
605 case START_RECORD:
606 /* start of record */
607 if (c == '\0')
608 /* empty line - return [] */
609 break;
610 else if (c == '\n' || c == '\r') {
611 self->state = EAT_CRNL;
612 break;
613 }
614 /* normal character - handle as START_FIELD */
615 self->state = START_FIELD;
616 /* fallthru */
617 case START_FIELD:
618 /* expecting field */
619 if (c == '\n' || c == '\r' || c == '\0') {
620 /* save empty field - return [fields] */
621 if (parse_save_field(self) < 0)
622 return -1;
623 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
624 }
625 else if (c == dialect->quotechar &&
626 dialect->quoting != QUOTE_NONE) {
627 /* start quoted field */
628 self->state = IN_QUOTED_FIELD;
629 }
630 else if (c == dialect->escapechar) {
631 /* possible escaped character */
632 self->state = ESCAPED_CHAR;
633 }
634 else if (c == ' ' && dialect->skipinitialspace)
635 /* ignore space at start of field */
636 ;
637 else if (c == dialect->delimiter) {
638 /* save empty field */
639 if (parse_save_field(self) < 0)
640 return -1;
641 }
642 else {
643 /* begin new unquoted field */
644 if (dialect->quoting == QUOTE_NONNUMERIC)
645 self->numeric_field = 1;
646 if (parse_add_char(self, c) < 0)
647 return -1;
648 self->state = IN_FIELD;
649 }
650 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000651
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000652 case ESCAPED_CHAR:
653 if (c == '\0')
654 c = '\n';
655 if (parse_add_char(self, c) < 0)
656 return -1;
657 self->state = IN_FIELD;
658 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000659
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000660 case IN_FIELD:
661 /* in unquoted field */
662 if (c == '\n' || c == '\r' || c == '\0') {
663 /* end of line - return [fields] */
664 if (parse_save_field(self) < 0)
665 return -1;
666 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
667 }
668 else if (c == dialect->escapechar) {
669 /* possible escaped character */
670 self->state = ESCAPED_CHAR;
671 }
672 else if (c == dialect->delimiter) {
673 /* save field - wait for new field */
674 if (parse_save_field(self) < 0)
675 return -1;
676 self->state = START_FIELD;
677 }
678 else {
679 /* normal character - save in field */
680 if (parse_add_char(self, c) < 0)
681 return -1;
682 }
683 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000684
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000685 case IN_QUOTED_FIELD:
686 /* in quoted field */
687 if (c == '\0')
688 ;
689 else if (c == dialect->escapechar) {
690 /* Possible escape character */
691 self->state = ESCAPE_IN_QUOTED_FIELD;
692 }
693 else if (c == dialect->quotechar &&
694 dialect->quoting != QUOTE_NONE) {
695 if (dialect->doublequote) {
696 /* doublequote; " represented by "" */
697 self->state = QUOTE_IN_QUOTED_FIELD;
698 }
699 else {
700 /* end of quote part of field */
701 self->state = IN_FIELD;
702 }
703 }
704 else {
705 /* normal character - save in field */
706 if (parse_add_char(self, c) < 0)
707 return -1;
708 }
709 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000710
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000711 case ESCAPE_IN_QUOTED_FIELD:
712 if (c == '\0')
713 c = '\n';
714 if (parse_add_char(self, c) < 0)
715 return -1;
716 self->state = IN_QUOTED_FIELD;
717 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000718
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000719 case QUOTE_IN_QUOTED_FIELD:
Serhiy Storchaka9a118f12016-04-17 09:37:36 +0300720 /* doublequote - seen a quote in a quoted field */
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000721 if (dialect->quoting != QUOTE_NONE &&
722 c == dialect->quotechar) {
723 /* save "" as " */
724 if (parse_add_char(self, c) < 0)
725 return -1;
726 self->state = IN_QUOTED_FIELD;
727 }
728 else if (c == dialect->delimiter) {
729 /* save field - wait for new field */
730 if (parse_save_field(self) < 0)
731 return -1;
732 self->state = START_FIELD;
733 }
734 else if (c == '\n' || c == '\r' || c == '\0') {
735 /* end of line - return [fields] */
736 if (parse_save_field(self) < 0)
737 return -1;
738 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
739 }
740 else if (!dialect->strict) {
741 if (parse_add_char(self, c) < 0)
742 return -1;
743 self->state = IN_FIELD;
744 }
745 else {
746 /* illegal */
747 PyErr_Format(error_obj, "'%c' expected after '%c'",
748 dialect->delimiter,
749 dialect->quotechar);
750 return -1;
751 }
752 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000753
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000754 case EAT_CRNL:
755 if (c == '\n' || c == '\r')
756 ;
757 else if (c == '\0')
758 self->state = START_RECORD;
759 else {
760 PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
761 return -1;
762 }
763 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000764
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000765 }
766 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000767}
768
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000769static int
770parse_reset(ReaderObj *self)
771{
Serhiy Storchakabc62af12016-04-06 09:51:18 +0300772 Py_XSETREF(self->fields, PyList_New(0));
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000773 if (self->fields == NULL)
774 return -1;
775 self->field_len = 0;
776 self->state = START_RECORD;
777 self->numeric_field = 0;
778 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000779}
Skip Montanarob4a04172003-03-20 23:29:12 +0000780
781static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000782Reader_iternext(ReaderObj *self)
783{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000784 PyObject *lineobj;
785 PyObject *fields = NULL;
786 char *line, c;
787 int linelen;
Skip Montanarob4a04172003-03-20 23:29:12 +0000788
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000789 if (parse_reset(self) < 0)
790 return NULL;
791 do {
792 lineobj = PyIter_Next(self->input_iter);
793 if (lineobj == NULL) {
794 /* End of input OR exception */
Senthil Kumarand41dc7c2012-09-25 02:48:21 -0700795 if (!PyErr_Occurred() && (self->field_len != 0 ||
796 self->state == IN_QUOTED_FIELD)) {
797 if (self->dialect->strict)
798 PyErr_SetString(error_obj, "unexpected end of data");
799 else if (parse_save_field(self) >= 0 )
800 break;
801 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000802 return NULL;
803 }
804 ++self->line_num;
Skip Montanarob4a04172003-03-20 23:29:12 +0000805
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000806 line = PyString_AsString(lineobj);
807 linelen = PyString_Size(lineobj);
Skip Montanarob4a04172003-03-20 23:29:12 +0000808
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000809 if (line == NULL || linelen < 0) {
810 Py_DECREF(lineobj);
811 return NULL;
812 }
813 while (linelen--) {
814 c = *line++;
815 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000816 Py_DECREF(lineobj);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000817 PyErr_Format(error_obj,
818 "line contains NULL byte");
819 goto err;
820 }
821 if (parse_process_char(self, c) < 0) {
822 Py_DECREF(lineobj);
823 goto err;
824 }
825 }
826 Py_DECREF(lineobj);
827 if (parse_process_char(self, 0) < 0)
828 goto err;
829 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000830
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000831 fields = self->fields;
832 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000833err:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000834 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000835}
836
837static void
838Reader_dealloc(ReaderObj *self)
839{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000840 PyObject_GC_UnTrack(self);
841 Py_XDECREF(self->dialect);
842 Py_XDECREF(self->input_iter);
843 Py_XDECREF(self->fields);
844 if (self->field != NULL)
845 PyMem_Free(self->field);
846 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000847}
848
849static int
850Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
851{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000852 Py_VISIT(self->dialect);
853 Py_VISIT(self->input_iter);
854 Py_VISIT(self->fields);
855 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000856}
857
858static int
859Reader_clear(ReaderObj *self)
860{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000861 Py_CLEAR(self->dialect);
862 Py_CLEAR(self->input_iter);
863 Py_CLEAR(self->fields);
864 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000865}
866
867PyDoc_STRVAR(Reader_Type_doc,
868"CSV reader\n"
869"\n"
870"Reader objects are responsible for reading and parsing tabular data\n"
871"in CSV format.\n"
872);
873
874static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000875 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000876};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000877#define R_OFF(x) offsetof(ReaderObj, x)
878
879static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000880 { "dialect", T_OBJECT, R_OFF(dialect), RO },
881 { "line_num", T_ULONG, R_OFF(line_num), RO },
882 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000883};
884
Skip Montanarob4a04172003-03-20 23:29:12 +0000885
886static PyTypeObject Reader_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000887 PyVarObject_HEAD_INIT(NULL, 0)
888 "_csv.reader", /*tp_name*/
889 sizeof(ReaderObj), /*tp_basicsize*/
890 0, /*tp_itemsize*/
891 /* methods */
892 (destructor)Reader_dealloc, /*tp_dealloc*/
893 (printfunc)0, /*tp_print*/
894 (getattrfunc)0, /*tp_getattr*/
895 (setattrfunc)0, /*tp_setattr*/
896 (cmpfunc)0, /*tp_compare*/
897 (reprfunc)0, /*tp_repr*/
898 0, /*tp_as_number*/
899 0, /*tp_as_sequence*/
900 0, /*tp_as_mapping*/
901 (hashfunc)0, /*tp_hash*/
902 (ternaryfunc)0, /*tp_call*/
903 (reprfunc)0, /*tp_str*/
904 0, /*tp_getattro*/
905 0, /*tp_setattro*/
906 0, /*tp_as_buffer*/
907 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
908 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
909 Reader_Type_doc, /*tp_doc*/
910 (traverseproc)Reader_traverse, /*tp_traverse*/
911 (inquiry)Reader_clear, /*tp_clear*/
912 0, /*tp_richcompare*/
913 0, /*tp_weaklistoffset*/
914 PyObject_SelfIter, /*tp_iter*/
915 (getiterfunc)Reader_iternext, /*tp_iternext*/
916 Reader_methods, /*tp_methods*/
917 Reader_memberlist, /*tp_members*/
918 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000919
920};
921
922static PyObject *
923csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
924{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000925 PyObject * iterator, * dialect = NULL;
926 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000927
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000928 if (!self)
929 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000930
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000931 self->dialect = NULL;
932 self->fields = NULL;
933 self->input_iter = NULL;
934 self->field = NULL;
935 self->field_size = 0;
936 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000937
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000938 if (parse_reset(self) < 0) {
939 Py_DECREF(self);
940 return NULL;
941 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000942
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000943 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
944 Py_DECREF(self);
945 return NULL;
946 }
947 self->input_iter = PyObject_GetIter(iterator);
948 if (self->input_iter == NULL) {
949 PyErr_SetString(PyExc_TypeError,
950 "argument 1 must be an iterator");
951 Py_DECREF(self);
952 return NULL;
953 }
954 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
955 if (self->dialect == NULL) {
956 Py_DECREF(self);
957 return NULL;
958 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000959
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000960 PyObject_GC_Track(self);
961 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +0000962}
963
964/*
965 * WRITER
966 */
967/* ---------------------------------------------------------------- */
968static void
969join_reset(WriterObj *self)
970{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000971 self->rec_len = 0;
972 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000973}
974
975#define MEM_INCR 32768
976
977/* Calculate new record length or append field to record. Return new
978 * record length.
979 */
980static int
981join_append_data(WriterObj *self, char *field, int quote_empty,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000982 int *quoted, int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +0000983{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000984 DialectObj *dialect = self->dialect;
985 int i, rec_len;
986 char *lineterm;
Andrew McNamarac89f2842005-01-12 07:44:42 +0000987
988#define ADDCH(c) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000989 do {\
990 if (copy_phase) \
991 self->rec[rec_len] = c;\
992 rec_len++;\
993 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +0000994
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000995 lineterm = PyString_AsString(dialect->lineterminator);
996 if (lineterm == NULL)
997 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000998
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000999 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001000
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001001 /* If this is not the first field we need a field separator */
1002 if (self->num_fields > 0)
1003 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001004
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001005 /* Handle preceding quote */
1006 if (copy_phase && *quoted)
1007 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001008
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001009 /* Copy/count field data */
1010 for (i = 0;; i++) {
1011 char c = field[i];
1012 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001013
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001014 if (c == '\0')
1015 break;
Skip Montanarob4a04172003-03-20 23:29:12 +00001016
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001017 if (c == dialect->delimiter ||
1018 c == dialect->escapechar ||
1019 c == dialect->quotechar ||
1020 strchr(lineterm, c)) {
1021 if (dialect->quoting == QUOTE_NONE)
1022 want_escape = 1;
1023 else {
1024 if (c == dialect->quotechar) {
1025 if (dialect->doublequote)
1026 ADDCH(dialect->quotechar);
1027 else
1028 want_escape = 1;
1029 }
1030 if (!want_escape)
1031 *quoted = 1;
1032 }
1033 if (want_escape) {
1034 if (!dialect->escapechar) {
1035 PyErr_Format(error_obj,
1036 "need to escape, but no escapechar set");
1037 return -1;
1038 }
1039 ADDCH(dialect->escapechar);
1040 }
1041 }
1042 /* Copy field character into record buffer.
1043 */
1044 ADDCH(c);
1045 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001046
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001047 /* If field is empty check if it needs to be quoted.
1048 */
1049 if (i == 0 && quote_empty) {
1050 if (dialect->quoting == QUOTE_NONE) {
1051 PyErr_Format(error_obj,
1052 "single empty field record must be quoted");
1053 return -1;
1054 }
1055 else
1056 *quoted = 1;
1057 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001058
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001059 if (*quoted) {
1060 if (copy_phase)
1061 ADDCH(dialect->quotechar);
1062 else
1063 rec_len += 2;
1064 }
1065 return rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001066#undef ADDCH
Skip Montanarob4a04172003-03-20 23:29:12 +00001067}
1068
1069static int
1070join_check_rec_size(WriterObj *self, int rec_len)
1071{
Gregory P. Smith9d534572008-06-11 07:41:16 +00001072
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001073 if (rec_len < 0 || rec_len > INT_MAX - MEM_INCR) {
1074 PyErr_NoMemory();
1075 return 0;
1076 }
Gregory P. Smith9d534572008-06-11 07:41:16 +00001077
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001078 if (rec_len > self->rec_size) {
1079 if (self->rec_size == 0) {
1080 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1081 if (self->rec != NULL)
1082 PyMem_Free(self->rec);
1083 self->rec = PyMem_Malloc(self->rec_size);
1084 }
1085 else {
1086 char *old_rec = self->rec;
Skip Montanarob4a04172003-03-20 23:29:12 +00001087
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001088 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1089 self->rec = PyMem_Realloc(self->rec, self->rec_size);
1090 if (self->rec == NULL)
1091 PyMem_Free(old_rec);
1092 }
1093 if (self->rec == NULL) {
1094 PyErr_NoMemory();
1095 return 0;
1096 }
1097 }
1098 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001099}
1100
1101static int
1102join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
1103{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001104 int rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001105
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001106 rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1107 if (rec_len < 0)
1108 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001109
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001110 /* grow record buffer if necessary */
1111 if (!join_check_rec_size(self, rec_len))
1112 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001113
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001114 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1115 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001116
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001117 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001118}
1119
1120static int
1121join_append_lineterminator(WriterObj *self)
1122{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001123 int terminator_len;
1124 char *terminator;
Skip Montanarob4a04172003-03-20 23:29:12 +00001125
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001126 terminator_len = PyString_Size(self->dialect->lineterminator);
1127 if (terminator_len == -1)
1128 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001129
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001130 /* grow record buffer if necessary */
1131 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1132 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001133
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001134 terminator = PyString_AsString(self->dialect->lineterminator);
1135 if (terminator == NULL)
1136 return 0;
1137 memmove(self->rec + self->rec_len, terminator, terminator_len);
1138 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001139
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001140 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001141}
1142
1143PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001144"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001145"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001146"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001147"elements will be converted to string.");
1148
1149static PyObject *
1150csv_writerow(WriterObj *self, PyObject *seq)
1151{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001152 DialectObj *dialect = self->dialect;
1153 int len, i;
Skip Montanarob4a04172003-03-20 23:29:12 +00001154
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001155 if (!PySequence_Check(seq))
1156 return PyErr_Format(error_obj, "sequence expected");
Skip Montanarob4a04172003-03-20 23:29:12 +00001157
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001158 len = PySequence_Length(seq);
1159 if (len < 0)
1160 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001161
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001162 /* Join all fields in internal buffer.
1163 */
1164 join_reset(self);
1165 for (i = 0; i < len; i++) {
1166 PyObject *field;
1167 int append_ok;
1168 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001169
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001170 field = PySequence_GetItem(seq, i);
1171 if (field == NULL)
1172 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001173
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001174 switch (dialect->quoting) {
1175 case QUOTE_NONNUMERIC:
1176 quoted = !PyNumber_Check(field);
1177 break;
1178 case QUOTE_ALL:
1179 quoted = 1;
1180 break;
1181 default:
1182 quoted = 0;
1183 break;
1184 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001185
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001186 if (PyString_Check(field)) {
1187 append_ok = join_append(self,
1188 PyString_AS_STRING(field),
1189 &quoted, len == 1);
1190 Py_DECREF(field);
1191 }
1192 else if (field == Py_None) {
1193 append_ok = join_append(self, "", &quoted, len == 1);
1194 Py_DECREF(field);
1195 }
1196 else {
1197 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001198
Raymond Hettingerf5377022011-12-11 22:31:09 -08001199 if (PyFloat_Check(field)) {
1200 str = PyObject_Repr(field);
1201 } else {
1202 str = PyObject_Str(field);
1203 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001204 Py_DECREF(field);
1205 if (str == NULL)
1206 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001207
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001208 append_ok = join_append(self, PyString_AS_STRING(str),
1209 &quoted, len == 1);
1210 Py_DECREF(str);
1211 }
1212 if (!append_ok)
1213 return NULL;
1214 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001215
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001216 /* Add line terminator.
1217 */
1218 if (!join_append_lineterminator(self))
1219 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001220
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001221 return PyObject_CallFunction(self->writeline,
1222 "(s#)", self->rec, self->rec_len);
Skip Montanarob4a04172003-03-20 23:29:12 +00001223}
1224
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001225PyDoc_STRVAR(csv_writerows_doc,
1226"writerows(sequence of sequences)\n"
1227"\n"
1228"Construct and write a series of sequences to a csv file. Non-string\n"
1229"elements will be converted to string.");
1230
Skip Montanarob4a04172003-03-20 23:29:12 +00001231static PyObject *
1232csv_writerows(WriterObj *self, PyObject *seqseq)
1233{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001234 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001235
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001236 row_iter = PyObject_GetIter(seqseq);
1237 if (row_iter == NULL) {
1238 PyErr_SetString(PyExc_TypeError,
1239 "writerows() argument must be iterable");
1240 return NULL;
1241 }
1242 while ((row_obj = PyIter_Next(row_iter))) {
1243 result = csv_writerow(self, row_obj);
1244 Py_DECREF(row_obj);
1245 if (!result) {
1246 Py_DECREF(row_iter);
1247 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001248 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001249 else
1250 Py_DECREF(result);
1251 }
1252 Py_DECREF(row_iter);
1253 if (PyErr_Occurred())
1254 return NULL;
1255 Py_INCREF(Py_None);
1256 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001257}
1258
1259static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001260 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1261 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1262 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001263};
1264
1265#define W_OFF(x) offsetof(WriterObj, x)
1266
1267static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001268 { "dialect", T_OBJECT, W_OFF(dialect), RO },
1269 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001270};
1271
1272static void
1273Writer_dealloc(WriterObj *self)
1274{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001275 PyObject_GC_UnTrack(self);
1276 Py_XDECREF(self->dialect);
1277 Py_XDECREF(self->writeline);
1278 if (self->rec != NULL)
1279 PyMem_Free(self->rec);
1280 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001281}
1282
1283static int
1284Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1285{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001286 Py_VISIT(self->dialect);
1287 Py_VISIT(self->writeline);
1288 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001289}
1290
1291static int
1292Writer_clear(WriterObj *self)
1293{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001294 Py_CLEAR(self->dialect);
1295 Py_CLEAR(self->writeline);
1296 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001297}
1298
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001299PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001300"CSV writer\n"
1301"\n"
1302"Writer objects are responsible for generating tabular data\n"
1303"in CSV format from sequence input.\n"
1304);
1305
1306static PyTypeObject Writer_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001307 PyVarObject_HEAD_INIT(NULL, 0)
1308 "_csv.writer", /*tp_name*/
1309 sizeof(WriterObj), /*tp_basicsize*/
1310 0, /*tp_itemsize*/
1311 /* methods */
1312 (destructor)Writer_dealloc, /*tp_dealloc*/
1313 (printfunc)0, /*tp_print*/
1314 (getattrfunc)0, /*tp_getattr*/
1315 (setattrfunc)0, /*tp_setattr*/
1316 (cmpfunc)0, /*tp_compare*/
1317 (reprfunc)0, /*tp_repr*/
1318 0, /*tp_as_number*/
1319 0, /*tp_as_sequence*/
1320 0, /*tp_as_mapping*/
1321 (hashfunc)0, /*tp_hash*/
1322 (ternaryfunc)0, /*tp_call*/
1323 (reprfunc)0, /*tp_str*/
1324 0, /*tp_getattro*/
1325 0, /*tp_setattro*/
1326 0, /*tp_as_buffer*/
1327 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1328 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1329 Writer_Type_doc,
1330 (traverseproc)Writer_traverse, /*tp_traverse*/
1331 (inquiry)Writer_clear, /*tp_clear*/
1332 0, /*tp_richcompare*/
1333 0, /*tp_weaklistoffset*/
1334 (getiterfunc)0, /*tp_iter*/
1335 (getiterfunc)0, /*tp_iternext*/
1336 Writer_methods, /*tp_methods*/
1337 Writer_memberlist, /*tp_members*/
1338 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001339};
1340
1341static PyObject *
1342csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1343{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001344 PyObject * output_file, * dialect = NULL;
1345 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001346
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001347 if (!self)
1348 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001349
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001350 self->dialect = NULL;
1351 self->writeline = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001352
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001353 self->rec = NULL;
1354 self->rec_size = 0;
1355 self->rec_len = 0;
1356 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001357
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001358 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1359 Py_DECREF(self);
1360 return NULL;
1361 }
1362 self->writeline = PyObject_GetAttrString(output_file, "write");
1363 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1364 PyErr_SetString(PyExc_TypeError,
1365 "argument 1 must have a \"write\" method");
1366 Py_DECREF(self);
1367 return NULL;
1368 }
1369 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1370 if (self->dialect == NULL) {
1371 Py_DECREF(self);
1372 return NULL;
1373 }
1374 PyObject_GC_Track(self);
1375 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001376}
1377
1378/*
1379 * DIALECT REGISTRY
1380 */
1381static PyObject *
1382csv_list_dialects(PyObject *module, PyObject *args)
1383{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001384 return PyDict_Keys(dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001385}
1386
1387static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001388csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001389{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001390 PyObject *name_obj, *dialect_obj = NULL;
1391 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001392
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001393 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1394 return NULL;
1395 if (!IS_BASESTRING(name_obj)) {
1396 PyErr_SetString(PyExc_TypeError,
1397 "dialect name must be a string or unicode");
1398 return NULL;
1399 }
1400 dialect = _call_dialect(dialect_obj, kwargs);
1401 if (dialect == NULL)
1402 return NULL;
1403 if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1404 Py_DECREF(dialect);
1405 return NULL;
1406 }
1407 Py_DECREF(dialect);
1408 Py_INCREF(Py_None);
1409 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001410}
1411
1412static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001413csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001414{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001415 if (PyDict_DelItem(dialects, name_obj) < 0)
1416 return PyErr_Format(error_obj, "unknown dialect");
1417 Py_INCREF(Py_None);
1418 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001419}
1420
1421static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001422csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001423{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001424 return get_dialect_from_registry(name_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001425}
1426
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001427static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001428csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001429{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001430 PyObject *new_limit = NULL;
1431 long old_limit = field_limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001432
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001433 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1434 return NULL;
1435 if (new_limit != NULL) {
1436 if (!PyInt_Check(new_limit)) {
1437 PyErr_Format(PyExc_TypeError,
1438 "limit must be an integer");
1439 return NULL;
1440 }
1441 field_limit = PyInt_AsLong(new_limit);
1442 }
1443 return PyInt_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001444}
1445
Skip Montanarob4a04172003-03-20 23:29:12 +00001446/*
1447 * MODULE
1448 */
1449
1450PyDoc_STRVAR(csv_module_doc,
1451"CSV parsing and writing.\n"
1452"\n"
1453"This module provides classes that assist in the reading and writing\n"
1454"of Comma Separated Value (CSV) files, and implements the interface\n"
1455"described by PEP 305. Although many CSV files are simple to parse,\n"
1456"the format is not formally defined by a stable specification and\n"
1457"is subtle enough that parsing lines of a CSV file with something\n"
1458"like line.split(\",\") is bound to fail. The module supports three\n"
1459"basic APIs: reading, writing, and registration of dialects.\n"
1460"\n"
1461"\n"
1462"DIALECT REGISTRATION:\n"
1463"\n"
1464"Readers and writers support a dialect argument, which is a convenient\n"
1465"handle on a group of settings. When the dialect argument is a string,\n"
1466"it identifies one of the dialects previously registered with the module.\n"
1467"If it is a class or instance, the attributes of the argument are used as\n"
1468"the settings for the reader or writer:\n"
1469"\n"
1470" class excel:\n"
1471" delimiter = ','\n"
1472" quotechar = '\"'\n"
1473" escapechar = None\n"
1474" doublequote = True\n"
1475" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001476" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001477" quoting = QUOTE_MINIMAL\n"
1478"\n"
1479"SETTINGS:\n"
1480"\n"
1481" * quotechar - specifies a one-character string to use as the \n"
1482" quoting character. It defaults to '\"'.\n"
1483" * delimiter - specifies a one-character string to use as the \n"
1484" field separator. It defaults to ','.\n"
1485" * skipinitialspace - specifies how to interpret whitespace which\n"
1486" immediately follows a delimiter. It defaults to False, which\n"
1487" means that whitespace immediately following a delimiter is part\n"
1488" of the following field.\n"
1489" * lineterminator - specifies the character sequence which should \n"
1490" terminate rows.\n"
1491" * quoting - controls when quotes should be generated by the writer.\n"
1492" It can take on any of the following module constants:\n"
1493"\n"
1494" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1495" field contains either the quotechar or the delimiter\n"
1496" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1497" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001498" fields which do not parse as integers or floating point\n"
1499" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001500" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1501" * escapechar - specifies a one-character string used to escape \n"
1502" the delimiter when quoting is set to QUOTE_NONE.\n"
1503" * doublequote - controls the handling of quotes inside fields. When\n"
1504" True, two consecutive quotes are interpreted as one during read,\n"
1505" and when writing, each quote character embedded in the data is\n"
1506" written as two quotes\n");
1507
1508PyDoc_STRVAR(csv_reader_doc,
1509" csv_reader = reader(iterable [, dialect='excel']\n"
1510" [optional keyword args])\n"
1511" for row in csv_reader:\n"
1512" process(row)\n"
1513"\n"
1514"The \"iterable\" argument can be any object that returns a line\n"
1515"of input for each iteration, such as a file object or a list. The\n"
1516"optional \"dialect\" parameter is discussed below. The function\n"
1517"also accepts optional keyword arguments which override settings\n"
1518"provided by the dialect.\n"
1519"\n"
1520"The returned object is an iterator. Each iteration returns a row\n"
Berker Peksag4e294772015-10-02 19:30:21 +03001521"of the CSV file (which can span multiple input lines).\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001522
1523PyDoc_STRVAR(csv_writer_doc,
1524" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1525" [optional keyword args])\n"
Fredrik Lundh4aaaa492006-04-04 16:51:13 +00001526" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001527" csv_writer.writerow(row)\n"
1528"\n"
1529" [or]\n"
1530"\n"
1531" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1532" [optional keyword args])\n"
1533" csv_writer.writerows(rows)\n"
1534"\n"
1535"The \"fileobj\" argument can be any object that supports the file API.\n");
1536
1537PyDoc_STRVAR(csv_list_dialects_doc,
1538"Return a list of all know dialect names.\n"
1539" names = csv.list_dialects()");
1540
1541PyDoc_STRVAR(csv_get_dialect_doc,
1542"Return the dialect instance associated with name.\n"
1543" dialect = csv.get_dialect(name)");
1544
1545PyDoc_STRVAR(csv_register_dialect_doc,
1546"Create a mapping from a string name to a dialect class.\n"
1547" dialect = csv.register_dialect(name, dialect)");
1548
1549PyDoc_STRVAR(csv_unregister_dialect_doc,
1550"Delete the name/dialect mapping associated with a string name.\n"
1551" csv.unregister_dialect(name)");
1552
Andrew McNamara31d88962005-01-12 03:45:10 +00001553PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001554"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001555" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001556"\n"
1557"Returns old limit. If limit is not given, no new limit is set and\n"
1558"the old limit is returned");
1559
Skip Montanarob4a04172003-03-20 23:29:12 +00001560static struct PyMethodDef csv_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001561 { "reader", (PyCFunction)csv_reader,
1562 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1563 { "writer", (PyCFunction)csv_writer,
1564 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1565 { "list_dialects", (PyCFunction)csv_list_dialects,
1566 METH_NOARGS, csv_list_dialects_doc},
1567 { "register_dialect", (PyCFunction)csv_register_dialect,
1568 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1569 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1570 METH_O, csv_unregister_dialect_doc},
1571 { "get_dialect", (PyCFunction)csv_get_dialect,
1572 METH_O, csv_get_dialect_doc},
1573 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1574 METH_VARARGS, csv_field_size_limit_doc},
1575 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001576};
1577
1578PyMODINIT_FUNC
1579init_csv(void)
1580{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001581 PyObject *module;
1582 StyleDesc *style;
Skip Montanarob4a04172003-03-20 23:29:12 +00001583
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001584 if (PyType_Ready(&Dialect_Type) < 0)
1585 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001586
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001587 if (PyType_Ready(&Reader_Type) < 0)
1588 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001589
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001590 if (PyType_Ready(&Writer_Type) < 0)
1591 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001592
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001593 /* Create the module and add the functions */
1594 module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1595 if (module == NULL)
1596 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001597
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001598 /* Add version to the module. */
1599 if (PyModule_AddStringConstant(module, "__version__",
1600 MODULE_VERSION) == -1)
1601 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001602
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001603 /* Add _dialects dictionary */
1604 dialects = PyDict_New();
1605 if (dialects == NULL)
1606 return;
1607 if (PyModule_AddObject(module, "_dialects", dialects))
1608 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001609
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001610 /* Add quote styles into dictionary */
1611 for (style = quote_styles; style->name; style++) {
1612 if (PyModule_AddIntConstant(module, style->name,
1613 style->style) == -1)
1614 return;
1615 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001616
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001617 /* Add the Dialect type */
1618 Py_INCREF(&Dialect_Type);
1619 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1620 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001621
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001622 /* Add the CSV exception object to the module. */
1623 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1624 if (error_obj == NULL)
1625 return;
1626 PyModule_AddObject(module, "Error", error_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001627}