blob: 00f5d002cdda778b62ad351c93a3b7317a54469f [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
9**** For people modifying this code, please note that as of this writing
Skip Montanarodfa35fa2003-04-11 21:40:01 +000010**** (2003-03-23), it is intended that this code should work with Python
Skip Montanaroa16b21f2003-03-23 14:32:54 +000011**** 2.2.
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013*/
14
Skip Montanaro7b01a832003-04-12 19:23:46 +000015#define MODULE_VERSION "1.0"
16
Skip Montanarob4a04172003-03-20 23:29:12 +000017#include "Python.h"
18#include "structmember.h"
19
Skip Montanaroa16b21f2003-03-23 14:32:54 +000020
Skip Montanarob4a04172003-03-20 23:29:12 +000021/* begin 2.2 compatibility macros */
22#ifndef PyDoc_STRVAR
23/* Define macros for inline documentation. */
24#define PyDoc_VAR(name) static char name[]
25#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
26#ifdef WITH_DOC_STRINGS
27#define PyDoc_STR(str) str
28#else
29#define PyDoc_STR(str) ""
30#endif
31#endif /* ifndef PyDoc_STRVAR */
32
33#ifndef PyMODINIT_FUNC
Antoine Pitrouc83ea132010-05-09 14:46:46 +000034# if defined(__cplusplus)
35# define PyMODINIT_FUNC extern "C" void
36# else /* __cplusplus */
37# define PyMODINIT_FUNC void
38# endif /* __cplusplus */
Skip Montanarob4a04172003-03-20 23:29:12 +000039#endif
Thomas Wouters2742c5e2006-04-15 17:33:14 +000040
41#ifndef Py_CLEAR
Antoine Pitrouc83ea132010-05-09 14:46:46 +000042#define Py_CLEAR(op) \
43 do { \
44 if (op) { \
45 PyObject *tmp = (PyObject *)(op); \
46 (op) = NULL; \
47 Py_DECREF(tmp); \
48 } \
49 } while (0)
Thomas Wouters2742c5e2006-04-15 17:33:14 +000050#endif
Thomas Woutersc6e55062006-04-15 21:47:09 +000051#ifndef Py_VISIT
Antoine Pitrouc83ea132010-05-09 14:46:46 +000052#define Py_VISIT(op) \
53 do { \
54 if (op) { \
55 int vret = visit((PyObject *)(op), arg); \
56 if (vret) \
57 return vret; \
58 } \
59 } while (0)
Thomas Woutersc6e55062006-04-15 21:47:09 +000060#endif
Thomas Wouters2742c5e2006-04-15 17:33:14 +000061
Skip Montanarob4a04172003-03-20 23:29:12 +000062/* end 2.2 compatibility macros */
63
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000064#define IS_BASESTRING(o) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +000065 PyObject_TypeCheck(o, &PyBaseString_Type)
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000066
Antoine Pitrouc83ea132010-05-09 14:46:46 +000067static PyObject *error_obj; /* CSV exception */
Skip Montanarob4a04172003-03-20 23:29:12 +000068static PyObject *dialects; /* Dialect registry */
Antoine Pitrouc83ea132010-05-09 14:46:46 +000069static long field_limit = 128 * 1024; /* max parsed field size */
Skip Montanarob4a04172003-03-20 23:29:12 +000070
71typedef enum {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000072 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
73 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
74 EAT_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000075} ParserState;
76
77typedef enum {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000078 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000079} QuoteStyle;
80
81typedef struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000082 QuoteStyle style;
83 char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000084} StyleDesc;
85
86static StyleDesc quote_styles[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000087 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
88 { QUOTE_ALL, "QUOTE_ALL" },
89 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
90 { QUOTE_NONE, "QUOTE_NONE" },
91 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000092};
93
94typedef struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000095 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +000096
Antoine Pitrouc83ea132010-05-09 14:46:46 +000097 int doublequote; /* is " represented by ""? */
98 char delimiter; /* field separator */
99 char quotechar; /* quote character */
100 char escapechar; /* escape character */
101 int skipinitialspace; /* ignore spaces following delimiter? */
102 PyObject *lineterminator; /* string to write between records */
103 int quoting; /* style of quoting to write */
104
105 int strict; /* raise exception on bad CSV */
Skip Montanarob4a04172003-03-20 23:29:12 +0000106} DialectObj;
107
108staticforward PyTypeObject Dialect_Type;
109
110typedef struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000111 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000112
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000113 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +0000114
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000115 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000116
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000117 PyObject *fields; /* field list for current record */
118 ParserState state; /* current CSV parse state */
119 char *field; /* build current field in here */
120 int field_size; /* size of allocated buffer */
121 int field_len; /* length of current field */
122 int numeric_field; /* treat field as numeric */
123 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000124} ReaderObj;
125
126staticforward PyTypeObject Reader_Type;
127
Christian Heimese93237d2007-12-19 02:37:44 +0000128#define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +0000129
130typedef struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000131 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000132
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000133 PyObject *writeline; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +0000134
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000135 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000136
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000137 char *rec; /* buffer for parser.join */
138 int rec_size; /* size of allocated record */
139 int rec_len; /* length of record */
140 int num_fields; /* number of fields in record */
141} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000142
143staticforward PyTypeObject Writer_Type;
144
145/*
146 * DIALECT class
147 */
148
149static PyObject *
150get_dialect_from_registry(PyObject * name_obj)
151{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000152 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000153
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000154 dialect_obj = PyDict_GetItem(dialects, name_obj);
155 if (dialect_obj == NULL) {
156 if (!PyErr_Occurred())
157 PyErr_Format(error_obj, "unknown dialect");
158 }
159 else
160 Py_INCREF(dialect_obj);
161 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000162}
163
Skip Montanarob4a04172003-03-20 23:29:12 +0000164static PyObject *
165get_string(PyObject *str)
166{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000167 Py_XINCREF(str);
168 return str;
Skip Montanarob4a04172003-03-20 23:29:12 +0000169}
170
Skip Montanarob4a04172003-03-20 23:29:12 +0000171static PyObject *
172get_nullchar_as_None(char c)
173{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000174 if (c == '\0') {
175 Py_INCREF(Py_None);
176 return Py_None;
177 }
178 else
179 return PyString_FromStringAndSize((char*)&c, 1);
Skip Montanarob4a04172003-03-20 23:29:12 +0000180}
181
Skip Montanarob4a04172003-03-20 23:29:12 +0000182static PyObject *
183Dialect_get_lineterminator(DialectObj *self)
184{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000185 return get_string(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000186}
187
Skip Montanarob4a04172003-03-20 23:29:12 +0000188static PyObject *
189Dialect_get_escapechar(DialectObj *self)
190{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000191 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000192}
193
Andrew McNamara1196cf12005-01-07 04:42:45 +0000194static PyObject *
195Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000196{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000197 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000198}
199
200static PyObject *
201Dialect_get_quoting(DialectObj *self)
202{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000203 return PyInt_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000204}
205
206static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000207_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000208{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000209 if (src == NULL)
210 *target = dflt;
Antoine Pitrouc5bef752012-08-15 23:16:51 +0200211 else {
212 int b = PyObject_IsTrue(src);
213 if (b < 0)
214 return -1;
215 *target = b;
216 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000217 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000218}
219
Andrew McNamara1196cf12005-01-07 04:42:45 +0000220static int
221_set_int(const char *name, int *target, PyObject *src, int dflt)
222{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000223 if (src == NULL)
224 *target = dflt;
225 else {
226 if (!PyInt_Check(src)) {
227 PyErr_Format(PyExc_TypeError,
228 "\"%s\" must be an integer", name);
229 return -1;
230 }
231 *target = PyInt_AsLong(src);
232 }
233 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000234}
235
236static int
237_set_char(const char *name, char *target, PyObject *src, char dflt)
238{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000239 if (src == NULL)
240 *target = dflt;
241 else {
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200242 *target = '\0';
243 if (src != Py_None) {
244 Py_ssize_t len;
245 if (!PyString_Check(src)) {
246 PyErr_Format(PyExc_TypeError,
247 "\"%s\" must be string, not %.200s", name,
248 src->ob_type->tp_name);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000249 return -1;
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200250 }
251 len = PyString_GET_SIZE(src);
252 if (len > 1) {
253 PyErr_Format(PyExc_TypeError,
254 "\"%s\" must be an 1-character string",
255 name);
256 return -1;
257 }
258 if (len > 0)
259 *target = *PyString_AS_STRING(src);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000260 }
261 }
262 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000263}
264
265static int
266_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
267{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000268 if (src == NULL)
269 *target = PyString_FromString(dflt);
270 else {
271 if (src == Py_None)
272 *target = NULL;
273 else if (!IS_BASESTRING(src)) {
274 PyErr_Format(PyExc_TypeError,
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200275 "\"%s\" must be a string", name);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000276 return -1;
277 }
278 else {
279 Py_XDECREF(*target);
280 Py_INCREF(src);
281 *target = src;
282 }
283 }
284 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000285}
286
287static int
288dialect_check_quoting(int quoting)
289{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000290 StyleDesc *qs = quote_styles;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000291
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000292 for (qs = quote_styles; qs->name; qs++) {
293 if (qs->style == quoting)
294 return 0;
295 }
296 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
297 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000298}
Skip Montanarob4a04172003-03-20 23:29:12 +0000299
300#define D_OFF(x) offsetof(DialectObj, x)
301
302static struct PyMemberDef Dialect_memberlist[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000303 { "delimiter", T_CHAR, D_OFF(delimiter), READONLY },
304 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
305 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
306 { "strict", T_INT, D_OFF(strict), READONLY },
307 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000308};
309
310static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000311 { "escapechar", (getter)Dialect_get_escapechar},
312 { "lineterminator", (getter)Dialect_get_lineterminator},
313 { "quotechar", (getter)Dialect_get_quotechar},
314 { "quoting", (getter)Dialect_get_quoting},
315 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000316};
317
318static void
319Dialect_dealloc(DialectObj *self)
320{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000321 Py_XDECREF(self->lineterminator);
322 Py_TYPE(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000323}
324
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000325static char *dialect_kws[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000326 "dialect",
327 "delimiter",
328 "doublequote",
329 "escapechar",
330 "lineterminator",
331 "quotechar",
332 "quoting",
333 "skipinitialspace",
334 "strict",
335 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000336};
337
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000338static PyObject *
339dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000340{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000341 DialectObj *self;
342 PyObject *ret = NULL;
343 PyObject *dialect = NULL;
344 PyObject *delimiter = NULL;
345 PyObject *doublequote = NULL;
346 PyObject *escapechar = NULL;
347 PyObject *lineterminator = NULL;
348 PyObject *quotechar = NULL;
349 PyObject *quoting = NULL;
350 PyObject *skipinitialspace = NULL;
351 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000352
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000353 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
354 "|OOOOOOOOO", dialect_kws,
355 &dialect,
356 &delimiter,
357 &doublequote,
358 &escapechar,
359 &lineterminator,
360 &quotechar,
361 &quoting,
362 &skipinitialspace,
363 &strict))
364 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000365
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000366 if (dialect != NULL) {
367 if (IS_BASESTRING(dialect)) {
368 dialect = get_dialect_from_registry(dialect);
369 if (dialect == NULL)
370 return NULL;
371 }
372 else
373 Py_INCREF(dialect);
374 /* Can we reuse this instance? */
375 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
376 delimiter == 0 &&
377 doublequote == 0 &&
378 escapechar == 0 &&
379 lineterminator == 0 &&
380 quotechar == 0 &&
381 quoting == 0 &&
382 skipinitialspace == 0 &&
383 strict == 0)
384 return dialect;
385 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000386
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000387 self = (DialectObj *)type->tp_alloc(type, 0);
388 if (self == NULL) {
389 Py_XDECREF(dialect);
390 return NULL;
391 }
392 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000393
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000394 Py_XINCREF(delimiter);
395 Py_XINCREF(doublequote);
396 Py_XINCREF(escapechar);
397 Py_XINCREF(lineterminator);
398 Py_XINCREF(quotechar);
399 Py_XINCREF(quoting);
400 Py_XINCREF(skipinitialspace);
401 Py_XINCREF(strict);
402 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000403#define DIALECT_GETATTR(v, n) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000404 if (v == NULL) \
405 v = PyObject_GetAttrString(dialect, n)
406 DIALECT_GETATTR(delimiter, "delimiter");
407 DIALECT_GETATTR(doublequote, "doublequote");
408 DIALECT_GETATTR(escapechar, "escapechar");
409 DIALECT_GETATTR(lineterminator, "lineterminator");
410 DIALECT_GETATTR(quotechar, "quotechar");
411 DIALECT_GETATTR(quoting, "quoting");
412 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
413 DIALECT_GETATTR(strict, "strict");
414 PyErr_Clear();
415 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000416
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000417 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000418#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000419 if (meth(name, target, src, dflt)) \
420 goto err
421 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
422 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
423 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
424 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
425 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
426 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
427 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
428 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000429
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000430 /* validate options */
431 if (dialect_check_quoting(self->quoting))
432 goto err;
433 if (self->delimiter == 0) {
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200434 PyErr_SetString(PyExc_TypeError,
435 "\"delimiter\" must be an 1-character string");
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000436 goto err;
437 }
438 if (quotechar == Py_None && quoting == NULL)
439 self->quoting = QUOTE_NONE;
440 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
441 PyErr_SetString(PyExc_TypeError,
442 "quotechar must be set if quoting enabled");
443 goto err;
444 }
445 if (self->lineterminator == 0) {
446 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
447 goto err;
448 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000449
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000450 ret = (PyObject *)self;
451 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000452err:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000453 Py_XDECREF(self);
454 Py_XDECREF(dialect);
455 Py_XDECREF(delimiter);
456 Py_XDECREF(doublequote);
457 Py_XDECREF(escapechar);
458 Py_XDECREF(lineterminator);
459 Py_XDECREF(quotechar);
460 Py_XDECREF(quoting);
461 Py_XDECREF(skipinitialspace);
462 Py_XDECREF(strict);
463 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000464}
465
466
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000467PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000468"CSV dialect\n"
469"\n"
470"The Dialect type records CSV parsing and generation options.\n");
471
472static PyTypeObject Dialect_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000473 PyVarObject_HEAD_INIT(NULL, 0)
474 "_csv.Dialect", /* tp_name */
475 sizeof(DialectObj), /* tp_basicsize */
476 0, /* tp_itemsize */
477 /* methods */
478 (destructor)Dialect_dealloc, /* tp_dealloc */
479 (printfunc)0, /* tp_print */
480 (getattrfunc)0, /* tp_getattr */
481 (setattrfunc)0, /* tp_setattr */
482 (cmpfunc)0, /* tp_compare */
483 (reprfunc)0, /* tp_repr */
484 0, /* tp_as_number */
485 0, /* tp_as_sequence */
486 0, /* tp_as_mapping */
487 (hashfunc)0, /* tp_hash */
488 (ternaryfunc)0, /* tp_call */
489 (reprfunc)0, /* tp_str */
490 0, /* tp_getattro */
491 0, /* tp_setattro */
492 0, /* tp_as_buffer */
493 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
494 Dialect_Type_doc, /* tp_doc */
495 0, /* tp_traverse */
496 0, /* tp_clear */
497 0, /* tp_richcompare */
498 0, /* tp_weaklistoffset */
499 0, /* tp_iter */
500 0, /* tp_iternext */
501 0, /* tp_methods */
502 Dialect_memberlist, /* tp_members */
503 Dialect_getsetlist, /* tp_getset */
504 0, /* tp_base */
505 0, /* tp_dict */
506 0, /* tp_descr_get */
507 0, /* tp_descr_set */
508 0, /* tp_dictoffset */
509 0, /* tp_init */
510 0, /* tp_alloc */
511 dialect_new, /* tp_new */
512 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000513};
514
Andrew McNamara91b97462005-01-11 01:07:23 +0000515/*
516 * Return an instance of the dialect type, given a Python instance or kwarg
517 * description of the dialect
518 */
519static PyObject *
520_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
521{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000522 PyObject *ctor_args;
523 PyObject *dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000524
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000525 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
526 if (ctor_args == NULL)
527 return NULL;
528 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
529 Py_DECREF(ctor_args);
530 return dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000531}
532
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000533/*
534 * READER
535 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000536static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000537parse_save_field(ReaderObj *self)
538{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000539 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000540
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000541 field = PyString_FromStringAndSize(self->field, self->field_len);
542 if (field == NULL)
543 return -1;
544 self->field_len = 0;
545 if (self->numeric_field) {
546 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000547
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000548 self->numeric_field = 0;
549 tmp = PyNumber_Float(field);
550 if (tmp == NULL) {
551 Py_DECREF(field);
552 return -1;
553 }
554 Py_DECREF(field);
555 field = tmp;
556 }
557 PyList_Append(self->fields, field);
558 Py_DECREF(field);
559 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000560}
561
562static int
563parse_grow_buff(ReaderObj *self)
564{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000565 if (self->field_size == 0) {
566 self->field_size = 4096;
567 if (self->field != NULL)
568 PyMem_Free(self->field);
569 self->field = PyMem_Malloc(self->field_size);
570 }
571 else {
572 if (self->field_size > INT_MAX / 2) {
573 PyErr_NoMemory();
574 return 0;
575 }
576 self->field_size *= 2;
577 self->field = PyMem_Realloc(self->field, self->field_size);
578 }
579 if (self->field == NULL) {
580 PyErr_NoMemory();
581 return 0;
582 }
583 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000584}
585
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000586static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000587parse_add_char(ReaderObj *self, char c)
588{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000589 if (self->field_len >= field_limit) {
590 PyErr_Format(error_obj, "field larger than field limit (%ld)",
591 field_limit);
592 return -1;
593 }
594 if (self->field_len == self->field_size && !parse_grow_buff(self))
595 return -1;
596 self->field[self->field_len++] = c;
597 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000598}
599
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000600static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000601parse_process_char(ReaderObj *self, char c)
602{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000603 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000604
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000605 switch (self->state) {
606 case START_RECORD:
607 /* start of record */
608 if (c == '\0')
609 /* empty line - return [] */
610 break;
611 else if (c == '\n' || c == '\r') {
612 self->state = EAT_CRNL;
613 break;
614 }
615 /* normal character - handle as START_FIELD */
616 self->state = START_FIELD;
617 /* fallthru */
618 case START_FIELD:
619 /* expecting field */
620 if (c == '\n' || c == '\r' || c == '\0') {
621 /* save empty field - return [fields] */
622 if (parse_save_field(self) < 0)
623 return -1;
624 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
625 }
626 else if (c == dialect->quotechar &&
627 dialect->quoting != QUOTE_NONE) {
628 /* start quoted field */
629 self->state = IN_QUOTED_FIELD;
630 }
631 else if (c == dialect->escapechar) {
632 /* possible escaped character */
633 self->state = ESCAPED_CHAR;
634 }
635 else if (c == ' ' && dialect->skipinitialspace)
636 /* ignore space at start of field */
637 ;
638 else if (c == dialect->delimiter) {
639 /* save empty field */
640 if (parse_save_field(self) < 0)
641 return -1;
642 }
643 else {
644 /* begin new unquoted field */
645 if (dialect->quoting == QUOTE_NONNUMERIC)
646 self->numeric_field = 1;
647 if (parse_add_char(self, c) < 0)
648 return -1;
649 self->state = IN_FIELD;
650 }
651 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000652
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000653 case ESCAPED_CHAR:
654 if (c == '\0')
655 c = '\n';
656 if (parse_add_char(self, c) < 0)
657 return -1;
658 self->state = IN_FIELD;
659 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000660
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000661 case IN_FIELD:
662 /* in unquoted field */
663 if (c == '\n' || c == '\r' || c == '\0') {
664 /* end of line - return [fields] */
665 if (parse_save_field(self) < 0)
666 return -1;
667 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
668 }
669 else if (c == dialect->escapechar) {
670 /* possible escaped character */
671 self->state = ESCAPED_CHAR;
672 }
673 else if (c == dialect->delimiter) {
674 /* save field - wait for new field */
675 if (parse_save_field(self) < 0)
676 return -1;
677 self->state = START_FIELD;
678 }
679 else {
680 /* normal character - save in field */
681 if (parse_add_char(self, c) < 0)
682 return -1;
683 }
684 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000685
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000686 case IN_QUOTED_FIELD:
687 /* in quoted field */
688 if (c == '\0')
689 ;
690 else if (c == dialect->escapechar) {
691 /* Possible escape character */
692 self->state = ESCAPE_IN_QUOTED_FIELD;
693 }
694 else if (c == dialect->quotechar &&
695 dialect->quoting != QUOTE_NONE) {
696 if (dialect->doublequote) {
697 /* doublequote; " represented by "" */
698 self->state = QUOTE_IN_QUOTED_FIELD;
699 }
700 else {
701 /* end of quote part of field */
702 self->state = IN_FIELD;
703 }
704 }
705 else {
706 /* normal character - save in field */
707 if (parse_add_char(self, c) < 0)
708 return -1;
709 }
710 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000711
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000712 case ESCAPE_IN_QUOTED_FIELD:
713 if (c == '\0')
714 c = '\n';
715 if (parse_add_char(self, c) < 0)
716 return -1;
717 self->state = IN_QUOTED_FIELD;
718 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000719
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000720 case QUOTE_IN_QUOTED_FIELD:
721 /* doublequote - seen a quote in an quoted field */
722 if (dialect->quoting != QUOTE_NONE &&
723 c == dialect->quotechar) {
724 /* save "" as " */
725 if (parse_add_char(self, c) < 0)
726 return -1;
727 self->state = IN_QUOTED_FIELD;
728 }
729 else if (c == dialect->delimiter) {
730 /* save field - wait for new field */
731 if (parse_save_field(self) < 0)
732 return -1;
733 self->state = START_FIELD;
734 }
735 else if (c == '\n' || c == '\r' || c == '\0') {
736 /* end of line - return [fields] */
737 if (parse_save_field(self) < 0)
738 return -1;
739 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
740 }
741 else if (!dialect->strict) {
742 if (parse_add_char(self, c) < 0)
743 return -1;
744 self->state = IN_FIELD;
745 }
746 else {
747 /* illegal */
748 PyErr_Format(error_obj, "'%c' expected after '%c'",
749 dialect->delimiter,
750 dialect->quotechar);
751 return -1;
752 }
753 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000754
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000755 case EAT_CRNL:
756 if (c == '\n' || c == '\r')
757 ;
758 else if (c == '\0')
759 self->state = START_RECORD;
760 else {
761 PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
762 return -1;
763 }
764 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000765
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000766 }
767 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000768}
769
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000770static int
771parse_reset(ReaderObj *self)
772{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000773 Py_XDECREF(self->fields);
774 self->fields = PyList_New(0);
775 if (self->fields == NULL)
776 return -1;
777 self->field_len = 0;
778 self->state = START_RECORD;
779 self->numeric_field = 0;
780 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000781}
Skip Montanarob4a04172003-03-20 23:29:12 +0000782
783static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000784Reader_iternext(ReaderObj *self)
785{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000786 PyObject *lineobj;
787 PyObject *fields = NULL;
788 char *line, c;
789 int linelen;
Skip Montanarob4a04172003-03-20 23:29:12 +0000790
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000791 if (parse_reset(self) < 0)
792 return NULL;
793 do {
794 lineobj = PyIter_Next(self->input_iter);
795 if (lineobj == NULL) {
796 /* End of input OR exception */
Senthil Kumarand41dc7c2012-09-25 02:48:21 -0700797 if (!PyErr_Occurred() && (self->field_len != 0 ||
798 self->state == IN_QUOTED_FIELD)) {
799 if (self->dialect->strict)
800 PyErr_SetString(error_obj, "unexpected end of data");
801 else if (parse_save_field(self) >= 0 )
802 break;
803 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000804 return NULL;
805 }
806 ++self->line_num;
Skip Montanarob4a04172003-03-20 23:29:12 +0000807
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000808 line = PyString_AsString(lineobj);
809 linelen = PyString_Size(lineobj);
Skip Montanarob4a04172003-03-20 23:29:12 +0000810
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000811 if (line == NULL || linelen < 0) {
812 Py_DECREF(lineobj);
813 return NULL;
814 }
815 while (linelen--) {
816 c = *line++;
817 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000818 Py_DECREF(lineobj);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000819 PyErr_Format(error_obj,
820 "line contains NULL byte");
821 goto err;
822 }
823 if (parse_process_char(self, c) < 0) {
824 Py_DECREF(lineobj);
825 goto err;
826 }
827 }
828 Py_DECREF(lineobj);
829 if (parse_process_char(self, 0) < 0)
830 goto err;
831 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000832
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000833 fields = self->fields;
834 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000835err:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000836 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000837}
838
839static void
840Reader_dealloc(ReaderObj *self)
841{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000842 PyObject_GC_UnTrack(self);
843 Py_XDECREF(self->dialect);
844 Py_XDECREF(self->input_iter);
845 Py_XDECREF(self->fields);
846 if (self->field != NULL)
847 PyMem_Free(self->field);
848 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000849}
850
851static int
852Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
853{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000854 Py_VISIT(self->dialect);
855 Py_VISIT(self->input_iter);
856 Py_VISIT(self->fields);
857 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000858}
859
860static int
861Reader_clear(ReaderObj *self)
862{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000863 Py_CLEAR(self->dialect);
864 Py_CLEAR(self->input_iter);
865 Py_CLEAR(self->fields);
866 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000867}
868
869PyDoc_STRVAR(Reader_Type_doc,
870"CSV reader\n"
871"\n"
872"Reader objects are responsible for reading and parsing tabular data\n"
873"in CSV format.\n"
874);
875
876static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000877 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000878};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000879#define R_OFF(x) offsetof(ReaderObj, x)
880
881static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000882 { "dialect", T_OBJECT, R_OFF(dialect), RO },
883 { "line_num", T_ULONG, R_OFF(line_num), RO },
884 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000885};
886
Skip Montanarob4a04172003-03-20 23:29:12 +0000887
888static PyTypeObject Reader_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000889 PyVarObject_HEAD_INIT(NULL, 0)
890 "_csv.reader", /*tp_name*/
891 sizeof(ReaderObj), /*tp_basicsize*/
892 0, /*tp_itemsize*/
893 /* methods */
894 (destructor)Reader_dealloc, /*tp_dealloc*/
895 (printfunc)0, /*tp_print*/
896 (getattrfunc)0, /*tp_getattr*/
897 (setattrfunc)0, /*tp_setattr*/
898 (cmpfunc)0, /*tp_compare*/
899 (reprfunc)0, /*tp_repr*/
900 0, /*tp_as_number*/
901 0, /*tp_as_sequence*/
902 0, /*tp_as_mapping*/
903 (hashfunc)0, /*tp_hash*/
904 (ternaryfunc)0, /*tp_call*/
905 (reprfunc)0, /*tp_str*/
906 0, /*tp_getattro*/
907 0, /*tp_setattro*/
908 0, /*tp_as_buffer*/
909 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
910 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
911 Reader_Type_doc, /*tp_doc*/
912 (traverseproc)Reader_traverse, /*tp_traverse*/
913 (inquiry)Reader_clear, /*tp_clear*/
914 0, /*tp_richcompare*/
915 0, /*tp_weaklistoffset*/
916 PyObject_SelfIter, /*tp_iter*/
917 (getiterfunc)Reader_iternext, /*tp_iternext*/
918 Reader_methods, /*tp_methods*/
919 Reader_memberlist, /*tp_members*/
920 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000921
922};
923
924static PyObject *
925csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
926{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000927 PyObject * iterator, * dialect = NULL;
928 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000929
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000930 if (!self)
931 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000932
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000933 self->dialect = NULL;
934 self->fields = NULL;
935 self->input_iter = NULL;
936 self->field = NULL;
937 self->field_size = 0;
938 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000939
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000940 if (parse_reset(self) < 0) {
941 Py_DECREF(self);
942 return NULL;
943 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000944
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000945 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
946 Py_DECREF(self);
947 return NULL;
948 }
949 self->input_iter = PyObject_GetIter(iterator);
950 if (self->input_iter == NULL) {
951 PyErr_SetString(PyExc_TypeError,
952 "argument 1 must be an iterator");
953 Py_DECREF(self);
954 return NULL;
955 }
956 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
957 if (self->dialect == NULL) {
958 Py_DECREF(self);
959 return NULL;
960 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000961
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000962 PyObject_GC_Track(self);
963 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +0000964}
965
966/*
967 * WRITER
968 */
969/* ---------------------------------------------------------------- */
970static void
971join_reset(WriterObj *self)
972{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000973 self->rec_len = 0;
974 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000975}
976
977#define MEM_INCR 32768
978
979/* Calculate new record length or append field to record. Return new
980 * record length.
981 */
982static int
983join_append_data(WriterObj *self, char *field, int quote_empty,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000984 int *quoted, int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +0000985{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000986 DialectObj *dialect = self->dialect;
987 int i, rec_len;
988 char *lineterm;
Andrew McNamarac89f2842005-01-12 07:44:42 +0000989
990#define ADDCH(c) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000991 do {\
992 if (copy_phase) \
993 self->rec[rec_len] = c;\
994 rec_len++;\
995 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +0000996
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000997 lineterm = PyString_AsString(dialect->lineterminator);
998 if (lineterm == NULL)
999 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001000
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001001 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001002
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001003 /* If this is not the first field we need a field separator */
1004 if (self->num_fields > 0)
1005 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001006
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001007 /* Handle preceding quote */
1008 if (copy_phase && *quoted)
1009 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001010
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001011 /* Copy/count field data */
1012 for (i = 0;; i++) {
1013 char c = field[i];
1014 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001015
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001016 if (c == '\0')
1017 break;
Skip Montanarob4a04172003-03-20 23:29:12 +00001018
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001019 if (c == dialect->delimiter ||
1020 c == dialect->escapechar ||
1021 c == dialect->quotechar ||
1022 strchr(lineterm, c)) {
1023 if (dialect->quoting == QUOTE_NONE)
1024 want_escape = 1;
1025 else {
1026 if (c == dialect->quotechar) {
1027 if (dialect->doublequote)
1028 ADDCH(dialect->quotechar);
1029 else
1030 want_escape = 1;
1031 }
1032 if (!want_escape)
1033 *quoted = 1;
1034 }
1035 if (want_escape) {
1036 if (!dialect->escapechar) {
1037 PyErr_Format(error_obj,
1038 "need to escape, but no escapechar set");
1039 return -1;
1040 }
1041 ADDCH(dialect->escapechar);
1042 }
1043 }
1044 /* Copy field character into record buffer.
1045 */
1046 ADDCH(c);
1047 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001048
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001049 /* If field is empty check if it needs to be quoted.
1050 */
1051 if (i == 0 && quote_empty) {
1052 if (dialect->quoting == QUOTE_NONE) {
1053 PyErr_Format(error_obj,
1054 "single empty field record must be quoted");
1055 return -1;
1056 }
1057 else
1058 *quoted = 1;
1059 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001060
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001061 if (*quoted) {
1062 if (copy_phase)
1063 ADDCH(dialect->quotechar);
1064 else
1065 rec_len += 2;
1066 }
1067 return rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001068#undef ADDCH
Skip Montanarob4a04172003-03-20 23:29:12 +00001069}
1070
1071static int
1072join_check_rec_size(WriterObj *self, int rec_len)
1073{
Gregory P. Smith9d534572008-06-11 07:41:16 +00001074
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001075 if (rec_len < 0 || rec_len > INT_MAX - MEM_INCR) {
1076 PyErr_NoMemory();
1077 return 0;
1078 }
Gregory P. Smith9d534572008-06-11 07:41:16 +00001079
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001080 if (rec_len > self->rec_size) {
1081 if (self->rec_size == 0) {
1082 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1083 if (self->rec != NULL)
1084 PyMem_Free(self->rec);
1085 self->rec = PyMem_Malloc(self->rec_size);
1086 }
1087 else {
1088 char *old_rec = self->rec;
Skip Montanarob4a04172003-03-20 23:29:12 +00001089
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001090 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1091 self->rec = PyMem_Realloc(self->rec, self->rec_size);
1092 if (self->rec == NULL)
1093 PyMem_Free(old_rec);
1094 }
1095 if (self->rec == NULL) {
1096 PyErr_NoMemory();
1097 return 0;
1098 }
1099 }
1100 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001101}
1102
1103static int
1104join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
1105{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001106 int rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001107
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001108 rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1109 if (rec_len < 0)
1110 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001111
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001112 /* grow record buffer if necessary */
1113 if (!join_check_rec_size(self, rec_len))
1114 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001115
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001116 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1117 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001118
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001119 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001120}
1121
1122static int
1123join_append_lineterminator(WriterObj *self)
1124{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001125 int terminator_len;
1126 char *terminator;
Skip Montanarob4a04172003-03-20 23:29:12 +00001127
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001128 terminator_len = PyString_Size(self->dialect->lineterminator);
1129 if (terminator_len == -1)
1130 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001131
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001132 /* grow record buffer if necessary */
1133 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1134 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001135
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001136 terminator = PyString_AsString(self->dialect->lineterminator);
1137 if (terminator == NULL)
1138 return 0;
1139 memmove(self->rec + self->rec_len, terminator, terminator_len);
1140 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001141
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001142 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001143}
1144
1145PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001146"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001147"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001148"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001149"elements will be converted to string.");
1150
1151static PyObject *
1152csv_writerow(WriterObj *self, PyObject *seq)
1153{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001154 DialectObj *dialect = self->dialect;
1155 int len, i;
Skip Montanarob4a04172003-03-20 23:29:12 +00001156
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001157 if (!PySequence_Check(seq))
1158 return PyErr_Format(error_obj, "sequence expected");
Skip Montanarob4a04172003-03-20 23:29:12 +00001159
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001160 len = PySequence_Length(seq);
1161 if (len < 0)
1162 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001163
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001164 /* Join all fields in internal buffer.
1165 */
1166 join_reset(self);
1167 for (i = 0; i < len; i++) {
1168 PyObject *field;
1169 int append_ok;
1170 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001171
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001172 field = PySequence_GetItem(seq, i);
1173 if (field == NULL)
1174 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001175
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001176 switch (dialect->quoting) {
1177 case QUOTE_NONNUMERIC:
1178 quoted = !PyNumber_Check(field);
1179 break;
1180 case QUOTE_ALL:
1181 quoted = 1;
1182 break;
1183 default:
1184 quoted = 0;
1185 break;
1186 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001187
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001188 if (PyString_Check(field)) {
1189 append_ok = join_append(self,
1190 PyString_AS_STRING(field),
1191 &quoted, len == 1);
1192 Py_DECREF(field);
1193 }
1194 else if (field == Py_None) {
1195 append_ok = join_append(self, "", &quoted, len == 1);
1196 Py_DECREF(field);
1197 }
1198 else {
1199 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001200
Raymond Hettingerf5377022011-12-11 22:31:09 -08001201 if (PyFloat_Check(field)) {
1202 str = PyObject_Repr(field);
1203 } else {
1204 str = PyObject_Str(field);
1205 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001206 Py_DECREF(field);
1207 if (str == NULL)
1208 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001209
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001210 append_ok = join_append(self, PyString_AS_STRING(str),
1211 &quoted, len == 1);
1212 Py_DECREF(str);
1213 }
1214 if (!append_ok)
1215 return NULL;
1216 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001217
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001218 /* Add line terminator.
1219 */
1220 if (!join_append_lineterminator(self))
1221 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001222
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001223 return PyObject_CallFunction(self->writeline,
1224 "(s#)", self->rec, self->rec_len);
Skip Montanarob4a04172003-03-20 23:29:12 +00001225}
1226
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001227PyDoc_STRVAR(csv_writerows_doc,
1228"writerows(sequence of sequences)\n"
1229"\n"
1230"Construct and write a series of sequences to a csv file. Non-string\n"
1231"elements will be converted to string.");
1232
Skip Montanarob4a04172003-03-20 23:29:12 +00001233static PyObject *
1234csv_writerows(WriterObj *self, PyObject *seqseq)
1235{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001236 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001237
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001238 row_iter = PyObject_GetIter(seqseq);
1239 if (row_iter == NULL) {
1240 PyErr_SetString(PyExc_TypeError,
1241 "writerows() argument must be iterable");
1242 return NULL;
1243 }
1244 while ((row_obj = PyIter_Next(row_iter))) {
1245 result = csv_writerow(self, row_obj);
1246 Py_DECREF(row_obj);
1247 if (!result) {
1248 Py_DECREF(row_iter);
1249 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001250 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001251 else
1252 Py_DECREF(result);
1253 }
1254 Py_DECREF(row_iter);
1255 if (PyErr_Occurred())
1256 return NULL;
1257 Py_INCREF(Py_None);
1258 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001259}
1260
1261static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001262 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1263 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1264 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001265};
1266
1267#define W_OFF(x) offsetof(WriterObj, x)
1268
1269static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001270 { "dialect", T_OBJECT, W_OFF(dialect), RO },
1271 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001272};
1273
1274static void
1275Writer_dealloc(WriterObj *self)
1276{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001277 PyObject_GC_UnTrack(self);
1278 Py_XDECREF(self->dialect);
1279 Py_XDECREF(self->writeline);
1280 if (self->rec != NULL)
1281 PyMem_Free(self->rec);
1282 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001283}
1284
1285static int
1286Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1287{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001288 Py_VISIT(self->dialect);
1289 Py_VISIT(self->writeline);
1290 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001291}
1292
1293static int
1294Writer_clear(WriterObj *self)
1295{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001296 Py_CLEAR(self->dialect);
1297 Py_CLEAR(self->writeline);
1298 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001299}
1300
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001301PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001302"CSV writer\n"
1303"\n"
1304"Writer objects are responsible for generating tabular data\n"
1305"in CSV format from sequence input.\n"
1306);
1307
1308static PyTypeObject Writer_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001309 PyVarObject_HEAD_INIT(NULL, 0)
1310 "_csv.writer", /*tp_name*/
1311 sizeof(WriterObj), /*tp_basicsize*/
1312 0, /*tp_itemsize*/
1313 /* methods */
1314 (destructor)Writer_dealloc, /*tp_dealloc*/
1315 (printfunc)0, /*tp_print*/
1316 (getattrfunc)0, /*tp_getattr*/
1317 (setattrfunc)0, /*tp_setattr*/
1318 (cmpfunc)0, /*tp_compare*/
1319 (reprfunc)0, /*tp_repr*/
1320 0, /*tp_as_number*/
1321 0, /*tp_as_sequence*/
1322 0, /*tp_as_mapping*/
1323 (hashfunc)0, /*tp_hash*/
1324 (ternaryfunc)0, /*tp_call*/
1325 (reprfunc)0, /*tp_str*/
1326 0, /*tp_getattro*/
1327 0, /*tp_setattro*/
1328 0, /*tp_as_buffer*/
1329 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1330 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1331 Writer_Type_doc,
1332 (traverseproc)Writer_traverse, /*tp_traverse*/
1333 (inquiry)Writer_clear, /*tp_clear*/
1334 0, /*tp_richcompare*/
1335 0, /*tp_weaklistoffset*/
1336 (getiterfunc)0, /*tp_iter*/
1337 (getiterfunc)0, /*tp_iternext*/
1338 Writer_methods, /*tp_methods*/
1339 Writer_memberlist, /*tp_members*/
1340 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001341};
1342
1343static PyObject *
1344csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1345{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001346 PyObject * output_file, * dialect = NULL;
1347 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001348
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001349 if (!self)
1350 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001351
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001352 self->dialect = NULL;
1353 self->writeline = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001354
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001355 self->rec = NULL;
1356 self->rec_size = 0;
1357 self->rec_len = 0;
1358 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001359
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001360 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1361 Py_DECREF(self);
1362 return NULL;
1363 }
1364 self->writeline = PyObject_GetAttrString(output_file, "write");
1365 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1366 PyErr_SetString(PyExc_TypeError,
1367 "argument 1 must have a \"write\" method");
1368 Py_DECREF(self);
1369 return NULL;
1370 }
1371 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1372 if (self->dialect == NULL) {
1373 Py_DECREF(self);
1374 return NULL;
1375 }
1376 PyObject_GC_Track(self);
1377 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001378}
1379
1380/*
1381 * DIALECT REGISTRY
1382 */
1383static PyObject *
1384csv_list_dialects(PyObject *module, PyObject *args)
1385{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001386 return PyDict_Keys(dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001387}
1388
1389static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001390csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001391{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001392 PyObject *name_obj, *dialect_obj = NULL;
1393 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001394
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001395 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1396 return NULL;
1397 if (!IS_BASESTRING(name_obj)) {
1398 PyErr_SetString(PyExc_TypeError,
1399 "dialect name must be a string or unicode");
1400 return NULL;
1401 }
1402 dialect = _call_dialect(dialect_obj, kwargs);
1403 if (dialect == NULL)
1404 return NULL;
1405 if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1406 Py_DECREF(dialect);
1407 return NULL;
1408 }
1409 Py_DECREF(dialect);
1410 Py_INCREF(Py_None);
1411 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001412}
1413
1414static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001415csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001416{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001417 if (PyDict_DelItem(dialects, name_obj) < 0)
1418 return PyErr_Format(error_obj, "unknown dialect");
1419 Py_INCREF(Py_None);
1420 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001421}
1422
1423static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001424csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001425{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001426 return get_dialect_from_registry(name_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001427}
1428
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001429static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001430csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001431{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001432 PyObject *new_limit = NULL;
1433 long old_limit = field_limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001434
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001435 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1436 return NULL;
1437 if (new_limit != NULL) {
1438 if (!PyInt_Check(new_limit)) {
1439 PyErr_Format(PyExc_TypeError,
1440 "limit must be an integer");
1441 return NULL;
1442 }
1443 field_limit = PyInt_AsLong(new_limit);
1444 }
1445 return PyInt_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001446}
1447
Skip Montanarob4a04172003-03-20 23:29:12 +00001448/*
1449 * MODULE
1450 */
1451
1452PyDoc_STRVAR(csv_module_doc,
1453"CSV parsing and writing.\n"
1454"\n"
1455"This module provides classes that assist in the reading and writing\n"
1456"of Comma Separated Value (CSV) files, and implements the interface\n"
1457"described by PEP 305. Although many CSV files are simple to parse,\n"
1458"the format is not formally defined by a stable specification and\n"
1459"is subtle enough that parsing lines of a CSV file with something\n"
1460"like line.split(\",\") is bound to fail. The module supports three\n"
1461"basic APIs: reading, writing, and registration of dialects.\n"
1462"\n"
1463"\n"
1464"DIALECT REGISTRATION:\n"
1465"\n"
1466"Readers and writers support a dialect argument, which is a convenient\n"
1467"handle on a group of settings. When the dialect argument is a string,\n"
1468"it identifies one of the dialects previously registered with the module.\n"
1469"If it is a class or instance, the attributes of the argument are used as\n"
1470"the settings for the reader or writer:\n"
1471"\n"
1472" class excel:\n"
1473" delimiter = ','\n"
1474" quotechar = '\"'\n"
1475" escapechar = None\n"
1476" doublequote = True\n"
1477" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001478" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001479" quoting = QUOTE_MINIMAL\n"
1480"\n"
1481"SETTINGS:\n"
1482"\n"
1483" * quotechar - specifies a one-character string to use as the \n"
1484" quoting character. It defaults to '\"'.\n"
1485" * delimiter - specifies a one-character string to use as the \n"
1486" field separator. It defaults to ','.\n"
1487" * skipinitialspace - specifies how to interpret whitespace which\n"
1488" immediately follows a delimiter. It defaults to False, which\n"
1489" means that whitespace immediately following a delimiter is part\n"
1490" of the following field.\n"
1491" * lineterminator - specifies the character sequence which should \n"
1492" terminate rows.\n"
1493" * quoting - controls when quotes should be generated by the writer.\n"
1494" It can take on any of the following module constants:\n"
1495"\n"
1496" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1497" field contains either the quotechar or the delimiter\n"
1498" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1499" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001500" fields which do not parse as integers or floating point\n"
1501" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001502" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1503" * escapechar - specifies a one-character string used to escape \n"
1504" the delimiter when quoting is set to QUOTE_NONE.\n"
1505" * doublequote - controls the handling of quotes inside fields. When\n"
1506" True, two consecutive quotes are interpreted as one during read,\n"
1507" and when writing, each quote character embedded in the data is\n"
1508" written as two quotes\n");
1509
1510PyDoc_STRVAR(csv_reader_doc,
1511" csv_reader = reader(iterable [, dialect='excel']\n"
1512" [optional keyword args])\n"
1513" for row in csv_reader:\n"
1514" process(row)\n"
1515"\n"
1516"The \"iterable\" argument can be any object that returns a line\n"
1517"of input for each iteration, such as a file object or a list. The\n"
1518"optional \"dialect\" parameter is discussed below. The function\n"
1519"also accepts optional keyword arguments which override settings\n"
1520"provided by the dialect.\n"
1521"\n"
1522"The returned object is an iterator. Each iteration returns a row\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001523"of the CSV file (which can span multiple input lines):\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001524
1525PyDoc_STRVAR(csv_writer_doc,
1526" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1527" [optional keyword args])\n"
Fredrik Lundh4aaaa492006-04-04 16:51:13 +00001528" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001529" csv_writer.writerow(row)\n"
1530"\n"
1531" [or]\n"
1532"\n"
1533" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1534" [optional keyword args])\n"
1535" csv_writer.writerows(rows)\n"
1536"\n"
1537"The \"fileobj\" argument can be any object that supports the file API.\n");
1538
1539PyDoc_STRVAR(csv_list_dialects_doc,
1540"Return a list of all know dialect names.\n"
1541" names = csv.list_dialects()");
1542
1543PyDoc_STRVAR(csv_get_dialect_doc,
1544"Return the dialect instance associated with name.\n"
1545" dialect = csv.get_dialect(name)");
1546
1547PyDoc_STRVAR(csv_register_dialect_doc,
1548"Create a mapping from a string name to a dialect class.\n"
1549" dialect = csv.register_dialect(name, dialect)");
1550
1551PyDoc_STRVAR(csv_unregister_dialect_doc,
1552"Delete the name/dialect mapping associated with a string name.\n"
1553" csv.unregister_dialect(name)");
1554
Andrew McNamara31d88962005-01-12 03:45:10 +00001555PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001556"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001557" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001558"\n"
1559"Returns old limit. If limit is not given, no new limit is set and\n"
1560"the old limit is returned");
1561
Skip Montanarob4a04172003-03-20 23:29:12 +00001562static struct PyMethodDef csv_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001563 { "reader", (PyCFunction)csv_reader,
1564 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1565 { "writer", (PyCFunction)csv_writer,
1566 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1567 { "list_dialects", (PyCFunction)csv_list_dialects,
1568 METH_NOARGS, csv_list_dialects_doc},
1569 { "register_dialect", (PyCFunction)csv_register_dialect,
1570 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1571 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1572 METH_O, csv_unregister_dialect_doc},
1573 { "get_dialect", (PyCFunction)csv_get_dialect,
1574 METH_O, csv_get_dialect_doc},
1575 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1576 METH_VARARGS, csv_field_size_limit_doc},
1577 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001578};
1579
1580PyMODINIT_FUNC
1581init_csv(void)
1582{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001583 PyObject *module;
1584 StyleDesc *style;
Skip Montanarob4a04172003-03-20 23:29:12 +00001585
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001586 if (PyType_Ready(&Dialect_Type) < 0)
1587 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001588
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001589 if (PyType_Ready(&Reader_Type) < 0)
1590 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001591
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001592 if (PyType_Ready(&Writer_Type) < 0)
1593 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001594
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001595 /* Create the module and add the functions */
1596 module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1597 if (module == NULL)
1598 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001599
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001600 /* Add version to the module. */
1601 if (PyModule_AddStringConstant(module, "__version__",
1602 MODULE_VERSION) == -1)
1603 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001604
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001605 /* Add _dialects dictionary */
1606 dialects = PyDict_New();
1607 if (dialects == NULL)
1608 return;
1609 if (PyModule_AddObject(module, "_dialects", dialects))
1610 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001611
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001612 /* Add quote styles into dictionary */
1613 for (style = quote_styles; style->name; style++) {
1614 if (PyModule_AddIntConstant(module, style->name,
1615 style->style) == -1)
1616 return;
1617 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001618
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001619 /* Add the Dialect type */
1620 Py_INCREF(&Dialect_Type);
1621 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1622 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001623
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001624 /* Add the CSV exception object to the module. */
1625 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1626 if (error_obj == NULL)
1627 return;
1628 PyModule_AddObject(module, "Error", error_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001629}