blob: ea3add2dbc4ac50eff2dcaa8c27c7063bb5aa167 [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
9**** For people modifying this code, please note that as of this writing
Skip Montanarodfa35fa2003-04-11 21:40:01 +000010**** (2003-03-23), it is intended that this code should work with Python
Skip Montanaroa16b21f2003-03-23 14:32:54 +000011**** 2.2.
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013*/
14
Skip Montanaro7b01a832003-04-12 19:23:46 +000015#define MODULE_VERSION "1.0"
16
Skip Montanarob4a04172003-03-20 23:29:12 +000017#include "Python.h"
18#include "structmember.h"
19
Skip Montanaroa16b21f2003-03-23 14:32:54 +000020
Skip Montanarob4a04172003-03-20 23:29:12 +000021/* begin 2.2 compatibility macros */
22#ifndef PyDoc_STRVAR
23/* Define macros for inline documentation. */
24#define PyDoc_VAR(name) static char name[]
25#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
26#ifdef WITH_DOC_STRINGS
27#define PyDoc_STR(str) str
28#else
29#define PyDoc_STR(str) ""
30#endif
31#endif /* ifndef PyDoc_STRVAR */
32
33#ifndef PyMODINIT_FUNC
Antoine Pitrouc83ea132010-05-09 14:46:46 +000034# if defined(__cplusplus)
35# define PyMODINIT_FUNC extern "C" void
36# else /* __cplusplus */
37# define PyMODINIT_FUNC void
38# endif /* __cplusplus */
Skip Montanarob4a04172003-03-20 23:29:12 +000039#endif
Thomas Wouters2742c5e2006-04-15 17:33:14 +000040
41#ifndef Py_CLEAR
Antoine Pitrouc83ea132010-05-09 14:46:46 +000042#define Py_CLEAR(op) \
43 do { \
44 if (op) { \
45 PyObject *tmp = (PyObject *)(op); \
46 (op) = NULL; \
47 Py_DECREF(tmp); \
48 } \
49 } while (0)
Thomas Wouters2742c5e2006-04-15 17:33:14 +000050#endif
Thomas Woutersc6e55062006-04-15 21:47:09 +000051#ifndef Py_VISIT
Antoine Pitrouc83ea132010-05-09 14:46:46 +000052#define Py_VISIT(op) \
53 do { \
54 if (op) { \
55 int vret = visit((PyObject *)(op), arg); \
56 if (vret) \
57 return vret; \
58 } \
59 } while (0)
Thomas Woutersc6e55062006-04-15 21:47:09 +000060#endif
Thomas Wouters2742c5e2006-04-15 17:33:14 +000061
Skip Montanarob4a04172003-03-20 23:29:12 +000062/* end 2.2 compatibility macros */
63
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000064#define IS_BASESTRING(o) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +000065 PyObject_TypeCheck(o, &PyBaseString_Type)
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000066
Antoine Pitrouc83ea132010-05-09 14:46:46 +000067static PyObject *error_obj; /* CSV exception */
Skip Montanarob4a04172003-03-20 23:29:12 +000068static PyObject *dialects; /* Dialect registry */
Antoine Pitrouc83ea132010-05-09 14:46:46 +000069static long field_limit = 128 * 1024; /* max parsed field size */
Skip Montanarob4a04172003-03-20 23:29:12 +000070
71typedef enum {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000072 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
73 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
74 EAT_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000075} ParserState;
76
77typedef enum {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000078 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000079} QuoteStyle;
80
81typedef struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000082 QuoteStyle style;
83 char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000084} StyleDesc;
85
86static StyleDesc quote_styles[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000087 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
88 { QUOTE_ALL, "QUOTE_ALL" },
89 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
90 { QUOTE_NONE, "QUOTE_NONE" },
91 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000092};
93
94typedef struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000095 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +000096
Antoine Pitrouc83ea132010-05-09 14:46:46 +000097 int doublequote; /* is " represented by ""? */
98 char delimiter; /* field separator */
99 char quotechar; /* quote character */
100 char escapechar; /* escape character */
101 int skipinitialspace; /* ignore spaces following delimiter? */
102 PyObject *lineterminator; /* string to write between records */
103 int quoting; /* style of quoting to write */
104
105 int strict; /* raise exception on bad CSV */
Skip Montanarob4a04172003-03-20 23:29:12 +0000106} DialectObj;
107
108staticforward PyTypeObject Dialect_Type;
109
110typedef struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000111 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000112
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000113 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +0000114
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000115 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000116
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000117 PyObject *fields; /* field list for current record */
118 ParserState state; /* current CSV parse state */
119 char *field; /* build current field in here */
120 int field_size; /* size of allocated buffer */
121 int field_len; /* length of current field */
122 int numeric_field; /* treat field as numeric */
123 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000124} ReaderObj;
125
126staticforward PyTypeObject Reader_Type;
127
Christian Heimese93237d2007-12-19 02:37:44 +0000128#define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +0000129
130typedef struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000131 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000132
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000133 PyObject *writeline; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +0000134
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000135 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000136
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000137 char *rec; /* buffer for parser.join */
138 int rec_size; /* size of allocated record */
139 int rec_len; /* length of record */
140 int num_fields; /* number of fields in record */
141} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000142
143staticforward PyTypeObject Writer_Type;
144
145/*
146 * DIALECT class
147 */
148
149static PyObject *
150get_dialect_from_registry(PyObject * name_obj)
151{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000152 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000153
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000154 dialect_obj = PyDict_GetItem(dialects, name_obj);
155 if (dialect_obj == NULL) {
156 if (!PyErr_Occurred())
157 PyErr_Format(error_obj, "unknown dialect");
158 }
159 else
160 Py_INCREF(dialect_obj);
161 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000162}
163
Skip Montanarob4a04172003-03-20 23:29:12 +0000164static PyObject *
165get_string(PyObject *str)
166{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000167 Py_XINCREF(str);
168 return str;
Skip Montanarob4a04172003-03-20 23:29:12 +0000169}
170
Skip Montanarob4a04172003-03-20 23:29:12 +0000171static PyObject *
172get_nullchar_as_None(char c)
173{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000174 if (c == '\0') {
175 Py_INCREF(Py_None);
176 return Py_None;
177 }
178 else
179 return PyString_FromStringAndSize((char*)&c, 1);
Skip Montanarob4a04172003-03-20 23:29:12 +0000180}
181
Skip Montanarob4a04172003-03-20 23:29:12 +0000182static PyObject *
183Dialect_get_lineterminator(DialectObj *self)
184{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000185 return get_string(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000186}
187
Skip Montanarob4a04172003-03-20 23:29:12 +0000188static PyObject *
189Dialect_get_escapechar(DialectObj *self)
190{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000191 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000192}
193
Andrew McNamara1196cf12005-01-07 04:42:45 +0000194static PyObject *
195Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000196{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000197 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000198}
199
200static PyObject *
201Dialect_get_quoting(DialectObj *self)
202{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000203 return PyInt_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000204}
205
206static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000207_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000208{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000209 if (src == NULL)
210 *target = dflt;
211 else
212 *target = PyObject_IsTrue(src);
213 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000214}
215
Andrew McNamara1196cf12005-01-07 04:42:45 +0000216static int
217_set_int(const char *name, int *target, PyObject *src, int dflt)
218{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000219 if (src == NULL)
220 *target = dflt;
221 else {
222 if (!PyInt_Check(src)) {
223 PyErr_Format(PyExc_TypeError,
224 "\"%s\" must be an integer", name);
225 return -1;
226 }
227 *target = PyInt_AsLong(src);
228 }
229 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000230}
231
232static int
233_set_char(const char *name, char *target, PyObject *src, char dflt)
234{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000235 if (src == NULL)
236 *target = dflt;
237 else {
238 if (src == Py_None || PyString_Size(src) == 0)
239 *target = '\0';
240 else if (!PyString_Check(src) || PyString_Size(src) != 1) {
241 PyErr_Format(PyExc_TypeError,
242 "\"%s\" must be an 1-character string",
243 name);
244 return -1;
245 }
246 else {
247 char *s = PyString_AsString(src);
248 if (s == NULL)
249 return -1;
250 *target = s[0];
251 }
252 }
253 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000254}
255
256static int
257_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
258{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000259 if (src == NULL)
260 *target = PyString_FromString(dflt);
261 else {
262 if (src == Py_None)
263 *target = NULL;
264 else if (!IS_BASESTRING(src)) {
265 PyErr_Format(PyExc_TypeError,
266 "\"%s\" must be an string", name);
267 return -1;
268 }
269 else {
270 Py_XDECREF(*target);
271 Py_INCREF(src);
272 *target = src;
273 }
274 }
275 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000276}
277
278static int
279dialect_check_quoting(int quoting)
280{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000281 StyleDesc *qs = quote_styles;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000282
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000283 for (qs = quote_styles; qs->name; qs++) {
284 if (qs->style == quoting)
285 return 0;
286 }
287 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
288 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000289}
Skip Montanarob4a04172003-03-20 23:29:12 +0000290
291#define D_OFF(x) offsetof(DialectObj, x)
292
293static struct PyMemberDef Dialect_memberlist[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000294 { "delimiter", T_CHAR, D_OFF(delimiter), READONLY },
295 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
296 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
297 { "strict", T_INT, D_OFF(strict), READONLY },
298 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000299};
300
301static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000302 { "escapechar", (getter)Dialect_get_escapechar},
303 { "lineterminator", (getter)Dialect_get_lineterminator},
304 { "quotechar", (getter)Dialect_get_quotechar},
305 { "quoting", (getter)Dialect_get_quoting},
306 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000307};
308
309static void
310Dialect_dealloc(DialectObj *self)
311{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000312 Py_XDECREF(self->lineterminator);
313 Py_TYPE(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000314}
315
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000316static char *dialect_kws[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000317 "dialect",
318 "delimiter",
319 "doublequote",
320 "escapechar",
321 "lineterminator",
322 "quotechar",
323 "quoting",
324 "skipinitialspace",
325 "strict",
326 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000327};
328
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000329static PyObject *
330dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000331{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000332 DialectObj *self;
333 PyObject *ret = NULL;
334 PyObject *dialect = NULL;
335 PyObject *delimiter = NULL;
336 PyObject *doublequote = NULL;
337 PyObject *escapechar = NULL;
338 PyObject *lineterminator = NULL;
339 PyObject *quotechar = NULL;
340 PyObject *quoting = NULL;
341 PyObject *skipinitialspace = NULL;
342 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000343
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000344 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
345 "|OOOOOOOOO", dialect_kws,
346 &dialect,
347 &delimiter,
348 &doublequote,
349 &escapechar,
350 &lineterminator,
351 &quotechar,
352 &quoting,
353 &skipinitialspace,
354 &strict))
355 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000356
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000357 if (dialect != NULL) {
358 if (IS_BASESTRING(dialect)) {
359 dialect = get_dialect_from_registry(dialect);
360 if (dialect == NULL)
361 return NULL;
362 }
363 else
364 Py_INCREF(dialect);
365 /* Can we reuse this instance? */
366 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
367 delimiter == 0 &&
368 doublequote == 0 &&
369 escapechar == 0 &&
370 lineterminator == 0 &&
371 quotechar == 0 &&
372 quoting == 0 &&
373 skipinitialspace == 0 &&
374 strict == 0)
375 return dialect;
376 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000377
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000378 self = (DialectObj *)type->tp_alloc(type, 0);
379 if (self == NULL) {
380 Py_XDECREF(dialect);
381 return NULL;
382 }
383 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000384
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000385 Py_XINCREF(delimiter);
386 Py_XINCREF(doublequote);
387 Py_XINCREF(escapechar);
388 Py_XINCREF(lineterminator);
389 Py_XINCREF(quotechar);
390 Py_XINCREF(quoting);
391 Py_XINCREF(skipinitialspace);
392 Py_XINCREF(strict);
393 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000394#define DIALECT_GETATTR(v, n) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000395 if (v == NULL) \
396 v = PyObject_GetAttrString(dialect, n)
397 DIALECT_GETATTR(delimiter, "delimiter");
398 DIALECT_GETATTR(doublequote, "doublequote");
399 DIALECT_GETATTR(escapechar, "escapechar");
400 DIALECT_GETATTR(lineterminator, "lineterminator");
401 DIALECT_GETATTR(quotechar, "quotechar");
402 DIALECT_GETATTR(quoting, "quoting");
403 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
404 DIALECT_GETATTR(strict, "strict");
405 PyErr_Clear();
406 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000407
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000408 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000409#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000410 if (meth(name, target, src, dflt)) \
411 goto err
412 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
413 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
414 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
415 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
416 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
417 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
418 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
419 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000420
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000421 /* validate options */
422 if (dialect_check_quoting(self->quoting))
423 goto err;
424 if (self->delimiter == 0) {
425 PyErr_SetString(PyExc_TypeError, "delimiter must be set");
426 goto err;
427 }
428 if (quotechar == Py_None && quoting == NULL)
429 self->quoting = QUOTE_NONE;
430 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
431 PyErr_SetString(PyExc_TypeError,
432 "quotechar must be set if quoting enabled");
433 goto err;
434 }
435 if (self->lineterminator == 0) {
436 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
437 goto err;
438 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000439
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000440 ret = (PyObject *)self;
441 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000442err:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000443 Py_XDECREF(self);
444 Py_XDECREF(dialect);
445 Py_XDECREF(delimiter);
446 Py_XDECREF(doublequote);
447 Py_XDECREF(escapechar);
448 Py_XDECREF(lineterminator);
449 Py_XDECREF(quotechar);
450 Py_XDECREF(quoting);
451 Py_XDECREF(skipinitialspace);
452 Py_XDECREF(strict);
453 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000454}
455
456
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000457PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000458"CSV dialect\n"
459"\n"
460"The Dialect type records CSV parsing and generation options.\n");
461
462static PyTypeObject Dialect_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000463 PyVarObject_HEAD_INIT(NULL, 0)
464 "_csv.Dialect", /* tp_name */
465 sizeof(DialectObj), /* tp_basicsize */
466 0, /* tp_itemsize */
467 /* methods */
468 (destructor)Dialect_dealloc, /* tp_dealloc */
469 (printfunc)0, /* tp_print */
470 (getattrfunc)0, /* tp_getattr */
471 (setattrfunc)0, /* tp_setattr */
472 (cmpfunc)0, /* tp_compare */
473 (reprfunc)0, /* tp_repr */
474 0, /* tp_as_number */
475 0, /* tp_as_sequence */
476 0, /* tp_as_mapping */
477 (hashfunc)0, /* tp_hash */
478 (ternaryfunc)0, /* tp_call */
479 (reprfunc)0, /* tp_str */
480 0, /* tp_getattro */
481 0, /* tp_setattro */
482 0, /* tp_as_buffer */
483 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
484 Dialect_Type_doc, /* tp_doc */
485 0, /* tp_traverse */
486 0, /* tp_clear */
487 0, /* tp_richcompare */
488 0, /* tp_weaklistoffset */
489 0, /* tp_iter */
490 0, /* tp_iternext */
491 0, /* tp_methods */
492 Dialect_memberlist, /* tp_members */
493 Dialect_getsetlist, /* tp_getset */
494 0, /* tp_base */
495 0, /* tp_dict */
496 0, /* tp_descr_get */
497 0, /* tp_descr_set */
498 0, /* tp_dictoffset */
499 0, /* tp_init */
500 0, /* tp_alloc */
501 dialect_new, /* tp_new */
502 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000503};
504
Andrew McNamara91b97462005-01-11 01:07:23 +0000505/*
506 * Return an instance of the dialect type, given a Python instance or kwarg
507 * description of the dialect
508 */
509static PyObject *
510_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
511{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000512 PyObject *ctor_args;
513 PyObject *dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000514
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000515 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
516 if (ctor_args == NULL)
517 return NULL;
518 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
519 Py_DECREF(ctor_args);
520 return dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000521}
522
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000523/*
524 * READER
525 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000526static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000527parse_save_field(ReaderObj *self)
528{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000529 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000530
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000531 field = PyString_FromStringAndSize(self->field, self->field_len);
532 if (field == NULL)
533 return -1;
534 self->field_len = 0;
535 if (self->numeric_field) {
536 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000537
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000538 self->numeric_field = 0;
539 tmp = PyNumber_Float(field);
540 if (tmp == NULL) {
541 Py_DECREF(field);
542 return -1;
543 }
544 Py_DECREF(field);
545 field = tmp;
546 }
547 PyList_Append(self->fields, field);
548 Py_DECREF(field);
549 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000550}
551
552static int
553parse_grow_buff(ReaderObj *self)
554{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000555 if (self->field_size == 0) {
556 self->field_size = 4096;
557 if (self->field != NULL)
558 PyMem_Free(self->field);
559 self->field = PyMem_Malloc(self->field_size);
560 }
561 else {
562 if (self->field_size > INT_MAX / 2) {
563 PyErr_NoMemory();
564 return 0;
565 }
566 self->field_size *= 2;
567 self->field = PyMem_Realloc(self->field, self->field_size);
568 }
569 if (self->field == NULL) {
570 PyErr_NoMemory();
571 return 0;
572 }
573 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000574}
575
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000576static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000577parse_add_char(ReaderObj *self, char c)
578{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000579 if (self->field_len >= field_limit) {
580 PyErr_Format(error_obj, "field larger than field limit (%ld)",
581 field_limit);
582 return -1;
583 }
584 if (self->field_len == self->field_size && !parse_grow_buff(self))
585 return -1;
586 self->field[self->field_len++] = c;
587 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000588}
589
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000590static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000591parse_process_char(ReaderObj *self, char c)
592{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000593 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000594
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000595 switch (self->state) {
596 case START_RECORD:
597 /* start of record */
598 if (c == '\0')
599 /* empty line - return [] */
600 break;
601 else if (c == '\n' || c == '\r') {
602 self->state = EAT_CRNL;
603 break;
604 }
605 /* normal character - handle as START_FIELD */
606 self->state = START_FIELD;
607 /* fallthru */
608 case START_FIELD:
609 /* expecting field */
610 if (c == '\n' || c == '\r' || c == '\0') {
611 /* save empty field - return [fields] */
612 if (parse_save_field(self) < 0)
613 return -1;
614 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
615 }
616 else if (c == dialect->quotechar &&
617 dialect->quoting != QUOTE_NONE) {
618 /* start quoted field */
619 self->state = IN_QUOTED_FIELD;
620 }
621 else if (c == dialect->escapechar) {
622 /* possible escaped character */
623 self->state = ESCAPED_CHAR;
624 }
625 else if (c == ' ' && dialect->skipinitialspace)
626 /* ignore space at start of field */
627 ;
628 else if (c == dialect->delimiter) {
629 /* save empty field */
630 if (parse_save_field(self) < 0)
631 return -1;
632 }
633 else {
634 /* begin new unquoted field */
635 if (dialect->quoting == QUOTE_NONNUMERIC)
636 self->numeric_field = 1;
637 if (parse_add_char(self, c) < 0)
638 return -1;
639 self->state = IN_FIELD;
640 }
641 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000642
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000643 case ESCAPED_CHAR:
644 if (c == '\0')
645 c = '\n';
646 if (parse_add_char(self, c) < 0)
647 return -1;
648 self->state = IN_FIELD;
649 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000650
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000651 case IN_FIELD:
652 /* in unquoted field */
653 if (c == '\n' || c == '\r' || c == '\0') {
654 /* end of line - return [fields] */
655 if (parse_save_field(self) < 0)
656 return -1;
657 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
658 }
659 else if (c == dialect->escapechar) {
660 /* possible escaped character */
661 self->state = ESCAPED_CHAR;
662 }
663 else if (c == dialect->delimiter) {
664 /* save field - wait for new field */
665 if (parse_save_field(self) < 0)
666 return -1;
667 self->state = START_FIELD;
668 }
669 else {
670 /* normal character - save in field */
671 if (parse_add_char(self, c) < 0)
672 return -1;
673 }
674 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000675
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000676 case IN_QUOTED_FIELD:
677 /* in quoted field */
678 if (c == '\0')
679 ;
680 else if (c == dialect->escapechar) {
681 /* Possible escape character */
682 self->state = ESCAPE_IN_QUOTED_FIELD;
683 }
684 else if (c == dialect->quotechar &&
685 dialect->quoting != QUOTE_NONE) {
686 if (dialect->doublequote) {
687 /* doublequote; " represented by "" */
688 self->state = QUOTE_IN_QUOTED_FIELD;
689 }
690 else {
691 /* end of quote part of field */
692 self->state = IN_FIELD;
693 }
694 }
695 else {
696 /* normal character - save in field */
697 if (parse_add_char(self, c) < 0)
698 return -1;
699 }
700 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000701
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000702 case ESCAPE_IN_QUOTED_FIELD:
703 if (c == '\0')
704 c = '\n';
705 if (parse_add_char(self, c) < 0)
706 return -1;
707 self->state = IN_QUOTED_FIELD;
708 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000709
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000710 case QUOTE_IN_QUOTED_FIELD:
711 /* doublequote - seen a quote in an quoted field */
712 if (dialect->quoting != QUOTE_NONE &&
713 c == dialect->quotechar) {
714 /* save "" as " */
715 if (parse_add_char(self, c) < 0)
716 return -1;
717 self->state = IN_QUOTED_FIELD;
718 }
719 else if (c == dialect->delimiter) {
720 /* save field - wait for new field */
721 if (parse_save_field(self) < 0)
722 return -1;
723 self->state = START_FIELD;
724 }
725 else if (c == '\n' || c == '\r' || c == '\0') {
726 /* end of line - return [fields] */
727 if (parse_save_field(self) < 0)
728 return -1;
729 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
730 }
731 else if (!dialect->strict) {
732 if (parse_add_char(self, c) < 0)
733 return -1;
734 self->state = IN_FIELD;
735 }
736 else {
737 /* illegal */
738 PyErr_Format(error_obj, "'%c' expected after '%c'",
739 dialect->delimiter,
740 dialect->quotechar);
741 return -1;
742 }
743 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000744
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000745 case EAT_CRNL:
746 if (c == '\n' || c == '\r')
747 ;
748 else if (c == '\0')
749 self->state = START_RECORD;
750 else {
751 PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
752 return -1;
753 }
754 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000755
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000756 }
757 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000758}
759
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000760static int
761parse_reset(ReaderObj *self)
762{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000763 Py_XDECREF(self->fields);
764 self->fields = PyList_New(0);
765 if (self->fields == NULL)
766 return -1;
767 self->field_len = 0;
768 self->state = START_RECORD;
769 self->numeric_field = 0;
770 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000771}
Skip Montanarob4a04172003-03-20 23:29:12 +0000772
773static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000774Reader_iternext(ReaderObj *self)
775{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000776 PyObject *lineobj;
777 PyObject *fields = NULL;
778 char *line, c;
779 int linelen;
Skip Montanarob4a04172003-03-20 23:29:12 +0000780
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000781 if (parse_reset(self) < 0)
782 return NULL;
783 do {
784 lineobj = PyIter_Next(self->input_iter);
785 if (lineobj == NULL) {
786 /* End of input OR exception */
787 if (!PyErr_Occurred() && self->field_len != 0)
788 PyErr_Format(error_obj,
789 "newline inside string");
790 return NULL;
791 }
792 ++self->line_num;
Skip Montanarob4a04172003-03-20 23:29:12 +0000793
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000794 line = PyString_AsString(lineobj);
795 linelen = PyString_Size(lineobj);
Skip Montanarob4a04172003-03-20 23:29:12 +0000796
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000797 if (line == NULL || linelen < 0) {
798 Py_DECREF(lineobj);
799 return NULL;
800 }
801 while (linelen--) {
802 c = *line++;
803 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000804 Py_DECREF(lineobj);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000805 PyErr_Format(error_obj,
806 "line contains NULL byte");
807 goto err;
808 }
809 if (parse_process_char(self, c) < 0) {
810 Py_DECREF(lineobj);
811 goto err;
812 }
813 }
814 Py_DECREF(lineobj);
815 if (parse_process_char(self, 0) < 0)
816 goto err;
817 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000818
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000819 fields = self->fields;
820 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000821err:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000822 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000823}
824
825static void
826Reader_dealloc(ReaderObj *self)
827{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000828 PyObject_GC_UnTrack(self);
829 Py_XDECREF(self->dialect);
830 Py_XDECREF(self->input_iter);
831 Py_XDECREF(self->fields);
832 if (self->field != NULL)
833 PyMem_Free(self->field);
834 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000835}
836
837static int
838Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
839{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000840 Py_VISIT(self->dialect);
841 Py_VISIT(self->input_iter);
842 Py_VISIT(self->fields);
843 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000844}
845
846static int
847Reader_clear(ReaderObj *self)
848{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000849 Py_CLEAR(self->dialect);
850 Py_CLEAR(self->input_iter);
851 Py_CLEAR(self->fields);
852 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000853}
854
855PyDoc_STRVAR(Reader_Type_doc,
856"CSV reader\n"
857"\n"
858"Reader objects are responsible for reading and parsing tabular data\n"
859"in CSV format.\n"
860);
861
862static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000863 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000864};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000865#define R_OFF(x) offsetof(ReaderObj, x)
866
867static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000868 { "dialect", T_OBJECT, R_OFF(dialect), RO },
869 { "line_num", T_ULONG, R_OFF(line_num), RO },
870 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000871};
872
Skip Montanarob4a04172003-03-20 23:29:12 +0000873
874static PyTypeObject Reader_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000875 PyVarObject_HEAD_INIT(NULL, 0)
876 "_csv.reader", /*tp_name*/
877 sizeof(ReaderObj), /*tp_basicsize*/
878 0, /*tp_itemsize*/
879 /* methods */
880 (destructor)Reader_dealloc, /*tp_dealloc*/
881 (printfunc)0, /*tp_print*/
882 (getattrfunc)0, /*tp_getattr*/
883 (setattrfunc)0, /*tp_setattr*/
884 (cmpfunc)0, /*tp_compare*/
885 (reprfunc)0, /*tp_repr*/
886 0, /*tp_as_number*/
887 0, /*tp_as_sequence*/
888 0, /*tp_as_mapping*/
889 (hashfunc)0, /*tp_hash*/
890 (ternaryfunc)0, /*tp_call*/
891 (reprfunc)0, /*tp_str*/
892 0, /*tp_getattro*/
893 0, /*tp_setattro*/
894 0, /*tp_as_buffer*/
895 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
896 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
897 Reader_Type_doc, /*tp_doc*/
898 (traverseproc)Reader_traverse, /*tp_traverse*/
899 (inquiry)Reader_clear, /*tp_clear*/
900 0, /*tp_richcompare*/
901 0, /*tp_weaklistoffset*/
902 PyObject_SelfIter, /*tp_iter*/
903 (getiterfunc)Reader_iternext, /*tp_iternext*/
904 Reader_methods, /*tp_methods*/
905 Reader_memberlist, /*tp_members*/
906 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000907
908};
909
910static PyObject *
911csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
912{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000913 PyObject * iterator, * dialect = NULL;
914 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000915
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000916 if (!self)
917 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000918
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000919 self->dialect = NULL;
920 self->fields = NULL;
921 self->input_iter = NULL;
922 self->field = NULL;
923 self->field_size = 0;
924 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000925
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000926 if (parse_reset(self) < 0) {
927 Py_DECREF(self);
928 return NULL;
929 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000930
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000931 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
932 Py_DECREF(self);
933 return NULL;
934 }
935 self->input_iter = PyObject_GetIter(iterator);
936 if (self->input_iter == NULL) {
937 PyErr_SetString(PyExc_TypeError,
938 "argument 1 must be an iterator");
939 Py_DECREF(self);
940 return NULL;
941 }
942 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
943 if (self->dialect == NULL) {
944 Py_DECREF(self);
945 return NULL;
946 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000947
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000948 PyObject_GC_Track(self);
949 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +0000950}
951
952/*
953 * WRITER
954 */
955/* ---------------------------------------------------------------- */
956static void
957join_reset(WriterObj *self)
958{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000959 self->rec_len = 0;
960 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000961}
962
963#define MEM_INCR 32768
964
965/* Calculate new record length or append field to record. Return new
966 * record length.
967 */
968static int
969join_append_data(WriterObj *self, char *field, int quote_empty,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000970 int *quoted, int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +0000971{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000972 DialectObj *dialect = self->dialect;
973 int i, rec_len;
974 char *lineterm;
Andrew McNamarac89f2842005-01-12 07:44:42 +0000975
976#define ADDCH(c) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000977 do {\
978 if (copy_phase) \
979 self->rec[rec_len] = c;\
980 rec_len++;\
981 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +0000982
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000983 lineterm = PyString_AsString(dialect->lineterminator);
984 if (lineterm == NULL)
985 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000986
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000987 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +0000988
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000989 /* If this is not the first field we need a field separator */
990 if (self->num_fields > 0)
991 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +0000992
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000993 /* Handle preceding quote */
994 if (copy_phase && *quoted)
995 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +0000996
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000997 /* Copy/count field data */
998 for (i = 0;; i++) {
999 char c = field[i];
1000 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001001
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001002 if (c == '\0')
1003 break;
Skip Montanarob4a04172003-03-20 23:29:12 +00001004
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001005 if (c == dialect->delimiter ||
1006 c == dialect->escapechar ||
1007 c == dialect->quotechar ||
1008 strchr(lineterm, c)) {
1009 if (dialect->quoting == QUOTE_NONE)
1010 want_escape = 1;
1011 else {
1012 if (c == dialect->quotechar) {
1013 if (dialect->doublequote)
1014 ADDCH(dialect->quotechar);
1015 else
1016 want_escape = 1;
1017 }
1018 if (!want_escape)
1019 *quoted = 1;
1020 }
1021 if (want_escape) {
1022 if (!dialect->escapechar) {
1023 PyErr_Format(error_obj,
1024 "need to escape, but no escapechar set");
1025 return -1;
1026 }
1027 ADDCH(dialect->escapechar);
1028 }
1029 }
1030 /* Copy field character into record buffer.
1031 */
1032 ADDCH(c);
1033 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001034
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001035 /* If field is empty check if it needs to be quoted.
1036 */
1037 if (i == 0 && quote_empty) {
1038 if (dialect->quoting == QUOTE_NONE) {
1039 PyErr_Format(error_obj,
1040 "single empty field record must be quoted");
1041 return -1;
1042 }
1043 else
1044 *quoted = 1;
1045 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001046
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001047 if (*quoted) {
1048 if (copy_phase)
1049 ADDCH(dialect->quotechar);
1050 else
1051 rec_len += 2;
1052 }
1053 return rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001054#undef ADDCH
Skip Montanarob4a04172003-03-20 23:29:12 +00001055}
1056
1057static int
1058join_check_rec_size(WriterObj *self, int rec_len)
1059{
Gregory P. Smith9d534572008-06-11 07:41:16 +00001060
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001061 if (rec_len < 0 || rec_len > INT_MAX - MEM_INCR) {
1062 PyErr_NoMemory();
1063 return 0;
1064 }
Gregory P. Smith9d534572008-06-11 07:41:16 +00001065
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001066 if (rec_len > self->rec_size) {
1067 if (self->rec_size == 0) {
1068 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1069 if (self->rec != NULL)
1070 PyMem_Free(self->rec);
1071 self->rec = PyMem_Malloc(self->rec_size);
1072 }
1073 else {
1074 char *old_rec = self->rec;
Skip Montanarob4a04172003-03-20 23:29:12 +00001075
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001076 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1077 self->rec = PyMem_Realloc(self->rec, self->rec_size);
1078 if (self->rec == NULL)
1079 PyMem_Free(old_rec);
1080 }
1081 if (self->rec == NULL) {
1082 PyErr_NoMemory();
1083 return 0;
1084 }
1085 }
1086 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001087}
1088
1089static int
1090join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
1091{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001092 int rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001093
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001094 rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1095 if (rec_len < 0)
1096 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001097
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001098 /* grow record buffer if necessary */
1099 if (!join_check_rec_size(self, rec_len))
1100 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001101
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001102 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1103 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001104
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001105 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001106}
1107
1108static int
1109join_append_lineterminator(WriterObj *self)
1110{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001111 int terminator_len;
1112 char *terminator;
Skip Montanarob4a04172003-03-20 23:29:12 +00001113
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001114 terminator_len = PyString_Size(self->dialect->lineterminator);
1115 if (terminator_len == -1)
1116 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001117
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001118 /* grow record buffer if necessary */
1119 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1120 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001121
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001122 terminator = PyString_AsString(self->dialect->lineterminator);
1123 if (terminator == NULL)
1124 return 0;
1125 memmove(self->rec + self->rec_len, terminator, terminator_len);
1126 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001127
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001128 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001129}
1130
1131PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001132"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001133"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001134"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001135"elements will be converted to string.");
1136
1137static PyObject *
1138csv_writerow(WriterObj *self, PyObject *seq)
1139{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001140 DialectObj *dialect = self->dialect;
1141 int len, i;
Skip Montanarob4a04172003-03-20 23:29:12 +00001142
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001143 if (!PySequence_Check(seq))
1144 return PyErr_Format(error_obj, "sequence expected");
Skip Montanarob4a04172003-03-20 23:29:12 +00001145
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001146 len = PySequence_Length(seq);
1147 if (len < 0)
1148 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001149
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001150 /* Join all fields in internal buffer.
1151 */
1152 join_reset(self);
1153 for (i = 0; i < len; i++) {
1154 PyObject *field;
1155 int append_ok;
1156 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001157
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001158 field = PySequence_GetItem(seq, i);
1159 if (field == NULL)
1160 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001161
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001162 switch (dialect->quoting) {
1163 case QUOTE_NONNUMERIC:
1164 quoted = !PyNumber_Check(field);
1165 break;
1166 case QUOTE_ALL:
1167 quoted = 1;
1168 break;
1169 default:
1170 quoted = 0;
1171 break;
1172 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001173
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001174 if (PyString_Check(field)) {
1175 append_ok = join_append(self,
1176 PyString_AS_STRING(field),
1177 &quoted, len == 1);
1178 Py_DECREF(field);
1179 }
1180 else if (field == Py_None) {
1181 append_ok = join_append(self, "", &quoted, len == 1);
1182 Py_DECREF(field);
1183 }
1184 else {
1185 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001186
Raymond Hettingerf5377022011-12-11 22:31:09 -08001187 if (PyFloat_Check(field)) {
1188 str = PyObject_Repr(field);
1189 } else {
1190 str = PyObject_Str(field);
1191 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001192 Py_DECREF(field);
1193 if (str == NULL)
1194 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001195
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001196 append_ok = join_append(self, PyString_AS_STRING(str),
1197 &quoted, len == 1);
1198 Py_DECREF(str);
1199 }
1200 if (!append_ok)
1201 return NULL;
1202 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001203
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001204 /* Add line terminator.
1205 */
1206 if (!join_append_lineterminator(self))
1207 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001208
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001209 return PyObject_CallFunction(self->writeline,
1210 "(s#)", self->rec, self->rec_len);
Skip Montanarob4a04172003-03-20 23:29:12 +00001211}
1212
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001213PyDoc_STRVAR(csv_writerows_doc,
1214"writerows(sequence of sequences)\n"
1215"\n"
1216"Construct and write a series of sequences to a csv file. Non-string\n"
1217"elements will be converted to string.");
1218
Skip Montanarob4a04172003-03-20 23:29:12 +00001219static PyObject *
1220csv_writerows(WriterObj *self, PyObject *seqseq)
1221{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001222 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001223
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001224 row_iter = PyObject_GetIter(seqseq);
1225 if (row_iter == NULL) {
1226 PyErr_SetString(PyExc_TypeError,
1227 "writerows() argument must be iterable");
1228 return NULL;
1229 }
1230 while ((row_obj = PyIter_Next(row_iter))) {
1231 result = csv_writerow(self, row_obj);
1232 Py_DECREF(row_obj);
1233 if (!result) {
1234 Py_DECREF(row_iter);
1235 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001236 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001237 else
1238 Py_DECREF(result);
1239 }
1240 Py_DECREF(row_iter);
1241 if (PyErr_Occurred())
1242 return NULL;
1243 Py_INCREF(Py_None);
1244 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001245}
1246
1247static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001248 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1249 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1250 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001251};
1252
1253#define W_OFF(x) offsetof(WriterObj, x)
1254
1255static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001256 { "dialect", T_OBJECT, W_OFF(dialect), RO },
1257 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001258};
1259
1260static void
1261Writer_dealloc(WriterObj *self)
1262{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001263 PyObject_GC_UnTrack(self);
1264 Py_XDECREF(self->dialect);
1265 Py_XDECREF(self->writeline);
1266 if (self->rec != NULL)
1267 PyMem_Free(self->rec);
1268 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001269}
1270
1271static int
1272Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1273{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001274 Py_VISIT(self->dialect);
1275 Py_VISIT(self->writeline);
1276 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001277}
1278
1279static int
1280Writer_clear(WriterObj *self)
1281{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001282 Py_CLEAR(self->dialect);
1283 Py_CLEAR(self->writeline);
1284 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001285}
1286
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001287PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001288"CSV writer\n"
1289"\n"
1290"Writer objects are responsible for generating tabular data\n"
1291"in CSV format from sequence input.\n"
1292);
1293
1294static PyTypeObject Writer_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001295 PyVarObject_HEAD_INIT(NULL, 0)
1296 "_csv.writer", /*tp_name*/
1297 sizeof(WriterObj), /*tp_basicsize*/
1298 0, /*tp_itemsize*/
1299 /* methods */
1300 (destructor)Writer_dealloc, /*tp_dealloc*/
1301 (printfunc)0, /*tp_print*/
1302 (getattrfunc)0, /*tp_getattr*/
1303 (setattrfunc)0, /*tp_setattr*/
1304 (cmpfunc)0, /*tp_compare*/
1305 (reprfunc)0, /*tp_repr*/
1306 0, /*tp_as_number*/
1307 0, /*tp_as_sequence*/
1308 0, /*tp_as_mapping*/
1309 (hashfunc)0, /*tp_hash*/
1310 (ternaryfunc)0, /*tp_call*/
1311 (reprfunc)0, /*tp_str*/
1312 0, /*tp_getattro*/
1313 0, /*tp_setattro*/
1314 0, /*tp_as_buffer*/
1315 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1316 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1317 Writer_Type_doc,
1318 (traverseproc)Writer_traverse, /*tp_traverse*/
1319 (inquiry)Writer_clear, /*tp_clear*/
1320 0, /*tp_richcompare*/
1321 0, /*tp_weaklistoffset*/
1322 (getiterfunc)0, /*tp_iter*/
1323 (getiterfunc)0, /*tp_iternext*/
1324 Writer_methods, /*tp_methods*/
1325 Writer_memberlist, /*tp_members*/
1326 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001327};
1328
1329static PyObject *
1330csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1331{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001332 PyObject * output_file, * dialect = NULL;
1333 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001334
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001335 if (!self)
1336 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001337
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001338 self->dialect = NULL;
1339 self->writeline = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001340
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001341 self->rec = NULL;
1342 self->rec_size = 0;
1343 self->rec_len = 0;
1344 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001345
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001346 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1347 Py_DECREF(self);
1348 return NULL;
1349 }
1350 self->writeline = PyObject_GetAttrString(output_file, "write");
1351 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1352 PyErr_SetString(PyExc_TypeError,
1353 "argument 1 must have a \"write\" method");
1354 Py_DECREF(self);
1355 return NULL;
1356 }
1357 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1358 if (self->dialect == NULL) {
1359 Py_DECREF(self);
1360 return NULL;
1361 }
1362 PyObject_GC_Track(self);
1363 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001364}
1365
1366/*
1367 * DIALECT REGISTRY
1368 */
1369static PyObject *
1370csv_list_dialects(PyObject *module, PyObject *args)
1371{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001372 return PyDict_Keys(dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001373}
1374
1375static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001376csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001377{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001378 PyObject *name_obj, *dialect_obj = NULL;
1379 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001380
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001381 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1382 return NULL;
1383 if (!IS_BASESTRING(name_obj)) {
1384 PyErr_SetString(PyExc_TypeError,
1385 "dialect name must be a string or unicode");
1386 return NULL;
1387 }
1388 dialect = _call_dialect(dialect_obj, kwargs);
1389 if (dialect == NULL)
1390 return NULL;
1391 if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1392 Py_DECREF(dialect);
1393 return NULL;
1394 }
1395 Py_DECREF(dialect);
1396 Py_INCREF(Py_None);
1397 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001398}
1399
1400static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001401csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001402{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001403 if (PyDict_DelItem(dialects, name_obj) < 0)
1404 return PyErr_Format(error_obj, "unknown dialect");
1405 Py_INCREF(Py_None);
1406 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001407}
1408
1409static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001410csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001411{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001412 return get_dialect_from_registry(name_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001413}
1414
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001415static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001416csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001417{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001418 PyObject *new_limit = NULL;
1419 long old_limit = field_limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001420
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001421 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1422 return NULL;
1423 if (new_limit != NULL) {
1424 if (!PyInt_Check(new_limit)) {
1425 PyErr_Format(PyExc_TypeError,
1426 "limit must be an integer");
1427 return NULL;
1428 }
1429 field_limit = PyInt_AsLong(new_limit);
1430 }
1431 return PyInt_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001432}
1433
Skip Montanarob4a04172003-03-20 23:29:12 +00001434/*
1435 * MODULE
1436 */
1437
1438PyDoc_STRVAR(csv_module_doc,
1439"CSV parsing and writing.\n"
1440"\n"
1441"This module provides classes that assist in the reading and writing\n"
1442"of Comma Separated Value (CSV) files, and implements the interface\n"
1443"described by PEP 305. Although many CSV files are simple to parse,\n"
1444"the format is not formally defined by a stable specification and\n"
1445"is subtle enough that parsing lines of a CSV file with something\n"
1446"like line.split(\",\") is bound to fail. The module supports three\n"
1447"basic APIs: reading, writing, and registration of dialects.\n"
1448"\n"
1449"\n"
1450"DIALECT REGISTRATION:\n"
1451"\n"
1452"Readers and writers support a dialect argument, which is a convenient\n"
1453"handle on a group of settings. When the dialect argument is a string,\n"
1454"it identifies one of the dialects previously registered with the module.\n"
1455"If it is a class or instance, the attributes of the argument are used as\n"
1456"the settings for the reader or writer:\n"
1457"\n"
1458" class excel:\n"
1459" delimiter = ','\n"
1460" quotechar = '\"'\n"
1461" escapechar = None\n"
1462" doublequote = True\n"
1463" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001464" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001465" quoting = QUOTE_MINIMAL\n"
1466"\n"
1467"SETTINGS:\n"
1468"\n"
1469" * quotechar - specifies a one-character string to use as the \n"
1470" quoting character. It defaults to '\"'.\n"
1471" * delimiter - specifies a one-character string to use as the \n"
1472" field separator. It defaults to ','.\n"
1473" * skipinitialspace - specifies how to interpret whitespace which\n"
1474" immediately follows a delimiter. It defaults to False, which\n"
1475" means that whitespace immediately following a delimiter is part\n"
1476" of the following field.\n"
1477" * lineterminator - specifies the character sequence which should \n"
1478" terminate rows.\n"
1479" * quoting - controls when quotes should be generated by the writer.\n"
1480" It can take on any of the following module constants:\n"
1481"\n"
1482" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1483" field contains either the quotechar or the delimiter\n"
1484" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1485" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001486" fields which do not parse as integers or floating point\n"
1487" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001488" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1489" * escapechar - specifies a one-character string used to escape \n"
1490" the delimiter when quoting is set to QUOTE_NONE.\n"
1491" * doublequote - controls the handling of quotes inside fields. When\n"
1492" True, two consecutive quotes are interpreted as one during read,\n"
1493" and when writing, each quote character embedded in the data is\n"
1494" written as two quotes\n");
1495
1496PyDoc_STRVAR(csv_reader_doc,
1497" csv_reader = reader(iterable [, dialect='excel']\n"
1498" [optional keyword args])\n"
1499" for row in csv_reader:\n"
1500" process(row)\n"
1501"\n"
1502"The \"iterable\" argument can be any object that returns a line\n"
1503"of input for each iteration, such as a file object or a list. The\n"
1504"optional \"dialect\" parameter is discussed below. The function\n"
1505"also accepts optional keyword arguments which override settings\n"
1506"provided by the dialect.\n"
1507"\n"
1508"The returned object is an iterator. Each iteration returns a row\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001509"of the CSV file (which can span multiple input lines):\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001510
1511PyDoc_STRVAR(csv_writer_doc,
1512" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1513" [optional keyword args])\n"
Fredrik Lundh4aaaa492006-04-04 16:51:13 +00001514" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001515" csv_writer.writerow(row)\n"
1516"\n"
1517" [or]\n"
1518"\n"
1519" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1520" [optional keyword args])\n"
1521" csv_writer.writerows(rows)\n"
1522"\n"
1523"The \"fileobj\" argument can be any object that supports the file API.\n");
1524
1525PyDoc_STRVAR(csv_list_dialects_doc,
1526"Return a list of all know dialect names.\n"
1527" names = csv.list_dialects()");
1528
1529PyDoc_STRVAR(csv_get_dialect_doc,
1530"Return the dialect instance associated with name.\n"
1531" dialect = csv.get_dialect(name)");
1532
1533PyDoc_STRVAR(csv_register_dialect_doc,
1534"Create a mapping from a string name to a dialect class.\n"
1535" dialect = csv.register_dialect(name, dialect)");
1536
1537PyDoc_STRVAR(csv_unregister_dialect_doc,
1538"Delete the name/dialect mapping associated with a string name.\n"
1539" csv.unregister_dialect(name)");
1540
Andrew McNamara31d88962005-01-12 03:45:10 +00001541PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001542"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001543" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001544"\n"
1545"Returns old limit. If limit is not given, no new limit is set and\n"
1546"the old limit is returned");
1547
Skip Montanarob4a04172003-03-20 23:29:12 +00001548static struct PyMethodDef csv_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001549 { "reader", (PyCFunction)csv_reader,
1550 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1551 { "writer", (PyCFunction)csv_writer,
1552 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1553 { "list_dialects", (PyCFunction)csv_list_dialects,
1554 METH_NOARGS, csv_list_dialects_doc},
1555 { "register_dialect", (PyCFunction)csv_register_dialect,
1556 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1557 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1558 METH_O, csv_unregister_dialect_doc},
1559 { "get_dialect", (PyCFunction)csv_get_dialect,
1560 METH_O, csv_get_dialect_doc},
1561 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1562 METH_VARARGS, csv_field_size_limit_doc},
1563 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001564};
1565
1566PyMODINIT_FUNC
1567init_csv(void)
1568{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001569 PyObject *module;
1570 StyleDesc *style;
Skip Montanarob4a04172003-03-20 23:29:12 +00001571
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001572 if (PyType_Ready(&Dialect_Type) < 0)
1573 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001574
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001575 if (PyType_Ready(&Reader_Type) < 0)
1576 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001577
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001578 if (PyType_Ready(&Writer_Type) < 0)
1579 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001580
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001581 /* Create the module and add the functions */
1582 module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1583 if (module == NULL)
1584 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001585
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001586 /* Add version to the module. */
1587 if (PyModule_AddStringConstant(module, "__version__",
1588 MODULE_VERSION) == -1)
1589 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001590
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001591 /* Add _dialects dictionary */
1592 dialects = PyDict_New();
1593 if (dialects == NULL)
1594 return;
1595 if (PyModule_AddObject(module, "_dialects", dialects))
1596 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001597
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001598 /* Add quote styles into dictionary */
1599 for (style = quote_styles; style->name; style++) {
1600 if (PyModule_AddIntConstant(module, style->name,
1601 style->style) == -1)
1602 return;
1603 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001604
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001605 /* Add the Dialect type */
1606 Py_INCREF(&Dialect_Type);
1607 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1608 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001609
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001610 /* Add the CSV exception object to the module. */
1611 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1612 if (error_obj == NULL)
1613 return;
1614 PyModule_AddObject(module, "Error", error_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001615}