blob: fd6121fccfc0ecd5992181ca8f790cc8aec041a6 [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
9**** For people modifying this code, please note that as of this writing
Skip Montanarodfa35fa2003-04-11 21:40:01 +000010**** (2003-03-23), it is intended that this code should work with Python
Skip Montanaroa16b21f2003-03-23 14:32:54 +000011**** 2.2.
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013*/
14
Skip Montanaro7b01a832003-04-12 19:23:46 +000015#define MODULE_VERSION "1.0"
16
Skip Montanarob4a04172003-03-20 23:29:12 +000017#include "Python.h"
18#include "structmember.h"
19
Skip Montanaroa16b21f2003-03-23 14:32:54 +000020
Skip Montanarob4a04172003-03-20 23:29:12 +000021/* begin 2.2 compatibility macros */
22#ifndef PyDoc_STRVAR
23/* Define macros for inline documentation. */
24#define PyDoc_VAR(name) static char name[]
25#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
26#ifdef WITH_DOC_STRINGS
27#define PyDoc_STR(str) str
28#else
29#define PyDoc_STR(str) ""
30#endif
31#endif /* ifndef PyDoc_STRVAR */
32
33#ifndef PyMODINIT_FUNC
Antoine Pitrouc83ea132010-05-09 14:46:46 +000034# if defined(__cplusplus)
35# define PyMODINIT_FUNC extern "C" void
36# else /* __cplusplus */
37# define PyMODINIT_FUNC void
38# endif /* __cplusplus */
Skip Montanarob4a04172003-03-20 23:29:12 +000039#endif
Thomas Wouters2742c5e2006-04-15 17:33:14 +000040
41#ifndef Py_CLEAR
Antoine Pitrouc83ea132010-05-09 14:46:46 +000042#define Py_CLEAR(op) \
43 do { \
44 if (op) { \
45 PyObject *tmp = (PyObject *)(op); \
46 (op) = NULL; \
47 Py_DECREF(tmp); \
48 } \
49 } while (0)
Thomas Wouters2742c5e2006-04-15 17:33:14 +000050#endif
Thomas Woutersc6e55062006-04-15 21:47:09 +000051#ifndef Py_VISIT
Antoine Pitrouc83ea132010-05-09 14:46:46 +000052#define Py_VISIT(op) \
53 do { \
54 if (op) { \
55 int vret = visit((PyObject *)(op), arg); \
56 if (vret) \
57 return vret; \
58 } \
59 } while (0)
Thomas Woutersc6e55062006-04-15 21:47:09 +000060#endif
Thomas Wouters2742c5e2006-04-15 17:33:14 +000061
Skip Montanarob4a04172003-03-20 23:29:12 +000062/* end 2.2 compatibility macros */
63
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000064#define IS_BASESTRING(o) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +000065 PyObject_TypeCheck(o, &PyBaseString_Type)
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000066
Antoine Pitrouc83ea132010-05-09 14:46:46 +000067static PyObject *error_obj; /* CSV exception */
Skip Montanarob4a04172003-03-20 23:29:12 +000068static PyObject *dialects; /* Dialect registry */
Antoine Pitrouc83ea132010-05-09 14:46:46 +000069static long field_limit = 128 * 1024; /* max parsed field size */
Skip Montanarob4a04172003-03-20 23:29:12 +000070
71typedef enum {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000072 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
73 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
74 EAT_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000075} ParserState;
76
77typedef enum {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000078 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000079} QuoteStyle;
80
81typedef struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000082 QuoteStyle style;
83 char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000084} StyleDesc;
85
86static StyleDesc quote_styles[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000087 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
88 { QUOTE_ALL, "QUOTE_ALL" },
89 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
90 { QUOTE_NONE, "QUOTE_NONE" },
91 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000092};
93
94typedef struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +000095 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +000096
Antoine Pitrouc83ea132010-05-09 14:46:46 +000097 int doublequote; /* is " represented by ""? */
98 char delimiter; /* field separator */
99 char quotechar; /* quote character */
100 char escapechar; /* escape character */
101 int skipinitialspace; /* ignore spaces following delimiter? */
102 PyObject *lineterminator; /* string to write between records */
103 int quoting; /* style of quoting to write */
104
105 int strict; /* raise exception on bad CSV */
Skip Montanarob4a04172003-03-20 23:29:12 +0000106} DialectObj;
107
108staticforward PyTypeObject Dialect_Type;
109
110typedef struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000111 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000112
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000113 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +0000114
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000115 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000116
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000117 PyObject *fields; /* field list for current record */
118 ParserState state; /* current CSV parse state */
119 char *field; /* build current field in here */
120 int field_size; /* size of allocated buffer */
121 int field_len; /* length of current field */
122 int numeric_field; /* treat field as numeric */
123 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000124} ReaderObj;
125
126staticforward PyTypeObject Reader_Type;
127
Christian Heimese93237d2007-12-19 02:37:44 +0000128#define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +0000129
130typedef struct {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000131 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000132
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000133 PyObject *writeline; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +0000134
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000135 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000136
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000137 char *rec; /* buffer for parser.join */
138 int rec_size; /* size of allocated record */
139 int rec_len; /* length of record */
140 int num_fields; /* number of fields in record */
141} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000142
143staticforward PyTypeObject Writer_Type;
144
145/*
146 * DIALECT class
147 */
148
149static PyObject *
150get_dialect_from_registry(PyObject * name_obj)
151{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000152 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000153
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000154 dialect_obj = PyDict_GetItem(dialects, name_obj);
155 if (dialect_obj == NULL) {
156 if (!PyErr_Occurred())
157 PyErr_Format(error_obj, "unknown dialect");
158 }
159 else
160 Py_INCREF(dialect_obj);
161 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000162}
163
Skip Montanarob4a04172003-03-20 23:29:12 +0000164static PyObject *
165get_string(PyObject *str)
166{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000167 Py_XINCREF(str);
168 return str;
Skip Montanarob4a04172003-03-20 23:29:12 +0000169}
170
Skip Montanarob4a04172003-03-20 23:29:12 +0000171static PyObject *
172get_nullchar_as_None(char c)
173{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000174 if (c == '\0') {
175 Py_INCREF(Py_None);
176 return Py_None;
177 }
178 else
179 return PyString_FromStringAndSize((char*)&c, 1);
Skip Montanarob4a04172003-03-20 23:29:12 +0000180}
181
Skip Montanarob4a04172003-03-20 23:29:12 +0000182static PyObject *
183Dialect_get_lineterminator(DialectObj *self)
184{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000185 return get_string(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000186}
187
Skip Montanarob4a04172003-03-20 23:29:12 +0000188static PyObject *
189Dialect_get_escapechar(DialectObj *self)
190{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000191 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000192}
193
Andrew McNamara1196cf12005-01-07 04:42:45 +0000194static PyObject *
195Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000196{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000197 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000198}
199
200static PyObject *
201Dialect_get_quoting(DialectObj *self)
202{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000203 return PyInt_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000204}
205
206static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000207_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000208{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000209 if (src == NULL)
210 *target = dflt;
Antoine Pitrouc5bef752012-08-15 23:16:51 +0200211 else {
212 int b = PyObject_IsTrue(src);
213 if (b < 0)
214 return -1;
215 *target = b;
216 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000217 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000218}
219
Andrew McNamara1196cf12005-01-07 04:42:45 +0000220static int
221_set_int(const char *name, int *target, PyObject *src, int dflt)
222{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000223 if (src == NULL)
224 *target = dflt;
225 else {
226 if (!PyInt_Check(src)) {
227 PyErr_Format(PyExc_TypeError,
228 "\"%s\" must be an integer", name);
229 return -1;
230 }
231 *target = PyInt_AsLong(src);
232 }
233 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000234}
235
236static int
237_set_char(const char *name, char *target, PyObject *src, char dflt)
238{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000239 if (src == NULL)
240 *target = dflt;
241 else {
242 if (src == Py_None || PyString_Size(src) == 0)
243 *target = '\0';
244 else if (!PyString_Check(src) || PyString_Size(src) != 1) {
245 PyErr_Format(PyExc_TypeError,
246 "\"%s\" must be an 1-character string",
247 name);
248 return -1;
249 }
250 else {
251 char *s = PyString_AsString(src);
252 if (s == NULL)
253 return -1;
254 *target = s[0];
255 }
256 }
257 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000258}
259
260static int
261_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
262{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000263 if (src == NULL)
264 *target = PyString_FromString(dflt);
265 else {
266 if (src == Py_None)
267 *target = NULL;
268 else if (!IS_BASESTRING(src)) {
269 PyErr_Format(PyExc_TypeError,
270 "\"%s\" must be an string", name);
271 return -1;
272 }
273 else {
274 Py_XDECREF(*target);
275 Py_INCREF(src);
276 *target = src;
277 }
278 }
279 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000280}
281
282static int
283dialect_check_quoting(int quoting)
284{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000285 StyleDesc *qs = quote_styles;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000286
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000287 for (qs = quote_styles; qs->name; qs++) {
288 if (qs->style == quoting)
289 return 0;
290 }
291 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
292 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000293}
Skip Montanarob4a04172003-03-20 23:29:12 +0000294
295#define D_OFF(x) offsetof(DialectObj, x)
296
297static struct PyMemberDef Dialect_memberlist[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000298 { "delimiter", T_CHAR, D_OFF(delimiter), READONLY },
299 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
300 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
301 { "strict", T_INT, D_OFF(strict), READONLY },
302 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000303};
304
305static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000306 { "escapechar", (getter)Dialect_get_escapechar},
307 { "lineterminator", (getter)Dialect_get_lineterminator},
308 { "quotechar", (getter)Dialect_get_quotechar},
309 { "quoting", (getter)Dialect_get_quoting},
310 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000311};
312
313static void
314Dialect_dealloc(DialectObj *self)
315{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000316 Py_XDECREF(self->lineterminator);
317 Py_TYPE(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000318}
319
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000320static char *dialect_kws[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000321 "dialect",
322 "delimiter",
323 "doublequote",
324 "escapechar",
325 "lineterminator",
326 "quotechar",
327 "quoting",
328 "skipinitialspace",
329 "strict",
330 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000331};
332
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000333static PyObject *
334dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000335{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000336 DialectObj *self;
337 PyObject *ret = NULL;
338 PyObject *dialect = NULL;
339 PyObject *delimiter = NULL;
340 PyObject *doublequote = NULL;
341 PyObject *escapechar = NULL;
342 PyObject *lineterminator = NULL;
343 PyObject *quotechar = NULL;
344 PyObject *quoting = NULL;
345 PyObject *skipinitialspace = NULL;
346 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000347
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000348 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
349 "|OOOOOOOOO", dialect_kws,
350 &dialect,
351 &delimiter,
352 &doublequote,
353 &escapechar,
354 &lineterminator,
355 &quotechar,
356 &quoting,
357 &skipinitialspace,
358 &strict))
359 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000360
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000361 if (dialect != NULL) {
362 if (IS_BASESTRING(dialect)) {
363 dialect = get_dialect_from_registry(dialect);
364 if (dialect == NULL)
365 return NULL;
366 }
367 else
368 Py_INCREF(dialect);
369 /* Can we reuse this instance? */
370 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
371 delimiter == 0 &&
372 doublequote == 0 &&
373 escapechar == 0 &&
374 lineterminator == 0 &&
375 quotechar == 0 &&
376 quoting == 0 &&
377 skipinitialspace == 0 &&
378 strict == 0)
379 return dialect;
380 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000381
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000382 self = (DialectObj *)type->tp_alloc(type, 0);
383 if (self == NULL) {
384 Py_XDECREF(dialect);
385 return NULL;
386 }
387 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000388
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000389 Py_XINCREF(delimiter);
390 Py_XINCREF(doublequote);
391 Py_XINCREF(escapechar);
392 Py_XINCREF(lineterminator);
393 Py_XINCREF(quotechar);
394 Py_XINCREF(quoting);
395 Py_XINCREF(skipinitialspace);
396 Py_XINCREF(strict);
397 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000398#define DIALECT_GETATTR(v, n) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000399 if (v == NULL) \
400 v = PyObject_GetAttrString(dialect, n)
401 DIALECT_GETATTR(delimiter, "delimiter");
402 DIALECT_GETATTR(doublequote, "doublequote");
403 DIALECT_GETATTR(escapechar, "escapechar");
404 DIALECT_GETATTR(lineterminator, "lineterminator");
405 DIALECT_GETATTR(quotechar, "quotechar");
406 DIALECT_GETATTR(quoting, "quoting");
407 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
408 DIALECT_GETATTR(strict, "strict");
409 PyErr_Clear();
410 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000411
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000412 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000413#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000414 if (meth(name, target, src, dflt)) \
415 goto err
416 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
417 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
418 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
419 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
420 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
421 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
422 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
423 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000424
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000425 /* validate options */
426 if (dialect_check_quoting(self->quoting))
427 goto err;
428 if (self->delimiter == 0) {
429 PyErr_SetString(PyExc_TypeError, "delimiter must be set");
430 goto err;
431 }
432 if (quotechar == Py_None && quoting == NULL)
433 self->quoting = QUOTE_NONE;
434 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
435 PyErr_SetString(PyExc_TypeError,
436 "quotechar must be set if quoting enabled");
437 goto err;
438 }
439 if (self->lineterminator == 0) {
440 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
441 goto err;
442 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000443
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000444 ret = (PyObject *)self;
445 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000446err:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000447 Py_XDECREF(self);
448 Py_XDECREF(dialect);
449 Py_XDECREF(delimiter);
450 Py_XDECREF(doublequote);
451 Py_XDECREF(escapechar);
452 Py_XDECREF(lineterminator);
453 Py_XDECREF(quotechar);
454 Py_XDECREF(quoting);
455 Py_XDECREF(skipinitialspace);
456 Py_XDECREF(strict);
457 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000458}
459
460
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000461PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000462"CSV dialect\n"
463"\n"
464"The Dialect type records CSV parsing and generation options.\n");
465
466static PyTypeObject Dialect_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000467 PyVarObject_HEAD_INIT(NULL, 0)
468 "_csv.Dialect", /* tp_name */
469 sizeof(DialectObj), /* tp_basicsize */
470 0, /* tp_itemsize */
471 /* methods */
472 (destructor)Dialect_dealloc, /* tp_dealloc */
473 (printfunc)0, /* tp_print */
474 (getattrfunc)0, /* tp_getattr */
475 (setattrfunc)0, /* tp_setattr */
476 (cmpfunc)0, /* tp_compare */
477 (reprfunc)0, /* tp_repr */
478 0, /* tp_as_number */
479 0, /* tp_as_sequence */
480 0, /* tp_as_mapping */
481 (hashfunc)0, /* tp_hash */
482 (ternaryfunc)0, /* tp_call */
483 (reprfunc)0, /* tp_str */
484 0, /* tp_getattro */
485 0, /* tp_setattro */
486 0, /* tp_as_buffer */
487 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
488 Dialect_Type_doc, /* tp_doc */
489 0, /* tp_traverse */
490 0, /* tp_clear */
491 0, /* tp_richcompare */
492 0, /* tp_weaklistoffset */
493 0, /* tp_iter */
494 0, /* tp_iternext */
495 0, /* tp_methods */
496 Dialect_memberlist, /* tp_members */
497 Dialect_getsetlist, /* tp_getset */
498 0, /* tp_base */
499 0, /* tp_dict */
500 0, /* tp_descr_get */
501 0, /* tp_descr_set */
502 0, /* tp_dictoffset */
503 0, /* tp_init */
504 0, /* tp_alloc */
505 dialect_new, /* tp_new */
506 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000507};
508
Andrew McNamara91b97462005-01-11 01:07:23 +0000509/*
510 * Return an instance of the dialect type, given a Python instance or kwarg
511 * description of the dialect
512 */
513static PyObject *
514_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
515{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000516 PyObject *ctor_args;
517 PyObject *dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000518
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000519 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
520 if (ctor_args == NULL)
521 return NULL;
522 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
523 Py_DECREF(ctor_args);
524 return dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000525}
526
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000527/*
528 * READER
529 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000530static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000531parse_save_field(ReaderObj *self)
532{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000533 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000534
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000535 field = PyString_FromStringAndSize(self->field, self->field_len);
536 if (field == NULL)
537 return -1;
538 self->field_len = 0;
539 if (self->numeric_field) {
540 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000541
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000542 self->numeric_field = 0;
543 tmp = PyNumber_Float(field);
544 if (tmp == NULL) {
545 Py_DECREF(field);
546 return -1;
547 }
548 Py_DECREF(field);
549 field = tmp;
550 }
551 PyList_Append(self->fields, field);
552 Py_DECREF(field);
553 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000554}
555
556static int
557parse_grow_buff(ReaderObj *self)
558{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000559 if (self->field_size == 0) {
560 self->field_size = 4096;
561 if (self->field != NULL)
562 PyMem_Free(self->field);
563 self->field = PyMem_Malloc(self->field_size);
564 }
565 else {
566 if (self->field_size > INT_MAX / 2) {
567 PyErr_NoMemory();
568 return 0;
569 }
570 self->field_size *= 2;
571 self->field = PyMem_Realloc(self->field, self->field_size);
572 }
573 if (self->field == NULL) {
574 PyErr_NoMemory();
575 return 0;
576 }
577 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000578}
579
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000580static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000581parse_add_char(ReaderObj *self, char c)
582{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000583 if (self->field_len >= field_limit) {
584 PyErr_Format(error_obj, "field larger than field limit (%ld)",
585 field_limit);
586 return -1;
587 }
588 if (self->field_len == self->field_size && !parse_grow_buff(self))
589 return -1;
590 self->field[self->field_len++] = c;
591 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000592}
593
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000594static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000595parse_process_char(ReaderObj *self, char c)
596{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000597 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000598
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000599 switch (self->state) {
600 case START_RECORD:
601 /* start of record */
602 if (c == '\0')
603 /* empty line - return [] */
604 break;
605 else if (c == '\n' || c == '\r') {
606 self->state = EAT_CRNL;
607 break;
608 }
609 /* normal character - handle as START_FIELD */
610 self->state = START_FIELD;
611 /* fallthru */
612 case START_FIELD:
613 /* expecting field */
614 if (c == '\n' || c == '\r' || c == '\0') {
615 /* save empty field - return [fields] */
616 if (parse_save_field(self) < 0)
617 return -1;
618 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
619 }
620 else if (c == dialect->quotechar &&
621 dialect->quoting != QUOTE_NONE) {
622 /* start quoted field */
623 self->state = IN_QUOTED_FIELD;
624 }
625 else if (c == dialect->escapechar) {
626 /* possible escaped character */
627 self->state = ESCAPED_CHAR;
628 }
629 else if (c == ' ' && dialect->skipinitialspace)
630 /* ignore space at start of field */
631 ;
632 else if (c == dialect->delimiter) {
633 /* save empty field */
634 if (parse_save_field(self) < 0)
635 return -1;
636 }
637 else {
638 /* begin new unquoted field */
639 if (dialect->quoting == QUOTE_NONNUMERIC)
640 self->numeric_field = 1;
641 if (parse_add_char(self, c) < 0)
642 return -1;
643 self->state = IN_FIELD;
644 }
645 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000646
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000647 case ESCAPED_CHAR:
648 if (c == '\0')
649 c = '\n';
650 if (parse_add_char(self, c) < 0)
651 return -1;
652 self->state = IN_FIELD;
653 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000654
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000655 case IN_FIELD:
656 /* in unquoted field */
657 if (c == '\n' || c == '\r' || c == '\0') {
658 /* end of line - return [fields] */
659 if (parse_save_field(self) < 0)
660 return -1;
661 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
662 }
663 else if (c == dialect->escapechar) {
664 /* possible escaped character */
665 self->state = ESCAPED_CHAR;
666 }
667 else if (c == dialect->delimiter) {
668 /* save field - wait for new field */
669 if (parse_save_field(self) < 0)
670 return -1;
671 self->state = START_FIELD;
672 }
673 else {
674 /* normal character - save in field */
675 if (parse_add_char(self, c) < 0)
676 return -1;
677 }
678 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000679
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000680 case IN_QUOTED_FIELD:
681 /* in quoted field */
682 if (c == '\0')
683 ;
684 else if (c == dialect->escapechar) {
685 /* Possible escape character */
686 self->state = ESCAPE_IN_QUOTED_FIELD;
687 }
688 else if (c == dialect->quotechar &&
689 dialect->quoting != QUOTE_NONE) {
690 if (dialect->doublequote) {
691 /* doublequote; " represented by "" */
692 self->state = QUOTE_IN_QUOTED_FIELD;
693 }
694 else {
695 /* end of quote part of field */
696 self->state = IN_FIELD;
697 }
698 }
699 else {
700 /* normal character - save in field */
701 if (parse_add_char(self, c) < 0)
702 return -1;
703 }
704 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000705
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000706 case ESCAPE_IN_QUOTED_FIELD:
707 if (c == '\0')
708 c = '\n';
709 if (parse_add_char(self, c) < 0)
710 return -1;
711 self->state = IN_QUOTED_FIELD;
712 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000713
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000714 case QUOTE_IN_QUOTED_FIELD:
715 /* doublequote - seen a quote in an quoted field */
716 if (dialect->quoting != QUOTE_NONE &&
717 c == dialect->quotechar) {
718 /* save "" as " */
719 if (parse_add_char(self, c) < 0)
720 return -1;
721 self->state = IN_QUOTED_FIELD;
722 }
723 else if (c == dialect->delimiter) {
724 /* save field - wait for new field */
725 if (parse_save_field(self) < 0)
726 return -1;
727 self->state = START_FIELD;
728 }
729 else if (c == '\n' || c == '\r' || c == '\0') {
730 /* end of line - return [fields] */
731 if (parse_save_field(self) < 0)
732 return -1;
733 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
734 }
735 else if (!dialect->strict) {
736 if (parse_add_char(self, c) < 0)
737 return -1;
738 self->state = IN_FIELD;
739 }
740 else {
741 /* illegal */
742 PyErr_Format(error_obj, "'%c' expected after '%c'",
743 dialect->delimiter,
744 dialect->quotechar);
745 return -1;
746 }
747 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000748
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000749 case EAT_CRNL:
750 if (c == '\n' || c == '\r')
751 ;
752 else if (c == '\0')
753 self->state = START_RECORD;
754 else {
755 PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
756 return -1;
757 }
758 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000759
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000760 }
761 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000762}
763
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000764static int
765parse_reset(ReaderObj *self)
766{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000767 Py_XDECREF(self->fields);
768 self->fields = PyList_New(0);
769 if (self->fields == NULL)
770 return -1;
771 self->field_len = 0;
772 self->state = START_RECORD;
773 self->numeric_field = 0;
774 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000775}
Skip Montanarob4a04172003-03-20 23:29:12 +0000776
777static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000778Reader_iternext(ReaderObj *self)
779{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000780 PyObject *lineobj;
781 PyObject *fields = NULL;
782 char *line, c;
783 int linelen;
Skip Montanarob4a04172003-03-20 23:29:12 +0000784
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000785 if (parse_reset(self) < 0)
786 return NULL;
787 do {
788 lineobj = PyIter_Next(self->input_iter);
789 if (lineobj == NULL) {
790 /* End of input OR exception */
Senthil Kumarand41dc7c2012-09-25 02:48:21 -0700791 if (!PyErr_Occurred() && (self->field_len != 0 ||
792 self->state == IN_QUOTED_FIELD)) {
793 if (self->dialect->strict)
794 PyErr_SetString(error_obj, "unexpected end of data");
795 else if (parse_save_field(self) >= 0 )
796 break;
797 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000798 return NULL;
799 }
800 ++self->line_num;
Skip Montanarob4a04172003-03-20 23:29:12 +0000801
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000802 line = PyString_AsString(lineobj);
803 linelen = PyString_Size(lineobj);
Skip Montanarob4a04172003-03-20 23:29:12 +0000804
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000805 if (line == NULL || linelen < 0) {
806 Py_DECREF(lineobj);
807 return NULL;
808 }
809 while (linelen--) {
810 c = *line++;
811 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000812 Py_DECREF(lineobj);
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000813 PyErr_Format(error_obj,
814 "line contains NULL byte");
815 goto err;
816 }
817 if (parse_process_char(self, c) < 0) {
818 Py_DECREF(lineobj);
819 goto err;
820 }
821 }
822 Py_DECREF(lineobj);
823 if (parse_process_char(self, 0) < 0)
824 goto err;
825 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000826
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000827 fields = self->fields;
828 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000829err:
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000830 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000831}
832
833static void
834Reader_dealloc(ReaderObj *self)
835{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000836 PyObject_GC_UnTrack(self);
837 Py_XDECREF(self->dialect);
838 Py_XDECREF(self->input_iter);
839 Py_XDECREF(self->fields);
840 if (self->field != NULL)
841 PyMem_Free(self->field);
842 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000843}
844
845static int
846Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
847{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000848 Py_VISIT(self->dialect);
849 Py_VISIT(self->input_iter);
850 Py_VISIT(self->fields);
851 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000852}
853
854static int
855Reader_clear(ReaderObj *self)
856{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000857 Py_CLEAR(self->dialect);
858 Py_CLEAR(self->input_iter);
859 Py_CLEAR(self->fields);
860 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000861}
862
863PyDoc_STRVAR(Reader_Type_doc,
864"CSV reader\n"
865"\n"
866"Reader objects are responsible for reading and parsing tabular data\n"
867"in CSV format.\n"
868);
869
870static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000871 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000872};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000873#define R_OFF(x) offsetof(ReaderObj, x)
874
875static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000876 { "dialect", T_OBJECT, R_OFF(dialect), RO },
877 { "line_num", T_ULONG, R_OFF(line_num), RO },
878 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000879};
880
Skip Montanarob4a04172003-03-20 23:29:12 +0000881
882static PyTypeObject Reader_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000883 PyVarObject_HEAD_INIT(NULL, 0)
884 "_csv.reader", /*tp_name*/
885 sizeof(ReaderObj), /*tp_basicsize*/
886 0, /*tp_itemsize*/
887 /* methods */
888 (destructor)Reader_dealloc, /*tp_dealloc*/
889 (printfunc)0, /*tp_print*/
890 (getattrfunc)0, /*tp_getattr*/
891 (setattrfunc)0, /*tp_setattr*/
892 (cmpfunc)0, /*tp_compare*/
893 (reprfunc)0, /*tp_repr*/
894 0, /*tp_as_number*/
895 0, /*tp_as_sequence*/
896 0, /*tp_as_mapping*/
897 (hashfunc)0, /*tp_hash*/
898 (ternaryfunc)0, /*tp_call*/
899 (reprfunc)0, /*tp_str*/
900 0, /*tp_getattro*/
901 0, /*tp_setattro*/
902 0, /*tp_as_buffer*/
903 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
904 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
905 Reader_Type_doc, /*tp_doc*/
906 (traverseproc)Reader_traverse, /*tp_traverse*/
907 (inquiry)Reader_clear, /*tp_clear*/
908 0, /*tp_richcompare*/
909 0, /*tp_weaklistoffset*/
910 PyObject_SelfIter, /*tp_iter*/
911 (getiterfunc)Reader_iternext, /*tp_iternext*/
912 Reader_methods, /*tp_methods*/
913 Reader_memberlist, /*tp_members*/
914 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000915
916};
917
918static PyObject *
919csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
920{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000921 PyObject * iterator, * dialect = NULL;
922 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000923
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000924 if (!self)
925 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000926
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000927 self->dialect = NULL;
928 self->fields = NULL;
929 self->input_iter = NULL;
930 self->field = NULL;
931 self->field_size = 0;
932 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000933
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000934 if (parse_reset(self) < 0) {
935 Py_DECREF(self);
936 return NULL;
937 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000938
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000939 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
940 Py_DECREF(self);
941 return NULL;
942 }
943 self->input_iter = PyObject_GetIter(iterator);
944 if (self->input_iter == NULL) {
945 PyErr_SetString(PyExc_TypeError,
946 "argument 1 must be an iterator");
947 Py_DECREF(self);
948 return NULL;
949 }
950 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
951 if (self->dialect == NULL) {
952 Py_DECREF(self);
953 return NULL;
954 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000955
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000956 PyObject_GC_Track(self);
957 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +0000958}
959
960/*
961 * WRITER
962 */
963/* ---------------------------------------------------------------- */
964static void
965join_reset(WriterObj *self)
966{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000967 self->rec_len = 0;
968 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000969}
970
971#define MEM_INCR 32768
972
973/* Calculate new record length or append field to record. Return new
974 * record length.
975 */
976static int
977join_append_data(WriterObj *self, char *field, int quote_empty,
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000978 int *quoted, int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +0000979{
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000980 DialectObj *dialect = self->dialect;
981 int i, rec_len;
982 char *lineterm;
Andrew McNamarac89f2842005-01-12 07:44:42 +0000983
984#define ADDCH(c) \
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000985 do {\
986 if (copy_phase) \
987 self->rec[rec_len] = c;\
988 rec_len++;\
989 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +0000990
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000991 lineterm = PyString_AsString(dialect->lineterminator);
992 if (lineterm == NULL)
993 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000994
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000995 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +0000996
Antoine Pitrouc83ea132010-05-09 14:46:46 +0000997 /* If this is not the first field we need a field separator */
998 if (self->num_fields > 0)
999 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001000
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001001 /* Handle preceding quote */
1002 if (copy_phase && *quoted)
1003 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001004
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001005 /* Copy/count field data */
1006 for (i = 0;; i++) {
1007 char c = field[i];
1008 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001009
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001010 if (c == '\0')
1011 break;
Skip Montanarob4a04172003-03-20 23:29:12 +00001012
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001013 if (c == dialect->delimiter ||
1014 c == dialect->escapechar ||
1015 c == dialect->quotechar ||
1016 strchr(lineterm, c)) {
1017 if (dialect->quoting == QUOTE_NONE)
1018 want_escape = 1;
1019 else {
1020 if (c == dialect->quotechar) {
1021 if (dialect->doublequote)
1022 ADDCH(dialect->quotechar);
1023 else
1024 want_escape = 1;
1025 }
1026 if (!want_escape)
1027 *quoted = 1;
1028 }
1029 if (want_escape) {
1030 if (!dialect->escapechar) {
1031 PyErr_Format(error_obj,
1032 "need to escape, but no escapechar set");
1033 return -1;
1034 }
1035 ADDCH(dialect->escapechar);
1036 }
1037 }
1038 /* Copy field character into record buffer.
1039 */
1040 ADDCH(c);
1041 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001042
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001043 /* If field is empty check if it needs to be quoted.
1044 */
1045 if (i == 0 && quote_empty) {
1046 if (dialect->quoting == QUOTE_NONE) {
1047 PyErr_Format(error_obj,
1048 "single empty field record must be quoted");
1049 return -1;
1050 }
1051 else
1052 *quoted = 1;
1053 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001054
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001055 if (*quoted) {
1056 if (copy_phase)
1057 ADDCH(dialect->quotechar);
1058 else
1059 rec_len += 2;
1060 }
1061 return rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001062#undef ADDCH
Skip Montanarob4a04172003-03-20 23:29:12 +00001063}
1064
1065static int
1066join_check_rec_size(WriterObj *self, int rec_len)
1067{
Gregory P. Smith9d534572008-06-11 07:41:16 +00001068
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001069 if (rec_len < 0 || rec_len > INT_MAX - MEM_INCR) {
1070 PyErr_NoMemory();
1071 return 0;
1072 }
Gregory P. Smith9d534572008-06-11 07:41:16 +00001073
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001074 if (rec_len > self->rec_size) {
1075 if (self->rec_size == 0) {
1076 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1077 if (self->rec != NULL)
1078 PyMem_Free(self->rec);
1079 self->rec = PyMem_Malloc(self->rec_size);
1080 }
1081 else {
1082 char *old_rec = self->rec;
Skip Montanarob4a04172003-03-20 23:29:12 +00001083
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001084 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1085 self->rec = PyMem_Realloc(self->rec, self->rec_size);
1086 if (self->rec == NULL)
1087 PyMem_Free(old_rec);
1088 }
1089 if (self->rec == NULL) {
1090 PyErr_NoMemory();
1091 return 0;
1092 }
1093 }
1094 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001095}
1096
1097static int
1098join_append(WriterObj *self, char *field, int *quoted, int quote_empty)
1099{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001100 int rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001101
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001102 rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1103 if (rec_len < 0)
1104 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001105
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001106 /* grow record buffer if necessary */
1107 if (!join_check_rec_size(self, rec_len))
1108 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001109
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001110 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1111 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001112
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001113 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001114}
1115
1116static int
1117join_append_lineterminator(WriterObj *self)
1118{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001119 int terminator_len;
1120 char *terminator;
Skip Montanarob4a04172003-03-20 23:29:12 +00001121
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001122 terminator_len = PyString_Size(self->dialect->lineterminator);
1123 if (terminator_len == -1)
1124 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001125
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001126 /* grow record buffer if necessary */
1127 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1128 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001129
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001130 terminator = PyString_AsString(self->dialect->lineterminator);
1131 if (terminator == NULL)
1132 return 0;
1133 memmove(self->rec + self->rec_len, terminator, terminator_len);
1134 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001135
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001136 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001137}
1138
1139PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001140"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001141"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001142"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001143"elements will be converted to string.");
1144
1145static PyObject *
1146csv_writerow(WriterObj *self, PyObject *seq)
1147{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001148 DialectObj *dialect = self->dialect;
1149 int len, i;
Skip Montanarob4a04172003-03-20 23:29:12 +00001150
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001151 if (!PySequence_Check(seq))
1152 return PyErr_Format(error_obj, "sequence expected");
Skip Montanarob4a04172003-03-20 23:29:12 +00001153
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001154 len = PySequence_Length(seq);
1155 if (len < 0)
1156 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001157
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001158 /* Join all fields in internal buffer.
1159 */
1160 join_reset(self);
1161 for (i = 0; i < len; i++) {
1162 PyObject *field;
1163 int append_ok;
1164 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001165
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001166 field = PySequence_GetItem(seq, i);
1167 if (field == NULL)
1168 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001169
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001170 switch (dialect->quoting) {
1171 case QUOTE_NONNUMERIC:
1172 quoted = !PyNumber_Check(field);
1173 break;
1174 case QUOTE_ALL:
1175 quoted = 1;
1176 break;
1177 default:
1178 quoted = 0;
1179 break;
1180 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001181
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001182 if (PyString_Check(field)) {
1183 append_ok = join_append(self,
1184 PyString_AS_STRING(field),
1185 &quoted, len == 1);
1186 Py_DECREF(field);
1187 }
1188 else if (field == Py_None) {
1189 append_ok = join_append(self, "", &quoted, len == 1);
1190 Py_DECREF(field);
1191 }
1192 else {
1193 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001194
Raymond Hettingerf5377022011-12-11 22:31:09 -08001195 if (PyFloat_Check(field)) {
1196 str = PyObject_Repr(field);
1197 } else {
1198 str = PyObject_Str(field);
1199 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001200 Py_DECREF(field);
1201 if (str == NULL)
1202 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001203
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001204 append_ok = join_append(self, PyString_AS_STRING(str),
1205 &quoted, len == 1);
1206 Py_DECREF(str);
1207 }
1208 if (!append_ok)
1209 return NULL;
1210 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001211
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001212 /* Add line terminator.
1213 */
1214 if (!join_append_lineterminator(self))
1215 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001216
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001217 return PyObject_CallFunction(self->writeline,
1218 "(s#)", self->rec, self->rec_len);
Skip Montanarob4a04172003-03-20 23:29:12 +00001219}
1220
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001221PyDoc_STRVAR(csv_writerows_doc,
1222"writerows(sequence of sequences)\n"
1223"\n"
1224"Construct and write a series of sequences to a csv file. Non-string\n"
1225"elements will be converted to string.");
1226
Skip Montanarob4a04172003-03-20 23:29:12 +00001227static PyObject *
1228csv_writerows(WriterObj *self, PyObject *seqseq)
1229{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001230 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001231
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001232 row_iter = PyObject_GetIter(seqseq);
1233 if (row_iter == NULL) {
1234 PyErr_SetString(PyExc_TypeError,
1235 "writerows() argument must be iterable");
1236 return NULL;
1237 }
1238 while ((row_obj = PyIter_Next(row_iter))) {
1239 result = csv_writerow(self, row_obj);
1240 Py_DECREF(row_obj);
1241 if (!result) {
1242 Py_DECREF(row_iter);
1243 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001244 }
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001245 else
1246 Py_DECREF(result);
1247 }
1248 Py_DECREF(row_iter);
1249 if (PyErr_Occurred())
1250 return NULL;
1251 Py_INCREF(Py_None);
1252 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001253}
1254
1255static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001256 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1257 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1258 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001259};
1260
1261#define W_OFF(x) offsetof(WriterObj, x)
1262
1263static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001264 { "dialect", T_OBJECT, W_OFF(dialect), RO },
1265 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001266};
1267
1268static void
1269Writer_dealloc(WriterObj *self)
1270{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001271 PyObject_GC_UnTrack(self);
1272 Py_XDECREF(self->dialect);
1273 Py_XDECREF(self->writeline);
1274 if (self->rec != NULL)
1275 PyMem_Free(self->rec);
1276 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001277}
1278
1279static int
1280Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1281{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001282 Py_VISIT(self->dialect);
1283 Py_VISIT(self->writeline);
1284 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001285}
1286
1287static int
1288Writer_clear(WriterObj *self)
1289{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001290 Py_CLEAR(self->dialect);
1291 Py_CLEAR(self->writeline);
1292 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001293}
1294
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001295PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001296"CSV writer\n"
1297"\n"
1298"Writer objects are responsible for generating tabular data\n"
1299"in CSV format from sequence input.\n"
1300);
1301
1302static PyTypeObject Writer_Type = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001303 PyVarObject_HEAD_INIT(NULL, 0)
1304 "_csv.writer", /*tp_name*/
1305 sizeof(WriterObj), /*tp_basicsize*/
1306 0, /*tp_itemsize*/
1307 /* methods */
1308 (destructor)Writer_dealloc, /*tp_dealloc*/
1309 (printfunc)0, /*tp_print*/
1310 (getattrfunc)0, /*tp_getattr*/
1311 (setattrfunc)0, /*tp_setattr*/
1312 (cmpfunc)0, /*tp_compare*/
1313 (reprfunc)0, /*tp_repr*/
1314 0, /*tp_as_number*/
1315 0, /*tp_as_sequence*/
1316 0, /*tp_as_mapping*/
1317 (hashfunc)0, /*tp_hash*/
1318 (ternaryfunc)0, /*tp_call*/
1319 (reprfunc)0, /*tp_str*/
1320 0, /*tp_getattro*/
1321 0, /*tp_setattro*/
1322 0, /*tp_as_buffer*/
1323 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1324 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1325 Writer_Type_doc,
1326 (traverseproc)Writer_traverse, /*tp_traverse*/
1327 (inquiry)Writer_clear, /*tp_clear*/
1328 0, /*tp_richcompare*/
1329 0, /*tp_weaklistoffset*/
1330 (getiterfunc)0, /*tp_iter*/
1331 (getiterfunc)0, /*tp_iternext*/
1332 Writer_methods, /*tp_methods*/
1333 Writer_memberlist, /*tp_members*/
1334 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001335};
1336
1337static PyObject *
1338csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1339{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001340 PyObject * output_file, * dialect = NULL;
1341 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001342
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001343 if (!self)
1344 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001345
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001346 self->dialect = NULL;
1347 self->writeline = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001348
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001349 self->rec = NULL;
1350 self->rec_size = 0;
1351 self->rec_len = 0;
1352 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001353
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001354 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1355 Py_DECREF(self);
1356 return NULL;
1357 }
1358 self->writeline = PyObject_GetAttrString(output_file, "write");
1359 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1360 PyErr_SetString(PyExc_TypeError,
1361 "argument 1 must have a \"write\" method");
1362 Py_DECREF(self);
1363 return NULL;
1364 }
1365 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1366 if (self->dialect == NULL) {
1367 Py_DECREF(self);
1368 return NULL;
1369 }
1370 PyObject_GC_Track(self);
1371 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001372}
1373
1374/*
1375 * DIALECT REGISTRY
1376 */
1377static PyObject *
1378csv_list_dialects(PyObject *module, PyObject *args)
1379{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001380 return PyDict_Keys(dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001381}
1382
1383static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001384csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001385{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001386 PyObject *name_obj, *dialect_obj = NULL;
1387 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001388
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001389 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1390 return NULL;
1391 if (!IS_BASESTRING(name_obj)) {
1392 PyErr_SetString(PyExc_TypeError,
1393 "dialect name must be a string or unicode");
1394 return NULL;
1395 }
1396 dialect = _call_dialect(dialect_obj, kwargs);
1397 if (dialect == NULL)
1398 return NULL;
1399 if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1400 Py_DECREF(dialect);
1401 return NULL;
1402 }
1403 Py_DECREF(dialect);
1404 Py_INCREF(Py_None);
1405 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001406}
1407
1408static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001409csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001410{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001411 if (PyDict_DelItem(dialects, name_obj) < 0)
1412 return PyErr_Format(error_obj, "unknown dialect");
1413 Py_INCREF(Py_None);
1414 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001415}
1416
1417static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001418csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001419{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001420 return get_dialect_from_registry(name_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001421}
1422
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001423static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001424csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001425{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001426 PyObject *new_limit = NULL;
1427 long old_limit = field_limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001428
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001429 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1430 return NULL;
1431 if (new_limit != NULL) {
1432 if (!PyInt_Check(new_limit)) {
1433 PyErr_Format(PyExc_TypeError,
1434 "limit must be an integer");
1435 return NULL;
1436 }
1437 field_limit = PyInt_AsLong(new_limit);
1438 }
1439 return PyInt_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001440}
1441
Skip Montanarob4a04172003-03-20 23:29:12 +00001442/*
1443 * MODULE
1444 */
1445
1446PyDoc_STRVAR(csv_module_doc,
1447"CSV parsing and writing.\n"
1448"\n"
1449"This module provides classes that assist in the reading and writing\n"
1450"of Comma Separated Value (CSV) files, and implements the interface\n"
1451"described by PEP 305. Although many CSV files are simple to parse,\n"
1452"the format is not formally defined by a stable specification and\n"
1453"is subtle enough that parsing lines of a CSV file with something\n"
1454"like line.split(\",\") is bound to fail. The module supports three\n"
1455"basic APIs: reading, writing, and registration of dialects.\n"
1456"\n"
1457"\n"
1458"DIALECT REGISTRATION:\n"
1459"\n"
1460"Readers and writers support a dialect argument, which is a convenient\n"
1461"handle on a group of settings. When the dialect argument is a string,\n"
1462"it identifies one of the dialects previously registered with the module.\n"
1463"If it is a class or instance, the attributes of the argument are used as\n"
1464"the settings for the reader or writer:\n"
1465"\n"
1466" class excel:\n"
1467" delimiter = ','\n"
1468" quotechar = '\"'\n"
1469" escapechar = None\n"
1470" doublequote = True\n"
1471" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001472" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001473" quoting = QUOTE_MINIMAL\n"
1474"\n"
1475"SETTINGS:\n"
1476"\n"
1477" * quotechar - specifies a one-character string to use as the \n"
1478" quoting character. It defaults to '\"'.\n"
1479" * delimiter - specifies a one-character string to use as the \n"
1480" field separator. It defaults to ','.\n"
1481" * skipinitialspace - specifies how to interpret whitespace which\n"
1482" immediately follows a delimiter. It defaults to False, which\n"
1483" means that whitespace immediately following a delimiter is part\n"
1484" of the following field.\n"
1485" * lineterminator - specifies the character sequence which should \n"
1486" terminate rows.\n"
1487" * quoting - controls when quotes should be generated by the writer.\n"
1488" It can take on any of the following module constants:\n"
1489"\n"
1490" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1491" field contains either the quotechar or the delimiter\n"
1492" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1493" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001494" fields which do not parse as integers or floating point\n"
1495" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001496" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1497" * escapechar - specifies a one-character string used to escape \n"
1498" the delimiter when quoting is set to QUOTE_NONE.\n"
1499" * doublequote - controls the handling of quotes inside fields. When\n"
1500" True, two consecutive quotes are interpreted as one during read,\n"
1501" and when writing, each quote character embedded in the data is\n"
1502" written as two quotes\n");
1503
1504PyDoc_STRVAR(csv_reader_doc,
1505" csv_reader = reader(iterable [, dialect='excel']\n"
1506" [optional keyword args])\n"
1507" for row in csv_reader:\n"
1508" process(row)\n"
1509"\n"
1510"The \"iterable\" argument can be any object that returns a line\n"
1511"of input for each iteration, such as a file object or a list. The\n"
1512"optional \"dialect\" parameter is discussed below. The function\n"
1513"also accepts optional keyword arguments which override settings\n"
1514"provided by the dialect.\n"
1515"\n"
1516"The returned object is an iterator. Each iteration returns a row\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001517"of the CSV file (which can span multiple input lines):\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001518
1519PyDoc_STRVAR(csv_writer_doc,
1520" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1521" [optional keyword args])\n"
Fredrik Lundh4aaaa492006-04-04 16:51:13 +00001522" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001523" csv_writer.writerow(row)\n"
1524"\n"
1525" [or]\n"
1526"\n"
1527" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1528" [optional keyword args])\n"
1529" csv_writer.writerows(rows)\n"
1530"\n"
1531"The \"fileobj\" argument can be any object that supports the file API.\n");
1532
1533PyDoc_STRVAR(csv_list_dialects_doc,
1534"Return a list of all know dialect names.\n"
1535" names = csv.list_dialects()");
1536
1537PyDoc_STRVAR(csv_get_dialect_doc,
1538"Return the dialect instance associated with name.\n"
1539" dialect = csv.get_dialect(name)");
1540
1541PyDoc_STRVAR(csv_register_dialect_doc,
1542"Create a mapping from a string name to a dialect class.\n"
1543" dialect = csv.register_dialect(name, dialect)");
1544
1545PyDoc_STRVAR(csv_unregister_dialect_doc,
1546"Delete the name/dialect mapping associated with a string name.\n"
1547" csv.unregister_dialect(name)");
1548
Andrew McNamara31d88962005-01-12 03:45:10 +00001549PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001550"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001551" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001552"\n"
1553"Returns old limit. If limit is not given, no new limit is set and\n"
1554"the old limit is returned");
1555
Skip Montanarob4a04172003-03-20 23:29:12 +00001556static struct PyMethodDef csv_methods[] = {
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001557 { "reader", (PyCFunction)csv_reader,
1558 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1559 { "writer", (PyCFunction)csv_writer,
1560 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1561 { "list_dialects", (PyCFunction)csv_list_dialects,
1562 METH_NOARGS, csv_list_dialects_doc},
1563 { "register_dialect", (PyCFunction)csv_register_dialect,
1564 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1565 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1566 METH_O, csv_unregister_dialect_doc},
1567 { "get_dialect", (PyCFunction)csv_get_dialect,
1568 METH_O, csv_get_dialect_doc},
1569 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1570 METH_VARARGS, csv_field_size_limit_doc},
1571 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001572};
1573
1574PyMODINIT_FUNC
1575init_csv(void)
1576{
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001577 PyObject *module;
1578 StyleDesc *style;
Skip Montanarob4a04172003-03-20 23:29:12 +00001579
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001580 if (PyType_Ready(&Dialect_Type) < 0)
1581 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001582
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001583 if (PyType_Ready(&Reader_Type) < 0)
1584 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001585
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001586 if (PyType_Ready(&Writer_Type) < 0)
1587 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001588
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001589 /* Create the module and add the functions */
1590 module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1591 if (module == NULL)
1592 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001593
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001594 /* Add version to the module. */
1595 if (PyModule_AddStringConstant(module, "__version__",
1596 MODULE_VERSION) == -1)
1597 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001598
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001599 /* Add _dialects dictionary */
1600 dialects = PyDict_New();
1601 if (dialects == NULL)
1602 return;
1603 if (PyModule_AddObject(module, "_dialects", dialects))
1604 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001605
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001606 /* Add quote styles into dictionary */
1607 for (style = quote_styles; style->name; style++) {
1608 if (PyModule_AddIntConstant(module, style->name,
1609 style->style) == -1)
1610 return;
1611 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001612
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001613 /* Add the Dialect type */
1614 Py_INCREF(&Dialect_Type);
1615 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1616 return;
Skip Montanarob4a04172003-03-20 23:29:12 +00001617
Antoine Pitrouc83ea132010-05-09 14:46:46 +00001618 /* Add the CSV exception object to the module. */
1619 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1620 if (error_obj == NULL)
1621 return;
1622 PyModule_AddObject(module, "Error", error_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001623}