blob: 0df85b6abdadd2a01f120a40be7f94cddf22cf45 [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
9**** For people modifying this code, please note that as of this writing
Skip Montanarodfa35fa2003-04-11 21:40:01 +000010**** (2003-03-23), it is intended that this code should work with Python
Skip Montanaroa16b21f2003-03-23 14:32:54 +000011**** 2.2.
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013*/
14
Skip Montanaro7b01a832003-04-12 19:23:46 +000015#define MODULE_VERSION "1.0"
16
Skip Montanarob4a04172003-03-20 23:29:12 +000017#include "Python.h"
18#include "structmember.h"
19
Skip Montanaroa16b21f2003-03-23 14:32:54 +000020
Skip Montanarob4a04172003-03-20 23:29:12 +000021/* begin 2.2 compatibility macros */
22#ifndef PyDoc_STRVAR
23/* Define macros for inline documentation. */
24#define PyDoc_VAR(name) static char name[]
25#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
26#ifdef WITH_DOC_STRINGS
27#define PyDoc_STR(str) str
28#else
29#define PyDoc_STR(str) ""
30#endif
31#endif /* ifndef PyDoc_STRVAR */
32
33#ifndef PyMODINIT_FUNC
34# if defined(__cplusplus)
35# define PyMODINIT_FUNC extern "C" void
36# else /* __cplusplus */
37# define PyMODINIT_FUNC void
38# endif /* __cplusplus */
39#endif
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000040
41#ifndef Py_CLEAR
42#define Py_CLEAR(op) \
43 do { \
44 if (op) { \
45 PyObject *tmp = (PyObject *)(op); \
46 (op) = NULL; \
47 Py_DECREF(tmp); \
48 } \
49 } while (0)
50#endif
51#ifndef Py_VISIT
52#define Py_VISIT(op) \
53 do { \
54 if (op) { \
55 int vret = visit((PyObject *)(op), arg); \
56 if (vret) \
57 return vret; \
58 } \
59 } while (0)
60#endif
61
Skip Montanarob4a04172003-03-20 23:29:12 +000062/* end 2.2 compatibility macros */
63
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000064#define IS_BASESTRING(o) \
65 PyObject_TypeCheck(o, &PyBaseString_Type)
66
Skip Montanarob4a04172003-03-20 23:29:12 +000067static PyObject *error_obj; /* CSV exception */
68static PyObject *dialects; /* Dialect registry */
Andrew McNamarae4d05c42005-01-11 07:32:02 +000069static long field_limit = 128 * 1024; /* max parsed field size */
Skip Montanarob4a04172003-03-20 23:29:12 +000070
71typedef enum {
72 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
Andrew McNamaraf69d94f2005-01-13 11:30:54 +000073 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
74 EAT_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000075} ParserState;
76
77typedef enum {
78 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
79} QuoteStyle;
80
81typedef struct {
82 QuoteStyle style;
83 char *name;
84} StyleDesc;
85
86static StyleDesc quote_styles[] = {
87 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
88 { QUOTE_ALL, "QUOTE_ALL" },
89 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
90 { QUOTE_NONE, "QUOTE_NONE" },
91 { 0 }
92};
93
94typedef struct {
95 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000096
Skip Montanarob4a04172003-03-20 23:29:12 +000097 int doublequote; /* is " represented by ""? */
Guido van Rossum46264582007-08-06 19:32:18 +000098 Py_UNICODE delimiter; /* field separator */
99 Py_UNICODE quotechar; /* quote character */
100 Py_UNICODE escapechar; /* escape character */
Skip Montanarob4a04172003-03-20 23:29:12 +0000101 int skipinitialspace; /* ignore spaces following delimiter? */
102 PyObject *lineterminator; /* string to write between records */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000103 int quoting; /* style of quoting to write */
Skip Montanarob4a04172003-03-20 23:29:12 +0000104
105 int strict; /* raise exception on bad CSV */
106} DialectObj;
107
Neal Norwitz227b5332006-03-22 09:28:35 +0000108static PyTypeObject Dialect_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000109
110typedef struct {
111 PyObject_HEAD
112
113 PyObject *input_iter; /* iterate over this for input lines */
114
115 DialectObj *dialect; /* parsing dialect */
116
117 PyObject *fields; /* field list for current record */
118 ParserState state; /* current CSV parse state */
Guido van Rossum46264582007-08-06 19:32:18 +0000119 Py_UNICODE *field; /* build current field in here */
Skip Montanarob4a04172003-03-20 23:29:12 +0000120 int field_size; /* size of allocated buffer */
Guido van Rossum46264582007-08-06 19:32:18 +0000121 Py_ssize_t field_len; /* length of current field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000122 int numeric_field; /* treat field as numeric */
Andrew McNamara7f2053e2005-01-12 11:17:16 +0000123 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000124} ReaderObj;
125
Neal Norwitz227b5332006-03-22 09:28:35 +0000126static PyTypeObject Reader_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000127
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000128#define ReaderObject_Check(v) (Py_Type(v) == &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +0000129
130typedef struct {
131 PyObject_HEAD
132
133 PyObject *writeline; /* write output lines to this file */
134
135 DialectObj *dialect; /* parsing dialect */
136
Guido van Rossum46264582007-08-06 19:32:18 +0000137 Py_UNICODE *rec; /* buffer for parser.join */
Skip Montanarob4a04172003-03-20 23:29:12 +0000138 int rec_size; /* size of allocated record */
Guido van Rossum46264582007-08-06 19:32:18 +0000139 Py_ssize_t rec_len; /* length of record */
Skip Montanarob4a04172003-03-20 23:29:12 +0000140 int num_fields; /* number of fields in record */
Guido van Rossum46264582007-08-06 19:32:18 +0000141} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000142
Neal Norwitz227b5332006-03-22 09:28:35 +0000143static PyTypeObject Writer_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000144
145/*
146 * DIALECT class
147 */
148
149static PyObject *
150get_dialect_from_registry(PyObject * name_obj)
151{
152 PyObject *dialect_obj;
153
154 dialect_obj = PyDict_GetItem(dialects, name_obj);
Andrew McNamaradbce2612005-01-10 23:17:35 +0000155 if (dialect_obj == NULL) {
156 if (!PyErr_Occurred())
157 PyErr_Format(error_obj, "unknown dialect");
158 }
159 else
160 Py_INCREF(dialect_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +0000161 return dialect_obj;
162}
163
Skip Montanarob4a04172003-03-20 23:29:12 +0000164static PyObject *
165get_string(PyObject *str)
166{
167 Py_XINCREF(str);
168 return str;
169}
170
Skip Montanarob4a04172003-03-20 23:29:12 +0000171static PyObject *
Skip Montanaroe3b10f42007-08-06 20:55:47 +0000172get_nullchar_as_None(Py_UNICODE c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000173{
174 if (c == '\0') {
175 Py_INCREF(Py_None);
176 return Py_None;
177 }
178 else
Skip Montanaroe3b10f42007-08-06 20:55:47 +0000179 return PyUnicode_FromUnicode((Py_UNICODE *)&c, 1);
Skip Montanarob4a04172003-03-20 23:29:12 +0000180}
181
Skip Montanarob4a04172003-03-20 23:29:12 +0000182static PyObject *
183Dialect_get_lineterminator(DialectObj *self)
184{
185 return get_string(self->lineterminator);
186}
187
Skip Montanarob4a04172003-03-20 23:29:12 +0000188static PyObject *
Guido van Rossuma9769c22007-08-07 23:59:30 +0000189Dialect_get_delimiter(DialectObj *self)
190{
191 return get_nullchar_as_None(self->delimiter);
192}
193
194static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000195Dialect_get_escapechar(DialectObj *self)
196{
197 return get_nullchar_as_None(self->escapechar);
198}
199
Andrew McNamara1196cf12005-01-07 04:42:45 +0000200static PyObject *
201Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000202{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000203 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000204}
205
206static PyObject *
207Dialect_get_quoting(DialectObj *self)
208{
209 return PyInt_FromLong(self->quoting);
210}
211
212static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000213_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000214{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000215 if (src == NULL)
216 *target = dflt;
217 else
218 *target = PyObject_IsTrue(src);
219 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000220}
221
Andrew McNamara1196cf12005-01-07 04:42:45 +0000222static int
223_set_int(const char *name, int *target, PyObject *src, int dflt)
224{
225 if (src == NULL)
226 *target = dflt;
227 else {
Guido van Rossumddefaf32007-01-14 03:31:43 +0000228 if (!PyInt_CheckExact(src)) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000229 PyErr_Format(PyExc_TypeError,
230 "\"%s\" must be an integer", name);
231 return -1;
232 }
233 *target = PyInt_AsLong(src);
234 }
235 return 0;
236}
237
238static int
Guido van Rossum46264582007-08-06 19:32:18 +0000239_set_char(const char *name, Py_UNICODE *target, PyObject *src, Py_UNICODE dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000240{
241 if (src == NULL)
242 *target = dflt;
243 else {
Guido van Rossumbce56a62007-05-10 18:04:33 +0000244 *target = '\0';
245 if (src != Py_None) {
Guido van Rossum46264582007-08-06 19:32:18 +0000246 Py_UNICODE *buf;
Guido van Rossumbce56a62007-05-10 18:04:33 +0000247 Py_ssize_t len;
Guido van Rossum46264582007-08-06 19:32:18 +0000248 buf = PyUnicode_AsUnicode(src);
249 len = PyUnicode_GetSize(src);
250 if (buf == NULL || len > 1) {
Guido van Rossumbce56a62007-05-10 18:04:33 +0000251 PyErr_Format(PyExc_TypeError,
252 "\"%s\" must be an 1-character string",
Guido van Rossum46264582007-08-06 19:32:18 +0000253 name);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000254 return -1;
Guido van Rossumbce56a62007-05-10 18:04:33 +0000255 }
256 if (len > 0)
257 *target = buf[0];
Andrew McNamara1196cf12005-01-07 04:42:45 +0000258 }
259 }
260 return 0;
261}
262
263static int
264_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
265{
266 if (src == NULL)
Guido van Rossum46264582007-08-06 19:32:18 +0000267 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000268 else {
269 if (src == Py_None)
270 *target = NULL;
Andrew McNamara37d2bdf2005-01-10 12:22:48 +0000271 else if (!IS_BASESTRING(src)) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000272 PyErr_Format(PyExc_TypeError,
273 "\"%s\" must be an string", name);
274 return -1;
Andrew McNamaradd3e6cb2005-01-07 06:46:50 +0000275 }
276 else {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000277 Py_XDECREF(*target);
278 Py_INCREF(src);
279 *target = src;
280 }
281 }
282 return 0;
283}
284
285static int
286dialect_check_quoting(int quoting)
287{
288 StyleDesc *qs = quote_styles;
289
290 for (qs = quote_styles; qs->name; qs++) {
291 if (qs->style == quoting)
292 return 0;
293 }
294 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
295 return -1;
296}
Skip Montanarob4a04172003-03-20 23:29:12 +0000297
298#define D_OFF(x) offsetof(DialectObj, x)
299
300static struct PyMemberDef Dialect_memberlist[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000301 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
302 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
303 { "strict", T_INT, D_OFF(strict), READONLY },
Skip Montanarob4a04172003-03-20 23:29:12 +0000304 { NULL }
305};
306
307static PyGetSetDef Dialect_getsetlist[] = {
Guido van Rossuma9769c22007-08-07 23:59:30 +0000308 { "delimiter", (getter)Dialect_get_delimiter},
Andrew McNamara1196cf12005-01-07 04:42:45 +0000309 { "escapechar", (getter)Dialect_get_escapechar},
310 { "lineterminator", (getter)Dialect_get_lineterminator},
311 { "quotechar", (getter)Dialect_get_quotechar},
312 { "quoting", (getter)Dialect_get_quoting},
313 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000314};
315
316static void
317Dialect_dealloc(DialectObj *self)
318{
319 Py_XDECREF(self->lineterminator);
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000320 Py_Type(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000321}
322
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000323static char *dialect_kws[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000324 "dialect",
325 "delimiter",
326 "doublequote",
327 "escapechar",
328 "lineterminator",
329 "quotechar",
330 "quoting",
331 "skipinitialspace",
332 "strict",
333 NULL
334};
335
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000336static PyObject *
337dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000338{
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000339 DialectObj *self;
340 PyObject *ret = NULL;
341 PyObject *dialect = NULL;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000342 PyObject *delimiter = NULL;
343 PyObject *doublequote = NULL;
344 PyObject *escapechar = NULL;
345 PyObject *lineterminator = NULL;
346 PyObject *quotechar = NULL;
347 PyObject *quoting = NULL;
348 PyObject *skipinitialspace = NULL;
349 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000350
Andrew McNamara1196cf12005-01-07 04:42:45 +0000351 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
352 "|OOOOOOOOO", dialect_kws,
353 &dialect,
354 &delimiter,
355 &doublequote,
356 &escapechar,
357 &lineterminator,
358 &quotechar,
359 &quoting,
360 &skipinitialspace,
361 &strict))
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000362 return NULL;
363
364 if (dialect != NULL) {
365 if (IS_BASESTRING(dialect)) {
366 dialect = get_dialect_from_registry(dialect);
367 if (dialect == NULL)
368 return NULL;
369 }
370 else
371 Py_INCREF(dialect);
372 /* Can we reuse this instance? */
373 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
374 delimiter == 0 &&
375 doublequote == 0 &&
376 escapechar == 0 &&
377 lineterminator == 0 &&
378 quotechar == 0 &&
379 quoting == 0 &&
380 skipinitialspace == 0 &&
381 strict == 0)
382 return dialect;
383 }
384
385 self = (DialectObj *)type->tp_alloc(type, 0);
386 if (self == NULL) {
387 Py_XDECREF(dialect);
388 return NULL;
389 }
390 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000391
Andrew McNamara1196cf12005-01-07 04:42:45 +0000392 Py_XINCREF(delimiter);
393 Py_XINCREF(doublequote);
394 Py_XINCREF(escapechar);
395 Py_XINCREF(lineterminator);
396 Py_XINCREF(quotechar);
397 Py_XINCREF(quoting);
398 Py_XINCREF(skipinitialspace);
399 Py_XINCREF(strict);
400 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000401#define DIALECT_GETATTR(v, n) \
402 if (v == NULL) \
403 v = PyObject_GetAttrString(dialect, n)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000404 DIALECT_GETATTR(delimiter, "delimiter");
405 DIALECT_GETATTR(doublequote, "doublequote");
406 DIALECT_GETATTR(escapechar, "escapechar");
407 DIALECT_GETATTR(lineterminator, "lineterminator");
408 DIALECT_GETATTR(quotechar, "quotechar");
409 DIALECT_GETATTR(quoting, "quoting");
410 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
411 DIALECT_GETATTR(strict, "strict");
412 PyErr_Clear();
Andrew McNamara1196cf12005-01-07 04:42:45 +0000413 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000414
Andrew McNamara1196cf12005-01-07 04:42:45 +0000415 /* check types and convert to C values */
416#define DIASET(meth, name, target, src, dflt) \
417 if (meth(name, target, src, dflt)) \
418 goto err
419 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
420 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
421 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
422 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
423 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
424 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
425 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
426 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000427
Andrew McNamara1196cf12005-01-07 04:42:45 +0000428 /* validate options */
429 if (dialect_check_quoting(self->quoting))
430 goto err;
431 if (self->delimiter == 0) {
432 PyErr_SetString(PyExc_TypeError, "delimiter must be set");
433 goto err;
434 }
Andrew McNamara5d45a8d2005-01-12 08:16:17 +0000435 if (quotechar == Py_None && quoting == NULL)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000436 self->quoting = QUOTE_NONE;
437 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
438 PyErr_SetString(PyExc_TypeError,
439 "quotechar must be set if quoting enabled");
440 goto err;
441 }
442 if (self->lineterminator == 0) {
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000443 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
Andrew McNamara1196cf12005-01-07 04:42:45 +0000444 goto err;
445 }
446
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000447 ret = (PyObject *)self;
Skip Montanarod60fbd42005-06-15 01:33:30 +0000448 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000449err:
Skip Montanarod60fbd42005-06-15 01:33:30 +0000450 Py_XDECREF(self);
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000451 Py_XDECREF(dialect);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000452 Py_XDECREF(delimiter);
453 Py_XDECREF(doublequote);
454 Py_XDECREF(escapechar);
455 Py_XDECREF(lineterminator);
456 Py_XDECREF(quotechar);
457 Py_XDECREF(quoting);
458 Py_XDECREF(skipinitialspace);
459 Py_XDECREF(strict);
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000460 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000461}
462
463
464PyDoc_STRVAR(Dialect_Type_doc,
465"CSV dialect\n"
466"\n"
467"The Dialect type records CSV parsing and generation options.\n");
468
469static PyTypeObject Dialect_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000470 PyVarObject_HEAD_INIT(NULL, 0)
Skip Montanarob4a04172003-03-20 23:29:12 +0000471 "_csv.Dialect", /* tp_name */
472 sizeof(DialectObj), /* tp_basicsize */
473 0, /* tp_itemsize */
474 /* methods */
475 (destructor)Dialect_dealloc, /* tp_dealloc */
476 (printfunc)0, /* tp_print */
477 (getattrfunc)0, /* tp_getattr */
478 (setattrfunc)0, /* tp_setattr */
479 (cmpfunc)0, /* tp_compare */
480 (reprfunc)0, /* tp_repr */
481 0, /* tp_as_number */
482 0, /* tp_as_sequence */
483 0, /* tp_as_mapping */
484 (hashfunc)0, /* tp_hash */
485 (ternaryfunc)0, /* tp_call */
486 (reprfunc)0, /* tp_str */
487 0, /* tp_getattro */
488 0, /* tp_setattro */
489 0, /* tp_as_buffer */
490 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
491 Dialect_Type_doc, /* tp_doc */
492 0, /* tp_traverse */
493 0, /* tp_clear */
494 0, /* tp_richcompare */
495 0, /* tp_weaklistoffset */
496 0, /* tp_iter */
497 0, /* tp_iternext */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000498 0, /* tp_methods */
Skip Montanarob4a04172003-03-20 23:29:12 +0000499 Dialect_memberlist, /* tp_members */
500 Dialect_getsetlist, /* tp_getset */
501 0, /* tp_base */
502 0, /* tp_dict */
503 0, /* tp_descr_get */
504 0, /* tp_descr_set */
505 0, /* tp_dictoffset */
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000506 0, /* tp_init */
507 0, /* tp_alloc */
Skip Montanarob4a04172003-03-20 23:29:12 +0000508 dialect_new, /* tp_new */
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000509 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000510};
511
Andrew McNamara91b97462005-01-11 01:07:23 +0000512/*
513 * Return an instance of the dialect type, given a Python instance or kwarg
514 * description of the dialect
515 */
516static PyObject *
517_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
518{
519 PyObject *ctor_args;
520 PyObject *dialect;
521
522 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
523 if (ctor_args == NULL)
524 return NULL;
525 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
526 Py_DECREF(ctor_args);
527 return dialect;
528}
529
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000530/*
531 * READER
532 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000533static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000534parse_save_field(ReaderObj *self)
535{
536 PyObject *field;
537
Guido van Rossum46264582007-08-06 19:32:18 +0000538 field = PyUnicode_FromUnicode(self->field, self->field_len);
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000539 if (field == NULL)
540 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000541 self->field_len = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000542 if (self->numeric_field) {
543 PyObject *tmp;
544
545 self->numeric_field = 0;
546 tmp = PyNumber_Float(field);
547 if (tmp == NULL) {
548 Py_DECREF(field);
549 return -1;
550 }
551 Py_DECREF(field);
552 field = tmp;
553 }
554 PyList_Append(self->fields, field);
555 Py_DECREF(field);
556 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000557}
558
559static int
560parse_grow_buff(ReaderObj *self)
561{
562 if (self->field_size == 0) {
563 self->field_size = 4096;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000564 if (self->field != NULL)
565 PyMem_Free(self->field);
Guido van Rossum46264582007-08-06 19:32:18 +0000566 self->field = PyMem_New(Py_UNICODE, self->field_size);
Skip Montanarob4a04172003-03-20 23:29:12 +0000567 }
568 else {
569 self->field_size *= 2;
Guido van Rossum46264582007-08-06 19:32:18 +0000570 self->field = PyMem_Resize(self->field, Py_UNICODE,
571 self->field_size);
Skip Montanarob4a04172003-03-20 23:29:12 +0000572 }
573 if (self->field == NULL) {
574 PyErr_NoMemory();
575 return 0;
576 }
577 return 1;
578}
579
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000580static int
Guido van Rossum46264582007-08-06 19:32:18 +0000581parse_add_char(ReaderObj *self, Py_UNICODE c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000582{
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000583 if (self->field_len >= field_limit) {
584 PyErr_Format(error_obj, "field larger than field limit (%ld)",
585 field_limit);
586 return -1;
587 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000588 if (self->field_len == self->field_size && !parse_grow_buff(self))
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000589 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000590 self->field[self->field_len++] = c;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000591 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000592}
593
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000594static int
Guido van Rossum46264582007-08-06 19:32:18 +0000595parse_process_char(ReaderObj *self, Py_UNICODE c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000596{
597 DialectObj *dialect = self->dialect;
598
599 switch (self->state) {
600 case START_RECORD:
601 /* start of record */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000602 if (c == '\0')
Skip Montanarob4a04172003-03-20 23:29:12 +0000603 /* empty line - return [] */
604 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000605 else if (c == '\n' || c == '\r') {
606 self->state = EAT_CRNL;
607 break;
608 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000609 /* normal character - handle as START_FIELD */
610 self->state = START_FIELD;
611 /* fallthru */
612 case START_FIELD:
613 /* expecting field */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000614 if (c == '\n' || c == '\r' || c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000615 /* save empty field - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000616 if (parse_save_field(self) < 0)
617 return -1;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000618 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
Skip Montanarob4a04172003-03-20 23:29:12 +0000619 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000620 else if (c == dialect->quotechar &&
621 dialect->quoting != QUOTE_NONE) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000622 /* start quoted field */
623 self->state = IN_QUOTED_FIELD;
624 }
625 else if (c == dialect->escapechar) {
626 /* possible escaped character */
627 self->state = ESCAPED_CHAR;
628 }
629 else if (c == ' ' && dialect->skipinitialspace)
630 /* ignore space at start of field */
631 ;
632 else if (c == dialect->delimiter) {
633 /* save empty field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000634 if (parse_save_field(self) < 0)
635 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000636 }
637 else {
638 /* begin new unquoted field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000639 if (dialect->quoting == QUOTE_NONNUMERIC)
640 self->numeric_field = 1;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000641 if (parse_add_char(self, c) < 0)
642 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000643 self->state = IN_FIELD;
644 }
645 break;
646
647 case ESCAPED_CHAR:
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000648 if (c == '\0')
649 c = '\n';
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000650 if (parse_add_char(self, c) < 0)
651 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000652 self->state = IN_FIELD;
653 break;
654
655 case IN_FIELD:
656 /* in unquoted field */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000657 if (c == '\n' || c == '\r' || c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000658 /* end of line - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000659 if (parse_save_field(self) < 0)
660 return -1;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000661 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
Skip Montanarob4a04172003-03-20 23:29:12 +0000662 }
663 else if (c == dialect->escapechar) {
664 /* possible escaped character */
665 self->state = ESCAPED_CHAR;
666 }
667 else if (c == dialect->delimiter) {
668 /* save field - wait for new field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000669 if (parse_save_field(self) < 0)
670 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000671 self->state = START_FIELD;
672 }
673 else {
674 /* normal character - save in field */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000675 if (parse_add_char(self, c) < 0)
676 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000677 }
678 break;
679
680 case IN_QUOTED_FIELD:
681 /* in quoted field */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000682 if (c == '\0')
683 ;
Skip Montanarob4a04172003-03-20 23:29:12 +0000684 else if (c == dialect->escapechar) {
685 /* Possible escape character */
686 self->state = ESCAPE_IN_QUOTED_FIELD;
687 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000688 else if (c == dialect->quotechar &&
689 dialect->quoting != QUOTE_NONE) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000690 if (dialect->doublequote) {
691 /* doublequote; " represented by "" */
692 self->state = QUOTE_IN_QUOTED_FIELD;
693 }
694 else {
695 /* end of quote part of field */
696 self->state = IN_FIELD;
697 }
698 }
699 else {
700 /* normal character - save in field */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000701 if (parse_add_char(self, c) < 0)
702 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000703 }
704 break;
705
706 case ESCAPE_IN_QUOTED_FIELD:
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000707 if (c == '\0')
708 c = '\n';
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000709 if (parse_add_char(self, c) < 0)
710 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000711 self->state = IN_QUOTED_FIELD;
712 break;
713
714 case QUOTE_IN_QUOTED_FIELD:
715 /* doublequote - seen a quote in an quoted field */
716 if (dialect->quoting != QUOTE_NONE &&
717 c == dialect->quotechar) {
718 /* save "" as " */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000719 if (parse_add_char(self, c) < 0)
720 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000721 self->state = IN_QUOTED_FIELD;
722 }
723 else if (c == dialect->delimiter) {
724 /* save field - wait for new field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000725 if (parse_save_field(self) < 0)
726 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000727 self->state = START_FIELD;
728 }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000729 else if (c == '\n' || c == '\r' || c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000730 /* end of line - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000731 if (parse_save_field(self) < 0)
732 return -1;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000733 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
Skip Montanarob4a04172003-03-20 23:29:12 +0000734 }
735 else if (!dialect->strict) {
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000736 if (parse_add_char(self, c) < 0)
737 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000738 self->state = IN_FIELD;
739 }
740 else {
741 /* illegal */
Andrew McNamara5cfd8372005-01-12 11:39:50 +0000742 PyErr_Format(error_obj, "'%c' expected after '%c'",
Skip Montanarob4a04172003-03-20 23:29:12 +0000743 dialect->delimiter,
744 dialect->quotechar);
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000745 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000746 }
747 break;
748
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000749 case EAT_CRNL:
750 if (c == '\n' || c == '\r')
751 ;
752 else if (c == '\0')
753 self->state = START_RECORD;
754 else {
755 PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
756 return -1;
757 }
758 break;
759
Skip Montanarob4a04172003-03-20 23:29:12 +0000760 }
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000761 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000762}
763
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000764static int
765parse_reset(ReaderObj *self)
766{
767 Py_XDECREF(self->fields);
768 self->fields = PyList_New(0);
769 if (self->fields == NULL)
770 return -1;
771 self->field_len = 0;
772 self->state = START_RECORD;
773 self->numeric_field = 0;
774 return 0;
775}
Skip Montanarob4a04172003-03-20 23:29:12 +0000776
777static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000778Reader_iternext(ReaderObj *self)
779{
780 PyObject *lineobj;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000781 PyObject *fields = NULL;
Guido van Rossum46264582007-08-06 19:32:18 +0000782 Py_UNICODE *line, c;
783 Py_ssize_t linelen;
Skip Montanarob4a04172003-03-20 23:29:12 +0000784
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000785 if (parse_reset(self) < 0)
786 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000787 do {
788 lineobj = PyIter_Next(self->input_iter);
789 if (lineobj == NULL) {
790 /* End of input OR exception */
791 if (!PyErr_Occurred() && self->field_len != 0)
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000792 PyErr_Format(error_obj,
793 "newline inside string");
Skip Montanarob4a04172003-03-20 23:29:12 +0000794 return NULL;
795 }
Guido van Rossum46264582007-08-06 19:32:18 +0000796 ++self->line_num;
797 line = PyUnicode_AsUnicode(lineobj);
798 linelen = PyUnicode_GetSize(lineobj);
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000799 if (line == NULL || linelen < 0) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000800 Py_DECREF(lineobj);
801 return NULL;
802 }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000803 while (linelen--) {
804 c = *line++;
805 if (c == '\0') {
806 Py_DECREF(lineobj);
807 PyErr_Format(error_obj,
808 "line contains NULL byte");
809 goto err;
810 }
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000811 if (parse_process_char(self, c) < 0) {
812 Py_DECREF(lineobj);
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000813 goto err;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000814 }
815 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000816 Py_DECREF(lineobj);
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000817 if (parse_process_char(self, 0) < 0)
818 goto err;
Skip Montanarob4a04172003-03-20 23:29:12 +0000819 } while (self->state != START_RECORD);
820
821 fields = self->fields;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000822 self->fields = NULL;
823err:
Skip Montanarob4a04172003-03-20 23:29:12 +0000824 return fields;
825}
826
827static void
828Reader_dealloc(ReaderObj *self)
829{
Andrew McNamara77ead872005-01-10 02:09:41 +0000830 PyObject_GC_UnTrack(self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000831 Py_XDECREF(self->dialect);
832 Py_XDECREF(self->input_iter);
833 Py_XDECREF(self->fields);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000834 if (self->field != NULL)
835 PyMem_Free(self->field);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000836 PyObject_GC_Del(self);
837}
838
839static int
840Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
841{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000842 Py_VISIT(self->dialect);
843 Py_VISIT(self->input_iter);
844 Py_VISIT(self->fields);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000845 return 0;
846}
847
848static int
849Reader_clear(ReaderObj *self)
850{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000851 Py_CLEAR(self->dialect);
852 Py_CLEAR(self->input_iter);
853 Py_CLEAR(self->fields);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000854 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000855}
856
857PyDoc_STRVAR(Reader_Type_doc,
858"CSV reader\n"
859"\n"
860"Reader objects are responsible for reading and parsing tabular data\n"
861"in CSV format.\n"
862);
863
864static struct PyMethodDef Reader_methods[] = {
865 { NULL, NULL }
866};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000867#define R_OFF(x) offsetof(ReaderObj, x)
868
869static struct PyMemberDef Reader_memberlist[] = {
Guido van Rossum33d26892007-08-05 15:29:28 +0000870 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
871 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000872 { NULL }
873};
874
Skip Montanarob4a04172003-03-20 23:29:12 +0000875
876static PyTypeObject Reader_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000877 PyVarObject_HEAD_INIT(NULL, 0)
Skip Montanarob4a04172003-03-20 23:29:12 +0000878 "_csv.reader", /*tp_name*/
879 sizeof(ReaderObj), /*tp_basicsize*/
880 0, /*tp_itemsize*/
881 /* methods */
882 (destructor)Reader_dealloc, /*tp_dealloc*/
883 (printfunc)0, /*tp_print*/
884 (getattrfunc)0, /*tp_getattr*/
885 (setattrfunc)0, /*tp_setattr*/
886 (cmpfunc)0, /*tp_compare*/
887 (reprfunc)0, /*tp_repr*/
888 0, /*tp_as_number*/
889 0, /*tp_as_sequence*/
890 0, /*tp_as_mapping*/
891 (hashfunc)0, /*tp_hash*/
892 (ternaryfunc)0, /*tp_call*/
893 (reprfunc)0, /*tp_str*/
894 0, /*tp_getattro*/
895 0, /*tp_setattro*/
896 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000897 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
898 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000899 Reader_Type_doc, /*tp_doc*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000900 (traverseproc)Reader_traverse, /*tp_traverse*/
901 (inquiry)Reader_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000902 0, /*tp_richcompare*/
903 0, /*tp_weaklistoffset*/
Andrew McNamara575a00b2005-01-06 02:25:41 +0000904 PyObject_SelfIter, /*tp_iter*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000905 (getiterfunc)Reader_iternext, /*tp_iternext*/
906 Reader_methods, /*tp_methods*/
907 Reader_memberlist, /*tp_members*/
908 0, /*tp_getset*/
909
910};
911
912static PyObject *
913csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
914{
Andrew McNamara91b97462005-01-11 01:07:23 +0000915 PyObject * iterator, * dialect = NULL;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000916 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000917
918 if (!self)
919 return NULL;
920
921 self->dialect = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000922 self->fields = NULL;
923 self->input_iter = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000924 self->field = NULL;
925 self->field_size = 0;
Andrew McNamara7f2053e2005-01-12 11:17:16 +0000926 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000927
928 if (parse_reset(self) < 0) {
929 Py_DECREF(self);
930 return NULL;
931 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000932
Raymond Hettinger1761a7c2004-06-20 04:23:19 +0000933 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000934 Py_DECREF(self);
935 return NULL;
936 }
937 self->input_iter = PyObject_GetIter(iterator);
938 if (self->input_iter == NULL) {
939 PyErr_SetString(PyExc_TypeError,
940 "argument 1 must be an iterator");
941 Py_DECREF(self);
942 return NULL;
943 }
Andrew McNamara91b97462005-01-11 01:07:23 +0000944 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
Skip Montanarob4a04172003-03-20 23:29:12 +0000945 if (self->dialect == NULL) {
946 Py_DECREF(self);
947 return NULL;
948 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000949
Andrew McNamara77ead872005-01-10 02:09:41 +0000950 PyObject_GC_Track(self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000951 return (PyObject *)self;
952}
953
954/*
955 * WRITER
956 */
957/* ---------------------------------------------------------------- */
958static void
959join_reset(WriterObj *self)
960{
961 self->rec_len = 0;
962 self->num_fields = 0;
963}
964
965#define MEM_INCR 32768
966
967/* Calculate new record length or append field to record. Return new
968 * record length.
969 */
970static int
Guido van Rossum46264582007-08-06 19:32:18 +0000971join_append_data(WriterObj *self, Py_UNICODE *field, int quote_empty,
972 int *quoted, int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +0000973{
974 DialectObj *dialect = self->dialect;
Guido van Rossum46264582007-08-06 19:32:18 +0000975 int i;
976 int rec_len;
977 Py_UNICODE *lineterm;
Andrew McNamarac89f2842005-01-12 07:44:42 +0000978
979#define ADDCH(c) \
980 do {\
981 if (copy_phase) \
982 self->rec[rec_len] = c;\
983 rec_len++;\
984 } while(0)
985
Guido van Rossum46264582007-08-06 19:32:18 +0000986 lineterm = PyUnicode_AsUnicode(dialect->lineterminator);
Andrew McNamarac89f2842005-01-12 07:44:42 +0000987 if (lineterm == NULL)
988 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000989
990 rec_len = self->rec_len;
991
Andrew McNamarac89f2842005-01-12 07:44:42 +0000992 /* If this is not the first field we need a field separator */
993 if (self->num_fields > 0)
994 ADDCH(dialect->delimiter);
995
996 /* Handle preceding quote */
997 if (copy_phase && *quoted)
998 ADDCH(dialect->quotechar);
999
1000 /* Copy/count field data */
Guido van Rossum46264582007-08-06 19:32:18 +00001001 /* If field is null just pass over */
1002 for (i = 0; field; i++) {
1003 Py_UNICODE c = field[i];
Andrew McNamarac89f2842005-01-12 07:44:42 +00001004 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001005
1006 if (c == '\0')
1007 break;
Skip Montanarob4a04172003-03-20 23:29:12 +00001008
Andrew McNamarac89f2842005-01-12 07:44:42 +00001009 if (c == dialect->delimiter ||
1010 c == dialect->escapechar ||
Guido van Rossum46264582007-08-06 19:32:18 +00001011 c == dialect->quotechar ||
1012 Py_UNICODE_strchr(lineterm, c)) {
Andrew McNamarac89f2842005-01-12 07:44:42 +00001013 if (dialect->quoting == QUOTE_NONE)
1014 want_escape = 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001015 else {
Andrew McNamarac89f2842005-01-12 07:44:42 +00001016 if (c == dialect->quotechar) {
1017 if (dialect->doublequote)
1018 ADDCH(dialect->quotechar);
1019 else
1020 want_escape = 1;
1021 }
1022 if (!want_escape)
1023 *quoted = 1;
1024 }
1025 if (want_escape) {
1026 if (!dialect->escapechar) {
1027 PyErr_Format(error_obj,
1028 "need to escape, but no escapechar set");
1029 return -1;
1030 }
1031 ADDCH(dialect->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +00001032 }
1033 }
1034 /* Copy field character into record buffer.
1035 */
Andrew McNamarac89f2842005-01-12 07:44:42 +00001036 ADDCH(c);
Skip Montanarob4a04172003-03-20 23:29:12 +00001037 }
1038
1039 /* If field is empty check if it needs to be quoted.
1040 */
1041 if (i == 0 && quote_empty) {
1042 if (dialect->quoting == QUOTE_NONE) {
1043 PyErr_Format(error_obj,
Guido van Rossum46264582007-08-06 19:32:18 +00001044 "single empty field record must be quoted");
Skip Montanarob4a04172003-03-20 23:29:12 +00001045 return -1;
Andrew McNamaradd3e6cb2005-01-07 06:46:50 +00001046 }
1047 else
Skip Montanarob4a04172003-03-20 23:29:12 +00001048 *quoted = 1;
1049 }
1050
Skip Montanarob4a04172003-03-20 23:29:12 +00001051 if (*quoted) {
1052 if (copy_phase)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001053 ADDCH(dialect->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +00001054 else
Andrew McNamarac89f2842005-01-12 07:44:42 +00001055 rec_len += 2;
Skip Montanarob4a04172003-03-20 23:29:12 +00001056 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001057 return rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001058#undef ADDCH
Skip Montanarob4a04172003-03-20 23:29:12 +00001059}
1060
1061static int
1062join_check_rec_size(WriterObj *self, int rec_len)
1063{
1064 if (rec_len > self->rec_size) {
1065 if (self->rec_size == 0) {
1066 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001067 if (self->rec != NULL)
1068 PyMem_Free(self->rec);
Guido van Rossum46264582007-08-06 19:32:18 +00001069 self->rec = PyMem_New(Py_UNICODE, self->rec_size);
Skip Montanarob4a04172003-03-20 23:29:12 +00001070 }
1071 else {
Guido van Rossum46264582007-08-06 19:32:18 +00001072 Py_UNICODE* old_rec = self->rec;
Skip Montanarob4a04172003-03-20 23:29:12 +00001073
1074 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Guido van Rossum46264582007-08-06 19:32:18 +00001075 self->rec = PyMem_Resize(self->rec, Py_UNICODE,
1076 self->rec_size);
Skip Montanarob4a04172003-03-20 23:29:12 +00001077 if (self->rec == NULL)
1078 PyMem_Free(old_rec);
1079 }
1080 if (self->rec == NULL) {
1081 PyErr_NoMemory();
1082 return 0;
1083 }
1084 }
1085 return 1;
1086}
1087
1088static int
Guido van Rossum46264582007-08-06 19:32:18 +00001089join_append(WriterObj *self, Py_UNICODE *field, int *quoted, int quote_empty)
Skip Montanarob4a04172003-03-20 23:29:12 +00001090{
1091 int rec_len;
1092
1093 rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1094 if (rec_len < 0)
1095 return 0;
1096
1097 /* grow record buffer if necessary */
1098 if (!join_check_rec_size(self, rec_len))
1099 return 0;
1100
1101 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1102 self->num_fields++;
1103
1104 return 1;
1105}
1106
1107static int
1108join_append_lineterminator(WriterObj *self)
1109{
1110 int terminator_len;
Guido van Rossum46264582007-08-06 19:32:18 +00001111 Py_UNICODE *terminator;
Skip Montanarob4a04172003-03-20 23:29:12 +00001112
Guido van Rossum46264582007-08-06 19:32:18 +00001113 terminator_len = PyUnicode_GetSize(self->dialect->lineterminator);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001114 if (terminator_len == -1)
1115 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001116
1117 /* grow record buffer if necessary */
1118 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1119 return 0;
1120
Guido van Rossum46264582007-08-06 19:32:18 +00001121 terminator = PyUnicode_AsUnicode(self->dialect->lineterminator);
Andrew McNamaracf0fd5a2005-01-12 01:16:35 +00001122 if (terminator == NULL)
1123 return 0;
Guido van Rossum46264582007-08-06 19:32:18 +00001124 memmove(self->rec + self->rec_len, terminator,
1125 sizeof(Py_UNICODE)*terminator_len);
Skip Montanarob4a04172003-03-20 23:29:12 +00001126 self->rec_len += terminator_len;
1127
1128 return 1;
1129}
1130
1131PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001132"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001133"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001134"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001135"elements will be converted to string.");
1136
1137static PyObject *
1138csv_writerow(WriterObj *self, PyObject *seq)
1139{
1140 DialectObj *dialect = self->dialect;
1141 int len, i;
1142
1143 if (!PySequence_Check(seq))
1144 return PyErr_Format(error_obj, "sequence expected");
1145
1146 len = PySequence_Length(seq);
1147 if (len < 0)
1148 return NULL;
1149
1150 /* Join all fields in internal buffer.
1151 */
1152 join_reset(self);
1153 for (i = 0; i < len; i++) {
1154 PyObject *field;
1155 int append_ok;
1156 int quoted;
1157
1158 field = PySequence_GetItem(seq, i);
1159 if (field == NULL)
1160 return NULL;
1161
Andrew McNamarac89f2842005-01-12 07:44:42 +00001162 switch (dialect->quoting) {
1163 case QUOTE_NONNUMERIC:
1164 quoted = !PyNumber_Check(field);
1165 break;
1166 case QUOTE_ALL:
1167 quoted = 1;
1168 break;
1169 default:
1170 quoted = 0;
1171 break;
Skip Montanarob4a04172003-03-20 23:29:12 +00001172 }
1173
Guido van Rossum46264582007-08-06 19:32:18 +00001174 if (PyUnicode_Check(field)) {
Skip Montanaro577c7a72003-04-12 19:17:14 +00001175 append_ok = join_append(self,
Guido van Rossum46264582007-08-06 19:32:18 +00001176 PyUnicode_AS_UNICODE(field),
1177 &quoted, len == 1);
Skip Montanarob4a04172003-03-20 23:29:12 +00001178 Py_DECREF(field);
1179 }
1180 else if (field == Py_None) {
Guido van Rossum46264582007-08-06 19:32:18 +00001181 append_ok = join_append(self, NULL,
1182 &quoted, len == 1);
Skip Montanarob4a04172003-03-20 23:29:12 +00001183 Py_DECREF(field);
1184 }
1185 else {
1186 PyObject *str;
1187
Guido van Rossum46264582007-08-06 19:32:18 +00001188 str = PyObject_Unicode(field);
1189 Py_DECREF(field);
Skip Montanarob4a04172003-03-20 23:29:12 +00001190 if (str == NULL)
1191 return NULL;
Guido van Rossum46264582007-08-06 19:32:18 +00001192 append_ok = join_append(self,
1193 PyUnicode_AS_UNICODE(str),
1194 &quoted, len == 1);
Skip Montanarob4a04172003-03-20 23:29:12 +00001195 Py_DECREF(str);
1196 }
1197 if (!append_ok)
1198 return NULL;
1199 }
1200
1201 /* Add line terminator.
1202 */
1203 if (!join_append_lineterminator(self))
1204 return 0;
1205
Guido van Rossum46264582007-08-06 19:32:18 +00001206 return PyObject_CallFunction(self->writeline,
1207 "(u#)", self->rec,
1208 self->rec_len);
Skip Montanarob4a04172003-03-20 23:29:12 +00001209}
1210
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001211PyDoc_STRVAR(csv_writerows_doc,
1212"writerows(sequence of sequences)\n"
1213"\n"
1214"Construct and write a series of sequences to a csv file. Non-string\n"
1215"elements will be converted to string.");
1216
Skip Montanarob4a04172003-03-20 23:29:12 +00001217static PyObject *
1218csv_writerows(WriterObj *self, PyObject *seqseq)
1219{
1220 PyObject *row_iter, *row_obj, *result;
1221
1222 row_iter = PyObject_GetIter(seqseq);
1223 if (row_iter == NULL) {
1224 PyErr_SetString(PyExc_TypeError,
Skip Montanaro98f16e02003-04-11 23:10:13 +00001225 "writerows() argument must be iterable");
Skip Montanarob4a04172003-03-20 23:29:12 +00001226 return NULL;
1227 }
1228 while ((row_obj = PyIter_Next(row_iter))) {
1229 result = csv_writerow(self, row_obj);
1230 Py_DECREF(row_obj);
1231 if (!result) {
1232 Py_DECREF(row_iter);
1233 return NULL;
1234 }
1235 else
1236 Py_DECREF(result);
1237 }
1238 Py_DECREF(row_iter);
1239 if (PyErr_Occurred())
1240 return NULL;
1241 Py_INCREF(Py_None);
1242 return Py_None;
1243}
1244
1245static struct PyMethodDef Writer_methods[] = {
1246 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001247 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
Skip Montanarob4a04172003-03-20 23:29:12 +00001248 { NULL, NULL }
1249};
1250
1251#define W_OFF(x) offsetof(WriterObj, x)
1252
1253static struct PyMemberDef Writer_memberlist[] = {
Guido van Rossum33d26892007-08-05 15:29:28 +00001254 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
Skip Montanarob4a04172003-03-20 23:29:12 +00001255 { NULL }
1256};
1257
1258static void
1259Writer_dealloc(WriterObj *self)
1260{
Andrew McNamara77ead872005-01-10 02:09:41 +00001261 PyObject_GC_UnTrack(self);
Skip Montanarob4a04172003-03-20 23:29:12 +00001262 Py_XDECREF(self->dialect);
1263 Py_XDECREF(self->writeline);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001264 if (self->rec != NULL)
1265 PyMem_Free(self->rec);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001266 PyObject_GC_Del(self);
1267}
1268
1269static int
1270Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1271{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001272 Py_VISIT(self->dialect);
1273 Py_VISIT(self->writeline);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001274 return 0;
1275}
1276
1277static int
1278Writer_clear(WriterObj *self)
1279{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001280 Py_CLEAR(self->dialect);
1281 Py_CLEAR(self->writeline);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001282 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001283}
1284
1285PyDoc_STRVAR(Writer_Type_doc,
1286"CSV writer\n"
1287"\n"
1288"Writer objects are responsible for generating tabular data\n"
1289"in CSV format from sequence input.\n"
1290);
1291
1292static PyTypeObject Writer_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001293 PyVarObject_HEAD_INIT(NULL, 0)
Skip Montanarob4a04172003-03-20 23:29:12 +00001294 "_csv.writer", /*tp_name*/
1295 sizeof(WriterObj), /*tp_basicsize*/
1296 0, /*tp_itemsize*/
1297 /* methods */
1298 (destructor)Writer_dealloc, /*tp_dealloc*/
1299 (printfunc)0, /*tp_print*/
1300 (getattrfunc)0, /*tp_getattr*/
1301 (setattrfunc)0, /*tp_setattr*/
1302 (cmpfunc)0, /*tp_compare*/
1303 (reprfunc)0, /*tp_repr*/
1304 0, /*tp_as_number*/
1305 0, /*tp_as_sequence*/
1306 0, /*tp_as_mapping*/
1307 (hashfunc)0, /*tp_hash*/
1308 (ternaryfunc)0, /*tp_call*/
1309 (reprfunc)0, /*tp_str*/
1310 0, /*tp_getattro*/
1311 0, /*tp_setattro*/
1312 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001313 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1314 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001315 Writer_Type_doc,
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001316 (traverseproc)Writer_traverse, /*tp_traverse*/
1317 (inquiry)Writer_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001318 0, /*tp_richcompare*/
1319 0, /*tp_weaklistoffset*/
1320 (getiterfunc)0, /*tp_iter*/
1321 (getiterfunc)0, /*tp_iternext*/
1322 Writer_methods, /*tp_methods*/
1323 Writer_memberlist, /*tp_members*/
1324 0, /*tp_getset*/
1325};
1326
1327static PyObject *
1328csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1329{
Andrew McNamara91b97462005-01-11 01:07:23 +00001330 PyObject * output_file, * dialect = NULL;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001331 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001332
1333 if (!self)
1334 return NULL;
1335
1336 self->dialect = NULL;
1337 self->writeline = NULL;
1338
1339 self->rec = NULL;
1340 self->rec_size = 0;
1341 self->rec_len = 0;
1342 self->num_fields = 0;
1343
Raymond Hettinger1761a7c2004-06-20 04:23:19 +00001344 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001345 Py_DECREF(self);
1346 return NULL;
1347 }
1348 self->writeline = PyObject_GetAttrString(output_file, "write");
1349 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1350 PyErr_SetString(PyExc_TypeError,
Andrew McNamara5cfd8372005-01-12 11:39:50 +00001351 "argument 1 must have a \"write\" method");
Skip Montanarob4a04172003-03-20 23:29:12 +00001352 Py_DECREF(self);
1353 return NULL;
1354 }
Andrew McNamara91b97462005-01-11 01:07:23 +00001355 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
Skip Montanarob4a04172003-03-20 23:29:12 +00001356 if (self->dialect == NULL) {
1357 Py_DECREF(self);
1358 return NULL;
1359 }
Andrew McNamara77ead872005-01-10 02:09:41 +00001360 PyObject_GC_Track(self);
Skip Montanarob4a04172003-03-20 23:29:12 +00001361 return (PyObject *)self;
1362}
1363
1364/*
1365 * DIALECT REGISTRY
1366 */
1367static PyObject *
1368csv_list_dialects(PyObject *module, PyObject *args)
1369{
1370 return PyDict_Keys(dialects);
1371}
1372
1373static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001374csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001375{
Andrew McNamara86625972005-01-11 01:28:33 +00001376 PyObject *name_obj, *dialect_obj = NULL;
1377 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001378
Andrew McNamara86625972005-01-11 01:28:33 +00001379 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
Skip Montanarob4a04172003-03-20 23:29:12 +00001380 return NULL;
Andrew McNamara37d2bdf2005-01-10 12:22:48 +00001381 if (!IS_BASESTRING(name_obj)) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001382 PyErr_SetString(PyExc_TypeError,
1383 "dialect name must be a string or unicode");
1384 return NULL;
1385 }
Andrew McNamara86625972005-01-11 01:28:33 +00001386 dialect = _call_dialect(dialect_obj, kwargs);
1387 if (dialect == NULL)
1388 return NULL;
1389 if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1390 Py_DECREF(dialect);
Skip Montanarob4a04172003-03-20 23:29:12 +00001391 return NULL;
1392 }
Andrew McNamara86625972005-01-11 01:28:33 +00001393 Py_DECREF(dialect);
Skip Montanarob4a04172003-03-20 23:29:12 +00001394 Py_INCREF(Py_None);
1395 return Py_None;
1396}
1397
1398static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001399csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001400{
Skip Montanarob4a04172003-03-20 23:29:12 +00001401 if (PyDict_DelItem(dialects, name_obj) < 0)
1402 return PyErr_Format(error_obj, "unknown dialect");
1403 Py_INCREF(Py_None);
1404 return Py_None;
1405}
1406
1407static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001408csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001409{
Skip Montanarob4a04172003-03-20 23:29:12 +00001410 return get_dialect_from_registry(name_obj);
1411}
1412
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001413static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001414csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001415{
1416 PyObject *new_limit = NULL;
1417 long old_limit = field_limit;
1418
Andrew McNamara31d88962005-01-12 03:45:10 +00001419 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001420 return NULL;
1421 if (new_limit != NULL) {
Guido van Rossumddefaf32007-01-14 03:31:43 +00001422 if (!PyInt_CheckExact(new_limit)) {
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001423 PyErr_Format(PyExc_TypeError,
1424 "limit must be an integer");
1425 return NULL;
1426 }
1427 field_limit = PyInt_AsLong(new_limit);
1428 }
1429 return PyInt_FromLong(old_limit);
1430}
1431
Skip Montanarob4a04172003-03-20 23:29:12 +00001432/*
1433 * MODULE
1434 */
1435
1436PyDoc_STRVAR(csv_module_doc,
1437"CSV parsing and writing.\n"
1438"\n"
1439"This module provides classes that assist in the reading and writing\n"
1440"of Comma Separated Value (CSV) files, and implements the interface\n"
1441"described by PEP 305. Although many CSV files are simple to parse,\n"
1442"the format is not formally defined by a stable specification and\n"
1443"is subtle enough that parsing lines of a CSV file with something\n"
1444"like line.split(\",\") is bound to fail. The module supports three\n"
1445"basic APIs: reading, writing, and registration of dialects.\n"
1446"\n"
1447"\n"
1448"DIALECT REGISTRATION:\n"
1449"\n"
1450"Readers and writers support a dialect argument, which is a convenient\n"
1451"handle on a group of settings. When the dialect argument is a string,\n"
1452"it identifies one of the dialects previously registered with the module.\n"
1453"If it is a class or instance, the attributes of the argument are used as\n"
1454"the settings for the reader or writer:\n"
1455"\n"
1456" class excel:\n"
1457" delimiter = ','\n"
1458" quotechar = '\"'\n"
1459" escapechar = None\n"
1460" doublequote = True\n"
1461" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001462" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001463" quoting = QUOTE_MINIMAL\n"
1464"\n"
1465"SETTINGS:\n"
1466"\n"
1467" * quotechar - specifies a one-character string to use as the \n"
1468" quoting character. It defaults to '\"'.\n"
1469" * delimiter - specifies a one-character string to use as the \n"
1470" field separator. It defaults to ','.\n"
1471" * skipinitialspace - specifies how to interpret whitespace which\n"
1472" immediately follows a delimiter. It defaults to False, which\n"
1473" means that whitespace immediately following a delimiter is part\n"
1474" of the following field.\n"
1475" * lineterminator - specifies the character sequence which should \n"
1476" terminate rows.\n"
1477" * quoting - controls when quotes should be generated by the writer.\n"
1478" It can take on any of the following module constants:\n"
1479"\n"
1480" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1481" field contains either the quotechar or the delimiter\n"
1482" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1483" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001484" fields which do not parse as integers or floating point\n"
1485" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001486" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1487" * escapechar - specifies a one-character string used to escape \n"
1488" the delimiter when quoting is set to QUOTE_NONE.\n"
1489" * doublequote - controls the handling of quotes inside fields. When\n"
1490" True, two consecutive quotes are interpreted as one during read,\n"
1491" and when writing, each quote character embedded in the data is\n"
1492" written as two quotes\n");
1493
1494PyDoc_STRVAR(csv_reader_doc,
1495" csv_reader = reader(iterable [, dialect='excel']\n"
1496" [optional keyword args])\n"
1497" for row in csv_reader:\n"
1498" process(row)\n"
1499"\n"
1500"The \"iterable\" argument can be any object that returns a line\n"
1501"of input for each iteration, such as a file object or a list. The\n"
1502"optional \"dialect\" parameter is discussed below. The function\n"
1503"also accepts optional keyword arguments which override settings\n"
1504"provided by the dialect.\n"
1505"\n"
1506"The returned object is an iterator. Each iteration returns a row\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001507"of the CSV file (which can span multiple input lines):\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001508
1509PyDoc_STRVAR(csv_writer_doc,
1510" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1511" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001512" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001513" csv_writer.writerow(row)\n"
1514"\n"
1515" [or]\n"
1516"\n"
1517" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1518" [optional keyword args])\n"
1519" csv_writer.writerows(rows)\n"
1520"\n"
1521"The \"fileobj\" argument can be any object that supports the file API.\n");
1522
1523PyDoc_STRVAR(csv_list_dialects_doc,
1524"Return a list of all know dialect names.\n"
1525" names = csv.list_dialects()");
1526
1527PyDoc_STRVAR(csv_get_dialect_doc,
1528"Return the dialect instance associated with name.\n"
1529" dialect = csv.get_dialect(name)");
1530
1531PyDoc_STRVAR(csv_register_dialect_doc,
1532"Create a mapping from a string name to a dialect class.\n"
1533" dialect = csv.register_dialect(name, dialect)");
1534
1535PyDoc_STRVAR(csv_unregister_dialect_doc,
1536"Delete the name/dialect mapping associated with a string name.\n"
1537" csv.unregister_dialect(name)");
1538
Andrew McNamara31d88962005-01-12 03:45:10 +00001539PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001540"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001541" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001542"\n"
1543"Returns old limit. If limit is not given, no new limit is set and\n"
1544"the old limit is returned");
1545
Skip Montanarob4a04172003-03-20 23:29:12 +00001546static struct PyMethodDef csv_methods[] = {
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001547 { "reader", (PyCFunction)csv_reader,
1548 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1549 { "writer", (PyCFunction)csv_writer,
1550 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1551 { "list_dialects", (PyCFunction)csv_list_dialects,
1552 METH_NOARGS, csv_list_dialects_doc},
1553 { "register_dialect", (PyCFunction)csv_register_dialect,
Andrew McNamara86625972005-01-11 01:28:33 +00001554 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001555 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1556 METH_O, csv_unregister_dialect_doc},
1557 { "get_dialect", (PyCFunction)csv_get_dialect,
1558 METH_O, csv_get_dialect_doc},
Andrew McNamara31d88962005-01-12 03:45:10 +00001559 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1560 METH_VARARGS, csv_field_size_limit_doc},
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001561 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001562};
1563
1564PyMODINIT_FUNC
1565init_csv(void)
1566{
1567 PyObject *module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001568 StyleDesc *style;
1569
1570 if (PyType_Ready(&Dialect_Type) < 0)
1571 return;
1572
1573 if (PyType_Ready(&Reader_Type) < 0)
1574 return;
1575
1576 if (PyType_Ready(&Writer_Type) < 0)
1577 return;
1578
1579 /* Create the module and add the functions */
1580 module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1581 if (module == NULL)
1582 return;
1583
1584 /* Add version to the module. */
Skip Montanaro7b01a832003-04-12 19:23:46 +00001585 if (PyModule_AddStringConstant(module, "__version__",
1586 MODULE_VERSION) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001587 return;
1588
1589 /* Add _dialects dictionary */
1590 dialects = PyDict_New();
1591 if (dialects == NULL)
1592 return;
1593 if (PyModule_AddObject(module, "_dialects", dialects))
1594 return;
1595
1596 /* Add quote styles into dictionary */
1597 for (style = quote_styles; style->name; style++) {
Skip Montanaro7b01a832003-04-12 19:23:46 +00001598 if (PyModule_AddIntConstant(module, style->name,
1599 style->style) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001600 return;
1601 }
1602
1603 /* Add the Dialect type */
Skip Montanaro32c5d422005-06-15 13:35:08 +00001604 Py_INCREF(&Dialect_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001605 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1606 return;
1607
1608 /* Add the CSV exception object to the module. */
1609 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1610 if (error_obj == NULL)
1611 return;
1612 PyModule_AddObject(module, "Error", error_obj);
1613}