blob: 75cafb26085b024ef145725b70c5efb8ff32a12c [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
9**** For people modifying this code, please note that as of this writing
Skip Montanarodfa35fa2003-04-11 21:40:01 +000010**** (2003-03-23), it is intended that this code should work with Python
Skip Montanaroa16b21f2003-03-23 14:32:54 +000011**** 2.2.
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013*/
14
Skip Montanaro7b01a832003-04-12 19:23:46 +000015#define MODULE_VERSION "1.0"
16
Skip Montanarob4a04172003-03-20 23:29:12 +000017#include "Python.h"
18#include "structmember.h"
19
Skip Montanaroa16b21f2003-03-23 14:32:54 +000020
Skip Montanarob4a04172003-03-20 23:29:12 +000021/* begin 2.2 compatibility macros */
22#ifndef PyDoc_STRVAR
23/* Define macros for inline documentation. */
24#define PyDoc_VAR(name) static char name[]
25#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
26#ifdef WITH_DOC_STRINGS
27#define PyDoc_STR(str) str
28#else
29#define PyDoc_STR(str) ""
30#endif
31#endif /* ifndef PyDoc_STRVAR */
32
33#ifndef PyMODINIT_FUNC
34# if defined(__cplusplus)
35# define PyMODINIT_FUNC extern "C" void
36# else /* __cplusplus */
37# define PyMODINIT_FUNC void
38# endif /* __cplusplus */
39#endif
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000040
41#ifndef Py_CLEAR
42#define Py_CLEAR(op) \
43 do { \
44 if (op) { \
45 PyObject *tmp = (PyObject *)(op); \
46 (op) = NULL; \
47 Py_DECREF(tmp); \
48 } \
49 } while (0)
50#endif
51#ifndef Py_VISIT
52#define Py_VISIT(op) \
53 do { \
54 if (op) { \
55 int vret = visit((PyObject *)(op), arg); \
56 if (vret) \
57 return vret; \
58 } \
59 } while (0)
60#endif
61
Skip Montanarob4a04172003-03-20 23:29:12 +000062/* end 2.2 compatibility macros */
63
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000064#define IS_BASESTRING(o) \
Guido van Rossum3172c5d2007-10-16 18:12:55 +000065 PyUnicode_Check(o)
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000066
Skip Montanarob4a04172003-03-20 23:29:12 +000067static PyObject *error_obj; /* CSV exception */
68static PyObject *dialects; /* Dialect registry */
Andrew McNamarae4d05c42005-01-11 07:32:02 +000069static long field_limit = 128 * 1024; /* max parsed field size */
Skip Montanarob4a04172003-03-20 23:29:12 +000070
71typedef enum {
72 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
Andrew McNamaraf69d94f2005-01-13 11:30:54 +000073 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
74 EAT_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000075} ParserState;
76
77typedef enum {
78 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
79} QuoteStyle;
80
81typedef struct {
82 QuoteStyle style;
83 char *name;
84} StyleDesc;
85
86static StyleDesc quote_styles[] = {
87 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
88 { QUOTE_ALL, "QUOTE_ALL" },
89 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
90 { QUOTE_NONE, "QUOTE_NONE" },
91 { 0 }
92};
93
94typedef struct {
95 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000096
Skip Montanarob4a04172003-03-20 23:29:12 +000097 int doublequote; /* is " represented by ""? */
Guido van Rossum46264582007-08-06 19:32:18 +000098 Py_UNICODE delimiter; /* field separator */
99 Py_UNICODE quotechar; /* quote character */
100 Py_UNICODE escapechar; /* escape character */
Skip Montanarob4a04172003-03-20 23:29:12 +0000101 int skipinitialspace; /* ignore spaces following delimiter? */
102 PyObject *lineterminator; /* string to write between records */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000103 int quoting; /* style of quoting to write */
Skip Montanarob4a04172003-03-20 23:29:12 +0000104
105 int strict; /* raise exception on bad CSV */
106} DialectObj;
107
Neal Norwitz227b5332006-03-22 09:28:35 +0000108static PyTypeObject Dialect_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000109
110typedef struct {
111 PyObject_HEAD
112
113 PyObject *input_iter; /* iterate over this for input lines */
114
115 DialectObj *dialect; /* parsing dialect */
116
117 PyObject *fields; /* field list for current record */
118 ParserState state; /* current CSV parse state */
Guido van Rossum46264582007-08-06 19:32:18 +0000119 Py_UNICODE *field; /* build current field in here */
Skip Montanarob4a04172003-03-20 23:29:12 +0000120 int field_size; /* size of allocated buffer */
Guido van Rossum46264582007-08-06 19:32:18 +0000121 Py_ssize_t field_len; /* length of current field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000122 int numeric_field; /* treat field as numeric */
Andrew McNamara7f2053e2005-01-12 11:17:16 +0000123 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000124} ReaderObj;
125
Neal Norwitz227b5332006-03-22 09:28:35 +0000126static PyTypeObject Reader_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000127
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000128#define ReaderObject_Check(v) (Py_Type(v) == &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +0000129
130typedef struct {
131 PyObject_HEAD
132
133 PyObject *writeline; /* write output lines to this file */
134
135 DialectObj *dialect; /* parsing dialect */
136
Guido van Rossum46264582007-08-06 19:32:18 +0000137 Py_UNICODE *rec; /* buffer for parser.join */
Skip Montanarob4a04172003-03-20 23:29:12 +0000138 int rec_size; /* size of allocated record */
Guido van Rossum46264582007-08-06 19:32:18 +0000139 Py_ssize_t rec_len; /* length of record */
Skip Montanarob4a04172003-03-20 23:29:12 +0000140 int num_fields; /* number of fields in record */
Guido van Rossum46264582007-08-06 19:32:18 +0000141} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000142
Neal Norwitz227b5332006-03-22 09:28:35 +0000143static PyTypeObject Writer_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000144
145/*
146 * DIALECT class
147 */
148
149static PyObject *
150get_dialect_from_registry(PyObject * name_obj)
151{
152 PyObject *dialect_obj;
153
154 dialect_obj = PyDict_GetItem(dialects, name_obj);
Andrew McNamaradbce2612005-01-10 23:17:35 +0000155 if (dialect_obj == NULL) {
156 if (!PyErr_Occurred())
157 PyErr_Format(error_obj, "unknown dialect");
158 }
159 else
160 Py_INCREF(dialect_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +0000161 return dialect_obj;
162}
163
Skip Montanarob4a04172003-03-20 23:29:12 +0000164static PyObject *
165get_string(PyObject *str)
166{
167 Py_XINCREF(str);
168 return str;
169}
170
Skip Montanarob4a04172003-03-20 23:29:12 +0000171static PyObject *
Skip Montanaroe3b10f42007-08-06 20:55:47 +0000172get_nullchar_as_None(Py_UNICODE c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000173{
174 if (c == '\0') {
175 Py_INCREF(Py_None);
176 return Py_None;
177 }
178 else
Skip Montanaroe3b10f42007-08-06 20:55:47 +0000179 return PyUnicode_FromUnicode((Py_UNICODE *)&c, 1);
Skip Montanarob4a04172003-03-20 23:29:12 +0000180}
181
Skip Montanarob4a04172003-03-20 23:29:12 +0000182static PyObject *
183Dialect_get_lineterminator(DialectObj *self)
184{
185 return get_string(self->lineterminator);
186}
187
Skip Montanarob4a04172003-03-20 23:29:12 +0000188static PyObject *
Guido van Rossuma9769c22007-08-07 23:59:30 +0000189Dialect_get_delimiter(DialectObj *self)
190{
191 return get_nullchar_as_None(self->delimiter);
192}
193
194static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000195Dialect_get_escapechar(DialectObj *self)
196{
197 return get_nullchar_as_None(self->escapechar);
198}
199
Andrew McNamara1196cf12005-01-07 04:42:45 +0000200static PyObject *
201Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000202{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000203 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000204}
205
206static PyObject *
207Dialect_get_quoting(DialectObj *self)
208{
209 return PyInt_FromLong(self->quoting);
210}
211
212static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000213_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000214{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000215 if (src == NULL)
216 *target = dflt;
217 else
218 *target = PyObject_IsTrue(src);
219 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000220}
221
Andrew McNamara1196cf12005-01-07 04:42:45 +0000222static int
223_set_int(const char *name, int *target, PyObject *src, int dflt)
224{
225 if (src == NULL)
226 *target = dflt;
227 else {
Guido van Rossumddefaf32007-01-14 03:31:43 +0000228 if (!PyInt_CheckExact(src)) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000229 PyErr_Format(PyExc_TypeError,
230 "\"%s\" must be an integer", name);
231 return -1;
232 }
233 *target = PyInt_AsLong(src);
234 }
235 return 0;
236}
237
238static int
Guido van Rossum46264582007-08-06 19:32:18 +0000239_set_char(const char *name, Py_UNICODE *target, PyObject *src, Py_UNICODE dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000240{
241 if (src == NULL)
242 *target = dflt;
243 else {
Guido van Rossumbce56a62007-05-10 18:04:33 +0000244 *target = '\0';
245 if (src != Py_None) {
Guido van Rossum46264582007-08-06 19:32:18 +0000246 Py_UNICODE *buf;
Guido van Rossumbce56a62007-05-10 18:04:33 +0000247 Py_ssize_t len;
Guido van Rossum46264582007-08-06 19:32:18 +0000248 buf = PyUnicode_AsUnicode(src);
249 len = PyUnicode_GetSize(src);
250 if (buf == NULL || len > 1) {
Guido van Rossumbce56a62007-05-10 18:04:33 +0000251 PyErr_Format(PyExc_TypeError,
252 "\"%s\" must be an 1-character string",
Guido van Rossum46264582007-08-06 19:32:18 +0000253 name);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000254 return -1;
Guido van Rossumbce56a62007-05-10 18:04:33 +0000255 }
256 if (len > 0)
257 *target = buf[0];
Andrew McNamara1196cf12005-01-07 04:42:45 +0000258 }
259 }
260 return 0;
261}
262
263static int
264_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
265{
266 if (src == NULL)
Guido van Rossum46264582007-08-06 19:32:18 +0000267 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000268 else {
269 if (src == Py_None)
270 *target = NULL;
Andrew McNamara37d2bdf2005-01-10 12:22:48 +0000271 else if (!IS_BASESTRING(src)) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000272 PyErr_Format(PyExc_TypeError,
Amaury Forgeot d'Arc10c476d2007-11-19 21:20:21 +0000273 "\"%s\" must be a string", name);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000274 return -1;
Andrew McNamaradd3e6cb2005-01-07 06:46:50 +0000275 }
276 else {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000277 Py_XDECREF(*target);
278 Py_INCREF(src);
279 *target = src;
280 }
281 }
282 return 0;
283}
284
285static int
286dialect_check_quoting(int quoting)
287{
288 StyleDesc *qs = quote_styles;
289
290 for (qs = quote_styles; qs->name; qs++) {
291 if (qs->style == quoting)
292 return 0;
293 }
294 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
295 return -1;
296}
Skip Montanarob4a04172003-03-20 23:29:12 +0000297
298#define D_OFF(x) offsetof(DialectObj, x)
299
300static struct PyMemberDef Dialect_memberlist[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000301 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
302 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
303 { "strict", T_INT, D_OFF(strict), READONLY },
Skip Montanarob4a04172003-03-20 23:29:12 +0000304 { NULL }
305};
306
307static PyGetSetDef Dialect_getsetlist[] = {
Guido van Rossuma9769c22007-08-07 23:59:30 +0000308 { "delimiter", (getter)Dialect_get_delimiter},
Andrew McNamara1196cf12005-01-07 04:42:45 +0000309 { "escapechar", (getter)Dialect_get_escapechar},
310 { "lineterminator", (getter)Dialect_get_lineterminator},
311 { "quotechar", (getter)Dialect_get_quotechar},
312 { "quoting", (getter)Dialect_get_quoting},
313 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000314};
315
316static void
317Dialect_dealloc(DialectObj *self)
318{
319 Py_XDECREF(self->lineterminator);
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000320 Py_Type(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000321}
322
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000323static char *dialect_kws[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000324 "dialect",
325 "delimiter",
326 "doublequote",
327 "escapechar",
328 "lineterminator",
329 "quotechar",
330 "quoting",
331 "skipinitialspace",
332 "strict",
333 NULL
334};
335
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000336static PyObject *
337dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000338{
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000339 DialectObj *self;
340 PyObject *ret = NULL;
341 PyObject *dialect = NULL;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000342 PyObject *delimiter = NULL;
343 PyObject *doublequote = NULL;
344 PyObject *escapechar = NULL;
345 PyObject *lineterminator = NULL;
346 PyObject *quotechar = NULL;
347 PyObject *quoting = NULL;
348 PyObject *skipinitialspace = NULL;
349 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000350
Andrew McNamara1196cf12005-01-07 04:42:45 +0000351 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
352 "|OOOOOOOOO", dialect_kws,
353 &dialect,
354 &delimiter,
355 &doublequote,
356 &escapechar,
357 &lineterminator,
358 &quotechar,
359 &quoting,
360 &skipinitialspace,
361 &strict))
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000362 return NULL;
363
364 if (dialect != NULL) {
365 if (IS_BASESTRING(dialect)) {
366 dialect = get_dialect_from_registry(dialect);
367 if (dialect == NULL)
368 return NULL;
369 }
370 else
371 Py_INCREF(dialect);
372 /* Can we reuse this instance? */
373 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
374 delimiter == 0 &&
375 doublequote == 0 &&
376 escapechar == 0 &&
377 lineterminator == 0 &&
378 quotechar == 0 &&
379 quoting == 0 &&
380 skipinitialspace == 0 &&
381 strict == 0)
382 return dialect;
383 }
384
385 self = (DialectObj *)type->tp_alloc(type, 0);
386 if (self == NULL) {
387 Py_XDECREF(dialect);
388 return NULL;
389 }
390 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000391
Andrew McNamara1196cf12005-01-07 04:42:45 +0000392 Py_XINCREF(delimiter);
393 Py_XINCREF(doublequote);
394 Py_XINCREF(escapechar);
395 Py_XINCREF(lineterminator);
396 Py_XINCREF(quotechar);
397 Py_XINCREF(quoting);
398 Py_XINCREF(skipinitialspace);
399 Py_XINCREF(strict);
400 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000401#define DIALECT_GETATTR(v, n) \
402 if (v == NULL) \
403 v = PyObject_GetAttrString(dialect, n)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000404 DIALECT_GETATTR(delimiter, "delimiter");
405 DIALECT_GETATTR(doublequote, "doublequote");
406 DIALECT_GETATTR(escapechar, "escapechar");
407 DIALECT_GETATTR(lineterminator, "lineterminator");
408 DIALECT_GETATTR(quotechar, "quotechar");
409 DIALECT_GETATTR(quoting, "quoting");
410 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
411 DIALECT_GETATTR(strict, "strict");
412 PyErr_Clear();
Andrew McNamara1196cf12005-01-07 04:42:45 +0000413 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000414
Andrew McNamara1196cf12005-01-07 04:42:45 +0000415 /* check types and convert to C values */
416#define DIASET(meth, name, target, src, dflt) \
417 if (meth(name, target, src, dflt)) \
418 goto err
419 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
420 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
421 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
422 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
423 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
424 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
425 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
426 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000427
Andrew McNamara1196cf12005-01-07 04:42:45 +0000428 /* validate options */
429 if (dialect_check_quoting(self->quoting))
430 goto err;
431 if (self->delimiter == 0) {
432 PyErr_SetString(PyExc_TypeError, "delimiter must be set");
433 goto err;
434 }
Andrew McNamara5d45a8d2005-01-12 08:16:17 +0000435 if (quotechar == Py_None && quoting == NULL)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000436 self->quoting = QUOTE_NONE;
437 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
438 PyErr_SetString(PyExc_TypeError,
439 "quotechar must be set if quoting enabled");
440 goto err;
441 }
442 if (self->lineterminator == 0) {
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000443 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
Andrew McNamara1196cf12005-01-07 04:42:45 +0000444 goto err;
445 }
446
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000447 ret = (PyObject *)self;
Skip Montanarod60fbd42005-06-15 01:33:30 +0000448 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000449err:
Skip Montanarod60fbd42005-06-15 01:33:30 +0000450 Py_XDECREF(self);
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000451 Py_XDECREF(dialect);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000452 Py_XDECREF(delimiter);
453 Py_XDECREF(doublequote);
454 Py_XDECREF(escapechar);
455 Py_XDECREF(lineterminator);
456 Py_XDECREF(quotechar);
457 Py_XDECREF(quoting);
458 Py_XDECREF(skipinitialspace);
459 Py_XDECREF(strict);
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000460 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000461}
462
463
464PyDoc_STRVAR(Dialect_Type_doc,
465"CSV dialect\n"
466"\n"
467"The Dialect type records CSV parsing and generation options.\n");
468
469static PyTypeObject Dialect_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000470 PyVarObject_HEAD_INIT(NULL, 0)
Skip Montanarob4a04172003-03-20 23:29:12 +0000471 "_csv.Dialect", /* tp_name */
472 sizeof(DialectObj), /* tp_basicsize */
473 0, /* tp_itemsize */
474 /* methods */
475 (destructor)Dialect_dealloc, /* tp_dealloc */
476 (printfunc)0, /* tp_print */
477 (getattrfunc)0, /* tp_getattr */
478 (setattrfunc)0, /* tp_setattr */
479 (cmpfunc)0, /* tp_compare */
480 (reprfunc)0, /* tp_repr */
481 0, /* tp_as_number */
482 0, /* tp_as_sequence */
483 0, /* tp_as_mapping */
484 (hashfunc)0, /* tp_hash */
485 (ternaryfunc)0, /* tp_call */
486 (reprfunc)0, /* tp_str */
487 0, /* tp_getattro */
488 0, /* tp_setattro */
489 0, /* tp_as_buffer */
490 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
491 Dialect_Type_doc, /* tp_doc */
492 0, /* tp_traverse */
493 0, /* tp_clear */
494 0, /* tp_richcompare */
495 0, /* tp_weaklistoffset */
496 0, /* tp_iter */
497 0, /* tp_iternext */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000498 0, /* tp_methods */
Skip Montanarob4a04172003-03-20 23:29:12 +0000499 Dialect_memberlist, /* tp_members */
500 Dialect_getsetlist, /* tp_getset */
501 0, /* tp_base */
502 0, /* tp_dict */
503 0, /* tp_descr_get */
504 0, /* tp_descr_set */
505 0, /* tp_dictoffset */
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000506 0, /* tp_init */
507 0, /* tp_alloc */
Skip Montanarob4a04172003-03-20 23:29:12 +0000508 dialect_new, /* tp_new */
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000509 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000510};
511
Andrew McNamara91b97462005-01-11 01:07:23 +0000512/*
513 * Return an instance of the dialect type, given a Python instance or kwarg
514 * description of the dialect
515 */
516static PyObject *
517_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
518{
519 PyObject *ctor_args;
520 PyObject *dialect;
521
522 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
523 if (ctor_args == NULL)
524 return NULL;
525 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
526 Py_DECREF(ctor_args);
527 return dialect;
528}
529
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000530/*
531 * READER
532 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000533static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000534parse_save_field(ReaderObj *self)
535{
536 PyObject *field;
537
Guido van Rossum46264582007-08-06 19:32:18 +0000538 field = PyUnicode_FromUnicode(self->field, self->field_len);
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000539 if (field == NULL)
540 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000541 self->field_len = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000542 if (self->numeric_field) {
543 PyObject *tmp;
544
545 self->numeric_field = 0;
546 tmp = PyNumber_Float(field);
547 if (tmp == NULL) {
548 Py_DECREF(field);
549 return -1;
550 }
551 Py_DECREF(field);
552 field = tmp;
553 }
554 PyList_Append(self->fields, field);
555 Py_DECREF(field);
556 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000557}
558
559static int
560parse_grow_buff(ReaderObj *self)
561{
562 if (self->field_size == 0) {
563 self->field_size = 4096;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000564 if (self->field != NULL)
565 PyMem_Free(self->field);
Guido van Rossum46264582007-08-06 19:32:18 +0000566 self->field = PyMem_New(Py_UNICODE, self->field_size);
Skip Montanarob4a04172003-03-20 23:29:12 +0000567 }
568 else {
569 self->field_size *= 2;
Guido van Rossum46264582007-08-06 19:32:18 +0000570 self->field = PyMem_Resize(self->field, Py_UNICODE,
571 self->field_size);
Skip Montanarob4a04172003-03-20 23:29:12 +0000572 }
573 if (self->field == NULL) {
574 PyErr_NoMemory();
575 return 0;
576 }
577 return 1;
578}
579
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000580static int
Guido van Rossum46264582007-08-06 19:32:18 +0000581parse_add_char(ReaderObj *self, Py_UNICODE c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000582{
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000583 if (self->field_len >= field_limit) {
584 PyErr_Format(error_obj, "field larger than field limit (%ld)",
585 field_limit);
586 return -1;
587 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000588 if (self->field_len == self->field_size && !parse_grow_buff(self))
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000589 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000590 self->field[self->field_len++] = c;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000591 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000592}
593
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000594static int
Guido van Rossum46264582007-08-06 19:32:18 +0000595parse_process_char(ReaderObj *self, Py_UNICODE c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000596{
597 DialectObj *dialect = self->dialect;
598
599 switch (self->state) {
600 case START_RECORD:
601 /* start of record */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000602 if (c == '\0')
Skip Montanarob4a04172003-03-20 23:29:12 +0000603 /* empty line - return [] */
604 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000605 else if (c == '\n' || c == '\r') {
606 self->state = EAT_CRNL;
607 break;
608 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000609 /* normal character - handle as START_FIELD */
610 self->state = START_FIELD;
611 /* fallthru */
612 case START_FIELD:
613 /* expecting field */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000614 if (c == '\n' || c == '\r' || c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000615 /* save empty field - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000616 if (parse_save_field(self) < 0)
617 return -1;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000618 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
Skip Montanarob4a04172003-03-20 23:29:12 +0000619 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000620 else if (c == dialect->quotechar &&
621 dialect->quoting != QUOTE_NONE) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000622 /* start quoted field */
623 self->state = IN_QUOTED_FIELD;
624 }
625 else if (c == dialect->escapechar) {
626 /* possible escaped character */
627 self->state = ESCAPED_CHAR;
628 }
629 else if (c == ' ' && dialect->skipinitialspace)
630 /* ignore space at start of field */
631 ;
632 else if (c == dialect->delimiter) {
633 /* save empty field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000634 if (parse_save_field(self) < 0)
635 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000636 }
637 else {
638 /* begin new unquoted field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000639 if (dialect->quoting == QUOTE_NONNUMERIC)
640 self->numeric_field = 1;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000641 if (parse_add_char(self, c) < 0)
642 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000643 self->state = IN_FIELD;
644 }
645 break;
646
647 case ESCAPED_CHAR:
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000648 if (c == '\0')
649 c = '\n';
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000650 if (parse_add_char(self, c) < 0)
651 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000652 self->state = IN_FIELD;
653 break;
654
655 case IN_FIELD:
656 /* in unquoted field */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000657 if (c == '\n' || c == '\r' || c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000658 /* end of line - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000659 if (parse_save_field(self) < 0)
660 return -1;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000661 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
Skip Montanarob4a04172003-03-20 23:29:12 +0000662 }
663 else if (c == dialect->escapechar) {
664 /* possible escaped character */
665 self->state = ESCAPED_CHAR;
666 }
667 else if (c == dialect->delimiter) {
668 /* save field - wait for new field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000669 if (parse_save_field(self) < 0)
670 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000671 self->state = START_FIELD;
672 }
673 else {
674 /* normal character - save in field */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000675 if (parse_add_char(self, c) < 0)
676 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000677 }
678 break;
679
680 case IN_QUOTED_FIELD:
681 /* in quoted field */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000682 if (c == '\0')
683 ;
Skip Montanarob4a04172003-03-20 23:29:12 +0000684 else if (c == dialect->escapechar) {
685 /* Possible escape character */
686 self->state = ESCAPE_IN_QUOTED_FIELD;
687 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000688 else if (c == dialect->quotechar &&
689 dialect->quoting != QUOTE_NONE) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000690 if (dialect->doublequote) {
691 /* doublequote; " represented by "" */
692 self->state = QUOTE_IN_QUOTED_FIELD;
693 }
694 else {
695 /* end of quote part of field */
696 self->state = IN_FIELD;
697 }
698 }
699 else {
700 /* normal character - save in field */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000701 if (parse_add_char(self, c) < 0)
702 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000703 }
704 break;
705
706 case ESCAPE_IN_QUOTED_FIELD:
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000707 if (c == '\0')
708 c = '\n';
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000709 if (parse_add_char(self, c) < 0)
710 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000711 self->state = IN_QUOTED_FIELD;
712 break;
713
714 case QUOTE_IN_QUOTED_FIELD:
715 /* doublequote - seen a quote in an quoted field */
716 if (dialect->quoting != QUOTE_NONE &&
717 c == dialect->quotechar) {
718 /* save "" as " */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000719 if (parse_add_char(self, c) < 0)
720 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000721 self->state = IN_QUOTED_FIELD;
722 }
723 else if (c == dialect->delimiter) {
724 /* save field - wait for new field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000725 if (parse_save_field(self) < 0)
726 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000727 self->state = START_FIELD;
728 }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000729 else if (c == '\n' || c == '\r' || c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000730 /* end of line - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000731 if (parse_save_field(self) < 0)
732 return -1;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000733 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
Skip Montanarob4a04172003-03-20 23:29:12 +0000734 }
735 else if (!dialect->strict) {
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000736 if (parse_add_char(self, c) < 0)
737 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000738 self->state = IN_FIELD;
739 }
740 else {
741 /* illegal */
Andrew McNamara5cfd8372005-01-12 11:39:50 +0000742 PyErr_Format(error_obj, "'%c' expected after '%c'",
Skip Montanarob4a04172003-03-20 23:29:12 +0000743 dialect->delimiter,
744 dialect->quotechar);
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000745 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000746 }
747 break;
748
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000749 case EAT_CRNL:
750 if (c == '\n' || c == '\r')
751 ;
752 else if (c == '\0')
753 self->state = START_RECORD;
754 else {
755 PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
756 return -1;
757 }
758 break;
759
Skip Montanarob4a04172003-03-20 23:29:12 +0000760 }
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000761 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000762}
763
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000764static int
765parse_reset(ReaderObj *self)
766{
767 Py_XDECREF(self->fields);
768 self->fields = PyList_New(0);
769 if (self->fields == NULL)
770 return -1;
771 self->field_len = 0;
772 self->state = START_RECORD;
773 self->numeric_field = 0;
774 return 0;
775}
Skip Montanarob4a04172003-03-20 23:29:12 +0000776
777static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000778Reader_iternext(ReaderObj *self)
779{
780 PyObject *lineobj;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000781 PyObject *fields = NULL;
Guido van Rossum46264582007-08-06 19:32:18 +0000782 Py_UNICODE *line, c;
783 Py_ssize_t linelen;
Skip Montanarob4a04172003-03-20 23:29:12 +0000784
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000785 if (parse_reset(self) < 0)
786 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000787 do {
788 lineobj = PyIter_Next(self->input_iter);
789 if (lineobj == NULL) {
790 /* End of input OR exception */
791 if (!PyErr_Occurred() && self->field_len != 0)
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000792 PyErr_Format(error_obj,
793 "newline inside string");
Skip Montanarob4a04172003-03-20 23:29:12 +0000794 return NULL;
795 }
Amaury Forgeot d'Arc10c476d2007-11-19 21:20:21 +0000796 if (!PyUnicode_Check(lineobj))
797 {
798 PyErr_Format(error_obj,
799 "Iterator should return strings, "
800 "not %.200s "
801 "(did you open the file in text mode?)",
802 lineobj->ob_type->tp_name
803 );
804 return NULL;
805 }
Guido van Rossum46264582007-08-06 19:32:18 +0000806 ++self->line_num;
807 line = PyUnicode_AsUnicode(lineobj);
808 linelen = PyUnicode_GetSize(lineobj);
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000809 if (line == NULL || linelen < 0) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000810 Py_DECREF(lineobj);
811 return NULL;
812 }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000813 while (linelen--) {
814 c = *line++;
815 if (c == '\0') {
816 Py_DECREF(lineobj);
817 PyErr_Format(error_obj,
818 "line contains NULL byte");
819 goto err;
820 }
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000821 if (parse_process_char(self, c) < 0) {
822 Py_DECREF(lineobj);
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000823 goto err;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000824 }
825 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000826 Py_DECREF(lineobj);
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000827 if (parse_process_char(self, 0) < 0)
828 goto err;
Skip Montanarob4a04172003-03-20 23:29:12 +0000829 } while (self->state != START_RECORD);
830
831 fields = self->fields;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000832 self->fields = NULL;
833err:
Skip Montanarob4a04172003-03-20 23:29:12 +0000834 return fields;
835}
836
837static void
838Reader_dealloc(ReaderObj *self)
839{
Andrew McNamara77ead872005-01-10 02:09:41 +0000840 PyObject_GC_UnTrack(self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000841 Py_XDECREF(self->dialect);
842 Py_XDECREF(self->input_iter);
843 Py_XDECREF(self->fields);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000844 if (self->field != NULL)
845 PyMem_Free(self->field);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000846 PyObject_GC_Del(self);
847}
848
849static int
850Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
851{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000852 Py_VISIT(self->dialect);
853 Py_VISIT(self->input_iter);
854 Py_VISIT(self->fields);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000855 return 0;
856}
857
858static int
859Reader_clear(ReaderObj *self)
860{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000861 Py_CLEAR(self->dialect);
862 Py_CLEAR(self->input_iter);
863 Py_CLEAR(self->fields);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000864 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000865}
866
867PyDoc_STRVAR(Reader_Type_doc,
868"CSV reader\n"
869"\n"
870"Reader objects are responsible for reading and parsing tabular data\n"
871"in CSV format.\n"
872);
873
874static struct PyMethodDef Reader_methods[] = {
875 { NULL, NULL }
876};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000877#define R_OFF(x) offsetof(ReaderObj, x)
878
879static struct PyMemberDef Reader_memberlist[] = {
Guido van Rossum33d26892007-08-05 15:29:28 +0000880 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
881 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000882 { NULL }
883};
884
Skip Montanarob4a04172003-03-20 23:29:12 +0000885
886static PyTypeObject Reader_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000887 PyVarObject_HEAD_INIT(NULL, 0)
Skip Montanarob4a04172003-03-20 23:29:12 +0000888 "_csv.reader", /*tp_name*/
889 sizeof(ReaderObj), /*tp_basicsize*/
890 0, /*tp_itemsize*/
891 /* methods */
892 (destructor)Reader_dealloc, /*tp_dealloc*/
893 (printfunc)0, /*tp_print*/
894 (getattrfunc)0, /*tp_getattr*/
895 (setattrfunc)0, /*tp_setattr*/
896 (cmpfunc)0, /*tp_compare*/
897 (reprfunc)0, /*tp_repr*/
898 0, /*tp_as_number*/
899 0, /*tp_as_sequence*/
900 0, /*tp_as_mapping*/
901 (hashfunc)0, /*tp_hash*/
902 (ternaryfunc)0, /*tp_call*/
903 (reprfunc)0, /*tp_str*/
904 0, /*tp_getattro*/
905 0, /*tp_setattro*/
906 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000907 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
908 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000909 Reader_Type_doc, /*tp_doc*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000910 (traverseproc)Reader_traverse, /*tp_traverse*/
911 (inquiry)Reader_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000912 0, /*tp_richcompare*/
913 0, /*tp_weaklistoffset*/
Andrew McNamara575a00b2005-01-06 02:25:41 +0000914 PyObject_SelfIter, /*tp_iter*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000915 (getiterfunc)Reader_iternext, /*tp_iternext*/
916 Reader_methods, /*tp_methods*/
917 Reader_memberlist, /*tp_members*/
918 0, /*tp_getset*/
919
920};
921
922static PyObject *
923csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
924{
Andrew McNamara91b97462005-01-11 01:07:23 +0000925 PyObject * iterator, * dialect = NULL;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000926 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000927
928 if (!self)
929 return NULL;
930
931 self->dialect = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000932 self->fields = NULL;
933 self->input_iter = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000934 self->field = NULL;
935 self->field_size = 0;
Andrew McNamara7f2053e2005-01-12 11:17:16 +0000936 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000937
938 if (parse_reset(self) < 0) {
939 Py_DECREF(self);
940 return NULL;
941 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000942
Raymond Hettinger1761a7c2004-06-20 04:23:19 +0000943 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000944 Py_DECREF(self);
945 return NULL;
946 }
947 self->input_iter = PyObject_GetIter(iterator);
948 if (self->input_iter == NULL) {
949 PyErr_SetString(PyExc_TypeError,
950 "argument 1 must be an iterator");
951 Py_DECREF(self);
952 return NULL;
953 }
Andrew McNamara91b97462005-01-11 01:07:23 +0000954 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
Skip Montanarob4a04172003-03-20 23:29:12 +0000955 if (self->dialect == NULL) {
956 Py_DECREF(self);
957 return NULL;
958 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000959
Andrew McNamara77ead872005-01-10 02:09:41 +0000960 PyObject_GC_Track(self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000961 return (PyObject *)self;
962}
963
964/*
965 * WRITER
966 */
967/* ---------------------------------------------------------------- */
968static void
969join_reset(WriterObj *self)
970{
971 self->rec_len = 0;
972 self->num_fields = 0;
973}
974
975#define MEM_INCR 32768
976
977/* Calculate new record length or append field to record. Return new
978 * record length.
979 */
980static int
Guido van Rossum46264582007-08-06 19:32:18 +0000981join_append_data(WriterObj *self, Py_UNICODE *field, int quote_empty,
982 int *quoted, int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +0000983{
984 DialectObj *dialect = self->dialect;
Guido van Rossum46264582007-08-06 19:32:18 +0000985 int i;
986 int rec_len;
987 Py_UNICODE *lineterm;
Andrew McNamarac89f2842005-01-12 07:44:42 +0000988
989#define ADDCH(c) \
990 do {\
991 if (copy_phase) \
992 self->rec[rec_len] = c;\
993 rec_len++;\
994 } while(0)
995
Guido van Rossum46264582007-08-06 19:32:18 +0000996 lineterm = PyUnicode_AsUnicode(dialect->lineterminator);
Andrew McNamarac89f2842005-01-12 07:44:42 +0000997 if (lineterm == NULL)
998 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000999
1000 rec_len = self->rec_len;
1001
Andrew McNamarac89f2842005-01-12 07:44:42 +00001002 /* If this is not the first field we need a field separator */
1003 if (self->num_fields > 0)
1004 ADDCH(dialect->delimiter);
1005
1006 /* Handle preceding quote */
1007 if (copy_phase && *quoted)
1008 ADDCH(dialect->quotechar);
1009
1010 /* Copy/count field data */
Guido van Rossum46264582007-08-06 19:32:18 +00001011 /* If field is null just pass over */
1012 for (i = 0; field; i++) {
1013 Py_UNICODE c = field[i];
Andrew McNamarac89f2842005-01-12 07:44:42 +00001014 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001015
1016 if (c == '\0')
1017 break;
Skip Montanarob4a04172003-03-20 23:29:12 +00001018
Andrew McNamarac89f2842005-01-12 07:44:42 +00001019 if (c == dialect->delimiter ||
1020 c == dialect->escapechar ||
Guido van Rossum46264582007-08-06 19:32:18 +00001021 c == dialect->quotechar ||
1022 Py_UNICODE_strchr(lineterm, c)) {
Andrew McNamarac89f2842005-01-12 07:44:42 +00001023 if (dialect->quoting == QUOTE_NONE)
1024 want_escape = 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001025 else {
Andrew McNamarac89f2842005-01-12 07:44:42 +00001026 if (c == dialect->quotechar) {
1027 if (dialect->doublequote)
1028 ADDCH(dialect->quotechar);
1029 else
1030 want_escape = 1;
1031 }
1032 if (!want_escape)
1033 *quoted = 1;
1034 }
1035 if (want_escape) {
1036 if (!dialect->escapechar) {
1037 PyErr_Format(error_obj,
1038 "need to escape, but no escapechar set");
1039 return -1;
1040 }
1041 ADDCH(dialect->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +00001042 }
1043 }
1044 /* Copy field character into record buffer.
1045 */
Andrew McNamarac89f2842005-01-12 07:44:42 +00001046 ADDCH(c);
Skip Montanarob4a04172003-03-20 23:29:12 +00001047 }
1048
1049 /* If field is empty check if it needs to be quoted.
1050 */
1051 if (i == 0 && quote_empty) {
1052 if (dialect->quoting == QUOTE_NONE) {
1053 PyErr_Format(error_obj,
Guido van Rossum46264582007-08-06 19:32:18 +00001054 "single empty field record must be quoted");
Skip Montanarob4a04172003-03-20 23:29:12 +00001055 return -1;
Andrew McNamaradd3e6cb2005-01-07 06:46:50 +00001056 }
1057 else
Skip Montanarob4a04172003-03-20 23:29:12 +00001058 *quoted = 1;
1059 }
1060
Skip Montanarob4a04172003-03-20 23:29:12 +00001061 if (*quoted) {
1062 if (copy_phase)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001063 ADDCH(dialect->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +00001064 else
Andrew McNamarac89f2842005-01-12 07:44:42 +00001065 rec_len += 2;
Skip Montanarob4a04172003-03-20 23:29:12 +00001066 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001067 return rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001068#undef ADDCH
Skip Montanarob4a04172003-03-20 23:29:12 +00001069}
1070
1071static int
1072join_check_rec_size(WriterObj *self, int rec_len)
1073{
1074 if (rec_len > self->rec_size) {
1075 if (self->rec_size == 0) {
1076 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001077 if (self->rec != NULL)
1078 PyMem_Free(self->rec);
Guido van Rossum46264582007-08-06 19:32:18 +00001079 self->rec = PyMem_New(Py_UNICODE, self->rec_size);
Skip Montanarob4a04172003-03-20 23:29:12 +00001080 }
1081 else {
Guido van Rossum46264582007-08-06 19:32:18 +00001082 Py_UNICODE* old_rec = self->rec;
Skip Montanarob4a04172003-03-20 23:29:12 +00001083
1084 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Guido van Rossum46264582007-08-06 19:32:18 +00001085 self->rec = PyMem_Resize(self->rec, Py_UNICODE,
1086 self->rec_size);
Skip Montanarob4a04172003-03-20 23:29:12 +00001087 if (self->rec == NULL)
1088 PyMem_Free(old_rec);
1089 }
1090 if (self->rec == NULL) {
1091 PyErr_NoMemory();
1092 return 0;
1093 }
1094 }
1095 return 1;
1096}
1097
1098static int
Guido van Rossum46264582007-08-06 19:32:18 +00001099join_append(WriterObj *self, Py_UNICODE *field, int *quoted, int quote_empty)
Skip Montanarob4a04172003-03-20 23:29:12 +00001100{
1101 int rec_len;
1102
1103 rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1104 if (rec_len < 0)
1105 return 0;
1106
1107 /* grow record buffer if necessary */
1108 if (!join_check_rec_size(self, rec_len))
1109 return 0;
1110
1111 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1112 self->num_fields++;
1113
1114 return 1;
1115}
1116
1117static int
1118join_append_lineterminator(WriterObj *self)
1119{
1120 int terminator_len;
Guido van Rossum46264582007-08-06 19:32:18 +00001121 Py_UNICODE *terminator;
Skip Montanarob4a04172003-03-20 23:29:12 +00001122
Guido van Rossum46264582007-08-06 19:32:18 +00001123 terminator_len = PyUnicode_GetSize(self->dialect->lineterminator);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001124 if (terminator_len == -1)
1125 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001126
1127 /* grow record buffer if necessary */
1128 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1129 return 0;
1130
Guido van Rossum46264582007-08-06 19:32:18 +00001131 terminator = PyUnicode_AsUnicode(self->dialect->lineterminator);
Andrew McNamaracf0fd5a2005-01-12 01:16:35 +00001132 if (terminator == NULL)
1133 return 0;
Guido van Rossum46264582007-08-06 19:32:18 +00001134 memmove(self->rec + self->rec_len, terminator,
1135 sizeof(Py_UNICODE)*terminator_len);
Skip Montanarob4a04172003-03-20 23:29:12 +00001136 self->rec_len += terminator_len;
1137
1138 return 1;
1139}
1140
1141PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001142"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001143"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001144"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001145"elements will be converted to string.");
1146
1147static PyObject *
1148csv_writerow(WriterObj *self, PyObject *seq)
1149{
1150 DialectObj *dialect = self->dialect;
1151 int len, i;
1152
1153 if (!PySequence_Check(seq))
1154 return PyErr_Format(error_obj, "sequence expected");
1155
1156 len = PySequence_Length(seq);
1157 if (len < 0)
1158 return NULL;
1159
1160 /* Join all fields in internal buffer.
1161 */
1162 join_reset(self);
1163 for (i = 0; i < len; i++) {
1164 PyObject *field;
1165 int append_ok;
1166 int quoted;
1167
1168 field = PySequence_GetItem(seq, i);
1169 if (field == NULL)
1170 return NULL;
1171
Andrew McNamarac89f2842005-01-12 07:44:42 +00001172 switch (dialect->quoting) {
1173 case QUOTE_NONNUMERIC:
1174 quoted = !PyNumber_Check(field);
1175 break;
1176 case QUOTE_ALL:
1177 quoted = 1;
1178 break;
1179 default:
1180 quoted = 0;
1181 break;
Skip Montanarob4a04172003-03-20 23:29:12 +00001182 }
1183
Guido van Rossum46264582007-08-06 19:32:18 +00001184 if (PyUnicode_Check(field)) {
Skip Montanaro577c7a72003-04-12 19:17:14 +00001185 append_ok = join_append(self,
Guido van Rossum46264582007-08-06 19:32:18 +00001186 PyUnicode_AS_UNICODE(field),
1187 &quoted, len == 1);
Skip Montanarob4a04172003-03-20 23:29:12 +00001188 Py_DECREF(field);
1189 }
1190 else if (field == Py_None) {
Guido van Rossum46264582007-08-06 19:32:18 +00001191 append_ok = join_append(self, NULL,
1192 &quoted, len == 1);
Skip Montanarob4a04172003-03-20 23:29:12 +00001193 Py_DECREF(field);
1194 }
1195 else {
1196 PyObject *str;
1197
Thomas Heller519a0422007-11-15 20:48:54 +00001198 str = PyObject_Str(field);
Guido van Rossum46264582007-08-06 19:32:18 +00001199 Py_DECREF(field);
Skip Montanarob4a04172003-03-20 23:29:12 +00001200 if (str == NULL)
1201 return NULL;
Guido van Rossum46264582007-08-06 19:32:18 +00001202 append_ok = join_append(self,
1203 PyUnicode_AS_UNICODE(str),
1204 &quoted, len == 1);
Skip Montanarob4a04172003-03-20 23:29:12 +00001205 Py_DECREF(str);
1206 }
1207 if (!append_ok)
1208 return NULL;
1209 }
1210
1211 /* Add line terminator.
1212 */
1213 if (!join_append_lineterminator(self))
1214 return 0;
1215
Guido van Rossum46264582007-08-06 19:32:18 +00001216 return PyObject_CallFunction(self->writeline,
1217 "(u#)", self->rec,
1218 self->rec_len);
Skip Montanarob4a04172003-03-20 23:29:12 +00001219}
1220
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001221PyDoc_STRVAR(csv_writerows_doc,
1222"writerows(sequence of sequences)\n"
1223"\n"
1224"Construct and write a series of sequences to a csv file. Non-string\n"
1225"elements will be converted to string.");
1226
Skip Montanarob4a04172003-03-20 23:29:12 +00001227static PyObject *
1228csv_writerows(WriterObj *self, PyObject *seqseq)
1229{
1230 PyObject *row_iter, *row_obj, *result;
1231
1232 row_iter = PyObject_GetIter(seqseq);
1233 if (row_iter == NULL) {
1234 PyErr_SetString(PyExc_TypeError,
Skip Montanaro98f16e02003-04-11 23:10:13 +00001235 "writerows() argument must be iterable");
Skip Montanarob4a04172003-03-20 23:29:12 +00001236 return NULL;
1237 }
1238 while ((row_obj = PyIter_Next(row_iter))) {
1239 result = csv_writerow(self, row_obj);
1240 Py_DECREF(row_obj);
1241 if (!result) {
1242 Py_DECREF(row_iter);
1243 return NULL;
1244 }
1245 else
1246 Py_DECREF(result);
1247 }
1248 Py_DECREF(row_iter);
1249 if (PyErr_Occurred())
1250 return NULL;
1251 Py_INCREF(Py_None);
1252 return Py_None;
1253}
1254
1255static struct PyMethodDef Writer_methods[] = {
1256 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001257 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
Skip Montanarob4a04172003-03-20 23:29:12 +00001258 { NULL, NULL }
1259};
1260
1261#define W_OFF(x) offsetof(WriterObj, x)
1262
1263static struct PyMemberDef Writer_memberlist[] = {
Guido van Rossum33d26892007-08-05 15:29:28 +00001264 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
Skip Montanarob4a04172003-03-20 23:29:12 +00001265 { NULL }
1266};
1267
1268static void
1269Writer_dealloc(WriterObj *self)
1270{
Andrew McNamara77ead872005-01-10 02:09:41 +00001271 PyObject_GC_UnTrack(self);
Skip Montanarob4a04172003-03-20 23:29:12 +00001272 Py_XDECREF(self->dialect);
1273 Py_XDECREF(self->writeline);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001274 if (self->rec != NULL)
1275 PyMem_Free(self->rec);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001276 PyObject_GC_Del(self);
1277}
1278
1279static int
1280Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1281{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001282 Py_VISIT(self->dialect);
1283 Py_VISIT(self->writeline);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001284 return 0;
1285}
1286
1287static int
1288Writer_clear(WriterObj *self)
1289{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001290 Py_CLEAR(self->dialect);
1291 Py_CLEAR(self->writeline);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001292 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001293}
1294
1295PyDoc_STRVAR(Writer_Type_doc,
1296"CSV writer\n"
1297"\n"
1298"Writer objects are responsible for generating tabular data\n"
1299"in CSV format from sequence input.\n"
1300);
1301
1302static PyTypeObject Writer_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001303 PyVarObject_HEAD_INIT(NULL, 0)
Skip Montanarob4a04172003-03-20 23:29:12 +00001304 "_csv.writer", /*tp_name*/
1305 sizeof(WriterObj), /*tp_basicsize*/
1306 0, /*tp_itemsize*/
1307 /* methods */
1308 (destructor)Writer_dealloc, /*tp_dealloc*/
1309 (printfunc)0, /*tp_print*/
1310 (getattrfunc)0, /*tp_getattr*/
1311 (setattrfunc)0, /*tp_setattr*/
1312 (cmpfunc)0, /*tp_compare*/
1313 (reprfunc)0, /*tp_repr*/
1314 0, /*tp_as_number*/
1315 0, /*tp_as_sequence*/
1316 0, /*tp_as_mapping*/
1317 (hashfunc)0, /*tp_hash*/
1318 (ternaryfunc)0, /*tp_call*/
1319 (reprfunc)0, /*tp_str*/
1320 0, /*tp_getattro*/
1321 0, /*tp_setattro*/
1322 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001323 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1324 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001325 Writer_Type_doc,
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001326 (traverseproc)Writer_traverse, /*tp_traverse*/
1327 (inquiry)Writer_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001328 0, /*tp_richcompare*/
1329 0, /*tp_weaklistoffset*/
1330 (getiterfunc)0, /*tp_iter*/
1331 (getiterfunc)0, /*tp_iternext*/
1332 Writer_methods, /*tp_methods*/
1333 Writer_memberlist, /*tp_members*/
1334 0, /*tp_getset*/
1335};
1336
1337static PyObject *
1338csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1339{
Andrew McNamara91b97462005-01-11 01:07:23 +00001340 PyObject * output_file, * dialect = NULL;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001341 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001342
1343 if (!self)
1344 return NULL;
1345
1346 self->dialect = NULL;
1347 self->writeline = NULL;
1348
1349 self->rec = NULL;
1350 self->rec_size = 0;
1351 self->rec_len = 0;
1352 self->num_fields = 0;
1353
Raymond Hettinger1761a7c2004-06-20 04:23:19 +00001354 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001355 Py_DECREF(self);
1356 return NULL;
1357 }
1358 self->writeline = PyObject_GetAttrString(output_file, "write");
1359 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1360 PyErr_SetString(PyExc_TypeError,
Andrew McNamara5cfd8372005-01-12 11:39:50 +00001361 "argument 1 must have a \"write\" method");
Skip Montanarob4a04172003-03-20 23:29:12 +00001362 Py_DECREF(self);
1363 return NULL;
1364 }
Andrew McNamara91b97462005-01-11 01:07:23 +00001365 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
Skip Montanarob4a04172003-03-20 23:29:12 +00001366 if (self->dialect == NULL) {
1367 Py_DECREF(self);
1368 return NULL;
1369 }
Andrew McNamara77ead872005-01-10 02:09:41 +00001370 PyObject_GC_Track(self);
Skip Montanarob4a04172003-03-20 23:29:12 +00001371 return (PyObject *)self;
1372}
1373
1374/*
1375 * DIALECT REGISTRY
1376 */
1377static PyObject *
1378csv_list_dialects(PyObject *module, PyObject *args)
1379{
1380 return PyDict_Keys(dialects);
1381}
1382
1383static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001384csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001385{
Andrew McNamara86625972005-01-11 01:28:33 +00001386 PyObject *name_obj, *dialect_obj = NULL;
1387 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001388
Andrew McNamara86625972005-01-11 01:28:33 +00001389 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
Skip Montanarob4a04172003-03-20 23:29:12 +00001390 return NULL;
Andrew McNamara37d2bdf2005-01-10 12:22:48 +00001391 if (!IS_BASESTRING(name_obj)) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001392 PyErr_SetString(PyExc_TypeError,
1393 "dialect name must be a string or unicode");
1394 return NULL;
1395 }
Andrew McNamara86625972005-01-11 01:28:33 +00001396 dialect = _call_dialect(dialect_obj, kwargs);
1397 if (dialect == NULL)
1398 return NULL;
1399 if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1400 Py_DECREF(dialect);
Skip Montanarob4a04172003-03-20 23:29:12 +00001401 return NULL;
1402 }
Andrew McNamara86625972005-01-11 01:28:33 +00001403 Py_DECREF(dialect);
Skip Montanarob4a04172003-03-20 23:29:12 +00001404 Py_INCREF(Py_None);
1405 return Py_None;
1406}
1407
1408static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001409csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001410{
Skip Montanarob4a04172003-03-20 23:29:12 +00001411 if (PyDict_DelItem(dialects, name_obj) < 0)
1412 return PyErr_Format(error_obj, "unknown dialect");
1413 Py_INCREF(Py_None);
1414 return Py_None;
1415}
1416
1417static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001418csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001419{
Skip Montanarob4a04172003-03-20 23:29:12 +00001420 return get_dialect_from_registry(name_obj);
1421}
1422
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001423static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001424csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001425{
1426 PyObject *new_limit = NULL;
1427 long old_limit = field_limit;
1428
Andrew McNamara31d88962005-01-12 03:45:10 +00001429 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001430 return NULL;
1431 if (new_limit != NULL) {
Guido van Rossumddefaf32007-01-14 03:31:43 +00001432 if (!PyInt_CheckExact(new_limit)) {
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001433 PyErr_Format(PyExc_TypeError,
1434 "limit must be an integer");
1435 return NULL;
1436 }
1437 field_limit = PyInt_AsLong(new_limit);
1438 }
1439 return PyInt_FromLong(old_limit);
1440}
1441
Skip Montanarob4a04172003-03-20 23:29:12 +00001442/*
1443 * MODULE
1444 */
1445
1446PyDoc_STRVAR(csv_module_doc,
1447"CSV parsing and writing.\n"
1448"\n"
1449"This module provides classes that assist in the reading and writing\n"
1450"of Comma Separated Value (CSV) files, and implements the interface\n"
1451"described by PEP 305. Although many CSV files are simple to parse,\n"
1452"the format is not formally defined by a stable specification and\n"
1453"is subtle enough that parsing lines of a CSV file with something\n"
1454"like line.split(\",\") is bound to fail. The module supports three\n"
1455"basic APIs: reading, writing, and registration of dialects.\n"
1456"\n"
1457"\n"
1458"DIALECT REGISTRATION:\n"
1459"\n"
1460"Readers and writers support a dialect argument, which is a convenient\n"
1461"handle on a group of settings. When the dialect argument is a string,\n"
1462"it identifies one of the dialects previously registered with the module.\n"
1463"If it is a class or instance, the attributes of the argument are used as\n"
1464"the settings for the reader or writer:\n"
1465"\n"
1466" class excel:\n"
1467" delimiter = ','\n"
1468" quotechar = '\"'\n"
1469" escapechar = None\n"
1470" doublequote = True\n"
1471" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001472" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001473" quoting = QUOTE_MINIMAL\n"
1474"\n"
1475"SETTINGS:\n"
1476"\n"
1477" * quotechar - specifies a one-character string to use as the \n"
1478" quoting character. It defaults to '\"'.\n"
1479" * delimiter - specifies a one-character string to use as the \n"
1480" field separator. It defaults to ','.\n"
1481" * skipinitialspace - specifies how to interpret whitespace which\n"
1482" immediately follows a delimiter. It defaults to False, which\n"
1483" means that whitespace immediately following a delimiter is part\n"
1484" of the following field.\n"
1485" * lineterminator - specifies the character sequence which should \n"
1486" terminate rows.\n"
1487" * quoting - controls when quotes should be generated by the writer.\n"
1488" It can take on any of the following module constants:\n"
1489"\n"
1490" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1491" field contains either the quotechar or the delimiter\n"
1492" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1493" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001494" fields which do not parse as integers or floating point\n"
1495" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001496" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1497" * escapechar - specifies a one-character string used to escape \n"
1498" the delimiter when quoting is set to QUOTE_NONE.\n"
1499" * doublequote - controls the handling of quotes inside fields. When\n"
1500" True, two consecutive quotes are interpreted as one during read,\n"
1501" and when writing, each quote character embedded in the data is\n"
1502" written as two quotes\n");
1503
1504PyDoc_STRVAR(csv_reader_doc,
1505" csv_reader = reader(iterable [, dialect='excel']\n"
1506" [optional keyword args])\n"
1507" for row in csv_reader:\n"
1508" process(row)\n"
1509"\n"
1510"The \"iterable\" argument can be any object that returns a line\n"
1511"of input for each iteration, such as a file object or a list. The\n"
1512"optional \"dialect\" parameter is discussed below. The function\n"
1513"also accepts optional keyword arguments which override settings\n"
1514"provided by the dialect.\n"
1515"\n"
1516"The returned object is an iterator. Each iteration returns a row\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001517"of the CSV file (which can span multiple input lines):\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001518
1519PyDoc_STRVAR(csv_writer_doc,
1520" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1521" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001522" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001523" csv_writer.writerow(row)\n"
1524"\n"
1525" [or]\n"
1526"\n"
1527" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1528" [optional keyword args])\n"
1529" csv_writer.writerows(rows)\n"
1530"\n"
1531"The \"fileobj\" argument can be any object that supports the file API.\n");
1532
1533PyDoc_STRVAR(csv_list_dialects_doc,
1534"Return a list of all know dialect names.\n"
1535" names = csv.list_dialects()");
1536
1537PyDoc_STRVAR(csv_get_dialect_doc,
1538"Return the dialect instance associated with name.\n"
1539" dialect = csv.get_dialect(name)");
1540
1541PyDoc_STRVAR(csv_register_dialect_doc,
1542"Create a mapping from a string name to a dialect class.\n"
1543" dialect = csv.register_dialect(name, dialect)");
1544
1545PyDoc_STRVAR(csv_unregister_dialect_doc,
1546"Delete the name/dialect mapping associated with a string name.\n"
1547" csv.unregister_dialect(name)");
1548
Andrew McNamara31d88962005-01-12 03:45:10 +00001549PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001550"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001551" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001552"\n"
1553"Returns old limit. If limit is not given, no new limit is set and\n"
1554"the old limit is returned");
1555
Skip Montanarob4a04172003-03-20 23:29:12 +00001556static struct PyMethodDef csv_methods[] = {
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001557 { "reader", (PyCFunction)csv_reader,
1558 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1559 { "writer", (PyCFunction)csv_writer,
1560 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1561 { "list_dialects", (PyCFunction)csv_list_dialects,
1562 METH_NOARGS, csv_list_dialects_doc},
1563 { "register_dialect", (PyCFunction)csv_register_dialect,
Andrew McNamara86625972005-01-11 01:28:33 +00001564 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001565 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1566 METH_O, csv_unregister_dialect_doc},
1567 { "get_dialect", (PyCFunction)csv_get_dialect,
1568 METH_O, csv_get_dialect_doc},
Andrew McNamara31d88962005-01-12 03:45:10 +00001569 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1570 METH_VARARGS, csv_field_size_limit_doc},
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001571 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001572};
1573
1574PyMODINIT_FUNC
1575init_csv(void)
1576{
1577 PyObject *module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001578 StyleDesc *style;
1579
1580 if (PyType_Ready(&Dialect_Type) < 0)
1581 return;
1582
1583 if (PyType_Ready(&Reader_Type) < 0)
1584 return;
1585
1586 if (PyType_Ready(&Writer_Type) < 0)
1587 return;
1588
1589 /* Create the module and add the functions */
1590 module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1591 if (module == NULL)
1592 return;
1593
1594 /* Add version to the module. */
Skip Montanaro7b01a832003-04-12 19:23:46 +00001595 if (PyModule_AddStringConstant(module, "__version__",
1596 MODULE_VERSION) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001597 return;
1598
1599 /* Add _dialects dictionary */
1600 dialects = PyDict_New();
1601 if (dialects == NULL)
1602 return;
1603 if (PyModule_AddObject(module, "_dialects", dialects))
1604 return;
1605
1606 /* Add quote styles into dictionary */
1607 for (style = quote_styles; style->name; style++) {
Skip Montanaro7b01a832003-04-12 19:23:46 +00001608 if (PyModule_AddIntConstant(module, style->name,
1609 style->style) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001610 return;
1611 }
1612
1613 /* Add the Dialect type */
Skip Montanaro32c5d422005-06-15 13:35:08 +00001614 Py_INCREF(&Dialect_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001615 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1616 return;
1617
1618 /* Add the CSV exception object to the module. */
1619 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1620 if (error_obj == NULL)
1621 return;
1622 PyModule_AddObject(module, "Error", error_obj);
1623}