blob: afa7810b566b02d0c5802186635645c39e3475e5 [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
9**** For people modifying this code, please note that as of this writing
Skip Montanarodfa35fa2003-04-11 21:40:01 +000010**** (2003-03-23), it is intended that this code should work with Python
Skip Montanaroa16b21f2003-03-23 14:32:54 +000011**** 2.2.
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013*/
14
Skip Montanaro7b01a832003-04-12 19:23:46 +000015#define MODULE_VERSION "1.0"
16
Skip Montanarob4a04172003-03-20 23:29:12 +000017#include "Python.h"
18#include "structmember.h"
19
Skip Montanaroa16b21f2003-03-23 14:32:54 +000020
Skip Montanarob4a04172003-03-20 23:29:12 +000021/* begin 2.2 compatibility macros */
22#ifndef PyDoc_STRVAR
23/* Define macros for inline documentation. */
24#define PyDoc_VAR(name) static char name[]
25#define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str)
26#ifdef WITH_DOC_STRINGS
27#define PyDoc_STR(str) str
28#else
29#define PyDoc_STR(str) ""
30#endif
31#endif /* ifndef PyDoc_STRVAR */
32
33#ifndef PyMODINIT_FUNC
34# if defined(__cplusplus)
35# define PyMODINIT_FUNC extern "C" void
36# else /* __cplusplus */
37# define PyMODINIT_FUNC void
38# endif /* __cplusplus */
39#endif
Thomas Wouters49fd7fa2006-04-21 10:40:58 +000040
41#ifndef Py_CLEAR
42#define Py_CLEAR(op) \
43 do { \
44 if (op) { \
45 PyObject *tmp = (PyObject *)(op); \
46 (op) = NULL; \
47 Py_DECREF(tmp); \
48 } \
49 } while (0)
50#endif
51#ifndef Py_VISIT
52#define Py_VISIT(op) \
53 do { \
54 if (op) { \
55 int vret = visit((PyObject *)(op), arg); \
56 if (vret) \
57 return vret; \
58 } \
59 } while (0)
60#endif
61
Skip Montanarob4a04172003-03-20 23:29:12 +000062/* end 2.2 compatibility macros */
63
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000064#define IS_BASESTRING(o) \
Guido van Rossum3172c5d2007-10-16 18:12:55 +000065 PyUnicode_Check(o)
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000066
Skip Montanarob4a04172003-03-20 23:29:12 +000067static PyObject *error_obj; /* CSV exception */
68static PyObject *dialects; /* Dialect registry */
Andrew McNamarae4d05c42005-01-11 07:32:02 +000069static long field_limit = 128 * 1024; /* max parsed field size */
Skip Montanarob4a04172003-03-20 23:29:12 +000070
71typedef enum {
72 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
Andrew McNamaraf69d94f2005-01-13 11:30:54 +000073 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
74 EAT_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000075} ParserState;
76
77typedef enum {
78 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
79} QuoteStyle;
80
81typedef struct {
82 QuoteStyle style;
83 char *name;
84} StyleDesc;
85
86static StyleDesc quote_styles[] = {
87 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
88 { QUOTE_ALL, "QUOTE_ALL" },
89 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
90 { QUOTE_NONE, "QUOTE_NONE" },
91 { 0 }
92};
93
94typedef struct {
95 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000096
Skip Montanarob4a04172003-03-20 23:29:12 +000097 int doublequote; /* is " represented by ""? */
Guido van Rossum46264582007-08-06 19:32:18 +000098 Py_UNICODE delimiter; /* field separator */
99 Py_UNICODE quotechar; /* quote character */
100 Py_UNICODE escapechar; /* escape character */
Skip Montanarob4a04172003-03-20 23:29:12 +0000101 int skipinitialspace; /* ignore spaces following delimiter? */
102 PyObject *lineterminator; /* string to write between records */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000103 int quoting; /* style of quoting to write */
Skip Montanarob4a04172003-03-20 23:29:12 +0000104
105 int strict; /* raise exception on bad CSV */
106} DialectObj;
107
Neal Norwitz227b5332006-03-22 09:28:35 +0000108static PyTypeObject Dialect_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000109
110typedef struct {
111 PyObject_HEAD
112
113 PyObject *input_iter; /* iterate over this for input lines */
114
115 DialectObj *dialect; /* parsing dialect */
116
117 PyObject *fields; /* field list for current record */
118 ParserState state; /* current CSV parse state */
Guido van Rossum46264582007-08-06 19:32:18 +0000119 Py_UNICODE *field; /* build current field in here */
Skip Montanarob4a04172003-03-20 23:29:12 +0000120 int field_size; /* size of allocated buffer */
Guido van Rossum46264582007-08-06 19:32:18 +0000121 Py_ssize_t field_len; /* length of current field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000122 int numeric_field; /* treat field as numeric */
Andrew McNamara7f2053e2005-01-12 11:17:16 +0000123 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000124} ReaderObj;
125
Neal Norwitz227b5332006-03-22 09:28:35 +0000126static PyTypeObject Reader_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000127
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000128#define ReaderObject_Check(v) (Py_Type(v) == &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +0000129
130typedef struct {
131 PyObject_HEAD
132
133 PyObject *writeline; /* write output lines to this file */
134
135 DialectObj *dialect; /* parsing dialect */
136
Guido van Rossum46264582007-08-06 19:32:18 +0000137 Py_UNICODE *rec; /* buffer for parser.join */
Skip Montanarob4a04172003-03-20 23:29:12 +0000138 int rec_size; /* size of allocated record */
Guido van Rossum46264582007-08-06 19:32:18 +0000139 Py_ssize_t rec_len; /* length of record */
Skip Montanarob4a04172003-03-20 23:29:12 +0000140 int num_fields; /* number of fields in record */
Guido van Rossum46264582007-08-06 19:32:18 +0000141} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000142
Neal Norwitz227b5332006-03-22 09:28:35 +0000143static PyTypeObject Writer_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000144
145/*
146 * DIALECT class
147 */
148
149static PyObject *
150get_dialect_from_registry(PyObject * name_obj)
151{
152 PyObject *dialect_obj;
153
154 dialect_obj = PyDict_GetItem(dialects, name_obj);
Andrew McNamaradbce2612005-01-10 23:17:35 +0000155 if (dialect_obj == NULL) {
156 if (!PyErr_Occurred())
157 PyErr_Format(error_obj, "unknown dialect");
158 }
159 else
160 Py_INCREF(dialect_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +0000161 return dialect_obj;
162}
163
Skip Montanarob4a04172003-03-20 23:29:12 +0000164static PyObject *
165get_string(PyObject *str)
166{
167 Py_XINCREF(str);
168 return str;
169}
170
Skip Montanarob4a04172003-03-20 23:29:12 +0000171static PyObject *
Skip Montanaroe3b10f42007-08-06 20:55:47 +0000172get_nullchar_as_None(Py_UNICODE c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000173{
174 if (c == '\0') {
175 Py_INCREF(Py_None);
176 return Py_None;
177 }
178 else
Skip Montanaroe3b10f42007-08-06 20:55:47 +0000179 return PyUnicode_FromUnicode((Py_UNICODE *)&c, 1);
Skip Montanarob4a04172003-03-20 23:29:12 +0000180}
181
Skip Montanarob4a04172003-03-20 23:29:12 +0000182static PyObject *
183Dialect_get_lineterminator(DialectObj *self)
184{
185 return get_string(self->lineterminator);
186}
187
Skip Montanarob4a04172003-03-20 23:29:12 +0000188static PyObject *
Guido van Rossuma9769c22007-08-07 23:59:30 +0000189Dialect_get_delimiter(DialectObj *self)
190{
191 return get_nullchar_as_None(self->delimiter);
192}
193
194static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000195Dialect_get_escapechar(DialectObj *self)
196{
197 return get_nullchar_as_None(self->escapechar);
198}
199
Andrew McNamara1196cf12005-01-07 04:42:45 +0000200static PyObject *
201Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000202{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000203 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000204}
205
206static PyObject *
207Dialect_get_quoting(DialectObj *self)
208{
209 return PyInt_FromLong(self->quoting);
210}
211
212static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000213_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000214{
Andrew McNamara1196cf12005-01-07 04:42:45 +0000215 if (src == NULL)
216 *target = dflt;
217 else
218 *target = PyObject_IsTrue(src);
219 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000220}
221
Andrew McNamara1196cf12005-01-07 04:42:45 +0000222static int
223_set_int(const char *name, int *target, PyObject *src, int dflt)
224{
225 if (src == NULL)
226 *target = dflt;
227 else {
Guido van Rossumddefaf32007-01-14 03:31:43 +0000228 if (!PyInt_CheckExact(src)) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000229 PyErr_Format(PyExc_TypeError,
230 "\"%s\" must be an integer", name);
231 return -1;
232 }
233 *target = PyInt_AsLong(src);
234 }
235 return 0;
236}
237
238static int
Guido van Rossum46264582007-08-06 19:32:18 +0000239_set_char(const char *name, Py_UNICODE *target, PyObject *src, Py_UNICODE dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000240{
241 if (src == NULL)
242 *target = dflt;
243 else {
Guido van Rossumbce56a62007-05-10 18:04:33 +0000244 *target = '\0';
245 if (src != Py_None) {
Guido van Rossum46264582007-08-06 19:32:18 +0000246 Py_UNICODE *buf;
Guido van Rossumbce56a62007-05-10 18:04:33 +0000247 Py_ssize_t len;
Guido van Rossum46264582007-08-06 19:32:18 +0000248 buf = PyUnicode_AsUnicode(src);
249 len = PyUnicode_GetSize(src);
250 if (buf == NULL || len > 1) {
Guido van Rossumbce56a62007-05-10 18:04:33 +0000251 PyErr_Format(PyExc_TypeError,
252 "\"%s\" must be an 1-character string",
Guido van Rossum46264582007-08-06 19:32:18 +0000253 name);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000254 return -1;
Guido van Rossumbce56a62007-05-10 18:04:33 +0000255 }
256 if (len > 0)
257 *target = buf[0];
Andrew McNamara1196cf12005-01-07 04:42:45 +0000258 }
259 }
260 return 0;
261}
262
263static int
264_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
265{
266 if (src == NULL)
Guido van Rossum46264582007-08-06 19:32:18 +0000267 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000268 else {
269 if (src == Py_None)
270 *target = NULL;
Andrew McNamara37d2bdf2005-01-10 12:22:48 +0000271 else if (!IS_BASESTRING(src)) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000272 PyErr_Format(PyExc_TypeError,
Amaury Forgeot d'Arc10c476d2007-11-19 21:20:21 +0000273 "\"%s\" must be a string", name);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000274 return -1;
Andrew McNamaradd3e6cb2005-01-07 06:46:50 +0000275 }
276 else {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000277 Py_XDECREF(*target);
278 Py_INCREF(src);
279 *target = src;
280 }
281 }
282 return 0;
283}
284
285static int
286dialect_check_quoting(int quoting)
287{
288 StyleDesc *qs = quote_styles;
289
290 for (qs = quote_styles; qs->name; qs++) {
291 if (qs->style == quoting)
292 return 0;
293 }
294 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
295 return -1;
296}
Skip Montanarob4a04172003-03-20 23:29:12 +0000297
298#define D_OFF(x) offsetof(DialectObj, x)
299
300static struct PyMemberDef Dialect_memberlist[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000301 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
302 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
303 { "strict", T_INT, D_OFF(strict), READONLY },
Skip Montanarob4a04172003-03-20 23:29:12 +0000304 { NULL }
305};
306
307static PyGetSetDef Dialect_getsetlist[] = {
Guido van Rossuma9769c22007-08-07 23:59:30 +0000308 { "delimiter", (getter)Dialect_get_delimiter},
Andrew McNamara1196cf12005-01-07 04:42:45 +0000309 { "escapechar", (getter)Dialect_get_escapechar},
310 { "lineterminator", (getter)Dialect_get_lineterminator},
311 { "quotechar", (getter)Dialect_get_quotechar},
312 { "quoting", (getter)Dialect_get_quoting},
313 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000314};
315
316static void
317Dialect_dealloc(DialectObj *self)
318{
319 Py_XDECREF(self->lineterminator);
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000320 Py_Type(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000321}
322
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000323static char *dialect_kws[] = {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000324 "dialect",
325 "delimiter",
326 "doublequote",
327 "escapechar",
328 "lineterminator",
329 "quotechar",
330 "quoting",
331 "skipinitialspace",
332 "strict",
333 NULL
334};
335
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000336static PyObject *
337dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000338{
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000339 DialectObj *self;
340 PyObject *ret = NULL;
341 PyObject *dialect = NULL;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000342 PyObject *delimiter = NULL;
343 PyObject *doublequote = NULL;
344 PyObject *escapechar = NULL;
345 PyObject *lineterminator = NULL;
346 PyObject *quotechar = NULL;
347 PyObject *quoting = NULL;
348 PyObject *skipinitialspace = NULL;
349 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000350
Andrew McNamara1196cf12005-01-07 04:42:45 +0000351 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
352 "|OOOOOOOOO", dialect_kws,
353 &dialect,
354 &delimiter,
355 &doublequote,
356 &escapechar,
357 &lineterminator,
358 &quotechar,
359 &quoting,
360 &skipinitialspace,
361 &strict))
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000362 return NULL;
363
364 if (dialect != NULL) {
365 if (IS_BASESTRING(dialect)) {
366 dialect = get_dialect_from_registry(dialect);
367 if (dialect == NULL)
368 return NULL;
369 }
370 else
371 Py_INCREF(dialect);
372 /* Can we reuse this instance? */
373 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
374 delimiter == 0 &&
375 doublequote == 0 &&
376 escapechar == 0 &&
377 lineterminator == 0 &&
378 quotechar == 0 &&
379 quoting == 0 &&
380 skipinitialspace == 0 &&
381 strict == 0)
382 return dialect;
383 }
384
385 self = (DialectObj *)type->tp_alloc(type, 0);
386 if (self == NULL) {
387 Py_XDECREF(dialect);
388 return NULL;
389 }
390 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000391
Andrew McNamara1196cf12005-01-07 04:42:45 +0000392 Py_XINCREF(delimiter);
393 Py_XINCREF(doublequote);
394 Py_XINCREF(escapechar);
395 Py_XINCREF(lineterminator);
396 Py_XINCREF(quotechar);
397 Py_XINCREF(quoting);
398 Py_XINCREF(skipinitialspace);
399 Py_XINCREF(strict);
400 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000401#define DIALECT_GETATTR(v, n) \
402 if (v == NULL) \
403 v = PyObject_GetAttrString(dialect, n)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000404 DIALECT_GETATTR(delimiter, "delimiter");
405 DIALECT_GETATTR(doublequote, "doublequote");
406 DIALECT_GETATTR(escapechar, "escapechar");
407 DIALECT_GETATTR(lineterminator, "lineterminator");
408 DIALECT_GETATTR(quotechar, "quotechar");
409 DIALECT_GETATTR(quoting, "quoting");
410 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
411 DIALECT_GETATTR(strict, "strict");
412 PyErr_Clear();
Andrew McNamara1196cf12005-01-07 04:42:45 +0000413 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000414
Andrew McNamara1196cf12005-01-07 04:42:45 +0000415 /* check types and convert to C values */
416#define DIASET(meth, name, target, src, dflt) \
417 if (meth(name, target, src, dflt)) \
418 goto err
419 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
420 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
421 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
422 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
423 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
424 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
425 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
426 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000427
Andrew McNamara1196cf12005-01-07 04:42:45 +0000428 /* validate options */
429 if (dialect_check_quoting(self->quoting))
430 goto err;
431 if (self->delimiter == 0) {
432 PyErr_SetString(PyExc_TypeError, "delimiter must be set");
433 goto err;
434 }
Andrew McNamara5d45a8d2005-01-12 08:16:17 +0000435 if (quotechar == Py_None && quoting == NULL)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000436 self->quoting = QUOTE_NONE;
437 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
438 PyErr_SetString(PyExc_TypeError,
439 "quotechar must be set if quoting enabled");
440 goto err;
441 }
442 if (self->lineterminator == 0) {
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000443 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
Andrew McNamara1196cf12005-01-07 04:42:45 +0000444 goto err;
445 }
446
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000447 ret = (PyObject *)self;
Skip Montanarod60fbd42005-06-15 01:33:30 +0000448 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000449err:
Skip Montanarod60fbd42005-06-15 01:33:30 +0000450 Py_XDECREF(self);
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000451 Py_XDECREF(dialect);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000452 Py_XDECREF(delimiter);
453 Py_XDECREF(doublequote);
454 Py_XDECREF(escapechar);
455 Py_XDECREF(lineterminator);
456 Py_XDECREF(quotechar);
457 Py_XDECREF(quoting);
458 Py_XDECREF(skipinitialspace);
459 Py_XDECREF(strict);
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000460 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000461}
462
463
464PyDoc_STRVAR(Dialect_Type_doc,
465"CSV dialect\n"
466"\n"
467"The Dialect type records CSV parsing and generation options.\n");
468
469static PyTypeObject Dialect_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000470 PyVarObject_HEAD_INIT(NULL, 0)
Skip Montanarob4a04172003-03-20 23:29:12 +0000471 "_csv.Dialect", /* tp_name */
472 sizeof(DialectObj), /* tp_basicsize */
473 0, /* tp_itemsize */
474 /* methods */
475 (destructor)Dialect_dealloc, /* tp_dealloc */
476 (printfunc)0, /* tp_print */
477 (getattrfunc)0, /* tp_getattr */
478 (setattrfunc)0, /* tp_setattr */
479 (cmpfunc)0, /* tp_compare */
480 (reprfunc)0, /* tp_repr */
481 0, /* tp_as_number */
482 0, /* tp_as_sequence */
483 0, /* tp_as_mapping */
484 (hashfunc)0, /* tp_hash */
485 (ternaryfunc)0, /* tp_call */
486 (reprfunc)0, /* tp_str */
487 0, /* tp_getattro */
488 0, /* tp_setattro */
489 0, /* tp_as_buffer */
490 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
491 Dialect_Type_doc, /* tp_doc */
492 0, /* tp_traverse */
493 0, /* tp_clear */
494 0, /* tp_richcompare */
495 0, /* tp_weaklistoffset */
496 0, /* tp_iter */
497 0, /* tp_iternext */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000498 0, /* tp_methods */
Skip Montanarob4a04172003-03-20 23:29:12 +0000499 Dialect_memberlist, /* tp_members */
500 Dialect_getsetlist, /* tp_getset */
501 0, /* tp_base */
502 0, /* tp_dict */
503 0, /* tp_descr_get */
504 0, /* tp_descr_set */
505 0, /* tp_dictoffset */
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000506 0, /* tp_init */
507 0, /* tp_alloc */
Skip Montanarob4a04172003-03-20 23:29:12 +0000508 dialect_new, /* tp_new */
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000509 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000510};
511
Andrew McNamara91b97462005-01-11 01:07:23 +0000512/*
513 * Return an instance of the dialect type, given a Python instance or kwarg
514 * description of the dialect
515 */
516static PyObject *
517_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
518{
519 PyObject *ctor_args;
520 PyObject *dialect;
521
522 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
523 if (ctor_args == NULL)
524 return NULL;
525 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
526 Py_DECREF(ctor_args);
527 return dialect;
528}
529
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000530/*
531 * READER
532 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000533static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000534parse_save_field(ReaderObj *self)
535{
536 PyObject *field;
537
Guido van Rossum46264582007-08-06 19:32:18 +0000538 field = PyUnicode_FromUnicode(self->field, self->field_len);
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000539 if (field == NULL)
540 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000541 self->field_len = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000542 if (self->numeric_field) {
543 PyObject *tmp;
544
545 self->numeric_field = 0;
546 tmp = PyNumber_Float(field);
547 if (tmp == NULL) {
548 Py_DECREF(field);
549 return -1;
550 }
551 Py_DECREF(field);
552 field = tmp;
553 }
554 PyList_Append(self->fields, field);
555 Py_DECREF(field);
556 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000557}
558
559static int
560parse_grow_buff(ReaderObj *self)
561{
562 if (self->field_size == 0) {
563 self->field_size = 4096;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000564 if (self->field != NULL)
565 PyMem_Free(self->field);
Guido van Rossum46264582007-08-06 19:32:18 +0000566 self->field = PyMem_New(Py_UNICODE, self->field_size);
Skip Montanarob4a04172003-03-20 23:29:12 +0000567 }
568 else {
569 self->field_size *= 2;
Guido van Rossum46264582007-08-06 19:32:18 +0000570 self->field = PyMem_Resize(self->field, Py_UNICODE,
571 self->field_size);
Skip Montanarob4a04172003-03-20 23:29:12 +0000572 }
573 if (self->field == NULL) {
574 PyErr_NoMemory();
575 return 0;
576 }
577 return 1;
578}
579
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000580static int
Guido van Rossum46264582007-08-06 19:32:18 +0000581parse_add_char(ReaderObj *self, Py_UNICODE c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000582{
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000583 if (self->field_len >= field_limit) {
584 PyErr_Format(error_obj, "field larger than field limit (%ld)",
585 field_limit);
586 return -1;
587 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000588 if (self->field_len == self->field_size && !parse_grow_buff(self))
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000589 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000590 self->field[self->field_len++] = c;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000591 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000592}
593
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000594static int
Guido van Rossum46264582007-08-06 19:32:18 +0000595parse_process_char(ReaderObj *self, Py_UNICODE c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000596{
597 DialectObj *dialect = self->dialect;
598
599 switch (self->state) {
600 case START_RECORD:
601 /* start of record */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000602 if (c == '\0')
Skip Montanarob4a04172003-03-20 23:29:12 +0000603 /* empty line - return [] */
604 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000605 else if (c == '\n' || c == '\r') {
606 self->state = EAT_CRNL;
607 break;
608 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000609 /* normal character - handle as START_FIELD */
610 self->state = START_FIELD;
611 /* fallthru */
612 case START_FIELD:
613 /* expecting field */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000614 if (c == '\n' || c == '\r' || c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000615 /* save empty field - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000616 if (parse_save_field(self) < 0)
617 return -1;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000618 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
Skip Montanarob4a04172003-03-20 23:29:12 +0000619 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000620 else if (c == dialect->quotechar &&
621 dialect->quoting != QUOTE_NONE) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000622 /* start quoted field */
623 self->state = IN_QUOTED_FIELD;
624 }
625 else if (c == dialect->escapechar) {
626 /* possible escaped character */
627 self->state = ESCAPED_CHAR;
628 }
629 else if (c == ' ' && dialect->skipinitialspace)
630 /* ignore space at start of field */
631 ;
632 else if (c == dialect->delimiter) {
633 /* save empty field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000634 if (parse_save_field(self) < 0)
635 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000636 }
637 else {
638 /* begin new unquoted field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000639 if (dialect->quoting == QUOTE_NONNUMERIC)
640 self->numeric_field = 1;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000641 if (parse_add_char(self, c) < 0)
642 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000643 self->state = IN_FIELD;
644 }
645 break;
646
647 case ESCAPED_CHAR:
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000648 if (c == '\0')
649 c = '\n';
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000650 if (parse_add_char(self, c) < 0)
651 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000652 self->state = IN_FIELD;
653 break;
654
655 case IN_FIELD:
656 /* in unquoted field */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000657 if (c == '\n' || c == '\r' || c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000658 /* end of line - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000659 if (parse_save_field(self) < 0)
660 return -1;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000661 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
Skip Montanarob4a04172003-03-20 23:29:12 +0000662 }
663 else if (c == dialect->escapechar) {
664 /* possible escaped character */
665 self->state = ESCAPED_CHAR;
666 }
667 else if (c == dialect->delimiter) {
668 /* save field - wait for new field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000669 if (parse_save_field(self) < 0)
670 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000671 self->state = START_FIELD;
672 }
673 else {
674 /* normal character - save in field */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000675 if (parse_add_char(self, c) < 0)
676 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000677 }
678 break;
679
680 case IN_QUOTED_FIELD:
681 /* in quoted field */
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000682 if (c == '\0')
683 ;
Skip Montanarob4a04172003-03-20 23:29:12 +0000684 else if (c == dialect->escapechar) {
685 /* Possible escape character */
686 self->state = ESCAPE_IN_QUOTED_FIELD;
687 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000688 else if (c == dialect->quotechar &&
689 dialect->quoting != QUOTE_NONE) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000690 if (dialect->doublequote) {
691 /* doublequote; " represented by "" */
692 self->state = QUOTE_IN_QUOTED_FIELD;
693 }
694 else {
695 /* end of quote part of field */
696 self->state = IN_FIELD;
697 }
698 }
699 else {
700 /* normal character - save in field */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000701 if (parse_add_char(self, c) < 0)
702 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000703 }
704 break;
705
706 case ESCAPE_IN_QUOTED_FIELD:
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000707 if (c == '\0')
708 c = '\n';
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000709 if (parse_add_char(self, c) < 0)
710 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000711 self->state = IN_QUOTED_FIELD;
712 break;
713
714 case QUOTE_IN_QUOTED_FIELD:
715 /* doublequote - seen a quote in an quoted field */
716 if (dialect->quoting != QUOTE_NONE &&
717 c == dialect->quotechar) {
718 /* save "" as " */
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000719 if (parse_add_char(self, c) < 0)
720 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000721 self->state = IN_QUOTED_FIELD;
722 }
723 else if (c == dialect->delimiter) {
724 /* save field - wait for new field */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000725 if (parse_save_field(self) < 0)
726 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000727 self->state = START_FIELD;
728 }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000729 else if (c == '\n' || c == '\r' || c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000730 /* end of line - return [fields] */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000731 if (parse_save_field(self) < 0)
732 return -1;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000733 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
Skip Montanarob4a04172003-03-20 23:29:12 +0000734 }
735 else if (!dialect->strict) {
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000736 if (parse_add_char(self, c) < 0)
737 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000738 self->state = IN_FIELD;
739 }
740 else {
741 /* illegal */
Andrew McNamara5cfd8372005-01-12 11:39:50 +0000742 PyErr_Format(error_obj, "'%c' expected after '%c'",
Skip Montanarob4a04172003-03-20 23:29:12 +0000743 dialect->delimiter,
744 dialect->quotechar);
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000745 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000746 }
747 break;
748
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000749 case EAT_CRNL:
750 if (c == '\n' || c == '\r')
751 ;
752 else if (c == '\0')
753 self->state = START_RECORD;
754 else {
755 PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
756 return -1;
757 }
758 break;
759
Skip Montanarob4a04172003-03-20 23:29:12 +0000760 }
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000761 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000762}
763
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000764static int
765parse_reset(ReaderObj *self)
766{
767 Py_XDECREF(self->fields);
768 self->fields = PyList_New(0);
769 if (self->fields == NULL)
770 return -1;
771 self->field_len = 0;
772 self->state = START_RECORD;
773 self->numeric_field = 0;
774 return 0;
775}
Skip Montanarob4a04172003-03-20 23:29:12 +0000776
777static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000778Reader_iternext(ReaderObj *self)
779{
780 PyObject *lineobj;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000781 PyObject *fields = NULL;
Guido van Rossum46264582007-08-06 19:32:18 +0000782 Py_UNICODE *line, c;
783 Py_ssize_t linelen;
Skip Montanarob4a04172003-03-20 23:29:12 +0000784
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000785 if (parse_reset(self) < 0)
786 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000787 do {
788 lineobj = PyIter_Next(self->input_iter);
789 if (lineobj == NULL) {
790 /* End of input OR exception */
791 if (!PyErr_Occurred() && self->field_len != 0)
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000792 PyErr_Format(error_obj,
793 "newline inside string");
Skip Montanarob4a04172003-03-20 23:29:12 +0000794 return NULL;
795 }
Georg Brandlf5192612007-11-25 00:52:21 +0000796 if (!PyUnicode_Check(lineobj)) {
797 PyErr_Format(error_obj, "iterator should return "
798 "strings, not %.200s (did you open "
799 "the file in text mode?)",
Amaury Forgeot d'Arc10c476d2007-11-19 21:20:21 +0000800 lineobj->ob_type->tp_name
801 );
Georg Brandlf5192612007-11-25 00:52:21 +0000802 Py_DECREF(lineobj);
Amaury Forgeot d'Arc10c476d2007-11-19 21:20:21 +0000803 return NULL;
804 }
Guido van Rossum46264582007-08-06 19:32:18 +0000805 ++self->line_num;
806 line = PyUnicode_AsUnicode(lineobj);
807 linelen = PyUnicode_GetSize(lineobj);
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000808 if (line == NULL || linelen < 0) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000809 Py_DECREF(lineobj);
810 return NULL;
811 }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000812 while (linelen--) {
813 c = *line++;
814 if (c == '\0') {
815 Py_DECREF(lineobj);
816 PyErr_Format(error_obj,
817 "line contains NULL byte");
818 goto err;
819 }
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000820 if (parse_process_char(self, c) < 0) {
821 Py_DECREF(lineobj);
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000822 goto err;
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000823 }
824 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000825 Py_DECREF(lineobj);
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000826 if (parse_process_char(self, 0) < 0)
827 goto err;
Skip Montanarob4a04172003-03-20 23:29:12 +0000828 } while (self->state != START_RECORD);
829
830 fields = self->fields;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000831 self->fields = NULL;
832err:
Skip Montanarob4a04172003-03-20 23:29:12 +0000833 return fields;
834}
835
836static void
837Reader_dealloc(ReaderObj *self)
838{
Andrew McNamara77ead872005-01-10 02:09:41 +0000839 PyObject_GC_UnTrack(self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000840 Py_XDECREF(self->dialect);
841 Py_XDECREF(self->input_iter);
842 Py_XDECREF(self->fields);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +0000843 if (self->field != NULL)
844 PyMem_Free(self->field);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000845 PyObject_GC_Del(self);
846}
847
848static int
849Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
850{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000851 Py_VISIT(self->dialect);
852 Py_VISIT(self->input_iter);
853 Py_VISIT(self->fields);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000854 return 0;
855}
856
857static int
858Reader_clear(ReaderObj *self)
859{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +0000860 Py_CLEAR(self->dialect);
861 Py_CLEAR(self->input_iter);
862 Py_CLEAR(self->fields);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000863 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000864}
865
866PyDoc_STRVAR(Reader_Type_doc,
867"CSV reader\n"
868"\n"
869"Reader objects are responsible for reading and parsing tabular data\n"
870"in CSV format.\n"
871);
872
873static struct PyMethodDef Reader_methods[] = {
874 { NULL, NULL }
875};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000876#define R_OFF(x) offsetof(ReaderObj, x)
877
878static struct PyMemberDef Reader_memberlist[] = {
Guido van Rossum33d26892007-08-05 15:29:28 +0000879 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
880 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000881 { NULL }
882};
883
Skip Montanarob4a04172003-03-20 23:29:12 +0000884
885static PyTypeObject Reader_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +0000886 PyVarObject_HEAD_INIT(NULL, 0)
Skip Montanarob4a04172003-03-20 23:29:12 +0000887 "_csv.reader", /*tp_name*/
888 sizeof(ReaderObj), /*tp_basicsize*/
889 0, /*tp_itemsize*/
890 /* methods */
891 (destructor)Reader_dealloc, /*tp_dealloc*/
892 (printfunc)0, /*tp_print*/
893 (getattrfunc)0, /*tp_getattr*/
894 (setattrfunc)0, /*tp_setattr*/
895 (cmpfunc)0, /*tp_compare*/
896 (reprfunc)0, /*tp_repr*/
897 0, /*tp_as_number*/
898 0, /*tp_as_sequence*/
899 0, /*tp_as_mapping*/
900 (hashfunc)0, /*tp_hash*/
901 (ternaryfunc)0, /*tp_call*/
902 (reprfunc)0, /*tp_str*/
903 0, /*tp_getattro*/
904 0, /*tp_setattro*/
905 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000906 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
907 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000908 Reader_Type_doc, /*tp_doc*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000909 (traverseproc)Reader_traverse, /*tp_traverse*/
910 (inquiry)Reader_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000911 0, /*tp_richcompare*/
912 0, /*tp_weaklistoffset*/
Andrew McNamara575a00b2005-01-06 02:25:41 +0000913 PyObject_SelfIter, /*tp_iter*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000914 (getiterfunc)Reader_iternext, /*tp_iternext*/
915 Reader_methods, /*tp_methods*/
916 Reader_memberlist, /*tp_members*/
917 0, /*tp_getset*/
918
919};
920
921static PyObject *
922csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
923{
Andrew McNamara91b97462005-01-11 01:07:23 +0000924 PyObject * iterator, * dialect = NULL;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000925 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000926
927 if (!self)
928 return NULL;
929
930 self->dialect = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000931 self->fields = NULL;
932 self->input_iter = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000933 self->field = NULL;
934 self->field_size = 0;
Andrew McNamara7f2053e2005-01-12 11:17:16 +0000935 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000936
937 if (parse_reset(self) < 0) {
938 Py_DECREF(self);
939 return NULL;
940 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000941
Raymond Hettinger1761a7c2004-06-20 04:23:19 +0000942 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
Skip Montanarob4a04172003-03-20 23:29:12 +0000943 Py_DECREF(self);
944 return NULL;
945 }
946 self->input_iter = PyObject_GetIter(iterator);
947 if (self->input_iter == NULL) {
948 PyErr_SetString(PyExc_TypeError,
949 "argument 1 must be an iterator");
950 Py_DECREF(self);
951 return NULL;
952 }
Andrew McNamara91b97462005-01-11 01:07:23 +0000953 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
Skip Montanarob4a04172003-03-20 23:29:12 +0000954 if (self->dialect == NULL) {
955 Py_DECREF(self);
956 return NULL;
957 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000958
Andrew McNamara77ead872005-01-10 02:09:41 +0000959 PyObject_GC_Track(self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000960 return (PyObject *)self;
961}
962
963/*
964 * WRITER
965 */
966/* ---------------------------------------------------------------- */
967static void
968join_reset(WriterObj *self)
969{
970 self->rec_len = 0;
971 self->num_fields = 0;
972}
973
974#define MEM_INCR 32768
975
976/* Calculate new record length or append field to record. Return new
977 * record length.
978 */
979static int
Guido van Rossum46264582007-08-06 19:32:18 +0000980join_append_data(WriterObj *self, Py_UNICODE *field, int quote_empty,
981 int *quoted, int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +0000982{
983 DialectObj *dialect = self->dialect;
Guido van Rossum46264582007-08-06 19:32:18 +0000984 int i;
985 int rec_len;
986 Py_UNICODE *lineterm;
Andrew McNamarac89f2842005-01-12 07:44:42 +0000987
988#define ADDCH(c) \
989 do {\
990 if (copy_phase) \
991 self->rec[rec_len] = c;\
992 rec_len++;\
993 } while(0)
994
Guido van Rossum46264582007-08-06 19:32:18 +0000995 lineterm = PyUnicode_AsUnicode(dialect->lineterminator);
Andrew McNamarac89f2842005-01-12 07:44:42 +0000996 if (lineterm == NULL)
997 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000998
999 rec_len = self->rec_len;
1000
Andrew McNamarac89f2842005-01-12 07:44:42 +00001001 /* If this is not the first field we need a field separator */
1002 if (self->num_fields > 0)
1003 ADDCH(dialect->delimiter);
1004
1005 /* Handle preceding quote */
1006 if (copy_phase && *quoted)
1007 ADDCH(dialect->quotechar);
1008
1009 /* Copy/count field data */
Guido van Rossum46264582007-08-06 19:32:18 +00001010 /* If field is null just pass over */
1011 for (i = 0; field; i++) {
1012 Py_UNICODE c = field[i];
Andrew McNamarac89f2842005-01-12 07:44:42 +00001013 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001014
1015 if (c == '\0')
1016 break;
Skip Montanarob4a04172003-03-20 23:29:12 +00001017
Andrew McNamarac89f2842005-01-12 07:44:42 +00001018 if (c == dialect->delimiter ||
1019 c == dialect->escapechar ||
Guido van Rossum46264582007-08-06 19:32:18 +00001020 c == dialect->quotechar ||
1021 Py_UNICODE_strchr(lineterm, c)) {
Andrew McNamarac89f2842005-01-12 07:44:42 +00001022 if (dialect->quoting == QUOTE_NONE)
1023 want_escape = 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001024 else {
Andrew McNamarac89f2842005-01-12 07:44:42 +00001025 if (c == dialect->quotechar) {
1026 if (dialect->doublequote)
1027 ADDCH(dialect->quotechar);
1028 else
1029 want_escape = 1;
1030 }
1031 if (!want_escape)
1032 *quoted = 1;
1033 }
1034 if (want_escape) {
1035 if (!dialect->escapechar) {
1036 PyErr_Format(error_obj,
1037 "need to escape, but no escapechar set");
1038 return -1;
1039 }
1040 ADDCH(dialect->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +00001041 }
1042 }
1043 /* Copy field character into record buffer.
1044 */
Andrew McNamarac89f2842005-01-12 07:44:42 +00001045 ADDCH(c);
Skip Montanarob4a04172003-03-20 23:29:12 +00001046 }
1047
1048 /* If field is empty check if it needs to be quoted.
1049 */
1050 if (i == 0 && quote_empty) {
1051 if (dialect->quoting == QUOTE_NONE) {
1052 PyErr_Format(error_obj,
Guido van Rossum46264582007-08-06 19:32:18 +00001053 "single empty field record must be quoted");
Skip Montanarob4a04172003-03-20 23:29:12 +00001054 return -1;
Andrew McNamaradd3e6cb2005-01-07 06:46:50 +00001055 }
1056 else
Skip Montanarob4a04172003-03-20 23:29:12 +00001057 *quoted = 1;
1058 }
1059
Skip Montanarob4a04172003-03-20 23:29:12 +00001060 if (*quoted) {
1061 if (copy_phase)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001062 ADDCH(dialect->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +00001063 else
Andrew McNamarac89f2842005-01-12 07:44:42 +00001064 rec_len += 2;
Skip Montanarob4a04172003-03-20 23:29:12 +00001065 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001066 return rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001067#undef ADDCH
Skip Montanarob4a04172003-03-20 23:29:12 +00001068}
1069
1070static int
1071join_check_rec_size(WriterObj *self, int rec_len)
1072{
1073 if (rec_len > self->rec_size) {
1074 if (self->rec_size == 0) {
1075 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001076 if (self->rec != NULL)
1077 PyMem_Free(self->rec);
Guido van Rossum46264582007-08-06 19:32:18 +00001078 self->rec = PyMem_New(Py_UNICODE, self->rec_size);
Skip Montanarob4a04172003-03-20 23:29:12 +00001079 }
1080 else {
Guido van Rossum46264582007-08-06 19:32:18 +00001081 Py_UNICODE* old_rec = self->rec;
Skip Montanarob4a04172003-03-20 23:29:12 +00001082
1083 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Guido van Rossum46264582007-08-06 19:32:18 +00001084 self->rec = PyMem_Resize(self->rec, Py_UNICODE,
1085 self->rec_size);
Skip Montanarob4a04172003-03-20 23:29:12 +00001086 if (self->rec == NULL)
1087 PyMem_Free(old_rec);
1088 }
1089 if (self->rec == NULL) {
1090 PyErr_NoMemory();
1091 return 0;
1092 }
1093 }
1094 return 1;
1095}
1096
1097static int
Guido van Rossum46264582007-08-06 19:32:18 +00001098join_append(WriterObj *self, Py_UNICODE *field, int *quoted, int quote_empty)
Skip Montanarob4a04172003-03-20 23:29:12 +00001099{
1100 int rec_len;
1101
1102 rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1103 if (rec_len < 0)
1104 return 0;
1105
1106 /* grow record buffer if necessary */
1107 if (!join_check_rec_size(self, rec_len))
1108 return 0;
1109
1110 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1111 self->num_fields++;
1112
1113 return 1;
1114}
1115
1116static int
1117join_append_lineterminator(WriterObj *self)
1118{
1119 int terminator_len;
Guido van Rossum46264582007-08-06 19:32:18 +00001120 Py_UNICODE *terminator;
Skip Montanarob4a04172003-03-20 23:29:12 +00001121
Guido van Rossum46264582007-08-06 19:32:18 +00001122 terminator_len = PyUnicode_GetSize(self->dialect->lineterminator);
Thomas Wouters477c8d52006-05-27 19:21:47 +00001123 if (terminator_len == -1)
1124 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001125
1126 /* grow record buffer if necessary */
1127 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1128 return 0;
1129
Guido van Rossum46264582007-08-06 19:32:18 +00001130 terminator = PyUnicode_AsUnicode(self->dialect->lineterminator);
Andrew McNamaracf0fd5a2005-01-12 01:16:35 +00001131 if (terminator == NULL)
1132 return 0;
Guido van Rossum46264582007-08-06 19:32:18 +00001133 memmove(self->rec + self->rec_len, terminator,
1134 sizeof(Py_UNICODE)*terminator_len);
Skip Montanarob4a04172003-03-20 23:29:12 +00001135 self->rec_len += terminator_len;
1136
1137 return 1;
1138}
1139
1140PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001141"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001142"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001143"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001144"elements will be converted to string.");
1145
1146static PyObject *
1147csv_writerow(WriterObj *self, PyObject *seq)
1148{
1149 DialectObj *dialect = self->dialect;
1150 int len, i;
1151
1152 if (!PySequence_Check(seq))
1153 return PyErr_Format(error_obj, "sequence expected");
1154
1155 len = PySequence_Length(seq);
1156 if (len < 0)
1157 return NULL;
1158
1159 /* Join all fields in internal buffer.
1160 */
1161 join_reset(self);
1162 for (i = 0; i < len; i++) {
1163 PyObject *field;
1164 int append_ok;
1165 int quoted;
1166
1167 field = PySequence_GetItem(seq, i);
1168 if (field == NULL)
1169 return NULL;
1170
Andrew McNamarac89f2842005-01-12 07:44:42 +00001171 switch (dialect->quoting) {
1172 case QUOTE_NONNUMERIC:
1173 quoted = !PyNumber_Check(field);
1174 break;
1175 case QUOTE_ALL:
1176 quoted = 1;
1177 break;
1178 default:
1179 quoted = 0;
1180 break;
Skip Montanarob4a04172003-03-20 23:29:12 +00001181 }
1182
Guido van Rossum46264582007-08-06 19:32:18 +00001183 if (PyUnicode_Check(field)) {
Skip Montanaro577c7a72003-04-12 19:17:14 +00001184 append_ok = join_append(self,
Guido van Rossum46264582007-08-06 19:32:18 +00001185 PyUnicode_AS_UNICODE(field),
1186 &quoted, len == 1);
Skip Montanarob4a04172003-03-20 23:29:12 +00001187 Py_DECREF(field);
1188 }
1189 else if (field == Py_None) {
Guido van Rossum46264582007-08-06 19:32:18 +00001190 append_ok = join_append(self, NULL,
1191 &quoted, len == 1);
Skip Montanarob4a04172003-03-20 23:29:12 +00001192 Py_DECREF(field);
1193 }
1194 else {
1195 PyObject *str;
1196
Thomas Heller519a0422007-11-15 20:48:54 +00001197 str = PyObject_Str(field);
Guido van Rossum46264582007-08-06 19:32:18 +00001198 Py_DECREF(field);
Skip Montanarob4a04172003-03-20 23:29:12 +00001199 if (str == NULL)
1200 return NULL;
Guido van Rossum46264582007-08-06 19:32:18 +00001201 append_ok = join_append(self,
1202 PyUnicode_AS_UNICODE(str),
1203 &quoted, len == 1);
Skip Montanarob4a04172003-03-20 23:29:12 +00001204 Py_DECREF(str);
1205 }
1206 if (!append_ok)
1207 return NULL;
1208 }
1209
1210 /* Add line terminator.
1211 */
1212 if (!join_append_lineterminator(self))
1213 return 0;
1214
Guido van Rossum46264582007-08-06 19:32:18 +00001215 return PyObject_CallFunction(self->writeline,
1216 "(u#)", self->rec,
1217 self->rec_len);
Skip Montanarob4a04172003-03-20 23:29:12 +00001218}
1219
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001220PyDoc_STRVAR(csv_writerows_doc,
1221"writerows(sequence of sequences)\n"
1222"\n"
1223"Construct and write a series of sequences to a csv file. Non-string\n"
1224"elements will be converted to string.");
1225
Skip Montanarob4a04172003-03-20 23:29:12 +00001226static PyObject *
1227csv_writerows(WriterObj *self, PyObject *seqseq)
1228{
1229 PyObject *row_iter, *row_obj, *result;
1230
1231 row_iter = PyObject_GetIter(seqseq);
1232 if (row_iter == NULL) {
1233 PyErr_SetString(PyExc_TypeError,
Skip Montanaro98f16e02003-04-11 23:10:13 +00001234 "writerows() argument must be iterable");
Skip Montanarob4a04172003-03-20 23:29:12 +00001235 return NULL;
1236 }
1237 while ((row_obj = PyIter_Next(row_iter))) {
1238 result = csv_writerow(self, row_obj);
1239 Py_DECREF(row_obj);
1240 if (!result) {
1241 Py_DECREF(row_iter);
1242 return NULL;
1243 }
1244 else
1245 Py_DECREF(result);
1246 }
1247 Py_DECREF(row_iter);
1248 if (PyErr_Occurred())
1249 return NULL;
1250 Py_INCREF(Py_None);
1251 return Py_None;
1252}
1253
1254static struct PyMethodDef Writer_methods[] = {
1255 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001256 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
Skip Montanarob4a04172003-03-20 23:29:12 +00001257 { NULL, NULL }
1258};
1259
1260#define W_OFF(x) offsetof(WriterObj, x)
1261
1262static struct PyMemberDef Writer_memberlist[] = {
Guido van Rossum33d26892007-08-05 15:29:28 +00001263 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
Skip Montanarob4a04172003-03-20 23:29:12 +00001264 { NULL }
1265};
1266
1267static void
1268Writer_dealloc(WriterObj *self)
1269{
Andrew McNamara77ead872005-01-10 02:09:41 +00001270 PyObject_GC_UnTrack(self);
Skip Montanarob4a04172003-03-20 23:29:12 +00001271 Py_XDECREF(self->dialect);
1272 Py_XDECREF(self->writeline);
Andrew McNamaradcfb38c2003-06-09 05:59:23 +00001273 if (self->rec != NULL)
1274 PyMem_Free(self->rec);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001275 PyObject_GC_Del(self);
1276}
1277
1278static int
1279Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1280{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001281 Py_VISIT(self->dialect);
1282 Py_VISIT(self->writeline);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001283 return 0;
1284}
1285
1286static int
1287Writer_clear(WriterObj *self)
1288{
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001289 Py_CLEAR(self->dialect);
1290 Py_CLEAR(self->writeline);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001291 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001292}
1293
1294PyDoc_STRVAR(Writer_Type_doc,
1295"CSV writer\n"
1296"\n"
1297"Writer objects are responsible for generating tabular data\n"
1298"in CSV format from sequence input.\n"
1299);
1300
1301static PyTypeObject Writer_Type = {
Martin v. Löwis9f2e3462007-07-21 17:22:18 +00001302 PyVarObject_HEAD_INIT(NULL, 0)
Skip Montanarob4a04172003-03-20 23:29:12 +00001303 "_csv.writer", /*tp_name*/
1304 sizeof(WriterObj), /*tp_basicsize*/
1305 0, /*tp_itemsize*/
1306 /* methods */
1307 (destructor)Writer_dealloc, /*tp_dealloc*/
1308 (printfunc)0, /*tp_print*/
1309 (getattrfunc)0, /*tp_getattr*/
1310 (setattrfunc)0, /*tp_setattr*/
1311 (cmpfunc)0, /*tp_compare*/
1312 (reprfunc)0, /*tp_repr*/
1313 0, /*tp_as_number*/
1314 0, /*tp_as_sequence*/
1315 0, /*tp_as_mapping*/
1316 (hashfunc)0, /*tp_hash*/
1317 (ternaryfunc)0, /*tp_call*/
1318 (reprfunc)0, /*tp_str*/
1319 0, /*tp_getattro*/
1320 0, /*tp_setattro*/
1321 0, /*tp_as_buffer*/
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001322 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1323 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001324 Writer_Type_doc,
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001325 (traverseproc)Writer_traverse, /*tp_traverse*/
1326 (inquiry)Writer_clear, /*tp_clear*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001327 0, /*tp_richcompare*/
1328 0, /*tp_weaklistoffset*/
1329 (getiterfunc)0, /*tp_iter*/
1330 (getiterfunc)0, /*tp_iternext*/
1331 Writer_methods, /*tp_methods*/
1332 Writer_memberlist, /*tp_members*/
1333 0, /*tp_getset*/
1334};
1335
1336static PyObject *
1337csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1338{
Andrew McNamara91b97462005-01-11 01:07:23 +00001339 PyObject * output_file, * dialect = NULL;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001340 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001341
1342 if (!self)
1343 return NULL;
1344
1345 self->dialect = NULL;
1346 self->writeline = NULL;
1347
1348 self->rec = NULL;
1349 self->rec_size = 0;
1350 self->rec_len = 0;
1351 self->num_fields = 0;
1352
Raymond Hettinger1761a7c2004-06-20 04:23:19 +00001353 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001354 Py_DECREF(self);
1355 return NULL;
1356 }
1357 self->writeline = PyObject_GetAttrString(output_file, "write");
1358 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1359 PyErr_SetString(PyExc_TypeError,
Andrew McNamara5cfd8372005-01-12 11:39:50 +00001360 "argument 1 must have a \"write\" method");
Skip Montanarob4a04172003-03-20 23:29:12 +00001361 Py_DECREF(self);
1362 return NULL;
1363 }
Andrew McNamara91b97462005-01-11 01:07:23 +00001364 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
Skip Montanarob4a04172003-03-20 23:29:12 +00001365 if (self->dialect == NULL) {
1366 Py_DECREF(self);
1367 return NULL;
1368 }
Andrew McNamara77ead872005-01-10 02:09:41 +00001369 PyObject_GC_Track(self);
Skip Montanarob4a04172003-03-20 23:29:12 +00001370 return (PyObject *)self;
1371}
1372
1373/*
1374 * DIALECT REGISTRY
1375 */
1376static PyObject *
1377csv_list_dialects(PyObject *module, PyObject *args)
1378{
1379 return PyDict_Keys(dialects);
1380}
1381
1382static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001383csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001384{
Andrew McNamara86625972005-01-11 01:28:33 +00001385 PyObject *name_obj, *dialect_obj = NULL;
1386 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001387
Andrew McNamara86625972005-01-11 01:28:33 +00001388 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
Skip Montanarob4a04172003-03-20 23:29:12 +00001389 return NULL;
Andrew McNamara37d2bdf2005-01-10 12:22:48 +00001390 if (!IS_BASESTRING(name_obj)) {
Skip Montanarob4a04172003-03-20 23:29:12 +00001391 PyErr_SetString(PyExc_TypeError,
1392 "dialect name must be a string or unicode");
1393 return NULL;
1394 }
Andrew McNamara86625972005-01-11 01:28:33 +00001395 dialect = _call_dialect(dialect_obj, kwargs);
1396 if (dialect == NULL)
1397 return NULL;
1398 if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1399 Py_DECREF(dialect);
Skip Montanarob4a04172003-03-20 23:29:12 +00001400 return NULL;
1401 }
Andrew McNamara86625972005-01-11 01:28:33 +00001402 Py_DECREF(dialect);
Skip Montanarob4a04172003-03-20 23:29:12 +00001403 Py_INCREF(Py_None);
1404 return Py_None;
1405}
1406
1407static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001408csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001409{
Skip Montanarob4a04172003-03-20 23:29:12 +00001410 if (PyDict_DelItem(dialects, name_obj) < 0)
1411 return PyErr_Format(error_obj, "unknown dialect");
1412 Py_INCREF(Py_None);
1413 return Py_None;
1414}
1415
1416static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001417csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001418{
Skip Montanarob4a04172003-03-20 23:29:12 +00001419 return get_dialect_from_registry(name_obj);
1420}
1421
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001422static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001423csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001424{
1425 PyObject *new_limit = NULL;
1426 long old_limit = field_limit;
1427
Andrew McNamara31d88962005-01-12 03:45:10 +00001428 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001429 return NULL;
1430 if (new_limit != NULL) {
Guido van Rossumddefaf32007-01-14 03:31:43 +00001431 if (!PyInt_CheckExact(new_limit)) {
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001432 PyErr_Format(PyExc_TypeError,
1433 "limit must be an integer");
1434 return NULL;
1435 }
1436 field_limit = PyInt_AsLong(new_limit);
1437 }
1438 return PyInt_FromLong(old_limit);
1439}
1440
Skip Montanarob4a04172003-03-20 23:29:12 +00001441/*
1442 * MODULE
1443 */
1444
1445PyDoc_STRVAR(csv_module_doc,
1446"CSV parsing and writing.\n"
1447"\n"
1448"This module provides classes that assist in the reading and writing\n"
1449"of Comma Separated Value (CSV) files, and implements the interface\n"
1450"described by PEP 305. Although many CSV files are simple to parse,\n"
1451"the format is not formally defined by a stable specification and\n"
1452"is subtle enough that parsing lines of a CSV file with something\n"
1453"like line.split(\",\") is bound to fail. The module supports three\n"
1454"basic APIs: reading, writing, and registration of dialects.\n"
1455"\n"
1456"\n"
1457"DIALECT REGISTRATION:\n"
1458"\n"
1459"Readers and writers support a dialect argument, which is a convenient\n"
1460"handle on a group of settings. When the dialect argument is a string,\n"
1461"it identifies one of the dialects previously registered with the module.\n"
1462"If it is a class or instance, the attributes of the argument are used as\n"
1463"the settings for the reader or writer:\n"
1464"\n"
1465" class excel:\n"
1466" delimiter = ','\n"
1467" quotechar = '\"'\n"
1468" escapechar = None\n"
1469" doublequote = True\n"
1470" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001471" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001472" quoting = QUOTE_MINIMAL\n"
1473"\n"
1474"SETTINGS:\n"
1475"\n"
1476" * quotechar - specifies a one-character string to use as the \n"
1477" quoting character. It defaults to '\"'.\n"
1478" * delimiter - specifies a one-character string to use as the \n"
1479" field separator. It defaults to ','.\n"
1480" * skipinitialspace - specifies how to interpret whitespace which\n"
1481" immediately follows a delimiter. It defaults to False, which\n"
1482" means that whitespace immediately following a delimiter is part\n"
1483" of the following field.\n"
1484" * lineterminator - specifies the character sequence which should \n"
1485" terminate rows.\n"
1486" * quoting - controls when quotes should be generated by the writer.\n"
1487" It can take on any of the following module constants:\n"
1488"\n"
1489" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1490" field contains either the quotechar or the delimiter\n"
1491" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1492" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001493" fields which do not parse as integers or floating point\n"
1494" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001495" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1496" * escapechar - specifies a one-character string used to escape \n"
1497" the delimiter when quoting is set to QUOTE_NONE.\n"
1498" * doublequote - controls the handling of quotes inside fields. When\n"
1499" True, two consecutive quotes are interpreted as one during read,\n"
1500" and when writing, each quote character embedded in the data is\n"
1501" written as two quotes\n");
1502
1503PyDoc_STRVAR(csv_reader_doc,
1504" csv_reader = reader(iterable [, dialect='excel']\n"
1505" [optional keyword args])\n"
1506" for row in csv_reader:\n"
1507" process(row)\n"
1508"\n"
1509"The \"iterable\" argument can be any object that returns a line\n"
1510"of input for each iteration, such as a file object or a list. The\n"
1511"optional \"dialect\" parameter is discussed below. The function\n"
1512"also accepts optional keyword arguments which override settings\n"
1513"provided by the dialect.\n"
1514"\n"
1515"The returned object is an iterator. Each iteration returns a row\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001516"of the CSV file (which can span multiple input lines):\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001517
1518PyDoc_STRVAR(csv_writer_doc,
1519" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1520" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001521" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001522" csv_writer.writerow(row)\n"
1523"\n"
1524" [or]\n"
1525"\n"
1526" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1527" [optional keyword args])\n"
1528" csv_writer.writerows(rows)\n"
1529"\n"
1530"The \"fileobj\" argument can be any object that supports the file API.\n");
1531
1532PyDoc_STRVAR(csv_list_dialects_doc,
1533"Return a list of all know dialect names.\n"
1534" names = csv.list_dialects()");
1535
1536PyDoc_STRVAR(csv_get_dialect_doc,
1537"Return the dialect instance associated with name.\n"
1538" dialect = csv.get_dialect(name)");
1539
1540PyDoc_STRVAR(csv_register_dialect_doc,
1541"Create a mapping from a string name to a dialect class.\n"
1542" dialect = csv.register_dialect(name, dialect)");
1543
1544PyDoc_STRVAR(csv_unregister_dialect_doc,
1545"Delete the name/dialect mapping associated with a string name.\n"
1546" csv.unregister_dialect(name)");
1547
Andrew McNamara31d88962005-01-12 03:45:10 +00001548PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001549"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001550" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001551"\n"
1552"Returns old limit. If limit is not given, no new limit is set and\n"
1553"the old limit is returned");
1554
Skip Montanarob4a04172003-03-20 23:29:12 +00001555static struct PyMethodDef csv_methods[] = {
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001556 { "reader", (PyCFunction)csv_reader,
1557 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1558 { "writer", (PyCFunction)csv_writer,
1559 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1560 { "list_dialects", (PyCFunction)csv_list_dialects,
1561 METH_NOARGS, csv_list_dialects_doc},
1562 { "register_dialect", (PyCFunction)csv_register_dialect,
Andrew McNamara86625972005-01-11 01:28:33 +00001563 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001564 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1565 METH_O, csv_unregister_dialect_doc},
1566 { "get_dialect", (PyCFunction)csv_get_dialect,
1567 METH_O, csv_get_dialect_doc},
Andrew McNamara31d88962005-01-12 03:45:10 +00001568 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1569 METH_VARARGS, csv_field_size_limit_doc},
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001570 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001571};
1572
1573PyMODINIT_FUNC
1574init_csv(void)
1575{
1576 PyObject *module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001577 StyleDesc *style;
1578
1579 if (PyType_Ready(&Dialect_Type) < 0)
1580 return;
1581
1582 if (PyType_Ready(&Reader_Type) < 0)
1583 return;
1584
1585 if (PyType_Ready(&Writer_Type) < 0)
1586 return;
1587
1588 /* Create the module and add the functions */
1589 module = Py_InitModule3("_csv", csv_methods, csv_module_doc);
1590 if (module == NULL)
1591 return;
1592
1593 /* Add version to the module. */
Skip Montanaro7b01a832003-04-12 19:23:46 +00001594 if (PyModule_AddStringConstant(module, "__version__",
1595 MODULE_VERSION) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001596 return;
1597
1598 /* Add _dialects dictionary */
1599 dialects = PyDict_New();
1600 if (dialects == NULL)
1601 return;
1602 if (PyModule_AddObject(module, "_dialects", dialects))
1603 return;
1604
1605 /* Add quote styles into dictionary */
1606 for (style = quote_styles; style->name; style++) {
Skip Montanaro7b01a832003-04-12 19:23:46 +00001607 if (PyModule_AddIntConstant(module, style->name,
1608 style->style) == -1)
Skip Montanarob4a04172003-03-20 23:29:12 +00001609 return;
1610 }
1611
1612 /* Add the Dialect type */
Skip Montanaro32c5d422005-06-15 13:35:08 +00001613 Py_INCREF(&Dialect_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001614 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1615 return;
1616
1617 /* Add the CSV exception object to the module. */
1618 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1619 if (error_obj == NULL)
1620 return;
1621 PyModule_AddObject(module, "Error", error_obj);
1622}