blob: 6c564d727bbbb7acf2176a0351eff5b63dbd6baa [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
Skip Montanarob4a04172003-03-20 23:29:12 +00009*/
10
Skip Montanaro7b01a832003-04-12 19:23:46 +000011#define MODULE_VERSION "1.0"
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013#include "Python.h"
14#include "structmember.h"
15
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000016#define IS_BASESTRING(o) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000017 PyUnicode_Check(o)
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000018
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000019static PyObject *error_obj; /* CSV exception */
Skip Montanarob4a04172003-03-20 23:29:12 +000020static PyObject *dialects; /* Dialect registry */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000021static long field_limit = 128 * 1024; /* max parsed field size */
Skip Montanarob4a04172003-03-20 23:29:12 +000022
23typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000024 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
25 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
26 EAT_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000027} ParserState;
28
29typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000030 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000031} QuoteStyle;
32
33typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000034 QuoteStyle style;
35 char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000036} StyleDesc;
37
38static StyleDesc quote_styles[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000039 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
40 { QUOTE_ALL, "QUOTE_ALL" },
41 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
42 { QUOTE_NONE, "QUOTE_NONE" },
43 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000044};
45
46typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000047 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000048
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000049 int doublequote; /* is " represented by ""? */
50 Py_UNICODE delimiter; /* field separator */
51 Py_UNICODE quotechar; /* quote character */
52 Py_UNICODE escapechar; /* escape character */
53 int skipinitialspace; /* ignore spaces following delimiter? */
54 PyObject *lineterminator; /* string to write between records */
55 int quoting; /* style of quoting to write */
Skip Montanarob4a04172003-03-20 23:29:12 +000056
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000057 int strict; /* raise exception on bad CSV */
Skip Montanarob4a04172003-03-20 23:29:12 +000058} DialectObj;
59
Neal Norwitz227b5332006-03-22 09:28:35 +000060static PyTypeObject Dialect_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000061
62typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000063 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +000064
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000065 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +000066
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +000068
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000069 PyObject *fields; /* field list for current record */
70 ParserState state; /* current CSV parse state */
71 Py_UNICODE *field; /* build current field in here */
Antoine Pitrou40455752010-08-15 18:51:10 +000072 Py_ssize_t field_size; /* size of allocated buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000073 Py_ssize_t field_len; /* length of current field */
74 int numeric_field; /* treat field as numeric */
75 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +000076} ReaderObj;
77
Neal Norwitz227b5332006-03-22 09:28:35 +000078static PyTypeObject Reader_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000079
Christian Heimes90aa7642007-12-19 02:45:37 +000080#define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +000081
82typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000083 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +000084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 PyObject *writeline; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +000086
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000087 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +000088
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000089 Py_UNICODE *rec; /* buffer for parser.join */
Antoine Pitrou40455752010-08-15 18:51:10 +000090 Py_ssize_t rec_size; /* size of allocated record */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 Py_ssize_t rec_len; /* length of record */
92 int num_fields; /* number of fields in record */
Guido van Rossum46264582007-08-06 19:32:18 +000093} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +000094
Neal Norwitz227b5332006-03-22 09:28:35 +000095static PyTypeObject Writer_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000096
97/*
98 * DIALECT class
99 */
100
101static PyObject *
102get_dialect_from_registry(PyObject * name_obj)
103{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000104 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000105
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000106 dialect_obj = PyDict_GetItem(dialects, name_obj);
107 if (dialect_obj == NULL) {
108 if (!PyErr_Occurred())
109 PyErr_Format(error_obj, "unknown dialect");
110 }
111 else
112 Py_INCREF(dialect_obj);
113 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000114}
115
Skip Montanarob4a04172003-03-20 23:29:12 +0000116static PyObject *
117get_string(PyObject *str)
118{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000119 Py_XINCREF(str);
120 return str;
Skip Montanarob4a04172003-03-20 23:29:12 +0000121}
122
Skip Montanarob4a04172003-03-20 23:29:12 +0000123static PyObject *
Skip Montanaroe3b10f42007-08-06 20:55:47 +0000124get_nullchar_as_None(Py_UNICODE c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000125{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000126 if (c == '\0') {
127 Py_INCREF(Py_None);
128 return Py_None;
129 }
130 else
131 return PyUnicode_FromUnicode((Py_UNICODE *)&c, 1);
Skip Montanarob4a04172003-03-20 23:29:12 +0000132}
133
Skip Montanarob4a04172003-03-20 23:29:12 +0000134static PyObject *
135Dialect_get_lineterminator(DialectObj *self)
136{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000137 return get_string(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000138}
139
Skip Montanarob4a04172003-03-20 23:29:12 +0000140static PyObject *
Guido van Rossuma9769c22007-08-07 23:59:30 +0000141Dialect_get_delimiter(DialectObj *self)
142{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000143 return get_nullchar_as_None(self->delimiter);
Guido van Rossuma9769c22007-08-07 23:59:30 +0000144}
145
146static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000147Dialect_get_escapechar(DialectObj *self)
148{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000149 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000150}
151
Andrew McNamara1196cf12005-01-07 04:42:45 +0000152static PyObject *
153Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000154{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000155 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000156}
157
158static PyObject *
159Dialect_get_quoting(DialectObj *self)
160{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000161 return PyLong_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000162}
163
164static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000165_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000166{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000167 if (src == NULL)
168 *target = dflt;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200169 else {
170 int b = PyObject_IsTrue(src);
171 if (b < 0)
172 return -1;
173 *target = b;
174 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000175 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000176}
177
Andrew McNamara1196cf12005-01-07 04:42:45 +0000178static int
179_set_int(const char *name, int *target, PyObject *src, int dflt)
180{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000181 if (src == NULL)
182 *target = dflt;
183 else {
184 long value;
185 if (!PyLong_CheckExact(src)) {
186 PyErr_Format(PyExc_TypeError,
187 "\"%s\" must be an integer", name);
188 return -1;
189 }
190 value = PyLong_AsLong(src);
191 if (value == -1 && PyErr_Occurred())
192 return -1;
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +0000193#if SIZEOF_LONG > SIZEOF_INT
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000194 if (value > INT_MAX || value < INT_MIN) {
195 PyErr_Format(PyExc_ValueError,
196 "integer out of range for \"%s\"", name);
197 return -1;
198 }
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +0000199#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000200 *target = (int)value;
201 }
202 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000203}
204
205static int
Guido van Rossum46264582007-08-06 19:32:18 +0000206_set_char(const char *name, Py_UNICODE *target, PyObject *src, Py_UNICODE dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000207{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000208 if (src == NULL)
209 *target = dflt;
210 else {
211 *target = '\0';
212 if (src != Py_None) {
213 Py_UNICODE *buf;
214 Py_ssize_t len;
215 buf = PyUnicode_AsUnicode(src);
216 len = PyUnicode_GetSize(src);
217 if (buf == NULL || len > 1) {
218 PyErr_Format(PyExc_TypeError,
219 "\"%s\" must be an 1-character string",
220 name);
221 return -1;
222 }
223 if (len > 0)
224 *target = buf[0];
225 }
226 }
227 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000228}
229
230static int
231_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
232{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000233 if (src == NULL)
234 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
235 else {
236 if (src == Py_None)
237 *target = NULL;
238 else if (!IS_BASESTRING(src)) {
239 PyErr_Format(PyExc_TypeError,
240 "\"%s\" must be a string", name);
241 return -1;
242 }
243 else {
244 Py_XDECREF(*target);
245 Py_INCREF(src);
246 *target = src;
247 }
248 }
249 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000250}
251
252static int
253dialect_check_quoting(int quoting)
254{
Victor Stinner4fe519b2010-11-09 09:40:16 +0000255 StyleDesc *qs;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000256
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000257 for (qs = quote_styles; qs->name; qs++) {
258 if (qs->style == quoting)
259 return 0;
260 }
261 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
262 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000263}
Skip Montanarob4a04172003-03-20 23:29:12 +0000264
265#define D_OFF(x) offsetof(DialectObj, x)
266
267static struct PyMemberDef Dialect_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000268 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
269 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
270 { "strict", T_INT, D_OFF(strict), READONLY },
271 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000272};
273
274static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000275 { "delimiter", (getter)Dialect_get_delimiter},
276 { "escapechar", (getter)Dialect_get_escapechar},
277 { "lineterminator", (getter)Dialect_get_lineterminator},
278 { "quotechar", (getter)Dialect_get_quotechar},
279 { "quoting", (getter)Dialect_get_quoting},
280 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000281};
282
283static void
284Dialect_dealloc(DialectObj *self)
285{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000286 Py_XDECREF(self->lineterminator);
287 Py_TYPE(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000288}
289
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000290static char *dialect_kws[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000291 "dialect",
292 "delimiter",
293 "doublequote",
294 "escapechar",
295 "lineterminator",
296 "quotechar",
297 "quoting",
298 "skipinitialspace",
299 "strict",
300 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000301};
302
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000303static PyObject *
304dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000305{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000306 DialectObj *self;
307 PyObject *ret = NULL;
308 PyObject *dialect = NULL;
309 PyObject *delimiter = NULL;
310 PyObject *doublequote = NULL;
311 PyObject *escapechar = NULL;
312 PyObject *lineterminator = NULL;
313 PyObject *quotechar = NULL;
314 PyObject *quoting = NULL;
315 PyObject *skipinitialspace = NULL;
316 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000317
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000318 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
319 "|OOOOOOOOO", dialect_kws,
320 &dialect,
321 &delimiter,
322 &doublequote,
323 &escapechar,
324 &lineterminator,
325 &quotechar,
326 &quoting,
327 &skipinitialspace,
328 &strict))
329 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000330
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000331 if (dialect != NULL) {
332 if (IS_BASESTRING(dialect)) {
333 dialect = get_dialect_from_registry(dialect);
334 if (dialect == NULL)
335 return NULL;
336 }
337 else
338 Py_INCREF(dialect);
339 /* Can we reuse this instance? */
340 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
341 delimiter == 0 &&
342 doublequote == 0 &&
343 escapechar == 0 &&
344 lineterminator == 0 &&
345 quotechar == 0 &&
346 quoting == 0 &&
347 skipinitialspace == 0 &&
348 strict == 0)
349 return dialect;
350 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000351
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000352 self = (DialectObj *)type->tp_alloc(type, 0);
353 if (self == NULL) {
354 Py_XDECREF(dialect);
355 return NULL;
356 }
357 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000358
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000359 Py_XINCREF(delimiter);
360 Py_XINCREF(doublequote);
361 Py_XINCREF(escapechar);
362 Py_XINCREF(lineterminator);
363 Py_XINCREF(quotechar);
364 Py_XINCREF(quoting);
365 Py_XINCREF(skipinitialspace);
366 Py_XINCREF(strict);
367 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000368#define DIALECT_GETATTR(v, n) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000369 if (v == NULL) \
370 v = PyObject_GetAttrString(dialect, n)
371 DIALECT_GETATTR(delimiter, "delimiter");
372 DIALECT_GETATTR(doublequote, "doublequote");
373 DIALECT_GETATTR(escapechar, "escapechar");
374 DIALECT_GETATTR(lineterminator, "lineterminator");
375 DIALECT_GETATTR(quotechar, "quotechar");
376 DIALECT_GETATTR(quoting, "quoting");
377 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
378 DIALECT_GETATTR(strict, "strict");
379 PyErr_Clear();
380 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000381
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000382 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000383#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000384 if (meth(name, target, src, dflt)) \
385 goto err
386 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
387 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
388 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
389 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
390 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
391 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
392 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
393 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000394
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000395 /* validate options */
396 if (dialect_check_quoting(self->quoting))
397 goto err;
398 if (self->delimiter == 0) {
399 PyErr_SetString(PyExc_TypeError, "delimiter must be set");
400 goto err;
401 }
402 if (quotechar == Py_None && quoting == NULL)
403 self->quoting = QUOTE_NONE;
404 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
405 PyErr_SetString(PyExc_TypeError,
406 "quotechar must be set if quoting enabled");
407 goto err;
408 }
409 if (self->lineterminator == 0) {
410 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
411 goto err;
412 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000413
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000414 ret = (PyObject *)self;
415 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000416err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000417 Py_XDECREF(self);
418 Py_XDECREF(dialect);
419 Py_XDECREF(delimiter);
420 Py_XDECREF(doublequote);
421 Py_XDECREF(escapechar);
422 Py_XDECREF(lineterminator);
423 Py_XDECREF(quotechar);
424 Py_XDECREF(quoting);
425 Py_XDECREF(skipinitialspace);
426 Py_XDECREF(strict);
427 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000428}
429
430
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000431PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000432"CSV dialect\n"
433"\n"
434"The Dialect type records CSV parsing and generation options.\n");
435
436static PyTypeObject Dialect_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000437 PyVarObject_HEAD_INIT(NULL, 0)
438 "_csv.Dialect", /* tp_name */
439 sizeof(DialectObj), /* tp_basicsize */
440 0, /* tp_itemsize */
441 /* methods */
442 (destructor)Dialect_dealloc, /* tp_dealloc */
443 (printfunc)0, /* tp_print */
444 (getattrfunc)0, /* tp_getattr */
445 (setattrfunc)0, /* tp_setattr */
446 0, /* tp_reserved */
447 (reprfunc)0, /* tp_repr */
448 0, /* tp_as_number */
449 0, /* tp_as_sequence */
450 0, /* tp_as_mapping */
451 (hashfunc)0, /* tp_hash */
452 (ternaryfunc)0, /* tp_call */
453 (reprfunc)0, /* tp_str */
454 0, /* tp_getattro */
455 0, /* tp_setattro */
456 0, /* tp_as_buffer */
457 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
458 Dialect_Type_doc, /* tp_doc */
459 0, /* tp_traverse */
460 0, /* tp_clear */
461 0, /* tp_richcompare */
462 0, /* tp_weaklistoffset */
463 0, /* tp_iter */
464 0, /* tp_iternext */
465 0, /* tp_methods */
466 Dialect_memberlist, /* tp_members */
467 Dialect_getsetlist, /* tp_getset */
468 0, /* tp_base */
469 0, /* tp_dict */
470 0, /* tp_descr_get */
471 0, /* tp_descr_set */
472 0, /* tp_dictoffset */
473 0, /* tp_init */
474 0, /* tp_alloc */
475 dialect_new, /* tp_new */
476 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000477};
478
Andrew McNamara91b97462005-01-11 01:07:23 +0000479/*
480 * Return an instance of the dialect type, given a Python instance or kwarg
481 * description of the dialect
482 */
483static PyObject *
484_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
485{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000486 PyObject *ctor_args;
487 PyObject *dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000488
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000489 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
490 if (ctor_args == NULL)
491 return NULL;
492 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
493 Py_DECREF(ctor_args);
494 return dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000495}
496
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000497/*
498 * READER
499 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000500static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000501parse_save_field(ReaderObj *self)
502{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000503 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000504
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000505 field = PyUnicode_FromUnicode(self->field, self->field_len);
506 if (field == NULL)
507 return -1;
508 self->field_len = 0;
509 if (self->numeric_field) {
510 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000511
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000512 self->numeric_field = 0;
513 tmp = PyNumber_Float(field);
514 if (tmp == NULL) {
515 Py_DECREF(field);
516 return -1;
517 }
518 Py_DECREF(field);
519 field = tmp;
520 }
521 PyList_Append(self->fields, field);
522 Py_DECREF(field);
523 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000524}
525
526static int
527parse_grow_buff(ReaderObj *self)
528{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000529 if (self->field_size == 0) {
530 self->field_size = 4096;
531 if (self->field != NULL)
532 PyMem_Free(self->field);
533 self->field = PyMem_New(Py_UNICODE, self->field_size);
534 }
535 else {
Antoine Pitrou40455752010-08-15 18:51:10 +0000536 if (self->field_size > PY_SSIZE_T_MAX / 2) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000537 PyErr_NoMemory();
538 return 0;
539 }
540 self->field_size *= 2;
541 self->field = PyMem_Resize(self->field, Py_UNICODE,
542 self->field_size);
543 }
544 if (self->field == NULL) {
545 PyErr_NoMemory();
546 return 0;
547 }
548 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000549}
550
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000551static int
Guido van Rossum46264582007-08-06 19:32:18 +0000552parse_add_char(ReaderObj *self, Py_UNICODE c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000553{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000554 if (self->field_len >= field_limit) {
555 PyErr_Format(error_obj, "field larger than field limit (%ld)",
556 field_limit);
557 return -1;
558 }
559 if (self->field_len == self->field_size && !parse_grow_buff(self))
560 return -1;
561 self->field[self->field_len++] = c;
562 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000563}
564
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000565static int
Guido van Rossum46264582007-08-06 19:32:18 +0000566parse_process_char(ReaderObj *self, Py_UNICODE c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000567{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000568 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000569
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000570 switch (self->state) {
571 case START_RECORD:
572 /* start of record */
573 if (c == '\0')
574 /* empty line - return [] */
575 break;
576 else if (c == '\n' || c == '\r') {
577 self->state = EAT_CRNL;
578 break;
579 }
580 /* normal character - handle as START_FIELD */
581 self->state = START_FIELD;
582 /* fallthru */
583 case START_FIELD:
584 /* expecting field */
585 if (c == '\n' || c == '\r' || c == '\0') {
586 /* save empty field - return [fields] */
587 if (parse_save_field(self) < 0)
588 return -1;
589 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
590 }
591 else if (c == dialect->quotechar &&
592 dialect->quoting != QUOTE_NONE) {
593 /* start quoted field */
594 self->state = IN_QUOTED_FIELD;
595 }
596 else if (c == dialect->escapechar) {
597 /* possible escaped character */
598 self->state = ESCAPED_CHAR;
599 }
600 else if (c == ' ' && dialect->skipinitialspace)
601 /* ignore space at start of field */
602 ;
603 else if (c == dialect->delimiter) {
604 /* save empty field */
605 if (parse_save_field(self) < 0)
606 return -1;
607 }
608 else {
609 /* begin new unquoted field */
610 if (dialect->quoting == QUOTE_NONNUMERIC)
611 self->numeric_field = 1;
612 if (parse_add_char(self, c) < 0)
613 return -1;
614 self->state = IN_FIELD;
615 }
616 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000617
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000618 case ESCAPED_CHAR:
619 if (c == '\0')
620 c = '\n';
621 if (parse_add_char(self, c) < 0)
622 return -1;
623 self->state = IN_FIELD;
624 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000625
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000626 case IN_FIELD:
627 /* in unquoted field */
628 if (c == '\n' || c == '\r' || c == '\0') {
629 /* end of line - return [fields] */
630 if (parse_save_field(self) < 0)
631 return -1;
632 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
633 }
634 else if (c == dialect->escapechar) {
635 /* possible escaped character */
636 self->state = ESCAPED_CHAR;
637 }
638 else if (c == dialect->delimiter) {
639 /* save field - wait for new field */
640 if (parse_save_field(self) < 0)
641 return -1;
642 self->state = START_FIELD;
643 }
644 else {
645 /* normal character - save in field */
646 if (parse_add_char(self, c) < 0)
647 return -1;
648 }
649 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000650
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000651 case IN_QUOTED_FIELD:
652 /* in quoted field */
653 if (c == '\0')
654 ;
655 else if (c == dialect->escapechar) {
656 /* Possible escape character */
657 self->state = ESCAPE_IN_QUOTED_FIELD;
658 }
659 else if (c == dialect->quotechar &&
660 dialect->quoting != QUOTE_NONE) {
661 if (dialect->doublequote) {
662 /* doublequote; " represented by "" */
663 self->state = QUOTE_IN_QUOTED_FIELD;
664 }
665 else {
666 /* end of quote part of field */
667 self->state = IN_FIELD;
668 }
669 }
670 else {
671 /* normal character - save in field */
672 if (parse_add_char(self, c) < 0)
673 return -1;
674 }
675 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000676
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000677 case ESCAPE_IN_QUOTED_FIELD:
678 if (c == '\0')
679 c = '\n';
680 if (parse_add_char(self, c) < 0)
681 return -1;
682 self->state = IN_QUOTED_FIELD;
683 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000684
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000685 case QUOTE_IN_QUOTED_FIELD:
686 /* doublequote - seen a quote in an quoted field */
687 if (dialect->quoting != QUOTE_NONE &&
688 c == dialect->quotechar) {
689 /* save "" as " */
690 if (parse_add_char(self, c) < 0)
691 return -1;
692 self->state = IN_QUOTED_FIELD;
693 }
694 else if (c == dialect->delimiter) {
695 /* save field - wait for new field */
696 if (parse_save_field(self) < 0)
697 return -1;
698 self->state = START_FIELD;
699 }
700 else if (c == '\n' || c == '\r' || c == '\0') {
701 /* end of line - return [fields] */
702 if (parse_save_field(self) < 0)
703 return -1;
704 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
705 }
706 else if (!dialect->strict) {
707 if (parse_add_char(self, c) < 0)
708 return -1;
709 self->state = IN_FIELD;
710 }
711 else {
712 /* illegal */
713 PyErr_Format(error_obj, "'%c' expected after '%c'",
714 dialect->delimiter,
715 dialect->quotechar);
716 return -1;
717 }
718 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000719
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000720 case EAT_CRNL:
721 if (c == '\n' || c == '\r')
722 ;
723 else if (c == '\0')
724 self->state = START_RECORD;
725 else {
726 PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
727 return -1;
728 }
729 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000730
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000731 }
732 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000733}
734
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000735static int
736parse_reset(ReaderObj *self)
737{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000738 Py_XDECREF(self->fields);
739 self->fields = PyList_New(0);
740 if (self->fields == NULL)
741 return -1;
742 self->field_len = 0;
743 self->state = START_RECORD;
744 self->numeric_field = 0;
745 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000746}
Skip Montanarob4a04172003-03-20 23:29:12 +0000747
748static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000749Reader_iternext(ReaderObj *self)
750{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000751 PyObject *lineobj;
752 PyObject *fields = NULL;
753 Py_UNICODE *line, c;
754 Py_ssize_t linelen;
Skip Montanarob4a04172003-03-20 23:29:12 +0000755
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000756 if (parse_reset(self) < 0)
757 return NULL;
758 do {
759 lineobj = PyIter_Next(self->input_iter);
760 if (lineobj == NULL) {
761 /* End of input OR exception */
762 if (!PyErr_Occurred() && self->field_len != 0)
763 PyErr_Format(error_obj,
764 "newline inside string");
765 return NULL;
766 }
767 if (!PyUnicode_Check(lineobj)) {
768 PyErr_Format(error_obj,
769 "iterator should return strings, "
770 "not %.200s "
771 "(did you open the file in text mode?)",
772 lineobj->ob_type->tp_name
773 );
774 Py_DECREF(lineobj);
775 return NULL;
776 }
777 ++self->line_num;
778 line = PyUnicode_AsUnicode(lineobj);
779 linelen = PyUnicode_GetSize(lineobj);
780 if (line == NULL || linelen < 0) {
781 Py_DECREF(lineobj);
782 return NULL;
783 }
784 while (linelen--) {
785 c = *line++;
786 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000787 Py_DECREF(lineobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000788 PyErr_Format(error_obj,
789 "line contains NULL byte");
790 goto err;
791 }
792 if (parse_process_char(self, c) < 0) {
793 Py_DECREF(lineobj);
794 goto err;
795 }
796 }
797 Py_DECREF(lineobj);
798 if (parse_process_char(self, 0) < 0)
799 goto err;
800 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000801
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000802 fields = self->fields;
803 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000804err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000805 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000806}
807
808static void
809Reader_dealloc(ReaderObj *self)
810{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000811 PyObject_GC_UnTrack(self);
812 Py_XDECREF(self->dialect);
813 Py_XDECREF(self->input_iter);
814 Py_XDECREF(self->fields);
815 if (self->field != NULL)
816 PyMem_Free(self->field);
817 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000818}
819
820static int
821Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
822{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000823 Py_VISIT(self->dialect);
824 Py_VISIT(self->input_iter);
825 Py_VISIT(self->fields);
826 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000827}
828
829static int
830Reader_clear(ReaderObj *self)
831{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000832 Py_CLEAR(self->dialect);
833 Py_CLEAR(self->input_iter);
834 Py_CLEAR(self->fields);
835 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000836}
837
838PyDoc_STRVAR(Reader_Type_doc,
839"CSV reader\n"
840"\n"
841"Reader objects are responsible for reading and parsing tabular data\n"
842"in CSV format.\n"
843);
844
845static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000846 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000847};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000848#define R_OFF(x) offsetof(ReaderObj, x)
849
850static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000851 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
852 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
853 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000854};
855
Skip Montanarob4a04172003-03-20 23:29:12 +0000856
857static PyTypeObject Reader_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000858 PyVarObject_HEAD_INIT(NULL, 0)
859 "_csv.reader", /*tp_name*/
860 sizeof(ReaderObj), /*tp_basicsize*/
861 0, /*tp_itemsize*/
862 /* methods */
863 (destructor)Reader_dealloc, /*tp_dealloc*/
864 (printfunc)0, /*tp_print*/
865 (getattrfunc)0, /*tp_getattr*/
866 (setattrfunc)0, /*tp_setattr*/
867 0, /*tp_reserved*/
868 (reprfunc)0, /*tp_repr*/
869 0, /*tp_as_number*/
870 0, /*tp_as_sequence*/
871 0, /*tp_as_mapping*/
872 (hashfunc)0, /*tp_hash*/
873 (ternaryfunc)0, /*tp_call*/
874 (reprfunc)0, /*tp_str*/
875 0, /*tp_getattro*/
876 0, /*tp_setattro*/
877 0, /*tp_as_buffer*/
878 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
879 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
880 Reader_Type_doc, /*tp_doc*/
881 (traverseproc)Reader_traverse, /*tp_traverse*/
882 (inquiry)Reader_clear, /*tp_clear*/
883 0, /*tp_richcompare*/
884 0, /*tp_weaklistoffset*/
885 PyObject_SelfIter, /*tp_iter*/
886 (getiterfunc)Reader_iternext, /*tp_iternext*/
887 Reader_methods, /*tp_methods*/
888 Reader_memberlist, /*tp_members*/
889 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000890
891};
892
893static PyObject *
894csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
895{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000896 PyObject * iterator, * dialect = NULL;
897 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000898
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000899 if (!self)
900 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000901
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000902 self->dialect = NULL;
903 self->fields = NULL;
904 self->input_iter = NULL;
905 self->field = NULL;
906 self->field_size = 0;
907 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000908
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000909 if (parse_reset(self) < 0) {
910 Py_DECREF(self);
911 return NULL;
912 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000913
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000914 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
915 Py_DECREF(self);
916 return NULL;
917 }
918 self->input_iter = PyObject_GetIter(iterator);
919 if (self->input_iter == NULL) {
920 PyErr_SetString(PyExc_TypeError,
921 "argument 1 must be an iterator");
922 Py_DECREF(self);
923 return NULL;
924 }
925 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
926 if (self->dialect == NULL) {
927 Py_DECREF(self);
928 return NULL;
929 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000930
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000931 PyObject_GC_Track(self);
932 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +0000933}
934
935/*
936 * WRITER
937 */
938/* ---------------------------------------------------------------- */
939static void
940join_reset(WriterObj *self)
941{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000942 self->rec_len = 0;
943 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000944}
945
946#define MEM_INCR 32768
947
948/* Calculate new record length or append field to record. Return new
949 * record length.
950 */
Antoine Pitrou40455752010-08-15 18:51:10 +0000951static Py_ssize_t
Guido van Rossum46264582007-08-06 19:32:18 +0000952join_append_data(WriterObj *self, Py_UNICODE *field, int quote_empty,
953 int *quoted, int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +0000954{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000955 DialectObj *dialect = self->dialect;
956 int i;
Antoine Pitrou40455752010-08-15 18:51:10 +0000957 Py_ssize_t rec_len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000958 Py_UNICODE *lineterm;
Andrew McNamarac89f2842005-01-12 07:44:42 +0000959
960#define ADDCH(c) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000961 do {\
962 if (copy_phase) \
963 self->rec[rec_len] = c;\
964 rec_len++;\
965 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +0000966
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000967 lineterm = PyUnicode_AsUnicode(dialect->lineterminator);
968 if (lineterm == NULL)
969 return -1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000970
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000971 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +0000972
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000973 /* If this is not the first field we need a field separator */
974 if (self->num_fields > 0)
975 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +0000976
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000977 /* Handle preceding quote */
978 if (copy_phase && *quoted)
979 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +0000980
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000981 /* Copy/count field data */
982 /* If field is null just pass over */
983 for (i = 0; field; i++) {
984 Py_UNICODE c = field[i];
985 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000986
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000987 if (c == '\0')
988 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000989
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000990 if (c == dialect->delimiter ||
991 c == dialect->escapechar ||
992 c == dialect->quotechar ||
993 Py_UNICODE_strchr(lineterm, c)) {
994 if (dialect->quoting == QUOTE_NONE)
995 want_escape = 1;
996 else {
997 if (c == dialect->quotechar) {
998 if (dialect->doublequote)
999 ADDCH(dialect->quotechar);
1000 else
1001 want_escape = 1;
1002 }
1003 if (!want_escape)
1004 *quoted = 1;
1005 }
1006 if (want_escape) {
1007 if (!dialect->escapechar) {
1008 PyErr_Format(error_obj,
1009 "need to escape, but no escapechar set");
1010 return -1;
1011 }
1012 ADDCH(dialect->escapechar);
1013 }
1014 }
1015 /* Copy field character into record buffer.
1016 */
1017 ADDCH(c);
1018 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001019
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001020 /* If field is empty check if it needs to be quoted.
1021 */
1022 if (i == 0 && quote_empty) {
1023 if (dialect->quoting == QUOTE_NONE) {
1024 PyErr_Format(error_obj,
1025 "single empty field record must be quoted");
1026 return -1;
1027 }
1028 else
1029 *quoted = 1;
1030 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001031
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001032 if (*quoted) {
1033 if (copy_phase)
1034 ADDCH(dialect->quotechar);
1035 else
1036 rec_len += 2;
1037 }
1038 return rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001039#undef ADDCH
Skip Montanarob4a04172003-03-20 23:29:12 +00001040}
1041
1042static int
Antoine Pitrou40455752010-08-15 18:51:10 +00001043join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
Skip Montanarob4a04172003-03-20 23:29:12 +00001044{
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001045
Antoine Pitrou40455752010-08-15 18:51:10 +00001046 if (rec_len < 0 || rec_len > PY_SSIZE_T_MAX - MEM_INCR) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001047 PyErr_NoMemory();
1048 return 0;
1049 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001050
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001051 if (rec_len > self->rec_size) {
1052 if (self->rec_size == 0) {
1053 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1054 if (self->rec != NULL)
1055 PyMem_Free(self->rec);
1056 self->rec = PyMem_New(Py_UNICODE, self->rec_size);
1057 }
1058 else {
1059 Py_UNICODE* old_rec = self->rec;
Skip Montanarob4a04172003-03-20 23:29:12 +00001060
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001061 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1062 self->rec = PyMem_Resize(self->rec, Py_UNICODE,
1063 self->rec_size);
1064 if (self->rec == NULL)
1065 PyMem_Free(old_rec);
1066 }
1067 if (self->rec == NULL) {
1068 PyErr_NoMemory();
1069 return 0;
1070 }
1071 }
1072 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001073}
1074
1075static int
Guido van Rossum46264582007-08-06 19:32:18 +00001076join_append(WriterObj *self, Py_UNICODE *field, int *quoted, int quote_empty)
Skip Montanarob4a04172003-03-20 23:29:12 +00001077{
Antoine Pitrou40455752010-08-15 18:51:10 +00001078 Py_ssize_t rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001079
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001080 rec_len = join_append_data(self, field, quote_empty, quoted, 0);
1081 if (rec_len < 0)
1082 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001083
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001084 /* grow record buffer if necessary */
1085 if (!join_check_rec_size(self, rec_len))
1086 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001087
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001088 self->rec_len = join_append_data(self, field, quote_empty, quoted, 1);
1089 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001090
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001091 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001092}
1093
1094static int
1095join_append_lineterminator(WriterObj *self)
1096{
Antoine Pitrou40455752010-08-15 18:51:10 +00001097 Py_ssize_t terminator_len;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001098 Py_UNICODE *terminator;
Skip Montanarob4a04172003-03-20 23:29:12 +00001099
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001100 terminator_len = PyUnicode_GetSize(self->dialect->lineterminator);
1101 if (terminator_len == -1)
1102 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001103
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001104 /* grow record buffer if necessary */
1105 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1106 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001107
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001108 terminator = PyUnicode_AsUnicode(self->dialect->lineterminator);
1109 if (terminator == NULL)
1110 return 0;
1111 memmove(self->rec + self->rec_len, terminator,
1112 sizeof(Py_UNICODE)*terminator_len);
1113 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001114
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001115 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001116}
1117
1118PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001119"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001120"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001121"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001122"elements will be converted to string.");
1123
1124static PyObject *
1125csv_writerow(WriterObj *self, PyObject *seq)
1126{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001127 DialectObj *dialect = self->dialect;
Antoine Pitrou40455752010-08-15 18:51:10 +00001128 Py_ssize_t len, i;
Skip Montanarob4a04172003-03-20 23:29:12 +00001129
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001130 if (!PySequence_Check(seq))
1131 return PyErr_Format(error_obj, "sequence expected");
Skip Montanarob4a04172003-03-20 23:29:12 +00001132
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001133 len = PySequence_Length(seq);
1134 if (len < 0)
1135 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001136
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001137 /* Join all fields in internal buffer.
1138 */
1139 join_reset(self);
1140 for (i = 0; i < len; i++) {
1141 PyObject *field;
1142 int append_ok;
1143 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001144
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001145 field = PySequence_GetItem(seq, i);
1146 if (field == NULL)
1147 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001149 switch (dialect->quoting) {
1150 case QUOTE_NONNUMERIC:
1151 quoted = !PyNumber_Check(field);
1152 break;
1153 case QUOTE_ALL:
1154 quoted = 1;
1155 break;
1156 default:
1157 quoted = 0;
1158 break;
1159 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001160
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001161 if (PyUnicode_Check(field)) {
1162 append_ok = join_append(self,
1163 PyUnicode_AS_UNICODE(field),
Guido van Rossum46264582007-08-06 19:32:18 +00001164 &quoted, len == 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001165 Py_DECREF(field);
1166 }
1167 else if (field == Py_None) {
1168 append_ok = join_append(self, NULL,
1169 &quoted, len == 1);
1170 Py_DECREF(field);
1171 }
1172 else {
1173 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001174
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001175 str = PyObject_Str(field);
1176 Py_DECREF(field);
1177 if (str == NULL)
1178 return NULL;
1179 append_ok = join_append(self,
1180 PyUnicode_AS_UNICODE(str),
1181 &quoted, len == 1);
1182 Py_DECREF(str);
1183 }
1184 if (!append_ok)
1185 return NULL;
1186 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001187
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001188 /* Add line terminator.
1189 */
1190 if (!join_append_lineterminator(self))
1191 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001192
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001193 return PyObject_CallFunction(self->writeline,
1194 "(u#)", self->rec,
1195 self->rec_len);
Skip Montanarob4a04172003-03-20 23:29:12 +00001196}
1197
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001198PyDoc_STRVAR(csv_writerows_doc,
1199"writerows(sequence of sequences)\n"
1200"\n"
1201"Construct and write a series of sequences to a csv file. Non-string\n"
1202"elements will be converted to string.");
1203
Skip Montanarob4a04172003-03-20 23:29:12 +00001204static PyObject *
1205csv_writerows(WriterObj *self, PyObject *seqseq)
1206{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001207 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001208
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001209 row_iter = PyObject_GetIter(seqseq);
1210 if (row_iter == NULL) {
1211 PyErr_SetString(PyExc_TypeError,
1212 "writerows() argument must be iterable");
1213 return NULL;
1214 }
1215 while ((row_obj = PyIter_Next(row_iter))) {
1216 result = csv_writerow(self, row_obj);
1217 Py_DECREF(row_obj);
1218 if (!result) {
1219 Py_DECREF(row_iter);
1220 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001221 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001222 else
1223 Py_DECREF(result);
1224 }
1225 Py_DECREF(row_iter);
1226 if (PyErr_Occurred())
1227 return NULL;
1228 Py_INCREF(Py_None);
1229 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001230}
1231
1232static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001233 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1234 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1235 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001236};
1237
1238#define W_OFF(x) offsetof(WriterObj, x)
1239
1240static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001241 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1242 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001243};
1244
1245static void
1246Writer_dealloc(WriterObj *self)
1247{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001248 PyObject_GC_UnTrack(self);
1249 Py_XDECREF(self->dialect);
1250 Py_XDECREF(self->writeline);
1251 if (self->rec != NULL)
1252 PyMem_Free(self->rec);
1253 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001254}
1255
1256static int
1257Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1258{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001259 Py_VISIT(self->dialect);
1260 Py_VISIT(self->writeline);
1261 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001262}
1263
1264static int
1265Writer_clear(WriterObj *self)
1266{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001267 Py_CLEAR(self->dialect);
1268 Py_CLEAR(self->writeline);
1269 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001270}
1271
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001272PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001273"CSV writer\n"
1274"\n"
1275"Writer objects are responsible for generating tabular data\n"
1276"in CSV format from sequence input.\n"
1277);
1278
1279static PyTypeObject Writer_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001280 PyVarObject_HEAD_INIT(NULL, 0)
1281 "_csv.writer", /*tp_name*/
1282 sizeof(WriterObj), /*tp_basicsize*/
1283 0, /*tp_itemsize*/
1284 /* methods */
1285 (destructor)Writer_dealloc, /*tp_dealloc*/
1286 (printfunc)0, /*tp_print*/
1287 (getattrfunc)0, /*tp_getattr*/
1288 (setattrfunc)0, /*tp_setattr*/
1289 0, /*tp_reserved*/
1290 (reprfunc)0, /*tp_repr*/
1291 0, /*tp_as_number*/
1292 0, /*tp_as_sequence*/
1293 0, /*tp_as_mapping*/
1294 (hashfunc)0, /*tp_hash*/
1295 (ternaryfunc)0, /*tp_call*/
1296 (reprfunc)0, /*tp_str*/
1297 0, /*tp_getattro*/
1298 0, /*tp_setattro*/
1299 0, /*tp_as_buffer*/
1300 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1301 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1302 Writer_Type_doc,
1303 (traverseproc)Writer_traverse, /*tp_traverse*/
1304 (inquiry)Writer_clear, /*tp_clear*/
1305 0, /*tp_richcompare*/
1306 0, /*tp_weaklistoffset*/
1307 (getiterfunc)0, /*tp_iter*/
1308 (getiterfunc)0, /*tp_iternext*/
1309 Writer_methods, /*tp_methods*/
1310 Writer_memberlist, /*tp_members*/
1311 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001312};
1313
1314static PyObject *
1315csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1316{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001317 PyObject * output_file, * dialect = NULL;
1318 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +00001319
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001320 if (!self)
1321 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001322
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001323 self->dialect = NULL;
1324 self->writeline = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001325
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001326 self->rec = NULL;
1327 self->rec_size = 0;
1328 self->rec_len = 0;
1329 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001330
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001331 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1332 Py_DECREF(self);
1333 return NULL;
1334 }
1335 self->writeline = PyObject_GetAttrString(output_file, "write");
1336 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1337 PyErr_SetString(PyExc_TypeError,
1338 "argument 1 must have a \"write\" method");
1339 Py_DECREF(self);
1340 return NULL;
1341 }
1342 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1343 if (self->dialect == NULL) {
1344 Py_DECREF(self);
1345 return NULL;
1346 }
1347 PyObject_GC_Track(self);
1348 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001349}
1350
1351/*
1352 * DIALECT REGISTRY
1353 */
1354static PyObject *
1355csv_list_dialects(PyObject *module, PyObject *args)
1356{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001357 return PyDict_Keys(dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001358}
1359
1360static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001361csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001362{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001363 PyObject *name_obj, *dialect_obj = NULL;
1364 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001365
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001366 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1367 return NULL;
1368 if (!IS_BASESTRING(name_obj)) {
1369 PyErr_SetString(PyExc_TypeError,
1370 "dialect name must be a string or unicode");
1371 return NULL;
1372 }
1373 dialect = _call_dialect(dialect_obj, kwargs);
1374 if (dialect == NULL)
1375 return NULL;
1376 if (PyDict_SetItem(dialects, name_obj, dialect) < 0) {
1377 Py_DECREF(dialect);
1378 return NULL;
1379 }
1380 Py_DECREF(dialect);
1381 Py_INCREF(Py_None);
1382 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001383}
1384
1385static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001386csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001387{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001388 if (PyDict_DelItem(dialects, name_obj) < 0)
1389 return PyErr_Format(error_obj, "unknown dialect");
1390 Py_INCREF(Py_None);
1391 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001392}
1393
1394static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001395csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001396{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001397 return get_dialect_from_registry(name_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001398}
1399
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001400static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001401csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001402{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001403 PyObject *new_limit = NULL;
1404 long old_limit = field_limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001405
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001406 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1407 return NULL;
1408 if (new_limit != NULL) {
1409 if (!PyLong_CheckExact(new_limit)) {
1410 PyErr_Format(PyExc_TypeError,
1411 "limit must be an integer");
1412 return NULL;
1413 }
1414 field_limit = PyLong_AsLong(new_limit);
1415 if (field_limit == -1 && PyErr_Occurred()) {
1416 field_limit = old_limit;
1417 return NULL;
1418 }
1419 }
1420 return PyLong_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001421}
1422
Skip Montanarob4a04172003-03-20 23:29:12 +00001423/*
1424 * MODULE
1425 */
1426
1427PyDoc_STRVAR(csv_module_doc,
1428"CSV parsing and writing.\n"
1429"\n"
1430"This module provides classes that assist in the reading and writing\n"
1431"of Comma Separated Value (CSV) files, and implements the interface\n"
1432"described by PEP 305. Although many CSV files are simple to parse,\n"
1433"the format is not formally defined by a stable specification and\n"
1434"is subtle enough that parsing lines of a CSV file with something\n"
1435"like line.split(\",\") is bound to fail. The module supports three\n"
1436"basic APIs: reading, writing, and registration of dialects.\n"
1437"\n"
1438"\n"
1439"DIALECT REGISTRATION:\n"
1440"\n"
1441"Readers and writers support a dialect argument, which is a convenient\n"
1442"handle on a group of settings. When the dialect argument is a string,\n"
1443"it identifies one of the dialects previously registered with the module.\n"
1444"If it is a class or instance, the attributes of the argument are used as\n"
1445"the settings for the reader or writer:\n"
1446"\n"
1447" class excel:\n"
1448" delimiter = ','\n"
1449" quotechar = '\"'\n"
1450" escapechar = None\n"
1451" doublequote = True\n"
1452" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001453" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001454" quoting = QUOTE_MINIMAL\n"
1455"\n"
1456"SETTINGS:\n"
1457"\n"
1458" * quotechar - specifies a one-character string to use as the \n"
1459" quoting character. It defaults to '\"'.\n"
1460" * delimiter - specifies a one-character string to use as the \n"
1461" field separator. It defaults to ','.\n"
1462" * skipinitialspace - specifies how to interpret whitespace which\n"
1463" immediately follows a delimiter. It defaults to False, which\n"
1464" means that whitespace immediately following a delimiter is part\n"
1465" of the following field.\n"
1466" * lineterminator - specifies the character sequence which should \n"
1467" terminate rows.\n"
1468" * quoting - controls when quotes should be generated by the writer.\n"
1469" It can take on any of the following module constants:\n"
1470"\n"
1471" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1472" field contains either the quotechar or the delimiter\n"
1473" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1474" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001475" fields which do not parse as integers or floating point\n"
1476" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001477" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1478" * escapechar - specifies a one-character string used to escape \n"
1479" the delimiter when quoting is set to QUOTE_NONE.\n"
1480" * doublequote - controls the handling of quotes inside fields. When\n"
1481" True, two consecutive quotes are interpreted as one during read,\n"
1482" and when writing, each quote character embedded in the data is\n"
1483" written as two quotes\n");
1484
1485PyDoc_STRVAR(csv_reader_doc,
1486" csv_reader = reader(iterable [, dialect='excel']\n"
1487" [optional keyword args])\n"
1488" for row in csv_reader:\n"
1489" process(row)\n"
1490"\n"
1491"The \"iterable\" argument can be any object that returns a line\n"
1492"of input for each iteration, such as a file object or a list. The\n"
1493"optional \"dialect\" parameter is discussed below. The function\n"
1494"also accepts optional keyword arguments which override settings\n"
1495"provided by the dialect.\n"
1496"\n"
1497"The returned object is an iterator. Each iteration returns a row\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001498"of the CSV file (which can span multiple input lines):\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001499
1500PyDoc_STRVAR(csv_writer_doc,
1501" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1502" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001503" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001504" csv_writer.writerow(row)\n"
1505"\n"
1506" [or]\n"
1507"\n"
1508" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1509" [optional keyword args])\n"
1510" csv_writer.writerows(rows)\n"
1511"\n"
1512"The \"fileobj\" argument can be any object that supports the file API.\n");
1513
1514PyDoc_STRVAR(csv_list_dialects_doc,
1515"Return a list of all know dialect names.\n"
1516" names = csv.list_dialects()");
1517
1518PyDoc_STRVAR(csv_get_dialect_doc,
1519"Return the dialect instance associated with name.\n"
1520" dialect = csv.get_dialect(name)");
1521
1522PyDoc_STRVAR(csv_register_dialect_doc,
1523"Create a mapping from a string name to a dialect class.\n"
1524" dialect = csv.register_dialect(name, dialect)");
1525
1526PyDoc_STRVAR(csv_unregister_dialect_doc,
1527"Delete the name/dialect mapping associated with a string name.\n"
1528" csv.unregister_dialect(name)");
1529
Andrew McNamara31d88962005-01-12 03:45:10 +00001530PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001531"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001532" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001533"\n"
1534"Returns old limit. If limit is not given, no new limit is set and\n"
1535"the old limit is returned");
1536
Skip Montanarob4a04172003-03-20 23:29:12 +00001537static struct PyMethodDef csv_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001538 { "reader", (PyCFunction)csv_reader,
1539 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1540 { "writer", (PyCFunction)csv_writer,
1541 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1542 { "list_dialects", (PyCFunction)csv_list_dialects,
1543 METH_NOARGS, csv_list_dialects_doc},
1544 { "register_dialect", (PyCFunction)csv_register_dialect,
1545 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1546 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1547 METH_O, csv_unregister_dialect_doc},
1548 { "get_dialect", (PyCFunction)csv_get_dialect,
1549 METH_O, csv_get_dialect_doc},
1550 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1551 METH_VARARGS, csv_field_size_limit_doc},
1552 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001553};
1554
Martin v. Löwis1a214512008-06-11 05:26:20 +00001555
1556static struct PyModuleDef _csvmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001557 PyModuleDef_HEAD_INIT,
1558 "_csv",
1559 csv_module_doc,
1560 -1,
1561 csv_methods,
1562 NULL,
1563 NULL,
1564 NULL,
1565 NULL
Martin v. Löwis1a214512008-06-11 05:26:20 +00001566};
1567
Skip Montanarob4a04172003-03-20 23:29:12 +00001568PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001569PyInit__csv(void)
Skip Montanarob4a04172003-03-20 23:29:12 +00001570{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001571 PyObject *module;
1572 StyleDesc *style;
Skip Montanarob4a04172003-03-20 23:29:12 +00001573
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001574 if (PyType_Ready(&Dialect_Type) < 0)
1575 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001576
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001577 if (PyType_Ready(&Reader_Type) < 0)
1578 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001579
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001580 if (PyType_Ready(&Writer_Type) < 0)
1581 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001582
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001583 /* Create the module and add the functions */
1584 module = PyModule_Create(&_csvmodule);
1585 if (module == NULL)
1586 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001587
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001588 /* Add version to the module. */
1589 if (PyModule_AddStringConstant(module, "__version__",
1590 MODULE_VERSION) == -1)
1591 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001592
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001593 /* Add _dialects dictionary */
1594 dialects = PyDict_New();
1595 if (dialects == NULL)
1596 return NULL;
1597 if (PyModule_AddObject(module, "_dialects", dialects))
1598 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001599
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001600 /* Add quote styles into dictionary */
1601 for (style = quote_styles; style->name; style++) {
1602 if (PyModule_AddIntConstant(module, style->name,
1603 style->style) == -1)
1604 return NULL;
1605 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001606
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001607 /* Add the Dialect type */
1608 Py_INCREF(&Dialect_Type);
1609 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1610 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001611
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001612 /* Add the CSV exception object to the module. */
1613 error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1614 if (error_obj == NULL)
1615 return NULL;
1616 PyModule_AddObject(module, "Error", error_obj);
1617 return module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001618}