blob: dcb671e40a81e15f369a8bbc56cfbd7b0661d547 [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
Skip Montanarob4a04172003-03-20 23:29:12 +00009*/
10
Skip Montanaro7b01a832003-04-12 19:23:46 +000011#define MODULE_VERSION "1.0"
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013#include "Python.h"
14#include "structmember.h"
15
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000016
Antoine Pitroue7672d32012-05-16 11:33:08 +020017typedef struct {
18 PyObject *error_obj; /* CSV exception */
19 PyObject *dialects; /* Dialect registry */
20 long field_limit; /* max parsed field size */
21} _csvstate;
22
23#define _csvstate(o) ((_csvstate *)PyModule_GetState(o))
24
25static int
26_csv_clear(PyObject *m)
27{
28 Py_CLEAR(_csvstate(m)->error_obj);
29 Py_CLEAR(_csvstate(m)->dialects);
30 return 0;
31}
32
33static int
34_csv_traverse(PyObject *m, visitproc visit, void *arg)
35{
36 Py_VISIT(_csvstate(m)->error_obj);
37 Py_VISIT(_csvstate(m)->dialects);
38 return 0;
39}
40
41static void
42_csv_free(void *m)
43{
44 _csv_clear((PyObject *)m);
45}
46
47static struct PyModuleDef _csvmodule;
48
49#define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
Skip Montanarob4a04172003-03-20 23:29:12 +000050
51typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000052 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
53 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
54 EAT_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000055} ParserState;
56
57typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000058 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000059} QuoteStyle;
60
61typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000062 QuoteStyle style;
63 char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000064} StyleDesc;
65
66static StyleDesc quote_styles[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
68 { QUOTE_ALL, "QUOTE_ALL" },
69 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
70 { QUOTE_NONE, "QUOTE_NONE" },
71 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000072};
73
74typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000075 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 int doublequote; /* is " represented by ""? */
Antoine Pitrou77ea6402011-10-07 04:26:55 +020078 Py_UCS4 delimiter; /* field separator */
79 Py_UCS4 quotechar; /* quote character */
80 Py_UCS4 escapechar; /* escape character */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000081 int skipinitialspace; /* ignore spaces following delimiter? */
82 PyObject *lineterminator; /* string to write between records */
83 int quoting; /* style of quoting to write */
Skip Montanarob4a04172003-03-20 23:29:12 +000084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 int strict; /* raise exception on bad CSV */
Skip Montanarob4a04172003-03-20 23:29:12 +000086} DialectObj;
87
Neal Norwitz227b5332006-03-22 09:28:35 +000088static PyTypeObject Dialect_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000089
90typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +000092
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000093 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +000094
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000095 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +000096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000097 PyObject *fields; /* field list for current record */
98 ParserState state; /* current CSV parse state */
Antoine Pitrou77ea6402011-10-07 04:26:55 +020099 Py_UCS4 *field; /* temporary buffer */
Antoine Pitrou40455752010-08-15 18:51:10 +0000100 Py_ssize_t field_size; /* size of allocated buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000101 Py_ssize_t field_len; /* length of current field */
102 int numeric_field; /* treat field as numeric */
103 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000104} ReaderObj;
105
Neal Norwitz227b5332006-03-22 09:28:35 +0000106static PyTypeObject Reader_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000107
Christian Heimes90aa7642007-12-19 02:45:37 +0000108#define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +0000109
110typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000111 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000112
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000113 PyObject *writeline; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +0000114
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000115 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000116
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200117 Py_UCS4 *rec; /* buffer for parser.join */
Antoine Pitrou40455752010-08-15 18:51:10 +0000118 Py_ssize_t rec_size; /* size of allocated record */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000119 Py_ssize_t rec_len; /* length of record */
120 int num_fields; /* number of fields in record */
Guido van Rossum46264582007-08-06 19:32:18 +0000121} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000122
Neal Norwitz227b5332006-03-22 09:28:35 +0000123static PyTypeObject Writer_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000124
125/*
126 * DIALECT class
127 */
128
129static PyObject *
130get_dialect_from_registry(PyObject * name_obj)
131{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000133
Antoine Pitroue7672d32012-05-16 11:33:08 +0200134 dialect_obj = PyDict_GetItem(_csvstate_global->dialects, name_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000135 if (dialect_obj == NULL) {
136 if (!PyErr_Occurred())
Antoine Pitroue7672d32012-05-16 11:33:08 +0200137 PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000138 }
139 else
140 Py_INCREF(dialect_obj);
141 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000142}
143
Skip Montanarob4a04172003-03-20 23:29:12 +0000144static PyObject *
145get_string(PyObject *str)
146{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000147 Py_XINCREF(str);
148 return str;
Skip Montanarob4a04172003-03-20 23:29:12 +0000149}
150
Skip Montanarob4a04172003-03-20 23:29:12 +0000151static PyObject *
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200152get_nullchar_as_None(Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000153{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 if (c == '\0') {
155 Py_INCREF(Py_None);
156 return Py_None;
157 }
158 else
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200159 return PyUnicode_FromOrdinal(c);
Skip Montanarob4a04172003-03-20 23:29:12 +0000160}
161
Skip Montanarob4a04172003-03-20 23:29:12 +0000162static PyObject *
163Dialect_get_lineterminator(DialectObj *self)
164{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000165 return get_string(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000166}
167
Skip Montanarob4a04172003-03-20 23:29:12 +0000168static PyObject *
Guido van Rossuma9769c22007-08-07 23:59:30 +0000169Dialect_get_delimiter(DialectObj *self)
170{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000171 return get_nullchar_as_None(self->delimiter);
Guido van Rossuma9769c22007-08-07 23:59:30 +0000172}
173
174static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000175Dialect_get_escapechar(DialectObj *self)
176{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000178}
179
Andrew McNamara1196cf12005-01-07 04:42:45 +0000180static PyObject *
181Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000182{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000183 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000184}
185
186static PyObject *
187Dialect_get_quoting(DialectObj *self)
188{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000189 return PyLong_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000190}
191
192static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000193_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000194{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000195 if (src == NULL)
196 *target = dflt;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200197 else {
198 int b = PyObject_IsTrue(src);
199 if (b < 0)
200 return -1;
201 *target = b;
202 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000203 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000204}
205
Andrew McNamara1196cf12005-01-07 04:42:45 +0000206static int
207_set_int(const char *name, int *target, PyObject *src, int dflt)
208{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000209 if (src == NULL)
210 *target = dflt;
211 else {
212 long value;
213 if (!PyLong_CheckExact(src)) {
214 PyErr_Format(PyExc_TypeError,
215 "\"%s\" must be an integer", name);
216 return -1;
217 }
218 value = PyLong_AsLong(src);
219 if (value == -1 && PyErr_Occurred())
220 return -1;
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +0000221#if SIZEOF_LONG > SIZEOF_INT
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000222 if (value > INT_MAX || value < INT_MIN) {
223 PyErr_Format(PyExc_ValueError,
224 "integer out of range for \"%s\"", name);
225 return -1;
226 }
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +0000227#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 *target = (int)value;
229 }
230 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000231}
232
233static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200234_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000235{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000236 if (src == NULL)
237 *target = dflt;
238 else {
239 *target = '\0';
240 if (src != Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000241 Py_ssize_t len;
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200242 if (!PyUnicode_Check(src)) {
243 PyErr_Format(PyExc_TypeError,
244 "\"%s\" must be string, not %.200s", name,
245 src->ob_type->tp_name);
246 return -1;
247 }
Victor Stinner9e30aa52011-11-21 02:49:52 +0100248 len = PyUnicode_GetLength(src);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200249 if (len > 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000250 PyErr_Format(PyExc_TypeError,
251 "\"%s\" must be an 1-character string",
252 name);
253 return -1;
254 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100255 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000256 if (len > 0)
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200257 *target = PyUnicode_READ_CHAR(src, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000258 }
259 }
260 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000261}
262
263static int
264_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
265{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000266 if (src == NULL)
267 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
268 else {
269 if (src == Py_None)
270 *target = NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +0100271 else if (!PyUnicode_Check(src)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000272 PyErr_Format(PyExc_TypeError,
273 "\"%s\" must be a string", name);
274 return -1;
275 }
276 else {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100277 if (PyUnicode_READY(src) == -1)
278 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000279 Py_XDECREF(*target);
280 Py_INCREF(src);
281 *target = src;
282 }
283 }
284 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000285}
286
287static int
288dialect_check_quoting(int quoting)
289{
Victor Stinner4fe519b2010-11-09 09:40:16 +0000290 StyleDesc *qs;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000291
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000292 for (qs = quote_styles; qs->name; qs++) {
293 if (qs->style == quoting)
294 return 0;
295 }
296 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
297 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000298}
Skip Montanarob4a04172003-03-20 23:29:12 +0000299
300#define D_OFF(x) offsetof(DialectObj, x)
301
302static struct PyMemberDef Dialect_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000303 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
304 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
305 { "strict", T_INT, D_OFF(strict), READONLY },
306 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000307};
308
309static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000310 { "delimiter", (getter)Dialect_get_delimiter},
311 { "escapechar", (getter)Dialect_get_escapechar},
312 { "lineterminator", (getter)Dialect_get_lineterminator},
313 { "quotechar", (getter)Dialect_get_quotechar},
314 { "quoting", (getter)Dialect_get_quoting},
315 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000316};
317
318static void
319Dialect_dealloc(DialectObj *self)
320{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000321 Py_XDECREF(self->lineterminator);
322 Py_TYPE(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000323}
324
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000325static char *dialect_kws[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000326 "dialect",
327 "delimiter",
328 "doublequote",
329 "escapechar",
330 "lineterminator",
331 "quotechar",
332 "quoting",
333 "skipinitialspace",
334 "strict",
335 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000336};
337
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000338static PyObject *
339dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000340{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000341 DialectObj *self;
342 PyObject *ret = NULL;
343 PyObject *dialect = NULL;
344 PyObject *delimiter = NULL;
345 PyObject *doublequote = NULL;
346 PyObject *escapechar = NULL;
347 PyObject *lineterminator = NULL;
348 PyObject *quotechar = NULL;
349 PyObject *quoting = NULL;
350 PyObject *skipinitialspace = NULL;
351 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000352
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000353 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
354 "|OOOOOOOOO", dialect_kws,
355 &dialect,
356 &delimiter,
357 &doublequote,
358 &escapechar,
359 &lineterminator,
360 &quotechar,
361 &quoting,
362 &skipinitialspace,
363 &strict))
364 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000365
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000366 if (dialect != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100367 if (PyUnicode_Check(dialect)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000368 dialect = get_dialect_from_registry(dialect);
369 if (dialect == NULL)
370 return NULL;
371 }
372 else
373 Py_INCREF(dialect);
374 /* Can we reuse this instance? */
375 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
376 delimiter == 0 &&
377 doublequote == 0 &&
378 escapechar == 0 &&
379 lineterminator == 0 &&
380 quotechar == 0 &&
381 quoting == 0 &&
382 skipinitialspace == 0 &&
383 strict == 0)
384 return dialect;
385 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000386
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000387 self = (DialectObj *)type->tp_alloc(type, 0);
388 if (self == NULL) {
389 Py_XDECREF(dialect);
390 return NULL;
391 }
392 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000393
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000394 Py_XINCREF(delimiter);
395 Py_XINCREF(doublequote);
396 Py_XINCREF(escapechar);
397 Py_XINCREF(lineterminator);
398 Py_XINCREF(quotechar);
399 Py_XINCREF(quoting);
400 Py_XINCREF(skipinitialspace);
401 Py_XINCREF(strict);
402 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000403#define DIALECT_GETATTR(v, n) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000404 if (v == NULL) \
405 v = PyObject_GetAttrString(dialect, n)
406 DIALECT_GETATTR(delimiter, "delimiter");
407 DIALECT_GETATTR(doublequote, "doublequote");
408 DIALECT_GETATTR(escapechar, "escapechar");
409 DIALECT_GETATTR(lineterminator, "lineterminator");
410 DIALECT_GETATTR(quotechar, "quotechar");
411 DIALECT_GETATTR(quoting, "quoting");
412 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
413 DIALECT_GETATTR(strict, "strict");
414 PyErr_Clear();
415 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000416
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000417 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000418#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000419 if (meth(name, target, src, dflt)) \
420 goto err
421 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
422 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
423 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
424 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
425 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
426 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
427 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
428 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000429
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000430 /* validate options */
431 if (dialect_check_quoting(self->quoting))
432 goto err;
433 if (self->delimiter == 0) {
Serhiy Storchakacac23a52013-12-19 16:27:18 +0200434 PyErr_SetString(PyExc_TypeError,
435 "\"delimiter\" must be an 1-character string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000436 goto err;
437 }
438 if (quotechar == Py_None && quoting == NULL)
439 self->quoting = QUOTE_NONE;
440 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
441 PyErr_SetString(PyExc_TypeError,
442 "quotechar must be set if quoting enabled");
443 goto err;
444 }
445 if (self->lineterminator == 0) {
446 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
447 goto err;
448 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000449
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000450 ret = (PyObject *)self;
451 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000452err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000453 Py_XDECREF(self);
454 Py_XDECREF(dialect);
455 Py_XDECREF(delimiter);
456 Py_XDECREF(doublequote);
457 Py_XDECREF(escapechar);
458 Py_XDECREF(lineterminator);
459 Py_XDECREF(quotechar);
460 Py_XDECREF(quoting);
461 Py_XDECREF(skipinitialspace);
462 Py_XDECREF(strict);
463 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000464}
465
466
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000467PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000468"CSV dialect\n"
469"\n"
470"The Dialect type records CSV parsing and generation options.\n");
471
472static PyTypeObject Dialect_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000473 PyVarObject_HEAD_INIT(NULL, 0)
474 "_csv.Dialect", /* tp_name */
475 sizeof(DialectObj), /* tp_basicsize */
476 0, /* tp_itemsize */
477 /* methods */
478 (destructor)Dialect_dealloc, /* tp_dealloc */
479 (printfunc)0, /* tp_print */
480 (getattrfunc)0, /* tp_getattr */
481 (setattrfunc)0, /* tp_setattr */
482 0, /* tp_reserved */
483 (reprfunc)0, /* tp_repr */
484 0, /* tp_as_number */
485 0, /* tp_as_sequence */
486 0, /* tp_as_mapping */
487 (hashfunc)0, /* tp_hash */
488 (ternaryfunc)0, /* tp_call */
489 (reprfunc)0, /* tp_str */
490 0, /* tp_getattro */
491 0, /* tp_setattro */
492 0, /* tp_as_buffer */
493 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
494 Dialect_Type_doc, /* tp_doc */
495 0, /* tp_traverse */
496 0, /* tp_clear */
497 0, /* tp_richcompare */
498 0, /* tp_weaklistoffset */
499 0, /* tp_iter */
500 0, /* tp_iternext */
501 0, /* tp_methods */
502 Dialect_memberlist, /* tp_members */
503 Dialect_getsetlist, /* tp_getset */
504 0, /* tp_base */
505 0, /* tp_dict */
506 0, /* tp_descr_get */
507 0, /* tp_descr_set */
508 0, /* tp_dictoffset */
509 0, /* tp_init */
510 0, /* tp_alloc */
511 dialect_new, /* tp_new */
512 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000513};
514
Andrew McNamara91b97462005-01-11 01:07:23 +0000515/*
516 * Return an instance of the dialect type, given a Python instance or kwarg
517 * description of the dialect
518 */
519static PyObject *
520_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
521{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000522 PyObject *ctor_args;
523 PyObject *dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000524
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000525 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
526 if (ctor_args == NULL)
527 return NULL;
528 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
529 Py_DECREF(ctor_args);
530 return dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000531}
532
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000533/*
534 * READER
535 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000536static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000537parse_save_field(ReaderObj *self)
538{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000539 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000540
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200541 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
542 (void *) self->field, self->field_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000543 if (field == NULL)
544 return -1;
545 self->field_len = 0;
546 if (self->numeric_field) {
547 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000548
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000549 self->numeric_field = 0;
550 tmp = PyNumber_Float(field);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000551 Py_DECREF(field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200552 if (tmp == NULL)
553 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000554 field = tmp;
555 }
556 PyList_Append(self->fields, field);
557 Py_DECREF(field);
558 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000559}
560
561static int
562parse_grow_buff(ReaderObj *self)
563{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000564 if (self->field_size == 0) {
565 self->field_size = 4096;
566 if (self->field != NULL)
567 PyMem_Free(self->field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200568 self->field = PyMem_New(Py_UCS4, self->field_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000569 }
570 else {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200571 Py_UCS4 *field = self->field;
Antoine Pitrou40455752010-08-15 18:51:10 +0000572 if (self->field_size > PY_SSIZE_T_MAX / 2) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000573 PyErr_NoMemory();
574 return 0;
575 }
576 self->field_size *= 2;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200577 self->field = PyMem_Resize(field, Py_UCS4, self->field_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000578 }
579 if (self->field == NULL) {
580 PyErr_NoMemory();
581 return 0;
582 }
583 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000584}
585
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000586static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200587parse_add_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000588{
Antoine Pitroue7672d32012-05-16 11:33:08 +0200589 if (self->field_len >= _csvstate_global->field_limit) {
590 PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
591 _csvstate_global->field_limit);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000592 return -1;
593 }
594 if (self->field_len == self->field_size && !parse_grow_buff(self))
595 return -1;
596 self->field[self->field_len++] = c;
597 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000598}
599
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000600static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200601parse_process_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000602{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000603 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000604
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000605 switch (self->state) {
606 case START_RECORD:
607 /* start of record */
608 if (c == '\0')
609 /* empty line - return [] */
610 break;
611 else if (c == '\n' || c == '\r') {
612 self->state = EAT_CRNL;
613 break;
614 }
615 /* normal character - handle as START_FIELD */
616 self->state = START_FIELD;
617 /* fallthru */
618 case START_FIELD:
619 /* expecting field */
620 if (c == '\n' || c == '\r' || c == '\0') {
621 /* save empty field - return [fields] */
622 if (parse_save_field(self) < 0)
623 return -1;
624 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
625 }
626 else if (c == dialect->quotechar &&
627 dialect->quoting != QUOTE_NONE) {
628 /* start quoted field */
629 self->state = IN_QUOTED_FIELD;
630 }
631 else if (c == dialect->escapechar) {
632 /* possible escaped character */
633 self->state = ESCAPED_CHAR;
634 }
635 else if (c == ' ' && dialect->skipinitialspace)
636 /* ignore space at start of field */
637 ;
638 else if (c == dialect->delimiter) {
639 /* save empty field */
640 if (parse_save_field(self) < 0)
641 return -1;
642 }
643 else {
644 /* begin new unquoted field */
645 if (dialect->quoting == QUOTE_NONNUMERIC)
646 self->numeric_field = 1;
647 if (parse_add_char(self, c) < 0)
648 return -1;
649 self->state = IN_FIELD;
650 }
651 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000652
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000653 case ESCAPED_CHAR:
654 if (c == '\0')
655 c = '\n';
656 if (parse_add_char(self, c) < 0)
657 return -1;
658 self->state = IN_FIELD;
659 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000660
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000661 case IN_FIELD:
662 /* in unquoted field */
663 if (c == '\n' || c == '\r' || c == '\0') {
664 /* end of line - return [fields] */
665 if (parse_save_field(self) < 0)
666 return -1;
667 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
668 }
669 else if (c == dialect->escapechar) {
670 /* possible escaped character */
671 self->state = ESCAPED_CHAR;
672 }
673 else if (c == dialect->delimiter) {
674 /* save field - wait for new field */
675 if (parse_save_field(self) < 0)
676 return -1;
677 self->state = START_FIELD;
678 }
679 else {
680 /* normal character - save in field */
681 if (parse_add_char(self, c) < 0)
682 return -1;
683 }
684 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000685
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000686 case IN_QUOTED_FIELD:
687 /* in quoted field */
688 if (c == '\0')
689 ;
690 else if (c == dialect->escapechar) {
691 /* Possible escape character */
692 self->state = ESCAPE_IN_QUOTED_FIELD;
693 }
694 else if (c == dialect->quotechar &&
695 dialect->quoting != QUOTE_NONE) {
696 if (dialect->doublequote) {
697 /* doublequote; " represented by "" */
698 self->state = QUOTE_IN_QUOTED_FIELD;
699 }
700 else {
701 /* end of quote part of field */
702 self->state = IN_FIELD;
703 }
704 }
705 else {
706 /* normal character - save in field */
707 if (parse_add_char(self, c) < 0)
708 return -1;
709 }
710 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000711
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000712 case ESCAPE_IN_QUOTED_FIELD:
713 if (c == '\0')
714 c = '\n';
715 if (parse_add_char(self, c) < 0)
716 return -1;
717 self->state = IN_QUOTED_FIELD;
718 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000719
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000720 case QUOTE_IN_QUOTED_FIELD:
721 /* doublequote - seen a quote in an quoted field */
722 if (dialect->quoting != QUOTE_NONE &&
723 c == dialect->quotechar) {
724 /* save "" as " */
725 if (parse_add_char(self, c) < 0)
726 return -1;
727 self->state = IN_QUOTED_FIELD;
728 }
729 else if (c == dialect->delimiter) {
730 /* save field - wait for new field */
731 if (parse_save_field(self) < 0)
732 return -1;
733 self->state = START_FIELD;
734 }
735 else if (c == '\n' || c == '\r' || c == '\0') {
736 /* end of line - return [fields] */
737 if (parse_save_field(self) < 0)
738 return -1;
739 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
740 }
741 else if (!dialect->strict) {
742 if (parse_add_char(self, c) < 0)
743 return -1;
744 self->state = IN_FIELD;
745 }
746 else {
747 /* illegal */
Antoine Pitroue7672d32012-05-16 11:33:08 +0200748 PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000749 dialect->delimiter,
750 dialect->quotechar);
751 return -1;
752 }
753 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000754
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000755 case EAT_CRNL:
756 if (c == '\n' || c == '\r')
757 ;
758 else if (c == '\0')
759 self->state = START_RECORD;
760 else {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200761 PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000762 return -1;
763 }
764 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000765
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000766 }
767 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000768}
769
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000770static int
771parse_reset(ReaderObj *self)
772{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000773 Py_XDECREF(self->fields);
774 self->fields = PyList_New(0);
775 if (self->fields == NULL)
776 return -1;
777 self->field_len = 0;
778 self->state = START_RECORD;
779 self->numeric_field = 0;
780 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000781}
Skip Montanarob4a04172003-03-20 23:29:12 +0000782
783static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000784Reader_iternext(ReaderObj *self)
785{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000786 PyObject *fields = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200787 Py_UCS4 c;
788 Py_ssize_t pos, linelen;
789 unsigned int kind;
790 void *data;
791 PyObject *lineobj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000792
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000793 if (parse_reset(self) < 0)
794 return NULL;
795 do {
796 lineobj = PyIter_Next(self->input_iter);
797 if (lineobj == NULL) {
798 /* End of input OR exception */
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700799 if (!PyErr_Occurred() && (self->field_len != 0 ||
800 self->state == IN_QUOTED_FIELD)) {
801 if (self->dialect->strict)
Senthil Kumaran49d13022012-09-25 02:37:20 -0700802 PyErr_SetString(_csvstate_global->error_obj,
803 "unexpected end of data");
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700804 else if (parse_save_field(self) >= 0)
805 break;
806 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000807 return NULL;
808 }
809 if (!PyUnicode_Check(lineobj)) {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200810 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000811 "iterator should return strings, "
812 "not %.200s "
813 "(did you open the file in text mode?)",
814 lineobj->ob_type->tp_name
815 );
816 Py_DECREF(lineobj);
817 return NULL;
818 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100819 if (PyUnicode_READY(lineobj) == -1) {
820 Py_DECREF(lineobj);
821 return NULL;
822 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000823 ++self->line_num;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200824 kind = PyUnicode_KIND(lineobj);
825 data = PyUnicode_DATA(lineobj);
826 pos = 0;
827 linelen = PyUnicode_GET_LENGTH(lineobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000828 while (linelen--) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200829 c = PyUnicode_READ(kind, data, pos);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000830 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000831 Py_DECREF(lineobj);
Antoine Pitroue7672d32012-05-16 11:33:08 +0200832 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000833 "line contains NULL byte");
834 goto err;
835 }
836 if (parse_process_char(self, c) < 0) {
837 Py_DECREF(lineobj);
838 goto err;
839 }
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200840 pos++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000841 }
842 Py_DECREF(lineobj);
843 if (parse_process_char(self, 0) < 0)
844 goto err;
845 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000846
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000847 fields = self->fields;
848 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000849err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000850 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000851}
852
853static void
854Reader_dealloc(ReaderObj *self)
855{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000856 PyObject_GC_UnTrack(self);
857 Py_XDECREF(self->dialect);
858 Py_XDECREF(self->input_iter);
859 Py_XDECREF(self->fields);
860 if (self->field != NULL)
861 PyMem_Free(self->field);
862 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000863}
864
865static int
866Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
867{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000868 Py_VISIT(self->dialect);
869 Py_VISIT(self->input_iter);
870 Py_VISIT(self->fields);
871 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000872}
873
874static int
875Reader_clear(ReaderObj *self)
876{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000877 Py_CLEAR(self->dialect);
878 Py_CLEAR(self->input_iter);
879 Py_CLEAR(self->fields);
880 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000881}
882
883PyDoc_STRVAR(Reader_Type_doc,
884"CSV reader\n"
885"\n"
886"Reader objects are responsible for reading and parsing tabular data\n"
887"in CSV format.\n"
888);
889
890static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000891 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000892};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000893#define R_OFF(x) offsetof(ReaderObj, x)
894
895static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000896 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
897 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
898 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000899};
900
Skip Montanarob4a04172003-03-20 23:29:12 +0000901
902static PyTypeObject Reader_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000903 PyVarObject_HEAD_INIT(NULL, 0)
904 "_csv.reader", /*tp_name*/
905 sizeof(ReaderObj), /*tp_basicsize*/
906 0, /*tp_itemsize*/
907 /* methods */
908 (destructor)Reader_dealloc, /*tp_dealloc*/
909 (printfunc)0, /*tp_print*/
910 (getattrfunc)0, /*tp_getattr*/
911 (setattrfunc)0, /*tp_setattr*/
912 0, /*tp_reserved*/
913 (reprfunc)0, /*tp_repr*/
914 0, /*tp_as_number*/
915 0, /*tp_as_sequence*/
916 0, /*tp_as_mapping*/
917 (hashfunc)0, /*tp_hash*/
918 (ternaryfunc)0, /*tp_call*/
919 (reprfunc)0, /*tp_str*/
920 0, /*tp_getattro*/
921 0, /*tp_setattro*/
922 0, /*tp_as_buffer*/
923 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
924 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
925 Reader_Type_doc, /*tp_doc*/
926 (traverseproc)Reader_traverse, /*tp_traverse*/
927 (inquiry)Reader_clear, /*tp_clear*/
928 0, /*tp_richcompare*/
929 0, /*tp_weaklistoffset*/
930 PyObject_SelfIter, /*tp_iter*/
931 (getiterfunc)Reader_iternext, /*tp_iternext*/
932 Reader_methods, /*tp_methods*/
933 Reader_memberlist, /*tp_members*/
934 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000935
936};
937
938static PyObject *
939csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
940{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000941 PyObject * iterator, * dialect = NULL;
942 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000943
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000944 if (!self)
945 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000946
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000947 self->dialect = NULL;
948 self->fields = NULL;
949 self->input_iter = NULL;
950 self->field = NULL;
951 self->field_size = 0;
952 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000953
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000954 if (parse_reset(self) < 0) {
955 Py_DECREF(self);
956 return NULL;
957 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000958
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000959 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
960 Py_DECREF(self);
961 return NULL;
962 }
963 self->input_iter = PyObject_GetIter(iterator);
964 if (self->input_iter == NULL) {
965 PyErr_SetString(PyExc_TypeError,
966 "argument 1 must be an iterator");
967 Py_DECREF(self);
968 return NULL;
969 }
970 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
971 if (self->dialect == NULL) {
972 Py_DECREF(self);
973 return NULL;
974 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000975
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000976 PyObject_GC_Track(self);
977 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +0000978}
979
980/*
981 * WRITER
982 */
983/* ---------------------------------------------------------------- */
984static void
985join_reset(WriterObj *self)
986{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000987 self->rec_len = 0;
988 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000989}
990
991#define MEM_INCR 32768
992
993/* Calculate new record length or append field to record. Return new
994 * record length.
995 */
Antoine Pitrou40455752010-08-15 18:51:10 +0000996static Py_ssize_t
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200997join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
998 Py_ssize_t field_len, int quote_empty, int *quoted,
999 int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +00001000{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001001 DialectObj *dialect = self->dialect;
1002 int i;
Antoine Pitrou40455752010-08-15 18:51:10 +00001003 Py_ssize_t rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001004
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001005#define INCLEN \
1006 do {\
1007 if (!copy_phase && rec_len == PY_SSIZE_T_MAX) { \
1008 goto overflow; \
1009 } \
1010 rec_len++; \
1011 } while(0)
1012
1013#define ADDCH(c) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001014 do {\
1015 if (copy_phase) \
1016 self->rec[rec_len] = c;\
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001017 INCLEN;\
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001018 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001019
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001020 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001021
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001022 /* If this is not the first field we need a field separator */
1023 if (self->num_fields > 0)
1024 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001025
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001026 /* Handle preceding quote */
1027 if (copy_phase && *quoted)
1028 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001029
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001030 /* Copy/count field data */
1031 /* If field is null just pass over */
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001032 for (i = 0; field_data && (i < field_len); i++) {
1033 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001034 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001035
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001036 if (c == dialect->delimiter ||
1037 c == dialect->escapechar ||
1038 c == dialect->quotechar ||
Martin v. Löwis5f4f4c52011-11-01 18:42:23 +01001039 PyUnicode_FindChar(
1040 dialect->lineterminator, c, 0,
1041 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001042 if (dialect->quoting == QUOTE_NONE)
1043 want_escape = 1;
1044 else {
1045 if (c == dialect->quotechar) {
1046 if (dialect->doublequote)
1047 ADDCH(dialect->quotechar);
1048 else
1049 want_escape = 1;
1050 }
1051 if (!want_escape)
1052 *quoted = 1;
1053 }
1054 if (want_escape) {
1055 if (!dialect->escapechar) {
Antoine Pitroue7672d32012-05-16 11:33:08 +02001056 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001057 "need to escape, but no escapechar set");
1058 return -1;
1059 }
1060 ADDCH(dialect->escapechar);
1061 }
1062 }
1063 /* Copy field character into record buffer.
1064 */
1065 ADDCH(c);
1066 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001067
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001068 /* If field is empty check if it needs to be quoted.
1069 */
1070 if (i == 0 && quote_empty) {
1071 if (dialect->quoting == QUOTE_NONE) {
Antoine Pitroue7672d32012-05-16 11:33:08 +02001072 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001073 "single empty field record must be quoted");
1074 return -1;
1075 }
1076 else
1077 *quoted = 1;
1078 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001079
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001080 if (*quoted) {
1081 if (copy_phase)
1082 ADDCH(dialect->quotechar);
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001083 else {
1084 INCLEN; /* starting quote */
1085 INCLEN; /* ending quote */
1086 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001087 }
1088 return rec_len;
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001089
1090 overflow:
1091 PyErr_NoMemory();
1092 return -1;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001093#undef ADDCH
Benjamin Peterson6e01d902016-08-13 17:17:06 -07001094#undef INCLEN
Skip Montanarob4a04172003-03-20 23:29:12 +00001095}
1096
1097static int
Antoine Pitrou40455752010-08-15 18:51:10 +00001098join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
Skip Montanarob4a04172003-03-20 23:29:12 +00001099{
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001100
Antoine Pitrou40455752010-08-15 18:51:10 +00001101 if (rec_len < 0 || rec_len > PY_SSIZE_T_MAX - MEM_INCR) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001102 PyErr_NoMemory();
1103 return 0;
1104 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001105
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001106 if (rec_len > self->rec_size) {
1107 if (self->rec_size == 0) {
1108 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1109 if (self->rec != NULL)
1110 PyMem_Free(self->rec);
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001111 self->rec = PyMem_New(Py_UCS4, self->rec_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001112 }
1113 else {
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001114 Py_UCS4* old_rec = self->rec;
Skip Montanarob4a04172003-03-20 23:29:12 +00001115
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001116 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001117 self->rec = PyMem_Resize(old_rec, Py_UCS4, self->rec_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001118 if (self->rec == NULL)
1119 PyMem_Free(old_rec);
1120 }
1121 if (self->rec == NULL) {
1122 PyErr_NoMemory();
1123 return 0;
1124 }
1125 }
1126 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001127}
1128
1129static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001130join_append(WriterObj *self, PyObject *field, int *quoted, int quote_empty)
Skip Montanarob4a04172003-03-20 23:29:12 +00001131{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001132 unsigned int field_kind = -1;
1133 void *field_data = NULL;
1134 Py_ssize_t field_len = 0;
Antoine Pitrou40455752010-08-15 18:51:10 +00001135 Py_ssize_t rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001136
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001137 if (field != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +01001138 if (PyUnicode_READY(field) == -1)
1139 return 0;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001140 field_kind = PyUnicode_KIND(field);
1141 field_data = PyUnicode_DATA(field);
1142 field_len = PyUnicode_GET_LENGTH(field);
1143 }
1144 rec_len = join_append_data(self, field_kind, field_data, field_len,
1145 quote_empty, quoted, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001146 if (rec_len < 0)
1147 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001148
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001149 /* grow record buffer if necessary */
1150 if (!join_check_rec_size(self, rec_len))
1151 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001152
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001153 self->rec_len = join_append_data(self, field_kind, field_data, field_len,
1154 quote_empty, quoted, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001155 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001156
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001157 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001158}
1159
1160static int
1161join_append_lineterminator(WriterObj *self)
1162{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001163 Py_ssize_t terminator_len, i;
1164 unsigned int term_kind;
1165 void *term_data;
Skip Montanarob4a04172003-03-20 23:29:12 +00001166
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001167 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001168 if (terminator_len == -1)
1169 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001170
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001171 /* grow record buffer if necessary */
1172 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1173 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001174
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001175 term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1176 term_data = PyUnicode_DATA(self->dialect->lineterminator);
1177 for (i = 0; i < terminator_len; i++)
1178 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001179 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001180
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001181 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001182}
1183
1184PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001185"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001186"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001187"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001188"elements will be converted to string.");
1189
1190static PyObject *
1191csv_writerow(WriterObj *self, PyObject *seq)
1192{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001193 DialectObj *dialect = self->dialect;
Antoine Pitrou40455752010-08-15 18:51:10 +00001194 Py_ssize_t len, i;
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001195 PyObject *line, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001196
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001197 if (!PySequence_Check(seq))
Antoine Pitroue7672d32012-05-16 11:33:08 +02001198 return PyErr_Format(_csvstate_global->error_obj, "sequence expected");
Skip Montanarob4a04172003-03-20 23:29:12 +00001199
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001200 len = PySequence_Length(seq);
1201 if (len < 0)
1202 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001203
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001204 /* Join all fields in internal buffer.
1205 */
1206 join_reset(self);
1207 for (i = 0; i < len; i++) {
1208 PyObject *field;
1209 int append_ok;
1210 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001211
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001212 field = PySequence_GetItem(seq, i);
1213 if (field == NULL)
1214 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001215
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001216 switch (dialect->quoting) {
1217 case QUOTE_NONNUMERIC:
1218 quoted = !PyNumber_Check(field);
1219 break;
1220 case QUOTE_ALL:
1221 quoted = 1;
1222 break;
1223 default:
1224 quoted = 0;
1225 break;
1226 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001227
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001228 if (PyUnicode_Check(field)) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001229 append_ok = join_append(self, field, &quoted, len == 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001230 Py_DECREF(field);
1231 }
1232 else if (field == Py_None) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001233 append_ok = join_append(self, NULL, &quoted, len == 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001234 Py_DECREF(field);
1235 }
1236 else {
1237 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001238
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001239 str = PyObject_Str(field);
1240 Py_DECREF(field);
1241 if (str == NULL)
1242 return NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001243 append_ok = join_append(self, str, &quoted, len == 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001244 Py_DECREF(str);
1245 }
1246 if (!append_ok)
1247 return NULL;
1248 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001249
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001250 /* Add line terminator.
1251 */
1252 if (!join_append_lineterminator(self))
1253 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001254
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001255 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1256 (void *) self->rec, self->rec_len);
1257 if (line == NULL)
1258 return NULL;
1259 result = PyObject_CallFunctionObjArgs(self->writeline, line, NULL);
1260 Py_DECREF(line);
1261 return result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001262}
1263
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001264PyDoc_STRVAR(csv_writerows_doc,
1265"writerows(sequence of sequences)\n"
1266"\n"
1267"Construct and write a series of sequences to a csv file. Non-string\n"
1268"elements will be converted to string.");
1269
Skip Montanarob4a04172003-03-20 23:29:12 +00001270static PyObject *
1271csv_writerows(WriterObj *self, PyObject *seqseq)
1272{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001273 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001274
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001275 row_iter = PyObject_GetIter(seqseq);
1276 if (row_iter == NULL) {
1277 PyErr_SetString(PyExc_TypeError,
1278 "writerows() argument must be iterable");
1279 return NULL;
1280 }
1281 while ((row_obj = PyIter_Next(row_iter))) {
1282 result = csv_writerow(self, row_obj);
1283 Py_DECREF(row_obj);
1284 if (!result) {
1285 Py_DECREF(row_iter);
1286 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001287 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001288 else
1289 Py_DECREF(result);
1290 }
1291 Py_DECREF(row_iter);
1292 if (PyErr_Occurred())
1293 return NULL;
1294 Py_INCREF(Py_None);
1295 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001296}
1297
1298static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001299 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1300 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1301 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001302};
1303
1304#define W_OFF(x) offsetof(WriterObj, x)
1305
1306static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001307 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1308 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001309};
1310
1311static void
1312Writer_dealloc(WriterObj *self)
1313{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001314 PyObject_GC_UnTrack(self);
1315 Py_XDECREF(self->dialect);
1316 Py_XDECREF(self->writeline);
1317 if (self->rec != NULL)
1318 PyMem_Free(self->rec);
1319 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001320}
1321
1322static int
1323Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1324{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001325 Py_VISIT(self->dialect);
1326 Py_VISIT(self->writeline);
1327 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001328}
1329
1330static int
1331Writer_clear(WriterObj *self)
1332{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001333 Py_CLEAR(self->dialect);
1334 Py_CLEAR(self->writeline);
1335 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001336}
1337
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001338PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001339"CSV writer\n"
1340"\n"
1341"Writer objects are responsible for generating tabular data\n"
1342"in CSV format from sequence input.\n"
1343);
1344
1345static PyTypeObject Writer_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001346 PyVarObject_HEAD_INIT(NULL, 0)
1347 "_csv.writer", /*tp_name*/
1348 sizeof(WriterObj), /*tp_basicsize*/
1349 0, /*tp_itemsize*/
1350 /* methods */
1351 (destructor)Writer_dealloc, /*tp_dealloc*/
1352 (printfunc)0, /*tp_print*/
1353 (getattrfunc)0, /*tp_getattr*/
1354 (setattrfunc)0, /*tp_setattr*/
1355 0, /*tp_reserved*/
1356 (reprfunc)0, /*tp_repr*/
1357 0, /*tp_as_number*/
1358 0, /*tp_as_sequence*/
1359 0, /*tp_as_mapping*/
1360 (hashfunc)0, /*tp_hash*/
1361 (ternaryfunc)0, /*tp_call*/
1362 (reprfunc)0, /*tp_str*/
1363 0, /*tp_getattro*/
1364 0, /*tp_setattro*/
1365 0, /*tp_as_buffer*/
1366 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1367 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1368 Writer_Type_doc,
1369 (traverseproc)Writer_traverse, /*tp_traverse*/
1370 (inquiry)Writer_clear, /*tp_clear*/
1371 0, /*tp_richcompare*/
1372 0, /*tp_weaklistoffset*/
1373 (getiterfunc)0, /*tp_iter*/
1374 (getiterfunc)0, /*tp_iternext*/
1375 Writer_methods, /*tp_methods*/
1376 Writer_memberlist, /*tp_members*/
1377 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001378};
1379
1380static PyObject *
1381csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1382{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001383 PyObject * output_file, * dialect = NULL;
1384 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001385 _Py_IDENTIFIER(write);
Skip Montanarob4a04172003-03-20 23:29:12 +00001386
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001387 if (!self)
1388 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001389
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001390 self->dialect = NULL;
1391 self->writeline = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001392
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001393 self->rec = NULL;
1394 self->rec_size = 0;
1395 self->rec_len = 0;
1396 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001397
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001398 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1399 Py_DECREF(self);
1400 return NULL;
1401 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001402 self->writeline = _PyObject_GetAttrId(output_file, &PyId_write);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001403 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1404 PyErr_SetString(PyExc_TypeError,
1405 "argument 1 must have a \"write\" method");
1406 Py_DECREF(self);
1407 return NULL;
1408 }
1409 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1410 if (self->dialect == NULL) {
1411 Py_DECREF(self);
1412 return NULL;
1413 }
1414 PyObject_GC_Track(self);
1415 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001416}
1417
1418/*
1419 * DIALECT REGISTRY
1420 */
1421static PyObject *
1422csv_list_dialects(PyObject *module, PyObject *args)
1423{
Antoine Pitroue7672d32012-05-16 11:33:08 +02001424 return PyDict_Keys(_csvstate_global->dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001425}
1426
1427static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001428csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001429{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001430 PyObject *name_obj, *dialect_obj = NULL;
1431 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001432
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001433 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1434 return NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +01001435 if (!PyUnicode_Check(name_obj)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001436 PyErr_SetString(PyExc_TypeError,
Stefan Krahe6996ed2012-11-02 14:44:20 +01001437 "dialect name must be a string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001438 return NULL;
1439 }
Stefan Krahe6996ed2012-11-02 14:44:20 +01001440 if (PyUnicode_READY(name_obj) == -1)
1441 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001442 dialect = _call_dialect(dialect_obj, kwargs);
1443 if (dialect == NULL)
1444 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001445 if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001446 Py_DECREF(dialect);
1447 return NULL;
1448 }
1449 Py_DECREF(dialect);
1450 Py_INCREF(Py_None);
1451 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001452}
1453
1454static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001455csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001456{
Antoine Pitroue7672d32012-05-16 11:33:08 +02001457 if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0)
1458 return PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001459 Py_INCREF(Py_None);
1460 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001461}
1462
1463static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001464csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001465{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001466 return get_dialect_from_registry(name_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001467}
1468
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001469static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001470csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001471{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001472 PyObject *new_limit = NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001473 long old_limit = _csvstate_global->field_limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001474
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001475 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1476 return NULL;
1477 if (new_limit != NULL) {
1478 if (!PyLong_CheckExact(new_limit)) {
1479 PyErr_Format(PyExc_TypeError,
1480 "limit must be an integer");
1481 return NULL;
1482 }
Antoine Pitroue7672d32012-05-16 11:33:08 +02001483 _csvstate_global->field_limit = PyLong_AsLong(new_limit);
1484 if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
1485 _csvstate_global->field_limit = old_limit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001486 return NULL;
1487 }
1488 }
1489 return PyLong_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001490}
1491
Skip Montanarob4a04172003-03-20 23:29:12 +00001492/*
1493 * MODULE
1494 */
1495
1496PyDoc_STRVAR(csv_module_doc,
1497"CSV parsing and writing.\n"
1498"\n"
1499"This module provides classes that assist in the reading and writing\n"
1500"of Comma Separated Value (CSV) files, and implements the interface\n"
1501"described by PEP 305. Although many CSV files are simple to parse,\n"
1502"the format is not formally defined by a stable specification and\n"
1503"is subtle enough that parsing lines of a CSV file with something\n"
1504"like line.split(\",\") is bound to fail. The module supports three\n"
1505"basic APIs: reading, writing, and registration of dialects.\n"
1506"\n"
1507"\n"
1508"DIALECT REGISTRATION:\n"
1509"\n"
1510"Readers and writers support a dialect argument, which is a convenient\n"
1511"handle on a group of settings. When the dialect argument is a string,\n"
1512"it identifies one of the dialects previously registered with the module.\n"
1513"If it is a class or instance, the attributes of the argument are used as\n"
1514"the settings for the reader or writer:\n"
1515"\n"
1516" class excel:\n"
1517" delimiter = ','\n"
1518" quotechar = '\"'\n"
1519" escapechar = None\n"
1520" doublequote = True\n"
1521" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001522" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001523" quoting = QUOTE_MINIMAL\n"
1524"\n"
1525"SETTINGS:\n"
1526"\n"
1527" * quotechar - specifies a one-character string to use as the \n"
1528" quoting character. It defaults to '\"'.\n"
1529" * delimiter - specifies a one-character string to use as the \n"
1530" field separator. It defaults to ','.\n"
1531" * skipinitialspace - specifies how to interpret whitespace which\n"
1532" immediately follows a delimiter. It defaults to False, which\n"
1533" means that whitespace immediately following a delimiter is part\n"
1534" of the following field.\n"
1535" * lineterminator - specifies the character sequence which should \n"
1536" terminate rows.\n"
1537" * quoting - controls when quotes should be generated by the writer.\n"
1538" It can take on any of the following module constants:\n"
1539"\n"
1540" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1541" field contains either the quotechar or the delimiter\n"
1542" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1543" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001544" fields which do not parse as integers or floating point\n"
1545" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001546" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1547" * escapechar - specifies a one-character string used to escape \n"
1548" the delimiter when quoting is set to QUOTE_NONE.\n"
1549" * doublequote - controls the handling of quotes inside fields. When\n"
1550" True, two consecutive quotes are interpreted as one during read,\n"
1551" and when writing, each quote character embedded in the data is\n"
1552" written as two quotes\n");
1553
1554PyDoc_STRVAR(csv_reader_doc,
1555" csv_reader = reader(iterable [, dialect='excel']\n"
1556" [optional keyword args])\n"
1557" for row in csv_reader:\n"
1558" process(row)\n"
1559"\n"
1560"The \"iterable\" argument can be any object that returns a line\n"
1561"of input for each iteration, such as a file object or a list. The\n"
1562"optional \"dialect\" parameter is discussed below. The function\n"
1563"also accepts optional keyword arguments which override settings\n"
1564"provided by the dialect.\n"
1565"\n"
1566"The returned object is an iterator. Each iteration returns a row\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001567"of the CSV file (which can span multiple input lines):\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001568
1569PyDoc_STRVAR(csv_writer_doc,
1570" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1571" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001572" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001573" csv_writer.writerow(row)\n"
1574"\n"
1575" [or]\n"
1576"\n"
1577" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1578" [optional keyword args])\n"
1579" csv_writer.writerows(rows)\n"
1580"\n"
1581"The \"fileobj\" argument can be any object that supports the file API.\n");
1582
1583PyDoc_STRVAR(csv_list_dialects_doc,
1584"Return a list of all know dialect names.\n"
1585" names = csv.list_dialects()");
1586
1587PyDoc_STRVAR(csv_get_dialect_doc,
1588"Return the dialect instance associated with name.\n"
1589" dialect = csv.get_dialect(name)");
1590
1591PyDoc_STRVAR(csv_register_dialect_doc,
1592"Create a mapping from a string name to a dialect class.\n"
1593" dialect = csv.register_dialect(name, dialect)");
1594
1595PyDoc_STRVAR(csv_unregister_dialect_doc,
1596"Delete the name/dialect mapping associated with a string name.\n"
1597" csv.unregister_dialect(name)");
1598
Andrew McNamara31d88962005-01-12 03:45:10 +00001599PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001600"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001601" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001602"\n"
1603"Returns old limit. If limit is not given, no new limit is set and\n"
1604"the old limit is returned");
1605
Skip Montanarob4a04172003-03-20 23:29:12 +00001606static struct PyMethodDef csv_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001607 { "reader", (PyCFunction)csv_reader,
1608 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1609 { "writer", (PyCFunction)csv_writer,
1610 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1611 { "list_dialects", (PyCFunction)csv_list_dialects,
1612 METH_NOARGS, csv_list_dialects_doc},
1613 { "register_dialect", (PyCFunction)csv_register_dialect,
1614 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1615 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1616 METH_O, csv_unregister_dialect_doc},
1617 { "get_dialect", (PyCFunction)csv_get_dialect,
1618 METH_O, csv_get_dialect_doc},
1619 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1620 METH_VARARGS, csv_field_size_limit_doc},
1621 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001622};
1623
Martin v. Löwis1a214512008-06-11 05:26:20 +00001624static struct PyModuleDef _csvmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001625 PyModuleDef_HEAD_INIT,
1626 "_csv",
1627 csv_module_doc,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001628 sizeof(_csvstate),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001629 csv_methods,
1630 NULL,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001631 _csv_traverse,
1632 _csv_clear,
1633 _csv_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00001634};
1635
Skip Montanarob4a04172003-03-20 23:29:12 +00001636PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001637PyInit__csv(void)
Skip Montanarob4a04172003-03-20 23:29:12 +00001638{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001639 PyObject *module;
1640 StyleDesc *style;
Skip Montanarob4a04172003-03-20 23:29:12 +00001641
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001642 if (PyType_Ready(&Dialect_Type) < 0)
1643 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001644
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001645 if (PyType_Ready(&Reader_Type) < 0)
1646 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001647
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001648 if (PyType_Ready(&Writer_Type) < 0)
1649 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001650
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001651 /* Create the module and add the functions */
1652 module = PyModule_Create(&_csvmodule);
1653 if (module == NULL)
1654 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001655
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001656 /* Add version to the module. */
1657 if (PyModule_AddStringConstant(module, "__version__",
1658 MODULE_VERSION) == -1)
1659 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001660
Antoine Pitroue7672d32012-05-16 11:33:08 +02001661 /* Set the field limit */
1662 _csvstate(module)->field_limit = 128 * 1024;
1663 /* Do I still need to add this var to the Module Dict? */
1664
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001665 /* Add _dialects dictionary */
Antoine Pitroue7672d32012-05-16 11:33:08 +02001666 _csvstate(module)->dialects = PyDict_New();
1667 if (_csvstate(module)->dialects == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001668 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001669 Py_INCREF(_csvstate(module)->dialects);
1670 if (PyModule_AddObject(module, "_dialects", _csvstate(module)->dialects))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001671 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001672
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001673 /* Add quote styles into dictionary */
1674 for (style = quote_styles; style->name; style++) {
1675 if (PyModule_AddIntConstant(module, style->name,
1676 style->style) == -1)
1677 return NULL;
1678 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001679
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001680 /* Add the Dialect type */
1681 Py_INCREF(&Dialect_Type);
1682 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1683 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001684
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001685 /* Add the CSV exception object to the module. */
Antoine Pitroue7672d32012-05-16 11:33:08 +02001686 _csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1687 if (_csvstate(module)->error_obj == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001688 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001689 Py_INCREF(_csvstate(module)->error_obj);
1690 PyModule_AddObject(module, "Error", _csvstate(module)->error_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001691 return module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001692}