blob: 22ac2667ff0a4540fb7fd9d53702da30daeb47a5 [file] [log] [blame]
Skip Montanaroa16b21f2003-03-23 14:32:54 +00001/* csv module */
2
3/*
4
5This module provides the low-level underpinnings of a CSV reading/writing
6module. Users should not use this module directly, but import the csv.py
7module instead.
8
Skip Montanarob4a04172003-03-20 23:29:12 +00009*/
10
Skip Montanaro7b01a832003-04-12 19:23:46 +000011#define MODULE_VERSION "1.0"
12
Skip Montanarob4a04172003-03-20 23:29:12 +000013#include "Python.h"
14#include "structmember.h"
15
Andrew McNamara37d2bdf2005-01-10 12:22:48 +000016
Antoine Pitroue7672d32012-05-16 11:33:08 +020017typedef struct {
18 PyObject *error_obj; /* CSV exception */
19 PyObject *dialects; /* Dialect registry */
20 long field_limit; /* max parsed field size */
21} _csvstate;
22
23#define _csvstate(o) ((_csvstate *)PyModule_GetState(o))
24
25static int
26_csv_clear(PyObject *m)
27{
28 Py_CLEAR(_csvstate(m)->error_obj);
29 Py_CLEAR(_csvstate(m)->dialects);
30 return 0;
31}
32
33static int
34_csv_traverse(PyObject *m, visitproc visit, void *arg)
35{
36 Py_VISIT(_csvstate(m)->error_obj);
37 Py_VISIT(_csvstate(m)->dialects);
38 return 0;
39}
40
41static void
42_csv_free(void *m)
43{
44 _csv_clear((PyObject *)m);
45}
46
47static struct PyModuleDef _csvmodule;
48
49#define _csvstate_global ((_csvstate *)PyModule_GetState(PyState_FindModule(&_csvmodule)))
Skip Montanarob4a04172003-03-20 23:29:12 +000050
51typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000052 START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
53 IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
R David Murrayc7c42ef2013-03-19 22:41:47 -040054 EAT_CRNL,AFTER_ESCAPED_CRNL
Skip Montanarob4a04172003-03-20 23:29:12 +000055} ParserState;
56
57typedef enum {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000058 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE
Skip Montanarob4a04172003-03-20 23:29:12 +000059} QuoteStyle;
60
61typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000062 QuoteStyle style;
63 char *name;
Skip Montanarob4a04172003-03-20 23:29:12 +000064} StyleDesc;
65
66static StyleDesc quote_styles[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000067 { QUOTE_MINIMAL, "QUOTE_MINIMAL" },
68 { QUOTE_ALL, "QUOTE_ALL" },
69 { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" },
70 { QUOTE_NONE, "QUOTE_NONE" },
71 { 0 }
Skip Montanarob4a04172003-03-20 23:29:12 +000072};
73
74typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000075 PyObject_HEAD
Guido van Rossum46264582007-08-06 19:32:18 +000076
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000077 int doublequote; /* is " represented by ""? */
Antoine Pitrou77ea6402011-10-07 04:26:55 +020078 Py_UCS4 delimiter; /* field separator */
79 Py_UCS4 quotechar; /* quote character */
80 Py_UCS4 escapechar; /* escape character */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000081 int skipinitialspace; /* ignore spaces following delimiter? */
82 PyObject *lineterminator; /* string to write between records */
83 int quoting; /* style of quoting to write */
Skip Montanarob4a04172003-03-20 23:29:12 +000084
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000085 int strict; /* raise exception on bad CSV */
Skip Montanarob4a04172003-03-20 23:29:12 +000086} DialectObj;
87
Neal Norwitz227b5332006-03-22 09:28:35 +000088static PyTypeObject Dialect_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +000089
90typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000091 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +000092
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000093 PyObject *input_iter; /* iterate over this for input lines */
Skip Montanarob4a04172003-03-20 23:29:12 +000094
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000095 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +000096
Antoine Pitrouf95a1b32010-05-09 15:52:27 +000097 PyObject *fields; /* field list for current record */
98 ParserState state; /* current CSV parse state */
Antoine Pitrou77ea6402011-10-07 04:26:55 +020099 Py_UCS4 *field; /* temporary buffer */
Antoine Pitrou40455752010-08-15 18:51:10 +0000100 Py_ssize_t field_size; /* size of allocated buffer */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000101 Py_ssize_t field_len; /* length of current field */
102 int numeric_field; /* treat field as numeric */
103 unsigned long line_num; /* Source-file line number */
Skip Montanarob4a04172003-03-20 23:29:12 +0000104} ReaderObj;
105
Neal Norwitz227b5332006-03-22 09:28:35 +0000106static PyTypeObject Reader_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000107
Christian Heimes90aa7642007-12-19 02:45:37 +0000108#define ReaderObject_Check(v) (Py_TYPE(v) == &Reader_Type)
Skip Montanarob4a04172003-03-20 23:29:12 +0000109
110typedef struct {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000111 PyObject_HEAD
Skip Montanarob4a04172003-03-20 23:29:12 +0000112
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000113 PyObject *writeline; /* write output lines to this file */
Skip Montanarob4a04172003-03-20 23:29:12 +0000114
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000115 DialectObj *dialect; /* parsing dialect */
Skip Montanarob4a04172003-03-20 23:29:12 +0000116
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200117 Py_UCS4 *rec; /* buffer for parser.join */
Antoine Pitrou40455752010-08-15 18:51:10 +0000118 Py_ssize_t rec_size; /* size of allocated record */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000119 Py_ssize_t rec_len; /* length of record */
120 int num_fields; /* number of fields in record */
Guido van Rossum46264582007-08-06 19:32:18 +0000121} WriterObj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000122
Neal Norwitz227b5332006-03-22 09:28:35 +0000123static PyTypeObject Writer_Type;
Skip Montanarob4a04172003-03-20 23:29:12 +0000124
125/*
126 * DIALECT class
127 */
128
129static PyObject *
130get_dialect_from_registry(PyObject * name_obj)
131{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000132 PyObject *dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000133
Antoine Pitroue7672d32012-05-16 11:33:08 +0200134 dialect_obj = PyDict_GetItem(_csvstate_global->dialects, name_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000135 if (dialect_obj == NULL) {
136 if (!PyErr_Occurred())
Antoine Pitroue7672d32012-05-16 11:33:08 +0200137 PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000138 }
139 else
140 Py_INCREF(dialect_obj);
141 return dialect_obj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000142}
143
Skip Montanarob4a04172003-03-20 23:29:12 +0000144static PyObject *
145get_string(PyObject *str)
146{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000147 Py_XINCREF(str);
148 return str;
Skip Montanarob4a04172003-03-20 23:29:12 +0000149}
150
Skip Montanarob4a04172003-03-20 23:29:12 +0000151static PyObject *
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200152get_nullchar_as_None(Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000153{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000154 if (c == '\0') {
155 Py_INCREF(Py_None);
156 return Py_None;
157 }
158 else
Martin v. Löwisd63a3b82011-09-28 07:41:54 +0200159 return PyUnicode_FromOrdinal(c);
Skip Montanarob4a04172003-03-20 23:29:12 +0000160}
161
Skip Montanarob4a04172003-03-20 23:29:12 +0000162static PyObject *
163Dialect_get_lineterminator(DialectObj *self)
164{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000165 return get_string(self->lineterminator);
Skip Montanarob4a04172003-03-20 23:29:12 +0000166}
167
Skip Montanarob4a04172003-03-20 23:29:12 +0000168static PyObject *
Guido van Rossuma9769c22007-08-07 23:59:30 +0000169Dialect_get_delimiter(DialectObj *self)
170{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000171 return get_nullchar_as_None(self->delimiter);
Guido van Rossuma9769c22007-08-07 23:59:30 +0000172}
173
174static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000175Dialect_get_escapechar(DialectObj *self)
176{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000177 return get_nullchar_as_None(self->escapechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000178}
179
Andrew McNamara1196cf12005-01-07 04:42:45 +0000180static PyObject *
181Dialect_get_quotechar(DialectObj *self)
Skip Montanarob4a04172003-03-20 23:29:12 +0000182{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000183 return get_nullchar_as_None(self->quotechar);
Skip Montanarob4a04172003-03-20 23:29:12 +0000184}
185
186static PyObject *
187Dialect_get_quoting(DialectObj *self)
188{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000189 return PyLong_FromLong(self->quoting);
Skip Montanarob4a04172003-03-20 23:29:12 +0000190}
191
192static int
Andrew McNamara1196cf12005-01-07 04:42:45 +0000193_set_bool(const char *name, int *target, PyObject *src, int dflt)
Skip Montanarob4a04172003-03-20 23:29:12 +0000194{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000195 if (src == NULL)
196 *target = dflt;
Antoine Pitrou6f430e42012-08-15 23:18:25 +0200197 else {
198 int b = PyObject_IsTrue(src);
199 if (b < 0)
200 return -1;
201 *target = b;
202 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000203 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000204}
205
Andrew McNamara1196cf12005-01-07 04:42:45 +0000206static int
207_set_int(const char *name, int *target, PyObject *src, int dflt)
208{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000209 if (src == NULL)
210 *target = dflt;
211 else {
212 long value;
213 if (!PyLong_CheckExact(src)) {
214 PyErr_Format(PyExc_TypeError,
215 "\"%s\" must be an integer", name);
216 return -1;
217 }
218 value = PyLong_AsLong(src);
219 if (value == -1 && PyErr_Occurred())
220 return -1;
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +0000221#if SIZEOF_LONG > SIZEOF_INT
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000222 if (value > INT_MAX || value < INT_MIN) {
223 PyErr_Format(PyExc_ValueError,
224 "integer out of range for \"%s\"", name);
225 return -1;
226 }
Martin v. Löwisd1a1d1e2007-12-04 22:10:37 +0000227#endif
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000228 *target = (int)value;
229 }
230 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000231}
232
233static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200234_set_char(const char *name, Py_UCS4 *target, PyObject *src, Py_UCS4 dflt)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000235{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000236 if (src == NULL)
237 *target = dflt;
238 else {
239 *target = '\0';
240 if (src != Py_None) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000241 Py_ssize_t len;
Victor Stinner9e30aa52011-11-21 02:49:52 +0100242 len = PyUnicode_GetLength(src);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200243 if (len > 1) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000244 PyErr_Format(PyExc_TypeError,
245 "\"%s\" must be an 1-character string",
246 name);
247 return -1;
248 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100249 /* PyUnicode_READY() is called in PyUnicode_GetLength() */
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000250 if (len > 0)
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200251 *target = PyUnicode_READ_CHAR(src, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000252 }
253 }
254 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000255}
256
257static int
258_set_str(const char *name, PyObject **target, PyObject *src, const char *dflt)
259{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000260 if (src == NULL)
261 *target = PyUnicode_DecodeASCII(dflt, strlen(dflt), NULL);
262 else {
263 if (src == Py_None)
264 *target = NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +0100265 else if (!PyUnicode_Check(src)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000266 PyErr_Format(PyExc_TypeError,
267 "\"%s\" must be a string", name);
268 return -1;
269 }
270 else {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100271 if (PyUnicode_READY(src) == -1)
272 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000273 Py_XDECREF(*target);
274 Py_INCREF(src);
275 *target = src;
276 }
277 }
278 return 0;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000279}
280
281static int
282dialect_check_quoting(int quoting)
283{
Victor Stinner4fe519b2010-11-09 09:40:16 +0000284 StyleDesc *qs;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000285
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000286 for (qs = quote_styles; qs->name; qs++) {
287 if (qs->style == quoting)
288 return 0;
289 }
290 PyErr_Format(PyExc_TypeError, "bad \"quoting\" value");
291 return -1;
Andrew McNamara1196cf12005-01-07 04:42:45 +0000292}
Skip Montanarob4a04172003-03-20 23:29:12 +0000293
294#define D_OFF(x) offsetof(DialectObj, x)
295
296static struct PyMemberDef Dialect_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000297 { "skipinitialspace", T_INT, D_OFF(skipinitialspace), READONLY },
298 { "doublequote", T_INT, D_OFF(doublequote), READONLY },
299 { "strict", T_INT, D_OFF(strict), READONLY },
300 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000301};
302
303static PyGetSetDef Dialect_getsetlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000304 { "delimiter", (getter)Dialect_get_delimiter},
305 { "escapechar", (getter)Dialect_get_escapechar},
306 { "lineterminator", (getter)Dialect_get_lineterminator},
307 { "quotechar", (getter)Dialect_get_quotechar},
308 { "quoting", (getter)Dialect_get_quoting},
309 {NULL},
Skip Montanarob4a04172003-03-20 23:29:12 +0000310};
311
312static void
313Dialect_dealloc(DialectObj *self)
314{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000315 Py_XDECREF(self->lineterminator);
316 Py_TYPE(self)->tp_free((PyObject *)self);
Skip Montanarob4a04172003-03-20 23:29:12 +0000317}
318
Martin v. Löwis02cbf4a2006-02-27 17:20:04 +0000319static char *dialect_kws[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000320 "dialect",
321 "delimiter",
322 "doublequote",
323 "escapechar",
324 "lineterminator",
325 "quotechar",
326 "quoting",
327 "skipinitialspace",
328 "strict",
329 NULL
Andrew McNamara1196cf12005-01-07 04:42:45 +0000330};
331
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000332static PyObject *
333dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +0000334{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000335 DialectObj *self;
336 PyObject *ret = NULL;
337 PyObject *dialect = NULL;
338 PyObject *delimiter = NULL;
339 PyObject *doublequote = NULL;
340 PyObject *escapechar = NULL;
341 PyObject *lineterminator = NULL;
342 PyObject *quotechar = NULL;
343 PyObject *quoting = NULL;
344 PyObject *skipinitialspace = NULL;
345 PyObject *strict = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000346
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000347 if (!PyArg_ParseTupleAndKeywords(args, kwargs,
348 "|OOOOOOOOO", dialect_kws,
349 &dialect,
350 &delimiter,
351 &doublequote,
352 &escapechar,
353 &lineterminator,
354 &quotechar,
355 &quoting,
356 &skipinitialspace,
357 &strict))
358 return NULL;
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000359
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000360 if (dialect != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +0100361 if (PyUnicode_Check(dialect)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000362 dialect = get_dialect_from_registry(dialect);
363 if (dialect == NULL)
364 return NULL;
365 }
366 else
367 Py_INCREF(dialect);
368 /* Can we reuse this instance? */
369 if (PyObject_TypeCheck(dialect, &Dialect_Type) &&
370 delimiter == 0 &&
371 doublequote == 0 &&
372 escapechar == 0 &&
373 lineterminator == 0 &&
374 quotechar == 0 &&
375 quoting == 0 &&
376 skipinitialspace == 0 &&
377 strict == 0)
378 return dialect;
379 }
Andrew McNamara29bf4e42005-01-11 04:49:53 +0000380
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000381 self = (DialectObj *)type->tp_alloc(type, 0);
382 if (self == NULL) {
383 Py_XDECREF(dialect);
384 return NULL;
385 }
386 self->lineterminator = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000387
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000388 Py_XINCREF(delimiter);
389 Py_XINCREF(doublequote);
390 Py_XINCREF(escapechar);
391 Py_XINCREF(lineterminator);
392 Py_XINCREF(quotechar);
393 Py_XINCREF(quoting);
394 Py_XINCREF(skipinitialspace);
395 Py_XINCREF(strict);
396 if (dialect != NULL) {
Andrew McNamara1196cf12005-01-07 04:42:45 +0000397#define DIALECT_GETATTR(v, n) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000398 if (v == NULL) \
399 v = PyObject_GetAttrString(dialect, n)
400 DIALECT_GETATTR(delimiter, "delimiter");
401 DIALECT_GETATTR(doublequote, "doublequote");
402 DIALECT_GETATTR(escapechar, "escapechar");
403 DIALECT_GETATTR(lineterminator, "lineterminator");
404 DIALECT_GETATTR(quotechar, "quotechar");
405 DIALECT_GETATTR(quoting, "quoting");
406 DIALECT_GETATTR(skipinitialspace, "skipinitialspace");
407 DIALECT_GETATTR(strict, "strict");
408 PyErr_Clear();
409 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000410
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000411 /* check types and convert to C values */
Andrew McNamara1196cf12005-01-07 04:42:45 +0000412#define DIASET(meth, name, target, src, dflt) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000413 if (meth(name, target, src, dflt)) \
414 goto err
415 DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ',');
416 DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1);
417 DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0);
418 DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n");
419 DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"');
420 DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL);
421 DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0);
422 DIASET(_set_bool, "strict", &self->strict, strict, 0);
Skip Montanarob4a04172003-03-20 23:29:12 +0000423
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000424 /* validate options */
425 if (dialect_check_quoting(self->quoting))
426 goto err;
427 if (self->delimiter == 0) {
428 PyErr_SetString(PyExc_TypeError, "delimiter must be set");
429 goto err;
430 }
431 if (quotechar == Py_None && quoting == NULL)
432 self->quoting = QUOTE_NONE;
433 if (self->quoting != QUOTE_NONE && self->quotechar == 0) {
434 PyErr_SetString(PyExc_TypeError,
435 "quotechar must be set if quoting enabled");
436 goto err;
437 }
438 if (self->lineterminator == 0) {
439 PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
440 goto err;
441 }
Andrew McNamara1196cf12005-01-07 04:42:45 +0000442
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000443 ret = (PyObject *)self;
444 Py_INCREF(self);
Andrew McNamara1196cf12005-01-07 04:42:45 +0000445err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000446 Py_XDECREF(self);
447 Py_XDECREF(dialect);
448 Py_XDECREF(delimiter);
449 Py_XDECREF(doublequote);
450 Py_XDECREF(escapechar);
451 Py_XDECREF(lineterminator);
452 Py_XDECREF(quotechar);
453 Py_XDECREF(quoting);
454 Py_XDECREF(skipinitialspace);
455 Py_XDECREF(strict);
456 return ret;
Skip Montanarob4a04172003-03-20 23:29:12 +0000457}
458
459
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000460PyDoc_STRVAR(Dialect_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +0000461"CSV dialect\n"
462"\n"
463"The Dialect type records CSV parsing and generation options.\n");
464
465static PyTypeObject Dialect_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000466 PyVarObject_HEAD_INIT(NULL, 0)
467 "_csv.Dialect", /* tp_name */
468 sizeof(DialectObj), /* tp_basicsize */
469 0, /* tp_itemsize */
470 /* methods */
471 (destructor)Dialect_dealloc, /* tp_dealloc */
472 (printfunc)0, /* tp_print */
473 (getattrfunc)0, /* tp_getattr */
474 (setattrfunc)0, /* tp_setattr */
475 0, /* tp_reserved */
476 (reprfunc)0, /* tp_repr */
477 0, /* tp_as_number */
478 0, /* tp_as_sequence */
479 0, /* tp_as_mapping */
480 (hashfunc)0, /* tp_hash */
481 (ternaryfunc)0, /* tp_call */
482 (reprfunc)0, /* tp_str */
483 0, /* tp_getattro */
484 0, /* tp_setattro */
485 0, /* tp_as_buffer */
486 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
487 Dialect_Type_doc, /* tp_doc */
488 0, /* tp_traverse */
489 0, /* tp_clear */
490 0, /* tp_richcompare */
491 0, /* tp_weaklistoffset */
492 0, /* tp_iter */
493 0, /* tp_iternext */
494 0, /* tp_methods */
495 Dialect_memberlist, /* tp_members */
496 Dialect_getsetlist, /* tp_getset */
497 0, /* tp_base */
498 0, /* tp_dict */
499 0, /* tp_descr_get */
500 0, /* tp_descr_set */
501 0, /* tp_dictoffset */
502 0, /* tp_init */
503 0, /* tp_alloc */
504 dialect_new, /* tp_new */
505 0, /* tp_free */
Skip Montanarob4a04172003-03-20 23:29:12 +0000506};
507
Andrew McNamara91b97462005-01-11 01:07:23 +0000508/*
509 * Return an instance of the dialect type, given a Python instance or kwarg
510 * description of the dialect
511 */
512static PyObject *
513_call_dialect(PyObject *dialect_inst, PyObject *kwargs)
514{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000515 PyObject *ctor_args;
516 PyObject *dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000517
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000518 ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst);
519 if (ctor_args == NULL)
520 return NULL;
521 dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs);
522 Py_DECREF(ctor_args);
523 return dialect;
Andrew McNamara91b97462005-01-11 01:07:23 +0000524}
525
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000526/*
527 * READER
528 */
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000529static int
Skip Montanarob4a04172003-03-20 23:29:12 +0000530parse_save_field(ReaderObj *self)
531{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000532 PyObject *field;
Skip Montanarob4a04172003-03-20 23:29:12 +0000533
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200534 field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
535 (void *) self->field, self->field_len);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000536 if (field == NULL)
537 return -1;
538 self->field_len = 0;
539 if (self->numeric_field) {
540 PyObject *tmp;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000541
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000542 self->numeric_field = 0;
543 tmp = PyNumber_Float(field);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000544 Py_DECREF(field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200545 if (tmp == NULL)
546 return -1;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000547 field = tmp;
548 }
549 PyList_Append(self->fields, field);
550 Py_DECREF(field);
551 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000552}
553
554static int
555parse_grow_buff(ReaderObj *self)
556{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000557 if (self->field_size == 0) {
558 self->field_size = 4096;
559 if (self->field != NULL)
560 PyMem_Free(self->field);
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200561 self->field = PyMem_New(Py_UCS4, self->field_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000562 }
563 else {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200564 Py_UCS4 *field = self->field;
Antoine Pitrou40455752010-08-15 18:51:10 +0000565 if (self->field_size > PY_SSIZE_T_MAX / 2) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000566 PyErr_NoMemory();
567 return 0;
568 }
569 self->field_size *= 2;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200570 self->field = PyMem_Resize(field, Py_UCS4, self->field_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000571 }
572 if (self->field == NULL) {
573 PyErr_NoMemory();
574 return 0;
575 }
576 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +0000577}
578
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000579static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200580parse_add_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000581{
Antoine Pitroue7672d32012-05-16 11:33:08 +0200582 if (self->field_len >= _csvstate_global->field_limit) {
583 PyErr_Format(_csvstate_global->error_obj, "field larger than field limit (%ld)",
584 _csvstate_global->field_limit);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000585 return -1;
586 }
587 if (self->field_len == self->field_size && !parse_grow_buff(self))
588 return -1;
589 self->field[self->field_len++] = c;
590 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000591}
592
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000593static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200594parse_process_char(ReaderObj *self, Py_UCS4 c)
Skip Montanarob4a04172003-03-20 23:29:12 +0000595{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000596 DialectObj *dialect = self->dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +0000597
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000598 switch (self->state) {
599 case START_RECORD:
600 /* start of record */
601 if (c == '\0')
602 /* empty line - return [] */
603 break;
604 else if (c == '\n' || c == '\r') {
605 self->state = EAT_CRNL;
606 break;
607 }
608 /* normal character - handle as START_FIELD */
609 self->state = START_FIELD;
610 /* fallthru */
611 case START_FIELD:
612 /* expecting field */
613 if (c == '\n' || c == '\r' || c == '\0') {
614 /* save empty field - return [fields] */
615 if (parse_save_field(self) < 0)
616 return -1;
617 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
618 }
619 else if (c == dialect->quotechar &&
620 dialect->quoting != QUOTE_NONE) {
621 /* start quoted field */
622 self->state = IN_QUOTED_FIELD;
623 }
624 else if (c == dialect->escapechar) {
625 /* possible escaped character */
626 self->state = ESCAPED_CHAR;
627 }
628 else if (c == ' ' && dialect->skipinitialspace)
629 /* ignore space at start of field */
630 ;
631 else if (c == dialect->delimiter) {
632 /* save empty field */
633 if (parse_save_field(self) < 0)
634 return -1;
635 }
636 else {
637 /* begin new unquoted field */
638 if (dialect->quoting == QUOTE_NONNUMERIC)
639 self->numeric_field = 1;
640 if (parse_add_char(self, c) < 0)
641 return -1;
642 self->state = IN_FIELD;
643 }
644 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000645
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000646 case ESCAPED_CHAR:
R David Murray9a7d3762013-03-20 00:15:20 -0400647 if (c == '\n' || c=='\r') {
R David Murrayc7c42ef2013-03-19 22:41:47 -0400648 if (parse_add_char(self, c) < 0)
649 return -1;
650 self->state = AFTER_ESCAPED_CRNL;
651 break;
652 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000653 if (c == '\0')
654 c = '\n';
655 if (parse_add_char(self, c) < 0)
656 return -1;
657 self->state = IN_FIELD;
658 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000659
R David Murrayc7c42ef2013-03-19 22:41:47 -0400660 case AFTER_ESCAPED_CRNL:
661 if (c == '\0')
662 break;
663 /*fallthru*/
664
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000665 case IN_FIELD:
666 /* in unquoted field */
667 if (c == '\n' || c == '\r' || c == '\0') {
668 /* end of line - return [fields] */
669 if (parse_save_field(self) < 0)
670 return -1;
671 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
672 }
673 else if (c == dialect->escapechar) {
674 /* possible escaped character */
675 self->state = ESCAPED_CHAR;
676 }
677 else if (c == dialect->delimiter) {
678 /* save field - wait for new field */
679 if (parse_save_field(self) < 0)
680 return -1;
681 self->state = START_FIELD;
682 }
683 else {
684 /* normal character - save in field */
685 if (parse_add_char(self, c) < 0)
686 return -1;
687 }
688 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000689
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000690 case IN_QUOTED_FIELD:
691 /* in quoted field */
692 if (c == '\0')
693 ;
694 else if (c == dialect->escapechar) {
695 /* Possible escape character */
696 self->state = ESCAPE_IN_QUOTED_FIELD;
697 }
698 else if (c == dialect->quotechar &&
699 dialect->quoting != QUOTE_NONE) {
700 if (dialect->doublequote) {
701 /* doublequote; " represented by "" */
702 self->state = QUOTE_IN_QUOTED_FIELD;
703 }
704 else {
705 /* end of quote part of field */
706 self->state = IN_FIELD;
707 }
708 }
709 else {
710 /* normal character - save in field */
711 if (parse_add_char(self, c) < 0)
712 return -1;
713 }
714 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000715
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000716 case ESCAPE_IN_QUOTED_FIELD:
717 if (c == '\0')
718 c = '\n';
719 if (parse_add_char(self, c) < 0)
720 return -1;
721 self->state = IN_QUOTED_FIELD;
722 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000723
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000724 case QUOTE_IN_QUOTED_FIELD:
725 /* doublequote - seen a quote in an quoted field */
726 if (dialect->quoting != QUOTE_NONE &&
727 c == dialect->quotechar) {
728 /* save "" as " */
729 if (parse_add_char(self, c) < 0)
730 return -1;
731 self->state = IN_QUOTED_FIELD;
732 }
733 else if (c == dialect->delimiter) {
734 /* save field - wait for new field */
735 if (parse_save_field(self) < 0)
736 return -1;
737 self->state = START_FIELD;
738 }
739 else if (c == '\n' || c == '\r' || c == '\0') {
740 /* end of line - return [fields] */
741 if (parse_save_field(self) < 0)
742 return -1;
743 self->state = (c == '\0' ? START_RECORD : EAT_CRNL);
744 }
745 else if (!dialect->strict) {
746 if (parse_add_char(self, c) < 0)
747 return -1;
748 self->state = IN_FIELD;
749 }
750 else {
751 /* illegal */
Antoine Pitroue7672d32012-05-16 11:33:08 +0200752 PyErr_Format(_csvstate_global->error_obj, "'%c' expected after '%c'",
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000753 dialect->delimiter,
754 dialect->quotechar);
755 return -1;
756 }
757 break;
Skip Montanarob4a04172003-03-20 23:29:12 +0000758
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000759 case EAT_CRNL:
760 if (c == '\n' || c == '\r')
761 ;
762 else if (c == '\0')
763 self->state = START_RECORD;
764 else {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200765 PyErr_Format(_csvstate_global->error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000766 return -1;
767 }
768 break;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000769
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000770 }
771 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000772}
773
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000774static int
775parse_reset(ReaderObj *self)
776{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000777 Py_XDECREF(self->fields);
778 self->fields = PyList_New(0);
779 if (self->fields == NULL)
780 return -1;
781 self->field_len = 0;
782 self->state = START_RECORD;
783 self->numeric_field = 0;
784 return 0;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000785}
Skip Montanarob4a04172003-03-20 23:29:12 +0000786
787static PyObject *
Skip Montanarob4a04172003-03-20 23:29:12 +0000788Reader_iternext(ReaderObj *self)
789{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000790 PyObject *fields = NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200791 Py_UCS4 c;
792 Py_ssize_t pos, linelen;
793 unsigned int kind;
794 void *data;
795 PyObject *lineobj;
Skip Montanarob4a04172003-03-20 23:29:12 +0000796
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000797 if (parse_reset(self) < 0)
798 return NULL;
799 do {
800 lineobj = PyIter_Next(self->input_iter);
801 if (lineobj == NULL) {
802 /* End of input OR exception */
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700803 if (!PyErr_Occurred() && (self->field_len != 0 ||
804 self->state == IN_QUOTED_FIELD)) {
805 if (self->dialect->strict)
Senthil Kumaran49d13022012-09-25 02:37:20 -0700806 PyErr_SetString(_csvstate_global->error_obj,
807 "unexpected end of data");
Senthil Kumaran67b7b982012-09-25 02:30:27 -0700808 else if (parse_save_field(self) >= 0)
809 break;
810 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000811 return NULL;
812 }
813 if (!PyUnicode_Check(lineobj)) {
Antoine Pitroue7672d32012-05-16 11:33:08 +0200814 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000815 "iterator should return strings, "
816 "not %.200s "
817 "(did you open the file in text mode?)",
818 lineobj->ob_type->tp_name
819 );
820 Py_DECREF(lineobj);
821 return NULL;
822 }
Stefan Krahe6996ed2012-11-02 14:44:20 +0100823 if (PyUnicode_READY(lineobj) == -1) {
824 Py_DECREF(lineobj);
825 return NULL;
826 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000827 ++self->line_num;
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200828 kind = PyUnicode_KIND(lineobj);
829 data = PyUnicode_DATA(lineobj);
830 pos = 0;
831 linelen = PyUnicode_GET_LENGTH(lineobj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000832 while (linelen--) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200833 c = PyUnicode_READ(kind, data, pos);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000834 if (c == '\0') {
Skip Montanarob4a04172003-03-20 23:29:12 +0000835 Py_DECREF(lineobj);
Antoine Pitroue7672d32012-05-16 11:33:08 +0200836 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000837 "line contains NULL byte");
838 goto err;
839 }
840 if (parse_process_char(self, c) < 0) {
841 Py_DECREF(lineobj);
842 goto err;
843 }
Antoine Pitrou77ea6402011-10-07 04:26:55 +0200844 pos++;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000845 }
846 Py_DECREF(lineobj);
847 if (parse_process_char(self, 0) < 0)
848 goto err;
849 } while (self->state != START_RECORD);
Skip Montanarob4a04172003-03-20 23:29:12 +0000850
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000851 fields = self->fields;
852 self->fields = NULL;
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000853err:
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000854 return fields;
Skip Montanarob4a04172003-03-20 23:29:12 +0000855}
856
857static void
858Reader_dealloc(ReaderObj *self)
859{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000860 PyObject_GC_UnTrack(self);
861 Py_XDECREF(self->dialect);
862 Py_XDECREF(self->input_iter);
863 Py_XDECREF(self->fields);
864 if (self->field != NULL)
865 PyMem_Free(self->field);
866 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000867}
868
869static int
870Reader_traverse(ReaderObj *self, visitproc visit, void *arg)
871{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000872 Py_VISIT(self->dialect);
873 Py_VISIT(self->input_iter);
874 Py_VISIT(self->fields);
875 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +0000876}
877
878static int
879Reader_clear(ReaderObj *self)
880{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000881 Py_CLEAR(self->dialect);
882 Py_CLEAR(self->input_iter);
883 Py_CLEAR(self->fields);
884 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000885}
886
887PyDoc_STRVAR(Reader_Type_doc,
888"CSV reader\n"
889"\n"
890"Reader objects are responsible for reading and parsing tabular data\n"
891"in CSV format.\n"
892);
893
894static struct PyMethodDef Reader_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000895 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +0000896};
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000897#define R_OFF(x) offsetof(ReaderObj, x)
898
899static struct PyMemberDef Reader_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000900 { "dialect", T_OBJECT, R_OFF(dialect), READONLY },
901 { "line_num", T_ULONG, R_OFF(line_num), READONLY },
902 { NULL }
Andrew McNamaraf69d94f2005-01-13 11:30:54 +0000903};
904
Skip Montanarob4a04172003-03-20 23:29:12 +0000905
906static PyTypeObject Reader_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000907 PyVarObject_HEAD_INIT(NULL, 0)
908 "_csv.reader", /*tp_name*/
909 sizeof(ReaderObj), /*tp_basicsize*/
910 0, /*tp_itemsize*/
911 /* methods */
912 (destructor)Reader_dealloc, /*tp_dealloc*/
913 (printfunc)0, /*tp_print*/
914 (getattrfunc)0, /*tp_getattr*/
915 (setattrfunc)0, /*tp_setattr*/
916 0, /*tp_reserved*/
917 (reprfunc)0, /*tp_repr*/
918 0, /*tp_as_number*/
919 0, /*tp_as_sequence*/
920 0, /*tp_as_mapping*/
921 (hashfunc)0, /*tp_hash*/
922 (ternaryfunc)0, /*tp_call*/
923 (reprfunc)0, /*tp_str*/
924 0, /*tp_getattro*/
925 0, /*tp_setattro*/
926 0, /*tp_as_buffer*/
927 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
928 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
929 Reader_Type_doc, /*tp_doc*/
930 (traverseproc)Reader_traverse, /*tp_traverse*/
931 (inquiry)Reader_clear, /*tp_clear*/
932 0, /*tp_richcompare*/
933 0, /*tp_weaklistoffset*/
934 PyObject_SelfIter, /*tp_iter*/
935 (getiterfunc)Reader_iternext, /*tp_iternext*/
936 Reader_methods, /*tp_methods*/
937 Reader_memberlist, /*tp_members*/
938 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +0000939
940};
941
942static PyObject *
943csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
944{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000945 PyObject * iterator, * dialect = NULL;
946 ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type);
Skip Montanarob4a04172003-03-20 23:29:12 +0000947
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000948 if (!self)
949 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +0000950
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000951 self->dialect = NULL;
952 self->fields = NULL;
953 self->input_iter = NULL;
954 self->field = NULL;
955 self->field_size = 0;
956 self->line_num = 0;
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000957
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000958 if (parse_reset(self) < 0) {
959 Py_DECREF(self);
960 return NULL;
961 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000962
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000963 if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
964 Py_DECREF(self);
965 return NULL;
966 }
967 self->input_iter = PyObject_GetIter(iterator);
968 if (self->input_iter == NULL) {
969 PyErr_SetString(PyExc_TypeError,
970 "argument 1 must be an iterator");
971 Py_DECREF(self);
972 return NULL;
973 }
974 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
975 if (self->dialect == NULL) {
976 Py_DECREF(self);
977 return NULL;
978 }
Skip Montanarob4a04172003-03-20 23:29:12 +0000979
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000980 PyObject_GC_Track(self);
981 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +0000982}
983
984/*
985 * WRITER
986 */
987/* ---------------------------------------------------------------- */
988static void
989join_reset(WriterObj *self)
990{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +0000991 self->rec_len = 0;
992 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +0000993}
994
995#define MEM_INCR 32768
996
997/* Calculate new record length or append field to record. Return new
998 * record length.
999 */
Antoine Pitrou40455752010-08-15 18:51:10 +00001000static Py_ssize_t
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001001join_append_data(WriterObj *self, unsigned int field_kind, void *field_data,
1002 Py_ssize_t field_len, int quote_empty, int *quoted,
1003 int copy_phase)
Skip Montanarob4a04172003-03-20 23:29:12 +00001004{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001005 DialectObj *dialect = self->dialect;
1006 int i;
Antoine Pitrou40455752010-08-15 18:51:10 +00001007 Py_ssize_t rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001008
1009#define ADDCH(c) \
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001010 do {\
1011 if (copy_phase) \
1012 self->rec[rec_len] = c;\
1013 rec_len++;\
1014 } while(0)
Andrew McNamarac89f2842005-01-12 07:44:42 +00001015
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001016 rec_len = self->rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001017
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001018 /* If this is not the first field we need a field separator */
1019 if (self->num_fields > 0)
1020 ADDCH(dialect->delimiter);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001021
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001022 /* Handle preceding quote */
1023 if (copy_phase && *quoted)
1024 ADDCH(dialect->quotechar);
Andrew McNamarac89f2842005-01-12 07:44:42 +00001025
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001026 /* Copy/count field data */
1027 /* If field is null just pass over */
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001028 for (i = 0; field_data && (i < field_len); i++) {
1029 Py_UCS4 c = PyUnicode_READ(field_kind, field_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001030 int want_escape = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001031
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001032 if (c == dialect->delimiter ||
1033 c == dialect->escapechar ||
1034 c == dialect->quotechar ||
Martin v. Löwis5f4f4c52011-11-01 18:42:23 +01001035 PyUnicode_FindChar(
1036 dialect->lineterminator, c, 0,
1037 PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001038 if (dialect->quoting == QUOTE_NONE)
1039 want_escape = 1;
1040 else {
1041 if (c == dialect->quotechar) {
1042 if (dialect->doublequote)
1043 ADDCH(dialect->quotechar);
1044 else
1045 want_escape = 1;
1046 }
1047 if (!want_escape)
1048 *quoted = 1;
1049 }
1050 if (want_escape) {
1051 if (!dialect->escapechar) {
Antoine Pitroue7672d32012-05-16 11:33:08 +02001052 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001053 "need to escape, but no escapechar set");
1054 return -1;
1055 }
1056 ADDCH(dialect->escapechar);
1057 }
1058 }
1059 /* Copy field character into record buffer.
1060 */
1061 ADDCH(c);
1062 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001063
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001064 /* If field is empty check if it needs to be quoted.
1065 */
1066 if (i == 0 && quote_empty) {
1067 if (dialect->quoting == QUOTE_NONE) {
Antoine Pitroue7672d32012-05-16 11:33:08 +02001068 PyErr_Format(_csvstate_global->error_obj,
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001069 "single empty field record must be quoted");
1070 return -1;
1071 }
1072 else
1073 *quoted = 1;
1074 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001075
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001076 if (*quoted) {
1077 if (copy_phase)
1078 ADDCH(dialect->quotechar);
1079 else
1080 rec_len += 2;
1081 }
1082 return rec_len;
Andrew McNamarac89f2842005-01-12 07:44:42 +00001083#undef ADDCH
Skip Montanarob4a04172003-03-20 23:29:12 +00001084}
1085
1086static int
Antoine Pitrou40455752010-08-15 18:51:10 +00001087join_check_rec_size(WriterObj *self, Py_ssize_t rec_len)
Skip Montanarob4a04172003-03-20 23:29:12 +00001088{
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001089
Antoine Pitrou40455752010-08-15 18:51:10 +00001090 if (rec_len < 0 || rec_len > PY_SSIZE_T_MAX - MEM_INCR) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001091 PyErr_NoMemory();
1092 return 0;
1093 }
Amaury Forgeot d'Arc9c74b142008-06-18 00:47:36 +00001094
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001095 if (rec_len > self->rec_size) {
1096 if (self->rec_size == 0) {
1097 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
1098 if (self->rec != NULL)
1099 PyMem_Free(self->rec);
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001100 self->rec = PyMem_New(Py_UCS4, self->rec_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001101 }
1102 else {
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001103 Py_UCS4* old_rec = self->rec;
Skip Montanarob4a04172003-03-20 23:29:12 +00001104
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001105 self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001106 self->rec = PyMem_Resize(old_rec, Py_UCS4, self->rec_size);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001107 if (self->rec == NULL)
1108 PyMem_Free(old_rec);
1109 }
1110 if (self->rec == NULL) {
1111 PyErr_NoMemory();
1112 return 0;
1113 }
1114 }
1115 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001116}
1117
1118static int
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001119join_append(WriterObj *self, PyObject *field, int *quoted, int quote_empty)
Skip Montanarob4a04172003-03-20 23:29:12 +00001120{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001121 unsigned int field_kind = -1;
1122 void *field_data = NULL;
1123 Py_ssize_t field_len = 0;
Antoine Pitrou40455752010-08-15 18:51:10 +00001124 Py_ssize_t rec_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001125
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001126 if (field != NULL) {
Stefan Krahe6996ed2012-11-02 14:44:20 +01001127 if (PyUnicode_READY(field) == -1)
1128 return 0;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001129 field_kind = PyUnicode_KIND(field);
1130 field_data = PyUnicode_DATA(field);
1131 field_len = PyUnicode_GET_LENGTH(field);
1132 }
1133 rec_len = join_append_data(self, field_kind, field_data, field_len,
1134 quote_empty, quoted, 0);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001135 if (rec_len < 0)
1136 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001137
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001138 /* grow record buffer if necessary */
1139 if (!join_check_rec_size(self, rec_len))
1140 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001141
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001142 self->rec_len = join_append_data(self, field_kind, field_data, field_len,
1143 quote_empty, quoted, 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001144 self->num_fields++;
Skip Montanarob4a04172003-03-20 23:29:12 +00001145
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001146 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001147}
1148
1149static int
1150join_append_lineterminator(WriterObj *self)
1151{
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001152 Py_ssize_t terminator_len, i;
1153 unsigned int term_kind;
1154 void *term_data;
Skip Montanarob4a04172003-03-20 23:29:12 +00001155
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001156 terminator_len = PyUnicode_GET_LENGTH(self->dialect->lineterminator);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001157 if (terminator_len == -1)
1158 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001159
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001160 /* grow record buffer if necessary */
1161 if (!join_check_rec_size(self, self->rec_len + terminator_len))
1162 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001163
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001164 term_kind = PyUnicode_KIND(self->dialect->lineterminator);
1165 term_data = PyUnicode_DATA(self->dialect->lineterminator);
1166 for (i = 0; i < terminator_len; i++)
1167 self->rec[self->rec_len + i] = PyUnicode_READ(term_kind, term_data, i);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001168 self->rec_len += terminator_len;
Skip Montanarob4a04172003-03-20 23:29:12 +00001169
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001170 return 1;
Skip Montanarob4a04172003-03-20 23:29:12 +00001171}
1172
1173PyDoc_STRVAR(csv_writerow_doc,
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001174"writerow(sequence)\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001175"\n"
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001176"Construct and write a CSV record from a sequence of fields. Non-string\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001177"elements will be converted to string.");
1178
1179static PyObject *
1180csv_writerow(WriterObj *self, PyObject *seq)
1181{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001182 DialectObj *dialect = self->dialect;
Antoine Pitrou40455752010-08-15 18:51:10 +00001183 Py_ssize_t len, i;
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001184 PyObject *line, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001185
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001186 if (!PySequence_Check(seq))
Antoine Pitroue7672d32012-05-16 11:33:08 +02001187 return PyErr_Format(_csvstate_global->error_obj, "sequence expected");
Skip Montanarob4a04172003-03-20 23:29:12 +00001188
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001189 len = PySequence_Length(seq);
1190 if (len < 0)
1191 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001192
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001193 /* Join all fields in internal buffer.
1194 */
1195 join_reset(self);
1196 for (i = 0; i < len; i++) {
1197 PyObject *field;
1198 int append_ok;
1199 int quoted;
Skip Montanarob4a04172003-03-20 23:29:12 +00001200
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001201 field = PySequence_GetItem(seq, i);
1202 if (field == NULL)
1203 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001204
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001205 switch (dialect->quoting) {
1206 case QUOTE_NONNUMERIC:
1207 quoted = !PyNumber_Check(field);
1208 break;
1209 case QUOTE_ALL:
1210 quoted = 1;
1211 break;
1212 default:
1213 quoted = 0;
1214 break;
1215 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001216
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001217 if (PyUnicode_Check(field)) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001218 append_ok = join_append(self, field, &quoted, len == 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001219 Py_DECREF(field);
1220 }
1221 else if (field == Py_None) {
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001222 append_ok = join_append(self, NULL, &quoted, len == 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001223 Py_DECREF(field);
1224 }
1225 else {
1226 PyObject *str;
Skip Montanarob4a04172003-03-20 23:29:12 +00001227
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001228 str = PyObject_Str(field);
1229 Py_DECREF(field);
1230 if (str == NULL)
1231 return NULL;
Antoine Pitrou77ea6402011-10-07 04:26:55 +02001232 append_ok = join_append(self, str, &quoted, len == 1);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001233 Py_DECREF(str);
1234 }
1235 if (!append_ok)
1236 return NULL;
1237 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001238
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001239 /* Add line terminator.
1240 */
1241 if (!join_append_lineterminator(self))
1242 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001243
Antoine Pitrou72ca65d2011-10-07 04:35:30 +02001244 line = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
1245 (void *) self->rec, self->rec_len);
1246 if (line == NULL)
1247 return NULL;
1248 result = PyObject_CallFunctionObjArgs(self->writeline, line, NULL);
1249 Py_DECREF(line);
1250 return result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001251}
1252
Skip Montanaro860fc0b2003-04-12 18:57:52 +00001253PyDoc_STRVAR(csv_writerows_doc,
1254"writerows(sequence of sequences)\n"
1255"\n"
1256"Construct and write a series of sequences to a csv file. Non-string\n"
1257"elements will be converted to string.");
1258
Skip Montanarob4a04172003-03-20 23:29:12 +00001259static PyObject *
1260csv_writerows(WriterObj *self, PyObject *seqseq)
1261{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001262 PyObject *row_iter, *row_obj, *result;
Skip Montanarob4a04172003-03-20 23:29:12 +00001263
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001264 row_iter = PyObject_GetIter(seqseq);
1265 if (row_iter == NULL) {
1266 PyErr_SetString(PyExc_TypeError,
1267 "writerows() argument must be iterable");
1268 return NULL;
1269 }
1270 while ((row_obj = PyIter_Next(row_iter))) {
1271 result = csv_writerow(self, row_obj);
1272 Py_DECREF(row_obj);
1273 if (!result) {
1274 Py_DECREF(row_iter);
1275 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001276 }
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001277 else
1278 Py_DECREF(result);
1279 }
1280 Py_DECREF(row_iter);
1281 if (PyErr_Occurred())
1282 return NULL;
1283 Py_INCREF(Py_None);
1284 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001285}
1286
1287static struct PyMethodDef Writer_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001288 { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc},
1289 { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc},
1290 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001291};
1292
1293#define W_OFF(x) offsetof(WriterObj, x)
1294
1295static struct PyMemberDef Writer_memberlist[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001296 { "dialect", T_OBJECT, W_OFF(dialect), READONLY },
1297 { NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001298};
1299
1300static void
1301Writer_dealloc(WriterObj *self)
1302{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001303 PyObject_GC_UnTrack(self);
1304 Py_XDECREF(self->dialect);
1305 Py_XDECREF(self->writeline);
1306 if (self->rec != NULL)
1307 PyMem_Free(self->rec);
1308 PyObject_GC_Del(self);
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001309}
1310
1311static int
1312Writer_traverse(WriterObj *self, visitproc visit, void *arg)
1313{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001314 Py_VISIT(self->dialect);
1315 Py_VISIT(self->writeline);
1316 return 0;
Jeremy Hylton42a8aed2003-04-14 02:20:55 +00001317}
1318
1319static int
1320Writer_clear(WriterObj *self)
1321{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001322 Py_CLEAR(self->dialect);
1323 Py_CLEAR(self->writeline);
1324 return 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001325}
1326
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001327PyDoc_STRVAR(Writer_Type_doc,
Skip Montanarob4a04172003-03-20 23:29:12 +00001328"CSV writer\n"
1329"\n"
1330"Writer objects are responsible for generating tabular data\n"
1331"in CSV format from sequence input.\n"
1332);
1333
1334static PyTypeObject Writer_Type = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001335 PyVarObject_HEAD_INIT(NULL, 0)
1336 "_csv.writer", /*tp_name*/
1337 sizeof(WriterObj), /*tp_basicsize*/
1338 0, /*tp_itemsize*/
1339 /* methods */
1340 (destructor)Writer_dealloc, /*tp_dealloc*/
1341 (printfunc)0, /*tp_print*/
1342 (getattrfunc)0, /*tp_getattr*/
1343 (setattrfunc)0, /*tp_setattr*/
1344 0, /*tp_reserved*/
1345 (reprfunc)0, /*tp_repr*/
1346 0, /*tp_as_number*/
1347 0, /*tp_as_sequence*/
1348 0, /*tp_as_mapping*/
1349 (hashfunc)0, /*tp_hash*/
1350 (ternaryfunc)0, /*tp_call*/
1351 (reprfunc)0, /*tp_str*/
1352 0, /*tp_getattro*/
1353 0, /*tp_setattro*/
1354 0, /*tp_as_buffer*/
1355 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
1356 Py_TPFLAGS_HAVE_GC, /*tp_flags*/
1357 Writer_Type_doc,
1358 (traverseproc)Writer_traverse, /*tp_traverse*/
1359 (inquiry)Writer_clear, /*tp_clear*/
1360 0, /*tp_richcompare*/
1361 0, /*tp_weaklistoffset*/
1362 (getiterfunc)0, /*tp_iter*/
1363 (getiterfunc)0, /*tp_iternext*/
1364 Writer_methods, /*tp_methods*/
1365 Writer_memberlist, /*tp_members*/
1366 0, /*tp_getset*/
Skip Montanarob4a04172003-03-20 23:29:12 +00001367};
1368
1369static PyObject *
1370csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args)
1371{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001372 PyObject * output_file, * dialect = NULL;
1373 WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type);
Martin v. Löwisbd928fe2011-10-14 10:20:37 +02001374 _Py_IDENTIFIER(write);
Skip Montanarob4a04172003-03-20 23:29:12 +00001375
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001376 if (!self)
1377 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001378
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001379 self->dialect = NULL;
1380 self->writeline = NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001381
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001382 self->rec = NULL;
1383 self->rec_size = 0;
1384 self->rec_len = 0;
1385 self->num_fields = 0;
Skip Montanarob4a04172003-03-20 23:29:12 +00001386
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001387 if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) {
1388 Py_DECREF(self);
1389 return NULL;
1390 }
Martin v. Löwis1ee1b6f2011-10-10 18:11:30 +02001391 self->writeline = _PyObject_GetAttrId(output_file, &PyId_write);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001392 if (self->writeline == NULL || !PyCallable_Check(self->writeline)) {
1393 PyErr_SetString(PyExc_TypeError,
1394 "argument 1 must have a \"write\" method");
1395 Py_DECREF(self);
1396 return NULL;
1397 }
1398 self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args);
1399 if (self->dialect == NULL) {
1400 Py_DECREF(self);
1401 return NULL;
1402 }
1403 PyObject_GC_Track(self);
1404 return (PyObject *)self;
Skip Montanarob4a04172003-03-20 23:29:12 +00001405}
1406
1407/*
1408 * DIALECT REGISTRY
1409 */
1410static PyObject *
1411csv_list_dialects(PyObject *module, PyObject *args)
1412{
Antoine Pitroue7672d32012-05-16 11:33:08 +02001413 return PyDict_Keys(_csvstate_global->dialects);
Skip Montanarob4a04172003-03-20 23:29:12 +00001414}
1415
1416static PyObject *
Andrew McNamara86625972005-01-11 01:28:33 +00001417csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs)
Skip Montanarob4a04172003-03-20 23:29:12 +00001418{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001419 PyObject *name_obj, *dialect_obj = NULL;
1420 PyObject *dialect;
Skip Montanarob4a04172003-03-20 23:29:12 +00001421
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001422 if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj))
1423 return NULL;
Stefan Krahe6996ed2012-11-02 14:44:20 +01001424 if (!PyUnicode_Check(name_obj)) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001425 PyErr_SetString(PyExc_TypeError,
Stefan Krahe6996ed2012-11-02 14:44:20 +01001426 "dialect name must be a string");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001427 return NULL;
1428 }
Stefan Krahe6996ed2012-11-02 14:44:20 +01001429 if (PyUnicode_READY(name_obj) == -1)
1430 return NULL;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001431 dialect = _call_dialect(dialect_obj, kwargs);
1432 if (dialect == NULL)
1433 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001434 if (PyDict_SetItem(_csvstate_global->dialects, name_obj, dialect) < 0) {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001435 Py_DECREF(dialect);
1436 return NULL;
1437 }
1438 Py_DECREF(dialect);
1439 Py_INCREF(Py_None);
1440 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001441}
1442
1443static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001444csv_unregister_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001445{
Antoine Pitroue7672d32012-05-16 11:33:08 +02001446 if (PyDict_DelItem(_csvstate_global->dialects, name_obj) < 0)
1447 return PyErr_Format(_csvstate_global->error_obj, "unknown dialect");
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001448 Py_INCREF(Py_None);
1449 return Py_None;
Skip Montanarob4a04172003-03-20 23:29:12 +00001450}
1451
1452static PyObject *
Skip Montanaro577c7a72003-04-12 19:17:14 +00001453csv_get_dialect(PyObject *module, PyObject *name_obj)
Skip Montanarob4a04172003-03-20 23:29:12 +00001454{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001455 return get_dialect_from_registry(name_obj);
Skip Montanarob4a04172003-03-20 23:29:12 +00001456}
1457
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001458static PyObject *
Andrew McNamara31d88962005-01-12 03:45:10 +00001459csv_field_size_limit(PyObject *module, PyObject *args)
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001460{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001461 PyObject *new_limit = NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001462 long old_limit = _csvstate_global->field_limit;
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001463
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001464 if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit))
1465 return NULL;
1466 if (new_limit != NULL) {
1467 if (!PyLong_CheckExact(new_limit)) {
1468 PyErr_Format(PyExc_TypeError,
1469 "limit must be an integer");
1470 return NULL;
1471 }
Antoine Pitroue7672d32012-05-16 11:33:08 +02001472 _csvstate_global->field_limit = PyLong_AsLong(new_limit);
1473 if (_csvstate_global->field_limit == -1 && PyErr_Occurred()) {
1474 _csvstate_global->field_limit = old_limit;
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001475 return NULL;
1476 }
1477 }
1478 return PyLong_FromLong(old_limit);
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001479}
1480
Skip Montanarob4a04172003-03-20 23:29:12 +00001481/*
1482 * MODULE
1483 */
1484
1485PyDoc_STRVAR(csv_module_doc,
1486"CSV parsing and writing.\n"
1487"\n"
1488"This module provides classes that assist in the reading and writing\n"
1489"of Comma Separated Value (CSV) files, and implements the interface\n"
1490"described by PEP 305. Although many CSV files are simple to parse,\n"
1491"the format is not formally defined by a stable specification and\n"
1492"is subtle enough that parsing lines of a CSV file with something\n"
1493"like line.split(\",\") is bound to fail. The module supports three\n"
1494"basic APIs: reading, writing, and registration of dialects.\n"
1495"\n"
1496"\n"
1497"DIALECT REGISTRATION:\n"
1498"\n"
1499"Readers and writers support a dialect argument, which is a convenient\n"
1500"handle on a group of settings. When the dialect argument is a string,\n"
1501"it identifies one of the dialects previously registered with the module.\n"
1502"If it is a class or instance, the attributes of the argument are used as\n"
1503"the settings for the reader or writer:\n"
1504"\n"
1505" class excel:\n"
1506" delimiter = ','\n"
1507" quotechar = '\"'\n"
1508" escapechar = None\n"
1509" doublequote = True\n"
1510" skipinitialspace = False\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001511" lineterminator = '\\r\\n'\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001512" quoting = QUOTE_MINIMAL\n"
1513"\n"
1514"SETTINGS:\n"
1515"\n"
1516" * quotechar - specifies a one-character string to use as the \n"
1517" quoting character. It defaults to '\"'.\n"
1518" * delimiter - specifies a one-character string to use as the \n"
1519" field separator. It defaults to ','.\n"
1520" * skipinitialspace - specifies how to interpret whitespace which\n"
1521" immediately follows a delimiter. It defaults to False, which\n"
1522" means that whitespace immediately following a delimiter is part\n"
1523" of the following field.\n"
1524" * lineterminator - specifies the character sequence which should \n"
1525" terminate rows.\n"
1526" * quoting - controls when quotes should be generated by the writer.\n"
1527" It can take on any of the following module constants:\n"
1528"\n"
1529" csv.QUOTE_MINIMAL means only when required, for example, when a\n"
1530" field contains either the quotechar or the delimiter\n"
1531" csv.QUOTE_ALL means that quotes are always placed around fields.\n"
1532" csv.QUOTE_NONNUMERIC means that quotes are always placed around\n"
Skip Montanaro148eb6a2003-12-02 18:57:47 +00001533" fields which do not parse as integers or floating point\n"
1534" numbers.\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001535" csv.QUOTE_NONE means that quotes are never placed around fields.\n"
1536" * escapechar - specifies a one-character string used to escape \n"
1537" the delimiter when quoting is set to QUOTE_NONE.\n"
1538" * doublequote - controls the handling of quotes inside fields. When\n"
1539" True, two consecutive quotes are interpreted as one during read,\n"
1540" and when writing, each quote character embedded in the data is\n"
1541" written as two quotes\n");
1542
1543PyDoc_STRVAR(csv_reader_doc,
1544" csv_reader = reader(iterable [, dialect='excel']\n"
1545" [optional keyword args])\n"
1546" for row in csv_reader:\n"
1547" process(row)\n"
1548"\n"
1549"The \"iterable\" argument can be any object that returns a line\n"
1550"of input for each iteration, such as a file object or a list. The\n"
1551"optional \"dialect\" parameter is discussed below. The function\n"
1552"also accepts optional keyword arguments which override settings\n"
1553"provided by the dialect.\n"
1554"\n"
1555"The returned object is an iterator. Each iteration returns a row\n"
Johannes Gijsbers8d3b9dd2004-08-15 12:23:10 +00001556"of the CSV file (which can span multiple input lines):\n");
Skip Montanarob4a04172003-03-20 23:29:12 +00001557
1558PyDoc_STRVAR(csv_writer_doc,
1559" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1560" [optional keyword args])\n"
Thomas Wouters49fd7fa2006-04-21 10:40:58 +00001561" for row in sequence:\n"
Skip Montanarob4a04172003-03-20 23:29:12 +00001562" csv_writer.writerow(row)\n"
1563"\n"
1564" [or]\n"
1565"\n"
1566" csv_writer = csv.writer(fileobj [, dialect='excel']\n"
1567" [optional keyword args])\n"
1568" csv_writer.writerows(rows)\n"
1569"\n"
1570"The \"fileobj\" argument can be any object that supports the file API.\n");
1571
1572PyDoc_STRVAR(csv_list_dialects_doc,
1573"Return a list of all know dialect names.\n"
1574" names = csv.list_dialects()");
1575
1576PyDoc_STRVAR(csv_get_dialect_doc,
1577"Return the dialect instance associated with name.\n"
1578" dialect = csv.get_dialect(name)");
1579
1580PyDoc_STRVAR(csv_register_dialect_doc,
1581"Create a mapping from a string name to a dialect class.\n"
1582" dialect = csv.register_dialect(name, dialect)");
1583
1584PyDoc_STRVAR(csv_unregister_dialect_doc,
1585"Delete the name/dialect mapping associated with a string name.\n"
1586" csv.unregister_dialect(name)");
1587
Andrew McNamara31d88962005-01-12 03:45:10 +00001588PyDoc_STRVAR(csv_field_size_limit_doc,
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001589"Sets an upper limit on parsed fields.\n"
Andrew McNamara31d88962005-01-12 03:45:10 +00001590" csv.field_size_limit([limit])\n"
Andrew McNamarae4d05c42005-01-11 07:32:02 +00001591"\n"
1592"Returns old limit. If limit is not given, no new limit is set and\n"
1593"the old limit is returned");
1594
Skip Montanarob4a04172003-03-20 23:29:12 +00001595static struct PyMethodDef csv_methods[] = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001596 { "reader", (PyCFunction)csv_reader,
1597 METH_VARARGS | METH_KEYWORDS, csv_reader_doc},
1598 { "writer", (PyCFunction)csv_writer,
1599 METH_VARARGS | METH_KEYWORDS, csv_writer_doc},
1600 { "list_dialects", (PyCFunction)csv_list_dialects,
1601 METH_NOARGS, csv_list_dialects_doc},
1602 { "register_dialect", (PyCFunction)csv_register_dialect,
1603 METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc},
1604 { "unregister_dialect", (PyCFunction)csv_unregister_dialect,
1605 METH_O, csv_unregister_dialect_doc},
1606 { "get_dialect", (PyCFunction)csv_get_dialect,
1607 METH_O, csv_get_dialect_doc},
1608 { "field_size_limit", (PyCFunction)csv_field_size_limit,
1609 METH_VARARGS, csv_field_size_limit_doc},
1610 { NULL, NULL }
Skip Montanarob4a04172003-03-20 23:29:12 +00001611};
1612
Martin v. Löwis1a214512008-06-11 05:26:20 +00001613static struct PyModuleDef _csvmodule = {
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001614 PyModuleDef_HEAD_INIT,
1615 "_csv",
1616 csv_module_doc,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001617 sizeof(_csvstate),
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001618 csv_methods,
1619 NULL,
Antoine Pitroue7672d32012-05-16 11:33:08 +02001620 _csv_traverse,
1621 _csv_clear,
1622 _csv_free
Martin v. Löwis1a214512008-06-11 05:26:20 +00001623};
1624
Skip Montanarob4a04172003-03-20 23:29:12 +00001625PyMODINIT_FUNC
Martin v. Löwis1a214512008-06-11 05:26:20 +00001626PyInit__csv(void)
Skip Montanarob4a04172003-03-20 23:29:12 +00001627{
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001628 PyObject *module;
1629 StyleDesc *style;
Skip Montanarob4a04172003-03-20 23:29:12 +00001630
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001631 if (PyType_Ready(&Dialect_Type) < 0)
1632 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001633
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001634 if (PyType_Ready(&Reader_Type) < 0)
1635 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001636
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001637 if (PyType_Ready(&Writer_Type) < 0)
1638 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001639
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001640 /* Create the module and add the functions */
1641 module = PyModule_Create(&_csvmodule);
1642 if (module == NULL)
1643 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001644
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001645 /* Add version to the module. */
1646 if (PyModule_AddStringConstant(module, "__version__",
1647 MODULE_VERSION) == -1)
1648 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001649
Antoine Pitroue7672d32012-05-16 11:33:08 +02001650 /* Set the field limit */
1651 _csvstate(module)->field_limit = 128 * 1024;
1652 /* Do I still need to add this var to the Module Dict? */
1653
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001654 /* Add _dialects dictionary */
Antoine Pitroue7672d32012-05-16 11:33:08 +02001655 _csvstate(module)->dialects = PyDict_New();
1656 if (_csvstate(module)->dialects == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001657 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001658 Py_INCREF(_csvstate(module)->dialects);
1659 if (PyModule_AddObject(module, "_dialects", _csvstate(module)->dialects))
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001660 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001661
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001662 /* Add quote styles into dictionary */
1663 for (style = quote_styles; style->name; style++) {
1664 if (PyModule_AddIntConstant(module, style->name,
1665 style->style) == -1)
1666 return NULL;
1667 }
Skip Montanarob4a04172003-03-20 23:29:12 +00001668
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001669 /* Add the Dialect type */
1670 Py_INCREF(&Dialect_Type);
1671 if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type))
1672 return NULL;
Skip Montanarob4a04172003-03-20 23:29:12 +00001673
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001674 /* Add the CSV exception object to the module. */
Antoine Pitroue7672d32012-05-16 11:33:08 +02001675 _csvstate(module)->error_obj = PyErr_NewException("_csv.Error", NULL, NULL);
1676 if (_csvstate(module)->error_obj == NULL)
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001677 return NULL;
Antoine Pitroue7672d32012-05-16 11:33:08 +02001678 Py_INCREF(_csvstate(module)->error_obj);
1679 PyModule_AddObject(module, "Error", _csvstate(module)->error_obj);
Antoine Pitrouf95a1b32010-05-09 15:52:27 +00001680 return module;
Skip Montanarob4a04172003-03-20 23:29:12 +00001681}