| Skip Montanaro | a16b21f | 2003-03-23 14:32:54 +0000 | [diff] [blame] | 1 | /* csv module */ | 
|  | 2 |  | 
|  | 3 | /* | 
|  | 4 |  | 
|  | 5 | This module provides the low-level underpinnings of a CSV reading/writing | 
|  | 6 | module.  Users should not use this module directly, but import the csv.py | 
|  | 7 | module instead. | 
|  | 8 |  | 
|  | 9 | **** For people modifying this code, please note that as of this writing | 
| Skip Montanaro | dfa35fa | 2003-04-11 21:40:01 +0000 | [diff] [blame] | 10 | **** (2003-03-23), it is intended that this code should work with Python | 
| Skip Montanaro | a16b21f | 2003-03-23 14:32:54 +0000 | [diff] [blame] | 11 | **** 2.2. | 
|  | 12 |  | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 13 | */ | 
|  | 14 |  | 
| Skip Montanaro | 7b01a83 | 2003-04-12 19:23:46 +0000 | [diff] [blame] | 15 | #define MODULE_VERSION "1.0" | 
|  | 16 |  | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 17 | #include "Python.h" | 
|  | 18 | #include "structmember.h" | 
|  | 19 |  | 
| Skip Montanaro | a16b21f | 2003-03-23 14:32:54 +0000 | [diff] [blame] | 20 |  | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 21 | /* begin 2.2 compatibility macros */ | 
|  | 22 | #ifndef PyDoc_STRVAR | 
|  | 23 | /* Define macros for inline documentation. */ | 
|  | 24 | #define PyDoc_VAR(name) static char name[] | 
|  | 25 | #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str) | 
|  | 26 | #ifdef WITH_DOC_STRINGS | 
|  | 27 | #define PyDoc_STR(str) str | 
|  | 28 | #else | 
|  | 29 | #define PyDoc_STR(str) "" | 
|  | 30 | #endif | 
|  | 31 | #endif /* ifndef PyDoc_STRVAR */ | 
|  | 32 |  | 
|  | 33 | #ifndef PyMODINIT_FUNC | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 34 | #       if defined(__cplusplus) | 
|  | 35 | #               define PyMODINIT_FUNC extern "C" void | 
|  | 36 | #       else /* __cplusplus */ | 
|  | 37 | #               define PyMODINIT_FUNC void | 
|  | 38 | #       endif /* __cplusplus */ | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 39 | #endif | 
| Thomas Wouters | 2742c5e | 2006-04-15 17:33:14 +0000 | [diff] [blame] | 40 |  | 
|  | 41 | #ifndef Py_CLEAR | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 42 | #define Py_CLEAR(op)                                            \ | 
|  | 43 | do {                                                        \ | 
|  | 44 | if (op) {                                               \ | 
|  | 45 | PyObject *tmp = (PyObject *)(op);                   \ | 
|  | 46 | (op) = NULL;                                        \ | 
|  | 47 | Py_DECREF(tmp);                                     \ | 
|  | 48 | }                                                       \ | 
|  | 49 | } while (0) | 
| Thomas Wouters | 2742c5e | 2006-04-15 17:33:14 +0000 | [diff] [blame] | 50 | #endif | 
| Thomas Wouters | c6e5506 | 2006-04-15 21:47:09 +0000 | [diff] [blame] | 51 | #ifndef Py_VISIT | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 52 | #define Py_VISIT(op)                                                    \ | 
|  | 53 | do {                                                                \ | 
|  | 54 | if (op) {                                                       \ | 
|  | 55 | int vret = visit((PyObject *)(op), arg);                    \ | 
|  | 56 | if (vret)                                                   \ | 
|  | 57 | return vret;                                            \ | 
|  | 58 | }                                                               \ | 
|  | 59 | } while (0) | 
| Thomas Wouters | c6e5506 | 2006-04-15 21:47:09 +0000 | [diff] [blame] | 60 | #endif | 
| Thomas Wouters | 2742c5e | 2006-04-15 17:33:14 +0000 | [diff] [blame] | 61 |  | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 62 | /* end 2.2 compatibility macros */ | 
|  | 63 |  | 
| Andrew McNamara | 37d2bdf | 2005-01-10 12:22:48 +0000 | [diff] [blame] | 64 | #define IS_BASESTRING(o) \ | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 65 | PyObject_TypeCheck(o, &PyBaseString_Type) | 
| Andrew McNamara | 37d2bdf | 2005-01-10 12:22:48 +0000 | [diff] [blame] | 66 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 67 | static PyObject *error_obj;     /* CSV exception */ | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 68 | static PyObject *dialects;      /* Dialect registry */ | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 69 | static long field_limit = 128 * 1024;   /* max parsed field size */ | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 70 |  | 
|  | 71 | typedef enum { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 72 | START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD, | 
|  | 73 | IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD, | 
|  | 74 | EAT_CRNL | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 75 | } ParserState; | 
|  | 76 |  | 
|  | 77 | typedef enum { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 78 | QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 79 | } QuoteStyle; | 
|  | 80 |  | 
|  | 81 | typedef struct { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 82 | QuoteStyle style; | 
|  | 83 | char *name; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 84 | } StyleDesc; | 
|  | 85 |  | 
|  | 86 | static StyleDesc quote_styles[] = { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 87 | { QUOTE_MINIMAL,    "QUOTE_MINIMAL" }, | 
|  | 88 | { QUOTE_ALL,        "QUOTE_ALL" }, | 
|  | 89 | { QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" }, | 
|  | 90 | { QUOTE_NONE,       "QUOTE_NONE" }, | 
|  | 91 | { 0 } | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 92 | }; | 
|  | 93 |  | 
|  | 94 | typedef struct { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 95 | PyObject_HEAD | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 96 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 97 | int doublequote;            /* is " represented by ""? */ | 
|  | 98 | char delimiter;             /* field separator */ | 
|  | 99 | char quotechar;             /* quote character */ | 
|  | 100 | char escapechar;            /* escape character */ | 
|  | 101 | int skipinitialspace;       /* ignore spaces following delimiter? */ | 
|  | 102 | PyObject *lineterminator; /* string to write between records */ | 
|  | 103 | int quoting;                /* style of quoting to write */ | 
|  | 104 |  | 
|  | 105 | int strict;                 /* raise exception on bad CSV */ | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 106 | } DialectObj; | 
|  | 107 |  | 
|  | 108 | staticforward PyTypeObject Dialect_Type; | 
|  | 109 |  | 
|  | 110 | typedef struct { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 111 | PyObject_HEAD | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 112 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 113 | PyObject *input_iter;   /* iterate over this for input lines */ | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 114 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 115 | DialectObj *dialect;    /* parsing dialect */ | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 116 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 117 | PyObject *fields;           /* field list for current record */ | 
|  | 118 | ParserState state;          /* current CSV parse state */ | 
|  | 119 | char *field;                /* build current field in here */ | 
|  | 120 | int field_size;             /* size of allocated buffer */ | 
|  | 121 | int field_len;              /* length of current field */ | 
|  | 122 | int numeric_field;          /* treat field as numeric */ | 
|  | 123 | unsigned long line_num;     /* Source-file line number */ | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 124 | } ReaderObj; | 
|  | 125 |  | 
|  | 126 | staticforward PyTypeObject Reader_Type; | 
|  | 127 |  | 
| Christian Heimes | e93237d | 2007-12-19 02:37:44 +0000 | [diff] [blame] | 128 | #define ReaderObject_Check(v)   (Py_TYPE(v) == &Reader_Type) | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 129 |  | 
|  | 130 | typedef struct { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 131 | PyObject_HEAD | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 132 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 133 | PyObject *writeline;    /* write output lines to this file */ | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 134 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 135 | DialectObj *dialect;    /* parsing dialect */ | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 136 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 137 | char *rec;                  /* buffer for parser.join */ | 
|  | 138 | int rec_size;               /* size of allocated record */ | 
|  | 139 | int rec_len;                /* length of record */ | 
|  | 140 | int num_fields;             /* number of fields in record */ | 
|  | 141 | } WriterObj; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 142 |  | 
|  | 143 | staticforward PyTypeObject Writer_Type; | 
|  | 144 |  | 
|  | 145 | /* | 
|  | 146 | * DIALECT class | 
|  | 147 | */ | 
|  | 148 |  | 
|  | 149 | static PyObject * | 
|  | 150 | get_dialect_from_registry(PyObject * name_obj) | 
|  | 151 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 152 | PyObject *dialect_obj; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 153 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 154 | dialect_obj = PyDict_GetItem(dialects, name_obj); | 
|  | 155 | if (dialect_obj == NULL) { | 
|  | 156 | if (!PyErr_Occurred()) | 
|  | 157 | PyErr_Format(error_obj, "unknown dialect"); | 
|  | 158 | } | 
|  | 159 | else | 
|  | 160 | Py_INCREF(dialect_obj); | 
|  | 161 | return dialect_obj; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 162 | } | 
|  | 163 |  | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 164 | static PyObject * | 
|  | 165 | get_string(PyObject *str) | 
|  | 166 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 167 | Py_XINCREF(str); | 
|  | 168 | return str; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 169 | } | 
|  | 170 |  | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 171 | static PyObject * | 
|  | 172 | get_nullchar_as_None(char c) | 
|  | 173 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 174 | if (c == '\0') { | 
|  | 175 | Py_INCREF(Py_None); | 
|  | 176 | return Py_None; | 
|  | 177 | } | 
|  | 178 | else | 
|  | 179 | return PyString_FromStringAndSize((char*)&c, 1); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 180 | } | 
|  | 181 |  | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 182 | static PyObject * | 
|  | 183 | Dialect_get_lineterminator(DialectObj *self) | 
|  | 184 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 185 | return get_string(self->lineterminator); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 186 | } | 
|  | 187 |  | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 188 | static PyObject * | 
|  | 189 | Dialect_get_escapechar(DialectObj *self) | 
|  | 190 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 191 | return get_nullchar_as_None(self->escapechar); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 192 | } | 
|  | 193 |  | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 194 | static PyObject * | 
|  | 195 | Dialect_get_quotechar(DialectObj *self) | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 196 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 197 | return get_nullchar_as_None(self->quotechar); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 198 | } | 
|  | 199 |  | 
|  | 200 | static PyObject * | 
|  | 201 | Dialect_get_quoting(DialectObj *self) | 
|  | 202 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 203 | return PyInt_FromLong(self->quoting); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 204 | } | 
|  | 205 |  | 
|  | 206 | static int | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 207 | _set_bool(const char *name, int *target, PyObject *src, int dflt) | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 208 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 209 | if (src == NULL) | 
|  | 210 | *target = dflt; | 
|  | 211 | else | 
|  | 212 | *target = PyObject_IsTrue(src); | 
|  | 213 | return 0; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 214 | } | 
|  | 215 |  | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 216 | static int | 
|  | 217 | _set_int(const char *name, int *target, PyObject *src, int dflt) | 
|  | 218 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 219 | if (src == NULL) | 
|  | 220 | *target = dflt; | 
|  | 221 | else { | 
|  | 222 | if (!PyInt_Check(src)) { | 
|  | 223 | PyErr_Format(PyExc_TypeError, | 
|  | 224 | "\"%s\" must be an integer", name); | 
|  | 225 | return -1; | 
|  | 226 | } | 
|  | 227 | *target = PyInt_AsLong(src); | 
|  | 228 | } | 
|  | 229 | return 0; | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 230 | } | 
|  | 231 |  | 
|  | 232 | static int | 
|  | 233 | _set_char(const char *name, char *target, PyObject *src, char dflt) | 
|  | 234 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 235 | if (src == NULL) | 
|  | 236 | *target = dflt; | 
|  | 237 | else { | 
|  | 238 | if (src == Py_None || PyString_Size(src) == 0) | 
|  | 239 | *target = '\0'; | 
|  | 240 | else if (!PyString_Check(src) || PyString_Size(src) != 1) { | 
|  | 241 | PyErr_Format(PyExc_TypeError, | 
|  | 242 | "\"%s\" must be an 1-character string", | 
|  | 243 | name); | 
|  | 244 | return -1; | 
|  | 245 | } | 
|  | 246 | else { | 
|  | 247 | char *s = PyString_AsString(src); | 
|  | 248 | if (s == NULL) | 
|  | 249 | return -1; | 
|  | 250 | *target = s[0]; | 
|  | 251 | } | 
|  | 252 | } | 
|  | 253 | return 0; | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 254 | } | 
|  | 255 |  | 
|  | 256 | static int | 
|  | 257 | _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt) | 
|  | 258 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 259 | if (src == NULL) | 
|  | 260 | *target = PyString_FromString(dflt); | 
|  | 261 | else { | 
|  | 262 | if (src == Py_None) | 
|  | 263 | *target = NULL; | 
|  | 264 | else if (!IS_BASESTRING(src)) { | 
|  | 265 | PyErr_Format(PyExc_TypeError, | 
|  | 266 | "\"%s\" must be an string", name); | 
|  | 267 | return -1; | 
|  | 268 | } | 
|  | 269 | else { | 
|  | 270 | Py_XDECREF(*target); | 
|  | 271 | Py_INCREF(src); | 
|  | 272 | *target = src; | 
|  | 273 | } | 
|  | 274 | } | 
|  | 275 | return 0; | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 276 | } | 
|  | 277 |  | 
|  | 278 | static int | 
|  | 279 | dialect_check_quoting(int quoting) | 
|  | 280 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 281 | StyleDesc *qs = quote_styles; | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 282 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 283 | for (qs = quote_styles; qs->name; qs++) { | 
|  | 284 | if (qs->style == quoting) | 
|  | 285 | return 0; | 
|  | 286 | } | 
|  | 287 | PyErr_Format(PyExc_TypeError, "bad \"quoting\" value"); | 
|  | 288 | return -1; | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 289 | } | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 290 |  | 
|  | 291 | #define D_OFF(x) offsetof(DialectObj, x) | 
|  | 292 |  | 
|  | 293 | static struct PyMemberDef Dialect_memberlist[] = { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 294 | { "delimiter",          T_CHAR, D_OFF(delimiter), READONLY }, | 
|  | 295 | { "skipinitialspace",   T_INT, D_OFF(skipinitialspace), READONLY }, | 
|  | 296 | { "doublequote",        T_INT, D_OFF(doublequote), READONLY }, | 
|  | 297 | { "strict",             T_INT, D_OFF(strict), READONLY }, | 
|  | 298 | { NULL } | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 299 | }; | 
|  | 300 |  | 
|  | 301 | static PyGetSetDef Dialect_getsetlist[] = { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 302 | { "escapechar",             (getter)Dialect_get_escapechar}, | 
|  | 303 | { "lineterminator",         (getter)Dialect_get_lineterminator}, | 
|  | 304 | { "quotechar",              (getter)Dialect_get_quotechar}, | 
|  | 305 | { "quoting",                (getter)Dialect_get_quoting}, | 
|  | 306 | {NULL}, | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 307 | }; | 
|  | 308 |  | 
|  | 309 | static void | 
|  | 310 | Dialect_dealloc(DialectObj *self) | 
|  | 311 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 312 | Py_XDECREF(self->lineterminator); | 
|  | 313 | Py_TYPE(self)->tp_free((PyObject *)self); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 314 | } | 
|  | 315 |  | 
| Martin v. Löwis | 02cbf4a | 2006-02-27 17:20:04 +0000 | [diff] [blame] | 316 | static char *dialect_kws[] = { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 317 | "dialect", | 
|  | 318 | "delimiter", | 
|  | 319 | "doublequote", | 
|  | 320 | "escapechar", | 
|  | 321 | "lineterminator", | 
|  | 322 | "quotechar", | 
|  | 323 | "quoting", | 
|  | 324 | "skipinitialspace", | 
|  | 325 | "strict", | 
|  | 326 | NULL | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 327 | }; | 
|  | 328 |  | 
| Andrew McNamara | 29bf4e4 | 2005-01-11 04:49:53 +0000 | [diff] [blame] | 329 | static PyObject * | 
|  | 330 | dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 331 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 332 | DialectObj *self; | 
|  | 333 | PyObject *ret = NULL; | 
|  | 334 | PyObject *dialect = NULL; | 
|  | 335 | PyObject *delimiter = NULL; | 
|  | 336 | PyObject *doublequote = NULL; | 
|  | 337 | PyObject *escapechar = NULL; | 
|  | 338 | PyObject *lineterminator = NULL; | 
|  | 339 | PyObject *quotechar = NULL; | 
|  | 340 | PyObject *quoting = NULL; | 
|  | 341 | PyObject *skipinitialspace = NULL; | 
|  | 342 | PyObject *strict = NULL; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 343 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 344 | if (!PyArg_ParseTupleAndKeywords(args, kwargs, | 
|  | 345 | "|OOOOOOOOO", dialect_kws, | 
|  | 346 | &dialect, | 
|  | 347 | &delimiter, | 
|  | 348 | &doublequote, | 
|  | 349 | &escapechar, | 
|  | 350 | &lineterminator, | 
|  | 351 | "echar, | 
|  | 352 | "ing, | 
|  | 353 | &skipinitialspace, | 
|  | 354 | &strict)) | 
|  | 355 | return NULL; | 
| Andrew McNamara | 29bf4e4 | 2005-01-11 04:49:53 +0000 | [diff] [blame] | 356 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 357 | if (dialect != NULL) { | 
|  | 358 | if (IS_BASESTRING(dialect)) { | 
|  | 359 | dialect = get_dialect_from_registry(dialect); | 
|  | 360 | if (dialect == NULL) | 
|  | 361 | return NULL; | 
|  | 362 | } | 
|  | 363 | else | 
|  | 364 | Py_INCREF(dialect); | 
|  | 365 | /* Can we reuse this instance? */ | 
|  | 366 | if (PyObject_TypeCheck(dialect, &Dialect_Type) && | 
|  | 367 | delimiter == 0 && | 
|  | 368 | doublequote == 0 && | 
|  | 369 | escapechar == 0 && | 
|  | 370 | lineterminator == 0 && | 
|  | 371 | quotechar == 0 && | 
|  | 372 | quoting == 0 && | 
|  | 373 | skipinitialspace == 0 && | 
|  | 374 | strict == 0) | 
|  | 375 | return dialect; | 
|  | 376 | } | 
| Andrew McNamara | 29bf4e4 | 2005-01-11 04:49:53 +0000 | [diff] [blame] | 377 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 378 | self = (DialectObj *)type->tp_alloc(type, 0); | 
|  | 379 | if (self == NULL) { | 
|  | 380 | Py_XDECREF(dialect); | 
|  | 381 | return NULL; | 
|  | 382 | } | 
|  | 383 | self->lineterminator = NULL; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 384 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 385 | Py_XINCREF(delimiter); | 
|  | 386 | Py_XINCREF(doublequote); | 
|  | 387 | Py_XINCREF(escapechar); | 
|  | 388 | Py_XINCREF(lineterminator); | 
|  | 389 | Py_XINCREF(quotechar); | 
|  | 390 | Py_XINCREF(quoting); | 
|  | 391 | Py_XINCREF(skipinitialspace); | 
|  | 392 | Py_XINCREF(strict); | 
|  | 393 | if (dialect != NULL) { | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 394 | #define DIALECT_GETATTR(v, n) \ | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 395 | if (v == NULL) \ | 
|  | 396 | v = PyObject_GetAttrString(dialect, n) | 
|  | 397 | DIALECT_GETATTR(delimiter, "delimiter"); | 
|  | 398 | DIALECT_GETATTR(doublequote, "doublequote"); | 
|  | 399 | DIALECT_GETATTR(escapechar, "escapechar"); | 
|  | 400 | DIALECT_GETATTR(lineterminator, "lineterminator"); | 
|  | 401 | DIALECT_GETATTR(quotechar, "quotechar"); | 
|  | 402 | DIALECT_GETATTR(quoting, "quoting"); | 
|  | 403 | DIALECT_GETATTR(skipinitialspace, "skipinitialspace"); | 
|  | 404 | DIALECT_GETATTR(strict, "strict"); | 
|  | 405 | PyErr_Clear(); | 
|  | 406 | } | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 407 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 408 | /* check types and convert to C values */ | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 409 | #define DIASET(meth, name, target, src, dflt) \ | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 410 | if (meth(name, target, src, dflt)) \ | 
|  | 411 | goto err | 
|  | 412 | DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ','); | 
|  | 413 | DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1); | 
|  | 414 | DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0); | 
|  | 415 | DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n"); | 
|  | 416 | DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"'); | 
|  | 417 | DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL); | 
|  | 418 | DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0); | 
|  | 419 | DIASET(_set_bool, "strict", &self->strict, strict, 0); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 420 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 421 | /* validate options */ | 
|  | 422 | if (dialect_check_quoting(self->quoting)) | 
|  | 423 | goto err; | 
|  | 424 | if (self->delimiter == 0) { | 
|  | 425 | PyErr_SetString(PyExc_TypeError, "delimiter must be set"); | 
|  | 426 | goto err; | 
|  | 427 | } | 
|  | 428 | if (quotechar == Py_None && quoting == NULL) | 
|  | 429 | self->quoting = QUOTE_NONE; | 
|  | 430 | if (self->quoting != QUOTE_NONE && self->quotechar == 0) { | 
|  | 431 | PyErr_SetString(PyExc_TypeError, | 
|  | 432 | "quotechar must be set if quoting enabled"); | 
|  | 433 | goto err; | 
|  | 434 | } | 
|  | 435 | if (self->lineterminator == 0) { | 
|  | 436 | PyErr_SetString(PyExc_TypeError, "lineterminator must be set"); | 
|  | 437 | goto err; | 
|  | 438 | } | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 439 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 440 | ret = (PyObject *)self; | 
|  | 441 | Py_INCREF(self); | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 442 | err: | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 443 | Py_XDECREF(self); | 
|  | 444 | Py_XDECREF(dialect); | 
|  | 445 | Py_XDECREF(delimiter); | 
|  | 446 | Py_XDECREF(doublequote); | 
|  | 447 | Py_XDECREF(escapechar); | 
|  | 448 | Py_XDECREF(lineterminator); | 
|  | 449 | Py_XDECREF(quotechar); | 
|  | 450 | Py_XDECREF(quoting); | 
|  | 451 | Py_XDECREF(skipinitialspace); | 
|  | 452 | Py_XDECREF(strict); | 
|  | 453 | return ret; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 454 | } | 
|  | 455 |  | 
|  | 456 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 457 | PyDoc_STRVAR(Dialect_Type_doc, | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 458 | "CSV dialect\n" | 
|  | 459 | "\n" | 
|  | 460 | "The Dialect type records CSV parsing and generation options.\n"); | 
|  | 461 |  | 
|  | 462 | static PyTypeObject Dialect_Type = { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 463 | PyVarObject_HEAD_INIT(NULL, 0) | 
|  | 464 | "_csv.Dialect",                         /* tp_name */ | 
|  | 465 | sizeof(DialectObj),                     /* tp_basicsize */ | 
|  | 466 | 0,                                      /* tp_itemsize */ | 
|  | 467 | /*  methods  */ | 
|  | 468 | (destructor)Dialect_dealloc,            /* tp_dealloc */ | 
|  | 469 | (printfunc)0,                           /* tp_print */ | 
|  | 470 | (getattrfunc)0,                         /* tp_getattr */ | 
|  | 471 | (setattrfunc)0,                         /* tp_setattr */ | 
|  | 472 | (cmpfunc)0,                             /* tp_compare */ | 
|  | 473 | (reprfunc)0,                            /* tp_repr */ | 
|  | 474 | 0,                                      /* tp_as_number */ | 
|  | 475 | 0,                                      /* tp_as_sequence */ | 
|  | 476 | 0,                                      /* tp_as_mapping */ | 
|  | 477 | (hashfunc)0,                            /* tp_hash */ | 
|  | 478 | (ternaryfunc)0,                         /* tp_call */ | 
|  | 479 | (reprfunc)0,                                /* tp_str */ | 
|  | 480 | 0,                                      /* tp_getattro */ | 
|  | 481 | 0,                                      /* tp_setattro */ | 
|  | 482 | 0,                                      /* tp_as_buffer */ | 
|  | 483 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ | 
|  | 484 | Dialect_Type_doc,                       /* tp_doc */ | 
|  | 485 | 0,                                      /* tp_traverse */ | 
|  | 486 | 0,                                      /* tp_clear */ | 
|  | 487 | 0,                                      /* tp_richcompare */ | 
|  | 488 | 0,                                      /* tp_weaklistoffset */ | 
|  | 489 | 0,                                      /* tp_iter */ | 
|  | 490 | 0,                                      /* tp_iternext */ | 
|  | 491 | 0,                                          /* tp_methods */ | 
|  | 492 | Dialect_memberlist,                     /* tp_members */ | 
|  | 493 | Dialect_getsetlist,                     /* tp_getset */ | 
|  | 494 | 0,                                          /* tp_base */ | 
|  | 495 | 0,                                          /* tp_dict */ | 
|  | 496 | 0,                                          /* tp_descr_get */ | 
|  | 497 | 0,                                          /* tp_descr_set */ | 
|  | 498 | 0,                                          /* tp_dictoffset */ | 
|  | 499 | 0,                                          /* tp_init */ | 
|  | 500 | 0,                                          /* tp_alloc */ | 
|  | 501 | dialect_new,                                /* tp_new */ | 
|  | 502 | 0,                                          /* tp_free */ | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 503 | }; | 
|  | 504 |  | 
| Andrew McNamara | 91b9746 | 2005-01-11 01:07:23 +0000 | [diff] [blame] | 505 | /* | 
|  | 506 | * Return an instance of the dialect type, given a Python instance or kwarg | 
|  | 507 | * description of the dialect | 
|  | 508 | */ | 
|  | 509 | static PyObject * | 
|  | 510 | _call_dialect(PyObject *dialect_inst, PyObject *kwargs) | 
|  | 511 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 512 | PyObject *ctor_args; | 
|  | 513 | PyObject *dialect; | 
| Andrew McNamara | 91b9746 | 2005-01-11 01:07:23 +0000 | [diff] [blame] | 514 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 515 | ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst); | 
|  | 516 | if (ctor_args == NULL) | 
|  | 517 | return NULL; | 
|  | 518 | dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs); | 
|  | 519 | Py_DECREF(ctor_args); | 
|  | 520 | return dialect; | 
| Andrew McNamara | 91b9746 | 2005-01-11 01:07:23 +0000 | [diff] [blame] | 521 | } | 
|  | 522 |  | 
| Andrew McNamara | f69d94f | 2005-01-13 11:30:54 +0000 | [diff] [blame] | 523 | /* | 
|  | 524 | * READER | 
|  | 525 | */ | 
| Andrew McNamara | 0f0599d | 2005-01-12 09:45:18 +0000 | [diff] [blame] | 526 | static int | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 527 | parse_save_field(ReaderObj *self) | 
|  | 528 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 529 | PyObject *field; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 530 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 531 | field = PyString_FromStringAndSize(self->field, self->field_len); | 
|  | 532 | if (field == NULL) | 
|  | 533 | return -1; | 
|  | 534 | self->field_len = 0; | 
|  | 535 | if (self->numeric_field) { | 
|  | 536 | PyObject *tmp; | 
| Andrew McNamara | 0f0599d | 2005-01-12 09:45:18 +0000 | [diff] [blame] | 537 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 538 | self->numeric_field = 0; | 
|  | 539 | tmp = PyNumber_Float(field); | 
|  | 540 | if (tmp == NULL) { | 
|  | 541 | Py_DECREF(field); | 
|  | 542 | return -1; | 
|  | 543 | } | 
|  | 544 | Py_DECREF(field); | 
|  | 545 | field = tmp; | 
|  | 546 | } | 
|  | 547 | PyList_Append(self->fields, field); | 
|  | 548 | Py_DECREF(field); | 
|  | 549 | return 0; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 550 | } | 
|  | 551 |  | 
|  | 552 | static int | 
|  | 553 | parse_grow_buff(ReaderObj *self) | 
|  | 554 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 555 | if (self->field_size == 0) { | 
|  | 556 | self->field_size = 4096; | 
|  | 557 | if (self->field != NULL) | 
|  | 558 | PyMem_Free(self->field); | 
|  | 559 | self->field = PyMem_Malloc(self->field_size); | 
|  | 560 | } | 
|  | 561 | else { | 
|  | 562 | if (self->field_size > INT_MAX / 2) { | 
|  | 563 | PyErr_NoMemory(); | 
|  | 564 | return 0; | 
|  | 565 | } | 
|  | 566 | self->field_size *= 2; | 
|  | 567 | self->field = PyMem_Realloc(self->field, self->field_size); | 
|  | 568 | } | 
|  | 569 | if (self->field == NULL) { | 
|  | 570 | PyErr_NoMemory(); | 
|  | 571 | return 0; | 
|  | 572 | } | 
|  | 573 | return 1; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 574 | } | 
|  | 575 |  | 
| Andrew McNamara | e4d05c4 | 2005-01-11 07:32:02 +0000 | [diff] [blame] | 576 | static int | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 577 | parse_add_char(ReaderObj *self, char c) | 
|  | 578 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 579 | if (self->field_len >= field_limit) { | 
|  | 580 | PyErr_Format(error_obj, "field larger than field limit (%ld)", | 
|  | 581 | field_limit); | 
|  | 582 | return -1; | 
|  | 583 | } | 
|  | 584 | if (self->field_len == self->field_size && !parse_grow_buff(self)) | 
|  | 585 | return -1; | 
|  | 586 | self->field[self->field_len++] = c; | 
|  | 587 | return 0; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 588 | } | 
|  | 589 |  | 
| Andrew McNamara | e4d05c4 | 2005-01-11 07:32:02 +0000 | [diff] [blame] | 590 | static int | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 591 | parse_process_char(ReaderObj *self, char c) | 
|  | 592 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 593 | DialectObj *dialect = self->dialect; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 594 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 595 | switch (self->state) { | 
|  | 596 | case START_RECORD: | 
|  | 597 | /* start of record */ | 
|  | 598 | if (c == '\0') | 
|  | 599 | /* empty line - return [] */ | 
|  | 600 | break; | 
|  | 601 | else if (c == '\n' || c == '\r') { | 
|  | 602 | self->state = EAT_CRNL; | 
|  | 603 | break; | 
|  | 604 | } | 
|  | 605 | /* normal character - handle as START_FIELD */ | 
|  | 606 | self->state = START_FIELD; | 
|  | 607 | /* fallthru */ | 
|  | 608 | case START_FIELD: | 
|  | 609 | /* expecting field */ | 
|  | 610 | if (c == '\n' || c == '\r' || c == '\0') { | 
|  | 611 | /* save empty field - return [fields] */ | 
|  | 612 | if (parse_save_field(self) < 0) | 
|  | 613 | return -1; | 
|  | 614 | self->state = (c == '\0' ? START_RECORD : EAT_CRNL); | 
|  | 615 | } | 
|  | 616 | else if (c == dialect->quotechar && | 
|  | 617 | dialect->quoting != QUOTE_NONE) { | 
|  | 618 | /* start quoted field */ | 
|  | 619 | self->state = IN_QUOTED_FIELD; | 
|  | 620 | } | 
|  | 621 | else if (c == dialect->escapechar) { | 
|  | 622 | /* possible escaped character */ | 
|  | 623 | self->state = ESCAPED_CHAR; | 
|  | 624 | } | 
|  | 625 | else if (c == ' ' && dialect->skipinitialspace) | 
|  | 626 | /* ignore space at start of field */ | 
|  | 627 | ; | 
|  | 628 | else if (c == dialect->delimiter) { | 
|  | 629 | /* save empty field */ | 
|  | 630 | if (parse_save_field(self) < 0) | 
|  | 631 | return -1; | 
|  | 632 | } | 
|  | 633 | else { | 
|  | 634 | /* begin new unquoted field */ | 
|  | 635 | if (dialect->quoting == QUOTE_NONNUMERIC) | 
|  | 636 | self->numeric_field = 1; | 
|  | 637 | if (parse_add_char(self, c) < 0) | 
|  | 638 | return -1; | 
|  | 639 | self->state = IN_FIELD; | 
|  | 640 | } | 
|  | 641 | break; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 642 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 643 | case ESCAPED_CHAR: | 
|  | 644 | if (c == '\0') | 
|  | 645 | c = '\n'; | 
|  | 646 | if (parse_add_char(self, c) < 0) | 
|  | 647 | return -1; | 
|  | 648 | self->state = IN_FIELD; | 
|  | 649 | break; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 650 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 651 | case IN_FIELD: | 
|  | 652 | /* in unquoted field */ | 
|  | 653 | if (c == '\n' || c == '\r' || c == '\0') { | 
|  | 654 | /* end of line - return [fields] */ | 
|  | 655 | if (parse_save_field(self) < 0) | 
|  | 656 | return -1; | 
|  | 657 | self->state = (c == '\0' ? START_RECORD : EAT_CRNL); | 
|  | 658 | } | 
|  | 659 | else if (c == dialect->escapechar) { | 
|  | 660 | /* possible escaped character */ | 
|  | 661 | self->state = ESCAPED_CHAR; | 
|  | 662 | } | 
|  | 663 | else if (c == dialect->delimiter) { | 
|  | 664 | /* save field - wait for new field */ | 
|  | 665 | if (parse_save_field(self) < 0) | 
|  | 666 | return -1; | 
|  | 667 | self->state = START_FIELD; | 
|  | 668 | } | 
|  | 669 | else { | 
|  | 670 | /* normal character - save in field */ | 
|  | 671 | if (parse_add_char(self, c) < 0) | 
|  | 672 | return -1; | 
|  | 673 | } | 
|  | 674 | break; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 675 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 676 | case IN_QUOTED_FIELD: | 
|  | 677 | /* in quoted field */ | 
|  | 678 | if (c == '\0') | 
|  | 679 | ; | 
|  | 680 | else if (c == dialect->escapechar) { | 
|  | 681 | /* Possible escape character */ | 
|  | 682 | self->state = ESCAPE_IN_QUOTED_FIELD; | 
|  | 683 | } | 
|  | 684 | else if (c == dialect->quotechar && | 
|  | 685 | dialect->quoting != QUOTE_NONE) { | 
|  | 686 | if (dialect->doublequote) { | 
|  | 687 | /* doublequote; " represented by "" */ | 
|  | 688 | self->state = QUOTE_IN_QUOTED_FIELD; | 
|  | 689 | } | 
|  | 690 | else { | 
|  | 691 | /* end of quote part of field */ | 
|  | 692 | self->state = IN_FIELD; | 
|  | 693 | } | 
|  | 694 | } | 
|  | 695 | else { | 
|  | 696 | /* normal character - save in field */ | 
|  | 697 | if (parse_add_char(self, c) < 0) | 
|  | 698 | return -1; | 
|  | 699 | } | 
|  | 700 | break; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 701 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 702 | case ESCAPE_IN_QUOTED_FIELD: | 
|  | 703 | if (c == '\0') | 
|  | 704 | c = '\n'; | 
|  | 705 | if (parse_add_char(self, c) < 0) | 
|  | 706 | return -1; | 
|  | 707 | self->state = IN_QUOTED_FIELD; | 
|  | 708 | break; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 709 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 710 | case QUOTE_IN_QUOTED_FIELD: | 
|  | 711 | /* doublequote - seen a quote in an quoted field */ | 
|  | 712 | if (dialect->quoting != QUOTE_NONE && | 
|  | 713 | c == dialect->quotechar) { | 
|  | 714 | /* save "" as " */ | 
|  | 715 | if (parse_add_char(self, c) < 0) | 
|  | 716 | return -1; | 
|  | 717 | self->state = IN_QUOTED_FIELD; | 
|  | 718 | } | 
|  | 719 | else if (c == dialect->delimiter) { | 
|  | 720 | /* save field - wait for new field */ | 
|  | 721 | if (parse_save_field(self) < 0) | 
|  | 722 | return -1; | 
|  | 723 | self->state = START_FIELD; | 
|  | 724 | } | 
|  | 725 | else if (c == '\n' || c == '\r' || c == '\0') { | 
|  | 726 | /* end of line - return [fields] */ | 
|  | 727 | if (parse_save_field(self) < 0) | 
|  | 728 | return -1; | 
|  | 729 | self->state = (c == '\0' ? START_RECORD : EAT_CRNL); | 
|  | 730 | } | 
|  | 731 | else if (!dialect->strict) { | 
|  | 732 | if (parse_add_char(self, c) < 0) | 
|  | 733 | return -1; | 
|  | 734 | self->state = IN_FIELD; | 
|  | 735 | } | 
|  | 736 | else { | 
|  | 737 | /* illegal */ | 
|  | 738 | PyErr_Format(error_obj, "'%c' expected after '%c'", | 
|  | 739 | dialect->delimiter, | 
|  | 740 | dialect->quotechar); | 
|  | 741 | return -1; | 
|  | 742 | } | 
|  | 743 | break; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 744 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 745 | case EAT_CRNL: | 
|  | 746 | if (c == '\n' || c == '\r') | 
|  | 747 | ; | 
|  | 748 | else if (c == '\0') | 
|  | 749 | self->state = START_RECORD; | 
|  | 750 | else { | 
|  | 751 | PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?"); | 
|  | 752 | return -1; | 
|  | 753 | } | 
|  | 754 | break; | 
| Andrew McNamara | f69d94f | 2005-01-13 11:30:54 +0000 | [diff] [blame] | 755 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 756 | } | 
|  | 757 | return 0; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 758 | } | 
|  | 759 |  | 
| Andrew McNamara | f69d94f | 2005-01-13 11:30:54 +0000 | [diff] [blame] | 760 | static int | 
|  | 761 | parse_reset(ReaderObj *self) | 
|  | 762 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 763 | Py_XDECREF(self->fields); | 
|  | 764 | self->fields = PyList_New(0); | 
|  | 765 | if (self->fields == NULL) | 
|  | 766 | return -1; | 
|  | 767 | self->field_len = 0; | 
|  | 768 | self->state = START_RECORD; | 
|  | 769 | self->numeric_field = 0; | 
|  | 770 | return 0; | 
| Andrew McNamara | f69d94f | 2005-01-13 11:30:54 +0000 | [diff] [blame] | 771 | } | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 772 |  | 
|  | 773 | static PyObject * | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 774 | Reader_iternext(ReaderObj *self) | 
|  | 775 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 776 | PyObject *lineobj; | 
|  | 777 | PyObject *fields = NULL; | 
|  | 778 | char *line, c; | 
|  | 779 | int linelen; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 780 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 781 | if (parse_reset(self) < 0) | 
|  | 782 | return NULL; | 
|  | 783 | do { | 
|  | 784 | lineobj = PyIter_Next(self->input_iter); | 
|  | 785 | if (lineobj == NULL) { | 
|  | 786 | /* End of input OR exception */ | 
|  | 787 | if (!PyErr_Occurred() && self->field_len != 0) | 
|  | 788 | PyErr_Format(error_obj, | 
|  | 789 | "newline inside string"); | 
|  | 790 | return NULL; | 
|  | 791 | } | 
|  | 792 | ++self->line_num; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 793 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 794 | line = PyString_AsString(lineobj); | 
|  | 795 | linelen = PyString_Size(lineobj); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 796 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 797 | if (line == NULL || linelen < 0) { | 
|  | 798 | Py_DECREF(lineobj); | 
|  | 799 | return NULL; | 
|  | 800 | } | 
|  | 801 | while (linelen--) { | 
|  | 802 | c = *line++; | 
|  | 803 | if (c == '\0') { | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 804 | Py_DECREF(lineobj); | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 805 | PyErr_Format(error_obj, | 
|  | 806 | "line contains NULL byte"); | 
|  | 807 | goto err; | 
|  | 808 | } | 
|  | 809 | if (parse_process_char(self, c) < 0) { | 
|  | 810 | Py_DECREF(lineobj); | 
|  | 811 | goto err; | 
|  | 812 | } | 
|  | 813 | } | 
|  | 814 | Py_DECREF(lineobj); | 
|  | 815 | if (parse_process_char(self, 0) < 0) | 
|  | 816 | goto err; | 
|  | 817 | } while (self->state != START_RECORD); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 818 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 819 | fields = self->fields; | 
|  | 820 | self->fields = NULL; | 
| Andrew McNamara | f69d94f | 2005-01-13 11:30:54 +0000 | [diff] [blame] | 821 | err: | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 822 | return fields; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 823 | } | 
|  | 824 |  | 
|  | 825 | static void | 
|  | 826 | Reader_dealloc(ReaderObj *self) | 
|  | 827 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 828 | PyObject_GC_UnTrack(self); | 
|  | 829 | Py_XDECREF(self->dialect); | 
|  | 830 | Py_XDECREF(self->input_iter); | 
|  | 831 | Py_XDECREF(self->fields); | 
|  | 832 | if (self->field != NULL) | 
|  | 833 | PyMem_Free(self->field); | 
|  | 834 | PyObject_GC_Del(self); | 
| Jeremy Hylton | 42a8aed | 2003-04-14 02:20:55 +0000 | [diff] [blame] | 835 | } | 
|  | 836 |  | 
|  | 837 | static int | 
|  | 838 | Reader_traverse(ReaderObj *self, visitproc visit, void *arg) | 
|  | 839 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 840 | Py_VISIT(self->dialect); | 
|  | 841 | Py_VISIT(self->input_iter); | 
|  | 842 | Py_VISIT(self->fields); | 
|  | 843 | return 0; | 
| Jeremy Hylton | 42a8aed | 2003-04-14 02:20:55 +0000 | [diff] [blame] | 844 | } | 
|  | 845 |  | 
|  | 846 | static int | 
|  | 847 | Reader_clear(ReaderObj *self) | 
|  | 848 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 849 | Py_CLEAR(self->dialect); | 
|  | 850 | Py_CLEAR(self->input_iter); | 
|  | 851 | Py_CLEAR(self->fields); | 
|  | 852 | return 0; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 853 | } | 
|  | 854 |  | 
|  | 855 | PyDoc_STRVAR(Reader_Type_doc, | 
|  | 856 | "CSV reader\n" | 
|  | 857 | "\n" | 
|  | 858 | "Reader objects are responsible for reading and parsing tabular data\n" | 
|  | 859 | "in CSV format.\n" | 
|  | 860 | ); | 
|  | 861 |  | 
|  | 862 | static struct PyMethodDef Reader_methods[] = { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 863 | { NULL, NULL } | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 864 | }; | 
| Andrew McNamara | f69d94f | 2005-01-13 11:30:54 +0000 | [diff] [blame] | 865 | #define R_OFF(x) offsetof(ReaderObj, x) | 
|  | 866 |  | 
|  | 867 | static struct PyMemberDef Reader_memberlist[] = { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 868 | { "dialect", T_OBJECT, R_OFF(dialect), RO }, | 
|  | 869 | { "line_num", T_ULONG, R_OFF(line_num), RO }, | 
|  | 870 | { NULL } | 
| Andrew McNamara | f69d94f | 2005-01-13 11:30:54 +0000 | [diff] [blame] | 871 | }; | 
|  | 872 |  | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 873 |  | 
|  | 874 | static PyTypeObject Reader_Type = { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 875 | PyVarObject_HEAD_INIT(NULL, 0) | 
|  | 876 | "_csv.reader",                          /*tp_name*/ | 
|  | 877 | sizeof(ReaderObj),                      /*tp_basicsize*/ | 
|  | 878 | 0,                                      /*tp_itemsize*/ | 
|  | 879 | /* methods */ | 
|  | 880 | (destructor)Reader_dealloc,             /*tp_dealloc*/ | 
|  | 881 | (printfunc)0,                           /*tp_print*/ | 
|  | 882 | (getattrfunc)0,                         /*tp_getattr*/ | 
|  | 883 | (setattrfunc)0,                         /*tp_setattr*/ | 
|  | 884 | (cmpfunc)0,                             /*tp_compare*/ | 
|  | 885 | (reprfunc)0,                            /*tp_repr*/ | 
|  | 886 | 0,                                      /*tp_as_number*/ | 
|  | 887 | 0,                                      /*tp_as_sequence*/ | 
|  | 888 | 0,                                      /*tp_as_mapping*/ | 
|  | 889 | (hashfunc)0,                            /*tp_hash*/ | 
|  | 890 | (ternaryfunc)0,                         /*tp_call*/ | 
|  | 891 | (reprfunc)0,                                /*tp_str*/ | 
|  | 892 | 0,                                      /*tp_getattro*/ | 
|  | 893 | 0,                                      /*tp_setattro*/ | 
|  | 894 | 0,                                      /*tp_as_buffer*/ | 
|  | 895 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | | 
|  | 896 | Py_TPFLAGS_HAVE_GC,                     /*tp_flags*/ | 
|  | 897 | Reader_Type_doc,                        /*tp_doc*/ | 
|  | 898 | (traverseproc)Reader_traverse,          /*tp_traverse*/ | 
|  | 899 | (inquiry)Reader_clear,                  /*tp_clear*/ | 
|  | 900 | 0,                                      /*tp_richcompare*/ | 
|  | 901 | 0,                                      /*tp_weaklistoffset*/ | 
|  | 902 | PyObject_SelfIter,                          /*tp_iter*/ | 
|  | 903 | (getiterfunc)Reader_iternext,           /*tp_iternext*/ | 
|  | 904 | Reader_methods,                         /*tp_methods*/ | 
|  | 905 | Reader_memberlist,                      /*tp_members*/ | 
|  | 906 | 0,                                      /*tp_getset*/ | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 907 |  | 
|  | 908 | }; | 
|  | 909 |  | 
|  | 910 | static PyObject * | 
|  | 911 | csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args) | 
|  | 912 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 913 | PyObject * iterator, * dialect = NULL; | 
|  | 914 | ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 915 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 916 | if (!self) | 
|  | 917 | return NULL; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 918 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 919 | self->dialect = NULL; | 
|  | 920 | self->fields = NULL; | 
|  | 921 | self->input_iter = NULL; | 
|  | 922 | self->field = NULL; | 
|  | 923 | self->field_size = 0; | 
|  | 924 | self->line_num = 0; | 
| Andrew McNamara | 0f0599d | 2005-01-12 09:45:18 +0000 | [diff] [blame] | 925 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 926 | if (parse_reset(self) < 0) { | 
|  | 927 | Py_DECREF(self); | 
|  | 928 | return NULL; | 
|  | 929 | } | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 930 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 931 | if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) { | 
|  | 932 | Py_DECREF(self); | 
|  | 933 | return NULL; | 
|  | 934 | } | 
|  | 935 | self->input_iter = PyObject_GetIter(iterator); | 
|  | 936 | if (self->input_iter == NULL) { | 
|  | 937 | PyErr_SetString(PyExc_TypeError, | 
|  | 938 | "argument 1 must be an iterator"); | 
|  | 939 | Py_DECREF(self); | 
|  | 940 | return NULL; | 
|  | 941 | } | 
|  | 942 | self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args); | 
|  | 943 | if (self->dialect == NULL) { | 
|  | 944 | Py_DECREF(self); | 
|  | 945 | return NULL; | 
|  | 946 | } | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 947 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 948 | PyObject_GC_Track(self); | 
|  | 949 | return (PyObject *)self; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 950 | } | 
|  | 951 |  | 
|  | 952 | /* | 
|  | 953 | * WRITER | 
|  | 954 | */ | 
|  | 955 | /* ---------------------------------------------------------------- */ | 
|  | 956 | static void | 
|  | 957 | join_reset(WriterObj *self) | 
|  | 958 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 959 | self->rec_len = 0; | 
|  | 960 | self->num_fields = 0; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 961 | } | 
|  | 962 |  | 
|  | 963 | #define MEM_INCR 32768 | 
|  | 964 |  | 
|  | 965 | /* Calculate new record length or append field to record.  Return new | 
|  | 966 | * record length. | 
|  | 967 | */ | 
|  | 968 | static int | 
|  | 969 | join_append_data(WriterObj *self, char *field, int quote_empty, | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 970 | int *quoted, int copy_phase) | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 971 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 972 | DialectObj *dialect = self->dialect; | 
|  | 973 | int i, rec_len; | 
|  | 974 | char *lineterm; | 
| Andrew McNamara | c89f284 | 2005-01-12 07:44:42 +0000 | [diff] [blame] | 975 |  | 
|  | 976 | #define ADDCH(c) \ | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 977 | do {\ | 
|  | 978 | if (copy_phase) \ | 
|  | 979 | self->rec[rec_len] = c;\ | 
|  | 980 | rec_len++;\ | 
|  | 981 | } while(0) | 
| Andrew McNamara | c89f284 | 2005-01-12 07:44:42 +0000 | [diff] [blame] | 982 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 983 | lineterm = PyString_AsString(dialect->lineterminator); | 
|  | 984 | if (lineterm == NULL) | 
|  | 985 | return -1; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 986 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 987 | rec_len = self->rec_len; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 988 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 989 | /* If this is not the first field we need a field separator */ | 
|  | 990 | if (self->num_fields > 0) | 
|  | 991 | ADDCH(dialect->delimiter); | 
| Andrew McNamara | c89f284 | 2005-01-12 07:44:42 +0000 | [diff] [blame] | 992 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 993 | /* Handle preceding quote */ | 
|  | 994 | if (copy_phase && *quoted) | 
|  | 995 | ADDCH(dialect->quotechar); | 
| Andrew McNamara | c89f284 | 2005-01-12 07:44:42 +0000 | [diff] [blame] | 996 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 997 | /* Copy/count field data */ | 
|  | 998 | for (i = 0;; i++) { | 
|  | 999 | char c = field[i]; | 
|  | 1000 | int want_escape = 0; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1001 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1002 | if (c == '\0') | 
|  | 1003 | break; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1004 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1005 | if (c == dialect->delimiter || | 
|  | 1006 | c == dialect->escapechar || | 
|  | 1007 | c == dialect->quotechar || | 
|  | 1008 | strchr(lineterm, c)) { | 
|  | 1009 | if (dialect->quoting == QUOTE_NONE) | 
|  | 1010 | want_escape = 1; | 
|  | 1011 | else { | 
|  | 1012 | if (c == dialect->quotechar) { | 
|  | 1013 | if (dialect->doublequote) | 
|  | 1014 | ADDCH(dialect->quotechar); | 
|  | 1015 | else | 
|  | 1016 | want_escape = 1; | 
|  | 1017 | } | 
|  | 1018 | if (!want_escape) | 
|  | 1019 | *quoted = 1; | 
|  | 1020 | } | 
|  | 1021 | if (want_escape) { | 
|  | 1022 | if (!dialect->escapechar) { | 
|  | 1023 | PyErr_Format(error_obj, | 
|  | 1024 | "need to escape, but no escapechar set"); | 
|  | 1025 | return -1; | 
|  | 1026 | } | 
|  | 1027 | ADDCH(dialect->escapechar); | 
|  | 1028 | } | 
|  | 1029 | } | 
|  | 1030 | /* Copy field character into record buffer. | 
|  | 1031 | */ | 
|  | 1032 | ADDCH(c); | 
|  | 1033 | } | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1034 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1035 | /* If field is empty check if it needs to be quoted. | 
|  | 1036 | */ | 
|  | 1037 | if (i == 0 && quote_empty) { | 
|  | 1038 | if (dialect->quoting == QUOTE_NONE) { | 
|  | 1039 | PyErr_Format(error_obj, | 
|  | 1040 | "single empty field record must be quoted"); | 
|  | 1041 | return -1; | 
|  | 1042 | } | 
|  | 1043 | else | 
|  | 1044 | *quoted = 1; | 
|  | 1045 | } | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1046 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1047 | if (*quoted) { | 
|  | 1048 | if (copy_phase) | 
|  | 1049 | ADDCH(dialect->quotechar); | 
|  | 1050 | else | 
|  | 1051 | rec_len += 2; | 
|  | 1052 | } | 
|  | 1053 | return rec_len; | 
| Andrew McNamara | c89f284 | 2005-01-12 07:44:42 +0000 | [diff] [blame] | 1054 | #undef ADDCH | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1055 | } | 
|  | 1056 |  | 
|  | 1057 | static int | 
|  | 1058 | join_check_rec_size(WriterObj *self, int rec_len) | 
|  | 1059 | { | 
| Gregory P. Smith | 9d53457 | 2008-06-11 07:41:16 +0000 | [diff] [blame] | 1060 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1061 | if (rec_len < 0 || rec_len > INT_MAX - MEM_INCR) { | 
|  | 1062 | PyErr_NoMemory(); | 
|  | 1063 | return 0; | 
|  | 1064 | } | 
| Gregory P. Smith | 9d53457 | 2008-06-11 07:41:16 +0000 | [diff] [blame] | 1065 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1066 | if (rec_len > self->rec_size) { | 
|  | 1067 | if (self->rec_size == 0) { | 
|  | 1068 | self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR; | 
|  | 1069 | if (self->rec != NULL) | 
|  | 1070 | PyMem_Free(self->rec); | 
|  | 1071 | self->rec = PyMem_Malloc(self->rec_size); | 
|  | 1072 | } | 
|  | 1073 | else { | 
|  | 1074 | char *old_rec = self->rec; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1075 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1076 | self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR; | 
|  | 1077 | self->rec = PyMem_Realloc(self->rec, self->rec_size); | 
|  | 1078 | if (self->rec == NULL) | 
|  | 1079 | PyMem_Free(old_rec); | 
|  | 1080 | } | 
|  | 1081 | if (self->rec == NULL) { | 
|  | 1082 | PyErr_NoMemory(); | 
|  | 1083 | return 0; | 
|  | 1084 | } | 
|  | 1085 | } | 
|  | 1086 | return 1; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1087 | } | 
|  | 1088 |  | 
|  | 1089 | static int | 
|  | 1090 | join_append(WriterObj *self, char *field, int *quoted, int quote_empty) | 
|  | 1091 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1092 | int rec_len; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1093 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1094 | rec_len = join_append_data(self, field, quote_empty, quoted, 0); | 
|  | 1095 | if (rec_len < 0) | 
|  | 1096 | return 0; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1097 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1098 | /* grow record buffer if necessary */ | 
|  | 1099 | if (!join_check_rec_size(self, rec_len)) | 
|  | 1100 | return 0; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1101 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1102 | self->rec_len = join_append_data(self, field, quote_empty, quoted, 1); | 
|  | 1103 | self->num_fields++; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1104 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1105 | return 1; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1106 | } | 
|  | 1107 |  | 
|  | 1108 | static int | 
|  | 1109 | join_append_lineterminator(WriterObj *self) | 
|  | 1110 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1111 | int terminator_len; | 
|  | 1112 | char *terminator; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1113 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1114 | terminator_len = PyString_Size(self->dialect->lineterminator); | 
|  | 1115 | if (terminator_len == -1) | 
|  | 1116 | return 0; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1117 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1118 | /* grow record buffer if necessary */ | 
|  | 1119 | if (!join_check_rec_size(self, self->rec_len + terminator_len)) | 
|  | 1120 | return 0; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1121 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1122 | terminator = PyString_AsString(self->dialect->lineterminator); | 
|  | 1123 | if (terminator == NULL) | 
|  | 1124 | return 0; | 
|  | 1125 | memmove(self->rec + self->rec_len, terminator, terminator_len); | 
|  | 1126 | self->rec_len += terminator_len; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1127 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1128 | return 1; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1129 | } | 
|  | 1130 |  | 
|  | 1131 | PyDoc_STRVAR(csv_writerow_doc, | 
| Skip Montanaro | 860fc0b | 2003-04-12 18:57:52 +0000 | [diff] [blame] | 1132 | "writerow(sequence)\n" | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1133 | "\n" | 
| Skip Montanaro | 860fc0b | 2003-04-12 18:57:52 +0000 | [diff] [blame] | 1134 | "Construct and write a CSV record from a sequence of fields.  Non-string\n" | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1135 | "elements will be converted to string."); | 
|  | 1136 |  | 
|  | 1137 | static PyObject * | 
|  | 1138 | csv_writerow(WriterObj *self, PyObject *seq) | 
|  | 1139 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1140 | DialectObj *dialect = self->dialect; | 
|  | 1141 | int len, i; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1142 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1143 | if (!PySequence_Check(seq)) | 
|  | 1144 | return PyErr_Format(error_obj, "sequence expected"); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1145 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1146 | len = PySequence_Length(seq); | 
|  | 1147 | if (len < 0) | 
|  | 1148 | return NULL; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1149 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1150 | /* Join all fields in internal buffer. | 
|  | 1151 | */ | 
|  | 1152 | join_reset(self); | 
|  | 1153 | for (i = 0; i < len; i++) { | 
|  | 1154 | PyObject *field; | 
|  | 1155 | int append_ok; | 
|  | 1156 | int quoted; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1157 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1158 | field = PySequence_GetItem(seq, i); | 
|  | 1159 | if (field == NULL) | 
|  | 1160 | return NULL; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1161 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1162 | switch (dialect->quoting) { | 
|  | 1163 | case QUOTE_NONNUMERIC: | 
|  | 1164 | quoted = !PyNumber_Check(field); | 
|  | 1165 | break; | 
|  | 1166 | case QUOTE_ALL: | 
|  | 1167 | quoted = 1; | 
|  | 1168 | break; | 
|  | 1169 | default: | 
|  | 1170 | quoted = 0; | 
|  | 1171 | break; | 
|  | 1172 | } | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1173 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1174 | if (PyString_Check(field)) { | 
|  | 1175 | append_ok = join_append(self, | 
|  | 1176 | PyString_AS_STRING(field), | 
|  | 1177 | "ed, len == 1); | 
|  | 1178 | Py_DECREF(field); | 
|  | 1179 | } | 
|  | 1180 | else if (field == Py_None) { | 
|  | 1181 | append_ok = join_append(self, "", "ed, len == 1); | 
|  | 1182 | Py_DECREF(field); | 
|  | 1183 | } | 
|  | 1184 | else { | 
|  | 1185 | PyObject *str; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1186 |  | 
| Raymond Hettinger | f537702 | 2011-12-11 22:31:09 -0800 | [diff] [blame] | 1187 | if (PyFloat_Check(field)) { | 
|  | 1188 | str = PyObject_Repr(field); | 
|  | 1189 | } else { | 
|  | 1190 | str = PyObject_Str(field); | 
|  | 1191 | } | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1192 | Py_DECREF(field); | 
|  | 1193 | if (str == NULL) | 
|  | 1194 | return NULL; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1195 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1196 | append_ok = join_append(self, PyString_AS_STRING(str), | 
|  | 1197 | "ed, len == 1); | 
|  | 1198 | Py_DECREF(str); | 
|  | 1199 | } | 
|  | 1200 | if (!append_ok) | 
|  | 1201 | return NULL; | 
|  | 1202 | } | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1203 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1204 | /* Add line terminator. | 
|  | 1205 | */ | 
|  | 1206 | if (!join_append_lineterminator(self)) | 
|  | 1207 | return 0; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1208 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1209 | return PyObject_CallFunction(self->writeline, | 
|  | 1210 | "(s#)", self->rec, self->rec_len); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1211 | } | 
|  | 1212 |  | 
| Skip Montanaro | 860fc0b | 2003-04-12 18:57:52 +0000 | [diff] [blame] | 1213 | PyDoc_STRVAR(csv_writerows_doc, | 
|  | 1214 | "writerows(sequence of sequences)\n" | 
|  | 1215 | "\n" | 
|  | 1216 | "Construct and write a series of sequences to a csv file.  Non-string\n" | 
|  | 1217 | "elements will be converted to string."); | 
|  | 1218 |  | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1219 | static PyObject * | 
|  | 1220 | csv_writerows(WriterObj *self, PyObject *seqseq) | 
|  | 1221 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1222 | PyObject *row_iter, *row_obj, *result; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1223 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1224 | row_iter = PyObject_GetIter(seqseq); | 
|  | 1225 | if (row_iter == NULL) { | 
|  | 1226 | PyErr_SetString(PyExc_TypeError, | 
|  | 1227 | "writerows() argument must be iterable"); | 
|  | 1228 | return NULL; | 
|  | 1229 | } | 
|  | 1230 | while ((row_obj = PyIter_Next(row_iter))) { | 
|  | 1231 | result = csv_writerow(self, row_obj); | 
|  | 1232 | Py_DECREF(row_obj); | 
|  | 1233 | if (!result) { | 
|  | 1234 | Py_DECREF(row_iter); | 
|  | 1235 | return NULL; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1236 | } | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1237 | else | 
|  | 1238 | Py_DECREF(result); | 
|  | 1239 | } | 
|  | 1240 | Py_DECREF(row_iter); | 
|  | 1241 | if (PyErr_Occurred()) | 
|  | 1242 | return NULL; | 
|  | 1243 | Py_INCREF(Py_None); | 
|  | 1244 | return Py_None; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1245 | } | 
|  | 1246 |  | 
|  | 1247 | static struct PyMethodDef Writer_methods[] = { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1248 | { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc}, | 
|  | 1249 | { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc}, | 
|  | 1250 | { NULL, NULL } | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1251 | }; | 
|  | 1252 |  | 
|  | 1253 | #define W_OFF(x) offsetof(WriterObj, x) | 
|  | 1254 |  | 
|  | 1255 | static struct PyMemberDef Writer_memberlist[] = { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1256 | { "dialect", T_OBJECT, W_OFF(dialect), RO }, | 
|  | 1257 | { NULL } | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1258 | }; | 
|  | 1259 |  | 
|  | 1260 | static void | 
|  | 1261 | Writer_dealloc(WriterObj *self) | 
|  | 1262 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1263 | PyObject_GC_UnTrack(self); | 
|  | 1264 | Py_XDECREF(self->dialect); | 
|  | 1265 | Py_XDECREF(self->writeline); | 
|  | 1266 | if (self->rec != NULL) | 
|  | 1267 | PyMem_Free(self->rec); | 
|  | 1268 | PyObject_GC_Del(self); | 
| Jeremy Hylton | 42a8aed | 2003-04-14 02:20:55 +0000 | [diff] [blame] | 1269 | } | 
|  | 1270 |  | 
|  | 1271 | static int | 
|  | 1272 | Writer_traverse(WriterObj *self, visitproc visit, void *arg) | 
|  | 1273 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1274 | Py_VISIT(self->dialect); | 
|  | 1275 | Py_VISIT(self->writeline); | 
|  | 1276 | return 0; | 
| Jeremy Hylton | 42a8aed | 2003-04-14 02:20:55 +0000 | [diff] [blame] | 1277 | } | 
|  | 1278 |  | 
|  | 1279 | static int | 
|  | 1280 | Writer_clear(WriterObj *self) | 
|  | 1281 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1282 | Py_CLEAR(self->dialect); | 
|  | 1283 | Py_CLEAR(self->writeline); | 
|  | 1284 | return 0; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1285 | } | 
|  | 1286 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1287 | PyDoc_STRVAR(Writer_Type_doc, | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1288 | "CSV writer\n" | 
|  | 1289 | "\n" | 
|  | 1290 | "Writer objects are responsible for generating tabular data\n" | 
|  | 1291 | "in CSV format from sequence input.\n" | 
|  | 1292 | ); | 
|  | 1293 |  | 
|  | 1294 | static PyTypeObject Writer_Type = { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1295 | PyVarObject_HEAD_INIT(NULL, 0) | 
|  | 1296 | "_csv.writer",                          /*tp_name*/ | 
|  | 1297 | sizeof(WriterObj),                      /*tp_basicsize*/ | 
|  | 1298 | 0,                                      /*tp_itemsize*/ | 
|  | 1299 | /* methods */ | 
|  | 1300 | (destructor)Writer_dealloc,             /*tp_dealloc*/ | 
|  | 1301 | (printfunc)0,                           /*tp_print*/ | 
|  | 1302 | (getattrfunc)0,                         /*tp_getattr*/ | 
|  | 1303 | (setattrfunc)0,                         /*tp_setattr*/ | 
|  | 1304 | (cmpfunc)0,                             /*tp_compare*/ | 
|  | 1305 | (reprfunc)0,                            /*tp_repr*/ | 
|  | 1306 | 0,                                      /*tp_as_number*/ | 
|  | 1307 | 0,                                      /*tp_as_sequence*/ | 
|  | 1308 | 0,                                      /*tp_as_mapping*/ | 
|  | 1309 | (hashfunc)0,                            /*tp_hash*/ | 
|  | 1310 | (ternaryfunc)0,                         /*tp_call*/ | 
|  | 1311 | (reprfunc)0,                            /*tp_str*/ | 
|  | 1312 | 0,                                      /*tp_getattro*/ | 
|  | 1313 | 0,                                      /*tp_setattro*/ | 
|  | 1314 | 0,                                      /*tp_as_buffer*/ | 
|  | 1315 | Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | | 
|  | 1316 | Py_TPFLAGS_HAVE_GC,                     /*tp_flags*/ | 
|  | 1317 | Writer_Type_doc, | 
|  | 1318 | (traverseproc)Writer_traverse,          /*tp_traverse*/ | 
|  | 1319 | (inquiry)Writer_clear,                  /*tp_clear*/ | 
|  | 1320 | 0,                                      /*tp_richcompare*/ | 
|  | 1321 | 0,                                      /*tp_weaklistoffset*/ | 
|  | 1322 | (getiterfunc)0,                         /*tp_iter*/ | 
|  | 1323 | (getiterfunc)0,                         /*tp_iternext*/ | 
|  | 1324 | Writer_methods,                         /*tp_methods*/ | 
|  | 1325 | Writer_memberlist,                      /*tp_members*/ | 
|  | 1326 | 0,                                      /*tp_getset*/ | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1327 | }; | 
|  | 1328 |  | 
|  | 1329 | static PyObject * | 
|  | 1330 | csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args) | 
|  | 1331 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1332 | PyObject * output_file, * dialect = NULL; | 
|  | 1333 | WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1334 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1335 | if (!self) | 
|  | 1336 | return NULL; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1337 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1338 | self->dialect = NULL; | 
|  | 1339 | self->writeline = NULL; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1340 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1341 | self->rec = NULL; | 
|  | 1342 | self->rec_size = 0; | 
|  | 1343 | self->rec_len = 0; | 
|  | 1344 | self->num_fields = 0; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1345 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1346 | if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) { | 
|  | 1347 | Py_DECREF(self); | 
|  | 1348 | return NULL; | 
|  | 1349 | } | 
|  | 1350 | self->writeline = PyObject_GetAttrString(output_file, "write"); | 
|  | 1351 | if (self->writeline == NULL || !PyCallable_Check(self->writeline)) { | 
|  | 1352 | PyErr_SetString(PyExc_TypeError, | 
|  | 1353 | "argument 1 must have a \"write\" method"); | 
|  | 1354 | Py_DECREF(self); | 
|  | 1355 | return NULL; | 
|  | 1356 | } | 
|  | 1357 | self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args); | 
|  | 1358 | if (self->dialect == NULL) { | 
|  | 1359 | Py_DECREF(self); | 
|  | 1360 | return NULL; | 
|  | 1361 | } | 
|  | 1362 | PyObject_GC_Track(self); | 
|  | 1363 | return (PyObject *)self; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1364 | } | 
|  | 1365 |  | 
|  | 1366 | /* | 
|  | 1367 | * DIALECT REGISTRY | 
|  | 1368 | */ | 
|  | 1369 | static PyObject * | 
|  | 1370 | csv_list_dialects(PyObject *module, PyObject *args) | 
|  | 1371 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1372 | return PyDict_Keys(dialects); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1373 | } | 
|  | 1374 |  | 
|  | 1375 | static PyObject * | 
| Andrew McNamara | 8662597 | 2005-01-11 01:28:33 +0000 | [diff] [blame] | 1376 | csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs) | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1377 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1378 | PyObject *name_obj, *dialect_obj = NULL; | 
|  | 1379 | PyObject *dialect; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1380 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1381 | if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj)) | 
|  | 1382 | return NULL; | 
|  | 1383 | if (!IS_BASESTRING(name_obj)) { | 
|  | 1384 | PyErr_SetString(PyExc_TypeError, | 
|  | 1385 | "dialect name must be a string or unicode"); | 
|  | 1386 | return NULL; | 
|  | 1387 | } | 
|  | 1388 | dialect = _call_dialect(dialect_obj, kwargs); | 
|  | 1389 | if (dialect == NULL) | 
|  | 1390 | return NULL; | 
|  | 1391 | if (PyDict_SetItem(dialects, name_obj, dialect) < 0) { | 
|  | 1392 | Py_DECREF(dialect); | 
|  | 1393 | return NULL; | 
|  | 1394 | } | 
|  | 1395 | Py_DECREF(dialect); | 
|  | 1396 | Py_INCREF(Py_None); | 
|  | 1397 | return Py_None; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1398 | } | 
|  | 1399 |  | 
|  | 1400 | static PyObject * | 
| Skip Montanaro | 577c7a7 | 2003-04-12 19:17:14 +0000 | [diff] [blame] | 1401 | csv_unregister_dialect(PyObject *module, PyObject *name_obj) | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1402 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1403 | if (PyDict_DelItem(dialects, name_obj) < 0) | 
|  | 1404 | return PyErr_Format(error_obj, "unknown dialect"); | 
|  | 1405 | Py_INCREF(Py_None); | 
|  | 1406 | return Py_None; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1407 | } | 
|  | 1408 |  | 
|  | 1409 | static PyObject * | 
| Skip Montanaro | 577c7a7 | 2003-04-12 19:17:14 +0000 | [diff] [blame] | 1410 | csv_get_dialect(PyObject *module, PyObject *name_obj) | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1411 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1412 | return get_dialect_from_registry(name_obj); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1413 | } | 
|  | 1414 |  | 
| Andrew McNamara | e4d05c4 | 2005-01-11 07:32:02 +0000 | [diff] [blame] | 1415 | static PyObject * | 
| Andrew McNamara | 31d8896 | 2005-01-12 03:45:10 +0000 | [diff] [blame] | 1416 | csv_field_size_limit(PyObject *module, PyObject *args) | 
| Andrew McNamara | e4d05c4 | 2005-01-11 07:32:02 +0000 | [diff] [blame] | 1417 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1418 | PyObject *new_limit = NULL; | 
|  | 1419 | long old_limit = field_limit; | 
| Andrew McNamara | e4d05c4 | 2005-01-11 07:32:02 +0000 | [diff] [blame] | 1420 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1421 | if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit)) | 
|  | 1422 | return NULL; | 
|  | 1423 | if (new_limit != NULL) { | 
|  | 1424 | if (!PyInt_Check(new_limit)) { | 
|  | 1425 | PyErr_Format(PyExc_TypeError, | 
|  | 1426 | "limit must be an integer"); | 
|  | 1427 | return NULL; | 
|  | 1428 | } | 
|  | 1429 | field_limit = PyInt_AsLong(new_limit); | 
|  | 1430 | } | 
|  | 1431 | return PyInt_FromLong(old_limit); | 
| Andrew McNamara | e4d05c4 | 2005-01-11 07:32:02 +0000 | [diff] [blame] | 1432 | } | 
|  | 1433 |  | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1434 | /* | 
|  | 1435 | * MODULE | 
|  | 1436 | */ | 
|  | 1437 |  | 
|  | 1438 | PyDoc_STRVAR(csv_module_doc, | 
|  | 1439 | "CSV parsing and writing.\n" | 
|  | 1440 | "\n" | 
|  | 1441 | "This module provides classes that assist in the reading and writing\n" | 
|  | 1442 | "of Comma Separated Value (CSV) files, and implements the interface\n" | 
|  | 1443 | "described by PEP 305.  Although many CSV files are simple to parse,\n" | 
|  | 1444 | "the format is not formally defined by a stable specification and\n" | 
|  | 1445 | "is subtle enough that parsing lines of a CSV file with something\n" | 
|  | 1446 | "like line.split(\",\") is bound to fail.  The module supports three\n" | 
|  | 1447 | "basic APIs: reading, writing, and registration of dialects.\n" | 
|  | 1448 | "\n" | 
|  | 1449 | "\n" | 
|  | 1450 | "DIALECT REGISTRATION:\n" | 
|  | 1451 | "\n" | 
|  | 1452 | "Readers and writers support a dialect argument, which is a convenient\n" | 
|  | 1453 | "handle on a group of settings.  When the dialect argument is a string,\n" | 
|  | 1454 | "it identifies one of the dialects previously registered with the module.\n" | 
|  | 1455 | "If it is a class or instance, the attributes of the argument are used as\n" | 
|  | 1456 | "the settings for the reader or writer:\n" | 
|  | 1457 | "\n" | 
|  | 1458 | "    class excel:\n" | 
|  | 1459 | "        delimiter = ','\n" | 
|  | 1460 | "        quotechar = '\"'\n" | 
|  | 1461 | "        escapechar = None\n" | 
|  | 1462 | "        doublequote = True\n" | 
|  | 1463 | "        skipinitialspace = False\n" | 
| Johannes Gijsbers | 8d3b9dd | 2004-08-15 12:23:10 +0000 | [diff] [blame] | 1464 | "        lineterminator = '\\r\\n'\n" | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1465 | "        quoting = QUOTE_MINIMAL\n" | 
|  | 1466 | "\n" | 
|  | 1467 | "SETTINGS:\n" | 
|  | 1468 | "\n" | 
|  | 1469 | "    * quotechar - specifies a one-character string to use as the \n" | 
|  | 1470 | "        quoting character.  It defaults to '\"'.\n" | 
|  | 1471 | "    * delimiter - specifies a one-character string to use as the \n" | 
|  | 1472 | "        field separator.  It defaults to ','.\n" | 
|  | 1473 | "    * skipinitialspace - specifies how to interpret whitespace which\n" | 
|  | 1474 | "        immediately follows a delimiter.  It defaults to False, which\n" | 
|  | 1475 | "        means that whitespace immediately following a delimiter is part\n" | 
|  | 1476 | "        of the following field.\n" | 
|  | 1477 | "    * lineterminator -  specifies the character sequence which should \n" | 
|  | 1478 | "        terminate rows.\n" | 
|  | 1479 | "    * quoting - controls when quotes should be generated by the writer.\n" | 
|  | 1480 | "        It can take on any of the following module constants:\n" | 
|  | 1481 | "\n" | 
|  | 1482 | "        csv.QUOTE_MINIMAL means only when required, for example, when a\n" | 
|  | 1483 | "            field contains either the quotechar or the delimiter\n" | 
|  | 1484 | "        csv.QUOTE_ALL means that quotes are always placed around fields.\n" | 
|  | 1485 | "        csv.QUOTE_NONNUMERIC means that quotes are always placed around\n" | 
| Skip Montanaro | 148eb6a | 2003-12-02 18:57:47 +0000 | [diff] [blame] | 1486 | "            fields which do not parse as integers or floating point\n" | 
|  | 1487 | "            numbers.\n" | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1488 | "        csv.QUOTE_NONE means that quotes are never placed around fields.\n" | 
|  | 1489 | "    * escapechar - specifies a one-character string used to escape \n" | 
|  | 1490 | "        the delimiter when quoting is set to QUOTE_NONE.\n" | 
|  | 1491 | "    * doublequote - controls the handling of quotes inside fields.  When\n" | 
|  | 1492 | "        True, two consecutive quotes are interpreted as one during read,\n" | 
|  | 1493 | "        and when writing, each quote character embedded in the data is\n" | 
|  | 1494 | "        written as two quotes\n"); | 
|  | 1495 |  | 
|  | 1496 | PyDoc_STRVAR(csv_reader_doc, | 
|  | 1497 | "    csv_reader = reader(iterable [, dialect='excel']\n" | 
|  | 1498 | "                        [optional keyword args])\n" | 
|  | 1499 | "    for row in csv_reader:\n" | 
|  | 1500 | "        process(row)\n" | 
|  | 1501 | "\n" | 
|  | 1502 | "The \"iterable\" argument can be any object that returns a line\n" | 
|  | 1503 | "of input for each iteration, such as a file object or a list.  The\n" | 
|  | 1504 | "optional \"dialect\" parameter is discussed below.  The function\n" | 
|  | 1505 | "also accepts optional keyword arguments which override settings\n" | 
|  | 1506 | "provided by the dialect.\n" | 
|  | 1507 | "\n" | 
|  | 1508 | "The returned object is an iterator.  Each iteration returns a row\n" | 
| Johannes Gijsbers | 8d3b9dd | 2004-08-15 12:23:10 +0000 | [diff] [blame] | 1509 | "of the CSV file (which can span multiple input lines):\n"); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1510 |  | 
|  | 1511 | PyDoc_STRVAR(csv_writer_doc, | 
|  | 1512 | "    csv_writer = csv.writer(fileobj [, dialect='excel']\n" | 
|  | 1513 | "                            [optional keyword args])\n" | 
| Fredrik Lundh | 4aaaa49 | 2006-04-04 16:51:13 +0000 | [diff] [blame] | 1514 | "    for row in sequence:\n" | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1515 | "        csv_writer.writerow(row)\n" | 
|  | 1516 | "\n" | 
|  | 1517 | "    [or]\n" | 
|  | 1518 | "\n" | 
|  | 1519 | "    csv_writer = csv.writer(fileobj [, dialect='excel']\n" | 
|  | 1520 | "                            [optional keyword args])\n" | 
|  | 1521 | "    csv_writer.writerows(rows)\n" | 
|  | 1522 | "\n" | 
|  | 1523 | "The \"fileobj\" argument can be any object that supports the file API.\n"); | 
|  | 1524 |  | 
|  | 1525 | PyDoc_STRVAR(csv_list_dialects_doc, | 
|  | 1526 | "Return a list of all know dialect names.\n" | 
|  | 1527 | "    names = csv.list_dialects()"); | 
|  | 1528 |  | 
|  | 1529 | PyDoc_STRVAR(csv_get_dialect_doc, | 
|  | 1530 | "Return the dialect instance associated with name.\n" | 
|  | 1531 | "    dialect = csv.get_dialect(name)"); | 
|  | 1532 |  | 
|  | 1533 | PyDoc_STRVAR(csv_register_dialect_doc, | 
|  | 1534 | "Create a mapping from a string name to a dialect class.\n" | 
|  | 1535 | "    dialect = csv.register_dialect(name, dialect)"); | 
|  | 1536 |  | 
|  | 1537 | PyDoc_STRVAR(csv_unregister_dialect_doc, | 
|  | 1538 | "Delete the name/dialect mapping associated with a string name.\n" | 
|  | 1539 | "    csv.unregister_dialect(name)"); | 
|  | 1540 |  | 
| Andrew McNamara | 31d8896 | 2005-01-12 03:45:10 +0000 | [diff] [blame] | 1541 | PyDoc_STRVAR(csv_field_size_limit_doc, | 
| Andrew McNamara | e4d05c4 | 2005-01-11 07:32:02 +0000 | [diff] [blame] | 1542 | "Sets an upper limit on parsed fields.\n" | 
| Andrew McNamara | 31d8896 | 2005-01-12 03:45:10 +0000 | [diff] [blame] | 1543 | "    csv.field_size_limit([limit])\n" | 
| Andrew McNamara | e4d05c4 | 2005-01-11 07:32:02 +0000 | [diff] [blame] | 1544 | "\n" | 
|  | 1545 | "Returns old limit. If limit is not given, no new limit is set and\n" | 
|  | 1546 | "the old limit is returned"); | 
|  | 1547 |  | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1548 | static struct PyMethodDef csv_methods[] = { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1549 | { "reader", (PyCFunction)csv_reader, | 
|  | 1550 | METH_VARARGS | METH_KEYWORDS, csv_reader_doc}, | 
|  | 1551 | { "writer", (PyCFunction)csv_writer, | 
|  | 1552 | METH_VARARGS | METH_KEYWORDS, csv_writer_doc}, | 
|  | 1553 | { "list_dialects", (PyCFunction)csv_list_dialects, | 
|  | 1554 | METH_NOARGS, csv_list_dialects_doc}, | 
|  | 1555 | { "register_dialect", (PyCFunction)csv_register_dialect, | 
|  | 1556 | METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc}, | 
|  | 1557 | { "unregister_dialect", (PyCFunction)csv_unregister_dialect, | 
|  | 1558 | METH_O, csv_unregister_dialect_doc}, | 
|  | 1559 | { "get_dialect", (PyCFunction)csv_get_dialect, | 
|  | 1560 | METH_O, csv_get_dialect_doc}, | 
|  | 1561 | { "field_size_limit", (PyCFunction)csv_field_size_limit, | 
|  | 1562 | METH_VARARGS, csv_field_size_limit_doc}, | 
|  | 1563 | { NULL, NULL } | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1564 | }; | 
|  | 1565 |  | 
|  | 1566 | PyMODINIT_FUNC | 
|  | 1567 | init_csv(void) | 
|  | 1568 | { | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1569 | PyObject *module; | 
|  | 1570 | StyleDesc *style; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1571 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1572 | if (PyType_Ready(&Dialect_Type) < 0) | 
|  | 1573 | return; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1574 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1575 | if (PyType_Ready(&Reader_Type) < 0) | 
|  | 1576 | return; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1577 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1578 | if (PyType_Ready(&Writer_Type) < 0) | 
|  | 1579 | return; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1580 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1581 | /* Create the module and add the functions */ | 
|  | 1582 | module = Py_InitModule3("_csv", csv_methods, csv_module_doc); | 
|  | 1583 | if (module == NULL) | 
|  | 1584 | return; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1585 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1586 | /* Add version to the module. */ | 
|  | 1587 | if (PyModule_AddStringConstant(module, "__version__", | 
|  | 1588 | MODULE_VERSION) == -1) | 
|  | 1589 | return; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1590 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1591 | /* Add _dialects dictionary */ | 
|  | 1592 | dialects = PyDict_New(); | 
|  | 1593 | if (dialects == NULL) | 
|  | 1594 | return; | 
|  | 1595 | if (PyModule_AddObject(module, "_dialects", dialects)) | 
|  | 1596 | return; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1597 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1598 | /* Add quote styles into dictionary */ | 
|  | 1599 | for (style = quote_styles; style->name; style++) { | 
|  | 1600 | if (PyModule_AddIntConstant(module, style->name, | 
|  | 1601 | style->style) == -1) | 
|  | 1602 | return; | 
|  | 1603 | } | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1604 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1605 | /* Add the Dialect type */ | 
|  | 1606 | Py_INCREF(&Dialect_Type); | 
|  | 1607 | if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type)) | 
|  | 1608 | return; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1609 |  | 
| Antoine Pitrou | c83ea13 | 2010-05-09 14:46:46 +0000 | [diff] [blame] | 1610 | /* Add the CSV exception object to the module. */ | 
|  | 1611 | error_obj = PyErr_NewException("_csv.Error", NULL, NULL); | 
|  | 1612 | if (error_obj == NULL) | 
|  | 1613 | return; | 
|  | 1614 | PyModule_AddObject(module, "Error", error_obj); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1615 | } |