| Skip Montanaro | a16b21f | 2003-03-23 14:32:54 +0000 | [diff] [blame] | 1 | /* csv module */ | 
 | 2 |  | 
 | 3 | /* | 
 | 4 |  | 
 | 5 | This module provides the low-level underpinnings of a CSV reading/writing | 
 | 6 | module.  Users should not use this module directly, but import the csv.py | 
 | 7 | module instead. | 
 | 8 |  | 
 | 9 | **** For people modifying this code, please note that as of this writing | 
| Skip Montanaro | dfa35fa | 2003-04-11 21:40:01 +0000 | [diff] [blame] | 10 | **** (2003-03-23), it is intended that this code should work with Python | 
| Skip Montanaro | a16b21f | 2003-03-23 14:32:54 +0000 | [diff] [blame] | 11 | **** 2.2. | 
 | 12 |  | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 13 | */ | 
 | 14 |  | 
| Skip Montanaro | 7b01a83 | 2003-04-12 19:23:46 +0000 | [diff] [blame] | 15 | #define MODULE_VERSION "1.0" | 
 | 16 |  | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 17 | #include "Python.h" | 
 | 18 | #include "structmember.h" | 
 | 19 |  | 
| Skip Montanaro | a16b21f | 2003-03-23 14:32:54 +0000 | [diff] [blame] | 20 |  | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 21 | /* begin 2.2 compatibility macros */ | 
 | 22 | #ifndef PyDoc_STRVAR | 
 | 23 | /* Define macros for inline documentation. */ | 
 | 24 | #define PyDoc_VAR(name) static char name[] | 
 | 25 | #define PyDoc_STRVAR(name,str) PyDoc_VAR(name) = PyDoc_STR(str) | 
 | 26 | #ifdef WITH_DOC_STRINGS | 
 | 27 | #define PyDoc_STR(str) str | 
 | 28 | #else | 
 | 29 | #define PyDoc_STR(str) "" | 
 | 30 | #endif | 
 | 31 | #endif /* ifndef PyDoc_STRVAR */ | 
 | 32 |  | 
 | 33 | #ifndef PyMODINIT_FUNC | 
 | 34 | #	if defined(__cplusplus) | 
 | 35 | #		define PyMODINIT_FUNC extern "C" void | 
 | 36 | #	else /* __cplusplus */ | 
 | 37 | #		define PyMODINIT_FUNC void | 
 | 38 | #	endif /* __cplusplus */ | 
 | 39 | #endif | 
 | 40 | /* end 2.2 compatibility macros */ | 
 | 41 |  | 
| Andrew McNamara | 37d2bdf | 2005-01-10 12:22:48 +0000 | [diff] [blame] | 42 | #define IS_BASESTRING(o) \ | 
 | 43 | 	PyObject_TypeCheck(o, &PyBaseString_Type) | 
 | 44 |  | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 45 | static PyObject *error_obj;	/* CSV exception */ | 
 | 46 | static PyObject *dialects;      /* Dialect registry */ | 
| Andrew McNamara | e4d05c4 | 2005-01-11 07:32:02 +0000 | [diff] [blame] | 47 | static long field_limit = 128 * 1024;	/* max parsed field size */ | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 48 |  | 
 | 49 | typedef enum { | 
 | 50 | 	START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,  | 
| Andrew McNamara | f69d94f | 2005-01-13 11:30:54 +0000 | [diff] [blame] | 51 | 	IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD, | 
 | 52 | 	EAT_CRNL | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 53 | } ParserState; | 
 | 54 |  | 
 | 55 | typedef enum { | 
 | 56 | 	QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE | 
 | 57 | } QuoteStyle; | 
 | 58 |  | 
 | 59 | typedef struct { | 
 | 60 | 	QuoteStyle style; | 
 | 61 | 	char *name; | 
 | 62 | } StyleDesc; | 
 | 63 |  | 
 | 64 | static StyleDesc quote_styles[] = { | 
 | 65 | 	{ QUOTE_MINIMAL,    "QUOTE_MINIMAL" }, | 
 | 66 | 	{ QUOTE_ALL,        "QUOTE_ALL" }, | 
 | 67 | 	{ QUOTE_NONNUMERIC, "QUOTE_NONNUMERIC" }, | 
 | 68 | 	{ QUOTE_NONE,       "QUOTE_NONE" }, | 
 | 69 | 	{ 0 } | 
 | 70 | }; | 
 | 71 |  | 
 | 72 | typedef struct { | 
 | 73 |         PyObject_HEAD | 
 | 74 |          | 
 | 75 | 	int doublequote;	/* is " represented by ""? */ | 
 | 76 | 	char delimiter;		/* field separator */ | 
 | 77 | 	char quotechar;		/* quote character */ | 
 | 78 | 	char escapechar;	/* escape character */ | 
 | 79 | 	int skipinitialspace;	/* ignore spaces following delimiter? */ | 
 | 80 | 	PyObject *lineterminator; /* string to write between records */ | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 81 | 	int quoting;		/* style of quoting to write */ | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 82 |  | 
 | 83 | 	int strict;		/* raise exception on bad CSV */ | 
 | 84 | } DialectObj; | 
 | 85 |  | 
 | 86 | staticforward PyTypeObject Dialect_Type; | 
 | 87 |  | 
 | 88 | typedef struct { | 
 | 89 |         PyObject_HEAD | 
 | 90 |  | 
 | 91 |         PyObject *input_iter;   /* iterate over this for input lines */ | 
 | 92 |  | 
 | 93 |         DialectObj *dialect;    /* parsing dialect */ | 
 | 94 |  | 
 | 95 | 	PyObject *fields;	/* field list for current record */ | 
 | 96 | 	ParserState state;	/* current CSV parse state */ | 
 | 97 | 	char *field;		/* build current field in here */ | 
 | 98 | 	int field_size;		/* size of allocated buffer */ | 
 | 99 | 	int field_len;		/* length of current field */ | 
| Andrew McNamara | 0f0599d | 2005-01-12 09:45:18 +0000 | [diff] [blame] | 100 | 	int numeric_field;	/* treat field as numeric */ | 
| Andrew McNamara | 7f2053e | 2005-01-12 11:17:16 +0000 | [diff] [blame] | 101 | 	unsigned long line_num;	/* Source-file line number */ | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 102 | } ReaderObj; | 
 | 103 |  | 
 | 104 | staticforward PyTypeObject Reader_Type; | 
 | 105 |  | 
 | 106 | #define ReaderObject_Check(v)   ((v)->ob_type == &Reader_Type) | 
 | 107 |  | 
 | 108 | typedef struct { | 
 | 109 |         PyObject_HEAD | 
 | 110 |  | 
 | 111 |         PyObject *writeline;    /* write output lines to this file */ | 
 | 112 |  | 
 | 113 |         DialectObj *dialect;    /* parsing dialect */ | 
 | 114 |  | 
 | 115 | 	char *rec;		/* buffer for parser.join */ | 
 | 116 | 	int rec_size;		/* size of allocated record */ | 
 | 117 | 	int rec_len;		/* length of record */ | 
 | 118 | 	int num_fields;		/* number of fields in record */ | 
 | 119 | } WriterObj;         | 
 | 120 |  | 
 | 121 | staticforward PyTypeObject Writer_Type; | 
 | 122 |  | 
 | 123 | /* | 
 | 124 |  * DIALECT class | 
 | 125 |  */ | 
 | 126 |  | 
 | 127 | static PyObject * | 
 | 128 | get_dialect_from_registry(PyObject * name_obj) | 
 | 129 | { | 
 | 130 |         PyObject *dialect_obj; | 
 | 131 |  | 
 | 132 |         dialect_obj = PyDict_GetItem(dialects, name_obj); | 
| Andrew McNamara | dbce261 | 2005-01-10 23:17:35 +0000 | [diff] [blame] | 133 | 	if (dialect_obj == NULL) { | 
 | 134 | 		if (!PyErr_Occurred()) | 
 | 135 | 			PyErr_Format(error_obj, "unknown dialect"); | 
 | 136 | 	} | 
 | 137 | 	else | 
 | 138 | 		Py_INCREF(dialect_obj); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 139 |         return dialect_obj; | 
 | 140 | } | 
 | 141 |  | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 142 | static PyObject * | 
 | 143 | get_string(PyObject *str) | 
 | 144 | { | 
 | 145 |         Py_XINCREF(str); | 
 | 146 |         return str; | 
 | 147 | } | 
 | 148 |  | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 149 | static PyObject * | 
 | 150 | get_nullchar_as_None(char c) | 
 | 151 | { | 
 | 152 |         if (c == '\0') { | 
 | 153 |                 Py_INCREF(Py_None); | 
 | 154 |                 return Py_None; | 
 | 155 |         } | 
 | 156 |         else | 
 | 157 |                 return PyString_FromStringAndSize((char*)&c, 1); | 
 | 158 | } | 
 | 159 |  | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 160 | static PyObject * | 
 | 161 | Dialect_get_lineterminator(DialectObj *self) | 
 | 162 | { | 
 | 163 |         return get_string(self->lineterminator); | 
 | 164 | } | 
 | 165 |  | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 166 | static PyObject * | 
 | 167 | Dialect_get_escapechar(DialectObj *self) | 
 | 168 | { | 
 | 169 |         return get_nullchar_as_None(self->escapechar); | 
 | 170 | } | 
 | 171 |  | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 172 | static PyObject * | 
 | 173 | Dialect_get_quotechar(DialectObj *self) | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 174 | { | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 175 |         return get_nullchar_as_None(self->quotechar); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 176 | } | 
 | 177 |  | 
 | 178 | static PyObject * | 
 | 179 | Dialect_get_quoting(DialectObj *self) | 
 | 180 | { | 
 | 181 |         return PyInt_FromLong(self->quoting); | 
 | 182 | } | 
 | 183 |  | 
 | 184 | static int | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 185 | _set_bool(const char *name, int *target, PyObject *src, int dflt) | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 186 | { | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 187 | 	if (src == NULL) | 
 | 188 | 		*target = dflt; | 
 | 189 | 	else | 
 | 190 | 		*target = PyObject_IsTrue(src); | 
 | 191 | 	return 0; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 192 | } | 
 | 193 |  | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 194 | static int | 
 | 195 | _set_int(const char *name, int *target, PyObject *src, int dflt) | 
 | 196 | { | 
 | 197 | 	if (src == NULL) | 
 | 198 | 		*target = dflt; | 
 | 199 | 	else { | 
 | 200 | 		if (!PyInt_Check(src)) { | 
 | 201 | 			PyErr_Format(PyExc_TypeError,  | 
 | 202 | 				     "\"%s\" must be an integer", name); | 
 | 203 | 			return -1; | 
 | 204 | 		} | 
 | 205 | 		*target = PyInt_AsLong(src); | 
 | 206 | 	} | 
 | 207 | 	return 0; | 
 | 208 | } | 
 | 209 |  | 
 | 210 | static int | 
 | 211 | _set_char(const char *name, char *target, PyObject *src, char dflt) | 
 | 212 | { | 
 | 213 | 	if (src == NULL) | 
 | 214 | 		*target = dflt; | 
 | 215 | 	else { | 
| Andrew McNamara | a829263 | 2005-01-10 12:25:11 +0000 | [diff] [blame] | 216 | 		if (src == Py_None || PyString_Size(src) == 0) | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 217 | 			*target = '\0'; | 
 | 218 | 		else if (!PyString_Check(src) || PyString_Size(src) != 1) { | 
 | 219 | 			PyErr_Format(PyExc_TypeError,  | 
 | 220 | 				     "\"%s\" must be an 1-character string",  | 
 | 221 | 				     name); | 
 | 222 | 			return -1; | 
 | 223 | 		} | 
 | 224 | 		else { | 
 | 225 | 			char *s = PyString_AsString(src); | 
 | 226 | 			if (s == NULL) | 
 | 227 | 				return -1; | 
 | 228 | 			*target = s[0]; | 
 | 229 | 		} | 
 | 230 | 	} | 
 | 231 |         return 0; | 
 | 232 | } | 
 | 233 |  | 
 | 234 | static int | 
 | 235 | _set_str(const char *name, PyObject **target, PyObject *src, const char *dflt) | 
 | 236 | { | 
 | 237 | 	if (src == NULL) | 
 | 238 | 		*target = PyString_FromString(dflt); | 
 | 239 | 	else { | 
 | 240 | 		if (src == Py_None) | 
 | 241 | 			*target = NULL; | 
| Andrew McNamara | 37d2bdf | 2005-01-10 12:22:48 +0000 | [diff] [blame] | 242 | 		else if (!IS_BASESTRING(src)) { | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 243 | 			PyErr_Format(PyExc_TypeError,  | 
 | 244 | 				     "\"%s\" must be an string", name); | 
 | 245 | 			return -1; | 
| Andrew McNamara | dd3e6cb | 2005-01-07 06:46:50 +0000 | [diff] [blame] | 246 | 		} | 
 | 247 | 		else { | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 248 | 			Py_XDECREF(*target); | 
 | 249 | 			Py_INCREF(src); | 
 | 250 | 			*target = src; | 
 | 251 | 		} | 
 | 252 | 	} | 
 | 253 |         return 0; | 
 | 254 | } | 
 | 255 |  | 
 | 256 | static int | 
 | 257 | dialect_check_quoting(int quoting) | 
 | 258 | { | 
 | 259 |         StyleDesc *qs = quote_styles; | 
 | 260 |  | 
 | 261 | 	for (qs = quote_styles; qs->name; qs++) { | 
 | 262 | 		if (qs->style == quoting) | 
 | 263 |                         return 0; | 
 | 264 |         } | 
 | 265 | 	PyErr_Format(PyExc_TypeError, "bad \"quoting\" value"); | 
 | 266 |         return -1; | 
 | 267 | } | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 268 |  | 
 | 269 | #define D_OFF(x) offsetof(DialectObj, x) | 
 | 270 |  | 
 | 271 | static struct PyMemberDef Dialect_memberlist[] = { | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 272 | 	{ "delimiter",          T_CHAR, D_OFF(delimiter), READONLY }, | 
 | 273 | 	{ "skipinitialspace",   T_INT, D_OFF(skipinitialspace), READONLY }, | 
 | 274 | 	{ "doublequote",        T_INT, D_OFF(doublequote), READONLY }, | 
 | 275 | 	{ "strict",             T_INT, D_OFF(strict), READONLY }, | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 276 | 	{ NULL } | 
 | 277 | }; | 
 | 278 |  | 
 | 279 | static PyGetSetDef Dialect_getsetlist[] = { | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 280 | 	{ "escapechar",		(getter)Dialect_get_escapechar}, | 
 | 281 | 	{ "lineterminator",	(getter)Dialect_get_lineterminator}, | 
 | 282 | 	{ "quotechar",		(getter)Dialect_get_quotechar}, | 
 | 283 | 	{ "quoting",		(getter)Dialect_get_quoting}, | 
 | 284 | 	{NULL}, | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 285 | }; | 
 | 286 |  | 
 | 287 | static void | 
 | 288 | Dialect_dealloc(DialectObj *self) | 
 | 289 | { | 
 | 290 |         Py_XDECREF(self->lineterminator); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 291 |         self->ob_type->tp_free((PyObject *)self); | 
 | 292 | } | 
 | 293 |  | 
| Jeremy Hylton | af68c87 | 2005-12-10 18:50:16 +0000 | [diff] [blame] | 294 | static const char *dialect_kws[] = { | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 295 | 	"dialect", | 
 | 296 | 	"delimiter", | 
 | 297 | 	"doublequote", | 
 | 298 | 	"escapechar", | 
 | 299 | 	"lineterminator", | 
 | 300 | 	"quotechar", | 
 | 301 | 	"quoting", | 
 | 302 | 	"skipinitialspace", | 
 | 303 | 	"strict", | 
 | 304 | 	NULL | 
 | 305 | }; | 
 | 306 |  | 
| Andrew McNamara | 29bf4e4 | 2005-01-11 04:49:53 +0000 | [diff] [blame] | 307 | static PyObject * | 
 | 308 | dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs) | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 309 | { | 
| Andrew McNamara | 29bf4e4 | 2005-01-11 04:49:53 +0000 | [diff] [blame] | 310 | 	DialectObj *self; | 
 | 311 | 	PyObject *ret = NULL; | 
 | 312 | 	PyObject *dialect = NULL; | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 313 | 	PyObject *delimiter = NULL; | 
 | 314 | 	PyObject *doublequote = NULL; | 
 | 315 | 	PyObject *escapechar = NULL; | 
 | 316 | 	PyObject *lineterminator = NULL; | 
 | 317 | 	PyObject *quotechar = NULL; | 
 | 318 | 	PyObject *quoting = NULL; | 
 | 319 | 	PyObject *skipinitialspace = NULL; | 
 | 320 | 	PyObject *strict = NULL; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 321 |  | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 322 | 	if (!PyArg_ParseTupleAndKeywords(args, kwargs, | 
 | 323 | 					 "|OOOOOOOOO", dialect_kws, | 
 | 324 | 					 &dialect, | 
 | 325 | 					 &delimiter, | 
 | 326 | 					 &doublequote, | 
 | 327 | 					 &escapechar, | 
 | 328 | 					 &lineterminator, | 
 | 329 | 					 "echar, | 
 | 330 | 					 "ing, | 
 | 331 | 					 &skipinitialspace, | 
 | 332 | 					 &strict)) | 
| Andrew McNamara | 29bf4e4 | 2005-01-11 04:49:53 +0000 | [diff] [blame] | 333 | 		return NULL; | 
 | 334 |  | 
 | 335 | 	if (dialect != NULL) { | 
 | 336 | 		if (IS_BASESTRING(dialect)) { | 
 | 337 | 			dialect = get_dialect_from_registry(dialect); | 
 | 338 | 			if (dialect == NULL) | 
 | 339 | 				return NULL; | 
 | 340 | 		} | 
 | 341 | 		else | 
 | 342 | 			Py_INCREF(dialect); | 
 | 343 | 		/* Can we reuse this instance? */ | 
 | 344 | 		if (PyObject_TypeCheck(dialect, &Dialect_Type) && | 
 | 345 | 		    delimiter == 0 && | 
 | 346 | 		    doublequote == 0 && | 
 | 347 | 		    escapechar == 0 && | 
 | 348 | 		    lineterminator == 0 && | 
 | 349 | 		    quotechar == 0 && | 
 | 350 | 		    quoting == 0 && | 
 | 351 | 		    skipinitialspace == 0 && | 
 | 352 | 		    strict == 0) | 
 | 353 | 			return dialect; | 
 | 354 | 	} | 
 | 355 |  | 
 | 356 | 	self = (DialectObj *)type->tp_alloc(type, 0); | 
 | 357 | 	if (self == NULL) { | 
 | 358 | 		Py_XDECREF(dialect); | 
 | 359 | 		return NULL; | 
 | 360 | 	} | 
 | 361 | 	self->lineterminator = NULL; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 362 |  | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 363 | 	Py_XINCREF(delimiter); | 
 | 364 | 	Py_XINCREF(doublequote); | 
 | 365 | 	Py_XINCREF(escapechar); | 
 | 366 | 	Py_XINCREF(lineterminator); | 
 | 367 | 	Py_XINCREF(quotechar); | 
 | 368 | 	Py_XINCREF(quoting); | 
 | 369 | 	Py_XINCREF(skipinitialspace); | 
 | 370 | 	Py_XINCREF(strict); | 
 | 371 | 	if (dialect != NULL) { | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 372 | #define DIALECT_GETATTR(v, n) \ | 
 | 373 | 		if (v == NULL) \ | 
 | 374 | 			v = PyObject_GetAttrString(dialect, n) | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 375 | 		DIALECT_GETATTR(delimiter, "delimiter"); | 
 | 376 | 		DIALECT_GETATTR(doublequote, "doublequote"); | 
 | 377 | 		DIALECT_GETATTR(escapechar, "escapechar"); | 
 | 378 | 		DIALECT_GETATTR(lineterminator, "lineterminator"); | 
 | 379 | 		DIALECT_GETATTR(quotechar, "quotechar"); | 
 | 380 | 		DIALECT_GETATTR(quoting, "quoting"); | 
 | 381 | 		DIALECT_GETATTR(skipinitialspace, "skipinitialspace"); | 
 | 382 | 		DIALECT_GETATTR(strict, "strict"); | 
 | 383 | 		PyErr_Clear(); | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 384 | 	} | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 385 |  | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 386 | 	/* check types and convert to C values */ | 
 | 387 | #define DIASET(meth, name, target, src, dflt) \ | 
 | 388 | 	if (meth(name, target, src, dflt)) \ | 
 | 389 | 		goto err | 
 | 390 | 	DIASET(_set_char, "delimiter", &self->delimiter, delimiter, ','); | 
 | 391 | 	DIASET(_set_bool, "doublequote", &self->doublequote, doublequote, 1); | 
 | 392 | 	DIASET(_set_char, "escapechar", &self->escapechar, escapechar, 0); | 
 | 393 | 	DIASET(_set_str, "lineterminator", &self->lineterminator, lineterminator, "\r\n"); | 
 | 394 | 	DIASET(_set_char, "quotechar", &self->quotechar, quotechar, '"'); | 
 | 395 | 	DIASET(_set_int, "quoting", &self->quoting, quoting, QUOTE_MINIMAL); | 
 | 396 | 	DIASET(_set_bool, "skipinitialspace", &self->skipinitialspace, skipinitialspace, 0); | 
 | 397 | 	DIASET(_set_bool, "strict", &self->strict, strict, 0); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 398 |  | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 399 | 	/* validate options */ | 
 | 400 | 	if (dialect_check_quoting(self->quoting)) | 
 | 401 | 		goto err; | 
 | 402 | 	if (self->delimiter == 0) { | 
 | 403 |                 PyErr_SetString(PyExc_TypeError, "delimiter must be set"); | 
 | 404 | 		goto err; | 
 | 405 | 	} | 
| Andrew McNamara | 5d45a8d | 2005-01-12 08:16:17 +0000 | [diff] [blame] | 406 | 	if (quotechar == Py_None && quoting == NULL) | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 407 | 		self->quoting = QUOTE_NONE; | 
 | 408 | 	if (self->quoting != QUOTE_NONE && self->quotechar == 0) { | 
 | 409 |                 PyErr_SetString(PyExc_TypeError,  | 
 | 410 | 				"quotechar must be set if quoting enabled"); | 
 | 411 | 		goto err; | 
 | 412 | 	} | 
 | 413 | 	if (self->lineterminator == 0) { | 
| Andrew McNamara | 29bf4e4 | 2005-01-11 04:49:53 +0000 | [diff] [blame] | 414 | 		PyErr_SetString(PyExc_TypeError, "lineterminator must be set"); | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 415 | 		goto err; | 
 | 416 | 	} | 
 | 417 |  | 
| Andrew McNamara | 29bf4e4 | 2005-01-11 04:49:53 +0000 | [diff] [blame] | 418 | 	ret = (PyObject *)self; | 
| Skip Montanaro | d60fbd4 | 2005-06-15 01:33:30 +0000 | [diff] [blame] | 419 | 	Py_INCREF(self); | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 420 | err: | 
| Skip Montanaro | d60fbd4 | 2005-06-15 01:33:30 +0000 | [diff] [blame] | 421 | 	Py_XDECREF(self); | 
| Andrew McNamara | 29bf4e4 | 2005-01-11 04:49:53 +0000 | [diff] [blame] | 422 | 	Py_XDECREF(dialect); | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 423 | 	Py_XDECREF(delimiter); | 
 | 424 | 	Py_XDECREF(doublequote); | 
 | 425 | 	Py_XDECREF(escapechar); | 
 | 426 | 	Py_XDECREF(lineterminator); | 
 | 427 | 	Py_XDECREF(quotechar); | 
 | 428 | 	Py_XDECREF(quoting); | 
 | 429 | 	Py_XDECREF(skipinitialspace); | 
 | 430 | 	Py_XDECREF(strict); | 
| Andrew McNamara | 29bf4e4 | 2005-01-11 04:49:53 +0000 | [diff] [blame] | 431 | 	return ret; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 432 | } | 
 | 433 |  | 
 | 434 |  | 
 | 435 | PyDoc_STRVAR(Dialect_Type_doc,  | 
 | 436 | "CSV dialect\n" | 
 | 437 | "\n" | 
 | 438 | "The Dialect type records CSV parsing and generation options.\n"); | 
 | 439 |  | 
 | 440 | static PyTypeObject Dialect_Type = { | 
 | 441 | 	PyObject_HEAD_INIT(NULL) | 
 | 442 | 	0,                                      /* ob_size */ | 
 | 443 | 	"_csv.Dialect",                         /* tp_name */ | 
 | 444 | 	sizeof(DialectObj),                     /* tp_basicsize */ | 
 | 445 | 	0,                                      /* tp_itemsize */ | 
 | 446 | 	/*  methods  */ | 
 | 447 | 	(destructor)Dialect_dealloc,            /* tp_dealloc */ | 
 | 448 | 	(printfunc)0,                           /* tp_print */ | 
 | 449 | 	(getattrfunc)0,                         /* tp_getattr */ | 
 | 450 | 	(setattrfunc)0,                         /* tp_setattr */ | 
 | 451 | 	(cmpfunc)0,                             /* tp_compare */ | 
 | 452 | 	(reprfunc)0,                            /* tp_repr */ | 
 | 453 | 	0,                                      /* tp_as_number */ | 
 | 454 | 	0,                                      /* tp_as_sequence */ | 
 | 455 | 	0,                                      /* tp_as_mapping */ | 
 | 456 | 	(hashfunc)0,                            /* tp_hash */ | 
 | 457 | 	(ternaryfunc)0,                         /* tp_call */ | 
 | 458 | 	(reprfunc)0,                    	/* tp_str */ | 
 | 459 | 	0,                                      /* tp_getattro */ | 
 | 460 |         0,                                      /* tp_setattro */ | 
 | 461 |         0,                                      /* tp_as_buffer */ | 
 | 462 |         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ | 
 | 463 | 	Dialect_Type_doc,                       /* tp_doc */ | 
 | 464 |         0,                                      /* tp_traverse */ | 
 | 465 |         0,                                      /* tp_clear */ | 
 | 466 |         0,                                      /* tp_richcompare */ | 
 | 467 |         0,                                      /* tp_weaklistoffset */ | 
 | 468 |         0,                                      /* tp_iter */ | 
 | 469 |         0,                                      /* tp_iternext */ | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 470 | 	0,					/* tp_methods */ | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 471 |         Dialect_memberlist,                     /* tp_members */ | 
 | 472 |         Dialect_getsetlist,                     /* tp_getset */ | 
 | 473 | 	0,					/* tp_base */ | 
 | 474 | 	0,					/* tp_dict */ | 
 | 475 | 	0,					/* tp_descr_get */ | 
 | 476 | 	0,					/* tp_descr_set */ | 
 | 477 | 	0,					/* tp_dictoffset */ | 
| Andrew McNamara | 29bf4e4 | 2005-01-11 04:49:53 +0000 | [diff] [blame] | 478 | 	0,					/* tp_init */ | 
 | 479 | 	0,					/* tp_alloc */ | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 480 | 	dialect_new,			        /* tp_new */ | 
| Jeremy Hylton | 42a8aed | 2003-04-14 02:20:55 +0000 | [diff] [blame] | 481 | 	0,                           		/* tp_free */ | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 482 | }; | 
 | 483 |  | 
| Andrew McNamara | 91b9746 | 2005-01-11 01:07:23 +0000 | [diff] [blame] | 484 | /* | 
 | 485 |  * Return an instance of the dialect type, given a Python instance or kwarg | 
 | 486 |  * description of the dialect | 
 | 487 |  */ | 
 | 488 | static PyObject * | 
 | 489 | _call_dialect(PyObject *dialect_inst, PyObject *kwargs) | 
 | 490 | { | 
 | 491 | 	PyObject *ctor_args; | 
 | 492 | 	PyObject *dialect; | 
 | 493 |  | 
 | 494 | 	ctor_args = Py_BuildValue(dialect_inst ? "(O)" : "()", dialect_inst); | 
 | 495 | 	if (ctor_args == NULL) | 
 | 496 | 		return NULL; | 
 | 497 | 	dialect = PyObject_Call((PyObject *)&Dialect_Type, ctor_args, kwargs); | 
 | 498 | 	Py_DECREF(ctor_args); | 
 | 499 | 	return dialect; | 
 | 500 | } | 
 | 501 |  | 
| Andrew McNamara | f69d94f | 2005-01-13 11:30:54 +0000 | [diff] [blame] | 502 | /* | 
 | 503 |  * READER | 
 | 504 |  */ | 
| Andrew McNamara | 0f0599d | 2005-01-12 09:45:18 +0000 | [diff] [blame] | 505 | static int | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 506 | parse_save_field(ReaderObj *self) | 
 | 507 | { | 
 | 508 | 	PyObject *field; | 
 | 509 |  | 
 | 510 | 	field = PyString_FromStringAndSize(self->field, self->field_len); | 
| Andrew McNamara | 0f0599d | 2005-01-12 09:45:18 +0000 | [diff] [blame] | 511 | 	if (field == NULL) | 
 | 512 | 		return -1; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 513 | 	self->field_len = 0; | 
| Andrew McNamara | 0f0599d | 2005-01-12 09:45:18 +0000 | [diff] [blame] | 514 | 	if (self->numeric_field) { | 
 | 515 | 		PyObject *tmp; | 
 | 516 |  | 
 | 517 | 		self->numeric_field = 0; | 
 | 518 | 		tmp = PyNumber_Float(field); | 
 | 519 | 		if (tmp == NULL) { | 
 | 520 | 			Py_DECREF(field); | 
 | 521 | 			return -1; | 
 | 522 | 		} | 
 | 523 | 		Py_DECREF(field); | 
 | 524 | 		field = tmp; | 
 | 525 | 	} | 
 | 526 | 	PyList_Append(self->fields, field); | 
 | 527 | 	Py_DECREF(field); | 
 | 528 | 	return 0; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 529 | } | 
 | 530 |  | 
 | 531 | static int | 
 | 532 | parse_grow_buff(ReaderObj *self) | 
 | 533 | { | 
 | 534 | 	if (self->field_size == 0) { | 
 | 535 | 		self->field_size = 4096; | 
| Andrew McNamara | dcfb38c | 2003-06-09 05:59:23 +0000 | [diff] [blame] | 536 | 		if (self->field != NULL) | 
 | 537 | 			PyMem_Free(self->field); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 538 | 		self->field = PyMem_Malloc(self->field_size); | 
 | 539 | 	} | 
 | 540 | 	else { | 
 | 541 | 		self->field_size *= 2; | 
 | 542 | 		self->field = PyMem_Realloc(self->field, self->field_size); | 
 | 543 | 	} | 
 | 544 | 	if (self->field == NULL) { | 
 | 545 | 		PyErr_NoMemory(); | 
 | 546 | 		return 0; | 
 | 547 | 	} | 
 | 548 | 	return 1; | 
 | 549 | } | 
 | 550 |  | 
| Andrew McNamara | e4d05c4 | 2005-01-11 07:32:02 +0000 | [diff] [blame] | 551 | static int | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 552 | parse_add_char(ReaderObj *self, char c) | 
 | 553 | { | 
| Andrew McNamara | e4d05c4 | 2005-01-11 07:32:02 +0000 | [diff] [blame] | 554 | 	if (self->field_len >= field_limit) { | 
 | 555 | 		PyErr_Format(error_obj, "field larger than field limit (%ld)", | 
 | 556 | 			     field_limit); | 
 | 557 | 		return -1; | 
 | 558 | 	} | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 559 | 	if (self->field_len == self->field_size && !parse_grow_buff(self)) | 
| Andrew McNamara | e4d05c4 | 2005-01-11 07:32:02 +0000 | [diff] [blame] | 560 | 		return -1; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 561 | 	self->field[self->field_len++] = c; | 
| Andrew McNamara | e4d05c4 | 2005-01-11 07:32:02 +0000 | [diff] [blame] | 562 | 	return 0; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 563 | } | 
 | 564 |  | 
| Andrew McNamara | e4d05c4 | 2005-01-11 07:32:02 +0000 | [diff] [blame] | 565 | static int | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 566 | parse_process_char(ReaderObj *self, char c) | 
 | 567 | { | 
 | 568 |         DialectObj *dialect = self->dialect; | 
 | 569 |  | 
 | 570 | 	switch (self->state) { | 
 | 571 | 	case START_RECORD: | 
 | 572 | 		/* start of record */ | 
| Andrew McNamara | f69d94f | 2005-01-13 11:30:54 +0000 | [diff] [blame] | 573 | 		if (c == '\0') | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 574 | 			/* empty line - return [] */ | 
 | 575 | 			break; | 
| Andrew McNamara | f69d94f | 2005-01-13 11:30:54 +0000 | [diff] [blame] | 576 | 		else if (c == '\n' || c == '\r') { | 
 | 577 | 			self->state = EAT_CRNL; | 
 | 578 | 			break; | 
 | 579 | 		} | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 580 | 		/* normal character - handle as START_FIELD */ | 
 | 581 | 		self->state = START_FIELD; | 
 | 582 | 		/* fallthru */ | 
 | 583 | 	case START_FIELD: | 
 | 584 | 		/* expecting field */ | 
| Andrew McNamara | f69d94f | 2005-01-13 11:30:54 +0000 | [diff] [blame] | 585 | 		if (c == '\n' || c == '\r' || c == '\0') { | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 586 | 			/* save empty field - return [fields] */ | 
| Andrew McNamara | 0f0599d | 2005-01-12 09:45:18 +0000 | [diff] [blame] | 587 | 			if (parse_save_field(self) < 0) | 
 | 588 | 				return -1; | 
| Andrew McNamara | f69d94f | 2005-01-13 11:30:54 +0000 | [diff] [blame] | 589 | 			self->state = (c == '\0' ? START_RECORD : EAT_CRNL); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 590 | 		} | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 591 | 		else if (c == dialect->quotechar &&  | 
 | 592 | 			 dialect->quoting != QUOTE_NONE) { | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 593 | 			/* start quoted field */ | 
 | 594 | 			self->state = IN_QUOTED_FIELD; | 
 | 595 | 		} | 
 | 596 | 		else if (c == dialect->escapechar) { | 
 | 597 | 			/* possible escaped character */ | 
 | 598 | 			self->state = ESCAPED_CHAR; | 
 | 599 | 		} | 
 | 600 | 		else if (c == ' ' && dialect->skipinitialspace) | 
 | 601 | 			/* ignore space at start of field */ | 
 | 602 | 			; | 
 | 603 | 		else if (c == dialect->delimiter) { | 
 | 604 | 			/* save empty field */ | 
| Andrew McNamara | 0f0599d | 2005-01-12 09:45:18 +0000 | [diff] [blame] | 605 | 			if (parse_save_field(self) < 0) | 
 | 606 | 				return -1; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 607 | 		} | 
 | 608 | 		else { | 
 | 609 | 			/* begin new unquoted field */ | 
| Andrew McNamara | 0f0599d | 2005-01-12 09:45:18 +0000 | [diff] [blame] | 610 | 			if (dialect->quoting == QUOTE_NONNUMERIC) | 
 | 611 | 				self->numeric_field = 1; | 
| Andrew McNamara | e4d05c4 | 2005-01-11 07:32:02 +0000 | [diff] [blame] | 612 | 			if (parse_add_char(self, c) < 0) | 
 | 613 | 				return -1; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 614 | 			self->state = IN_FIELD; | 
 | 615 | 		} | 
 | 616 | 		break; | 
 | 617 |  | 
 | 618 | 	case ESCAPED_CHAR: | 
| Andrew McNamara | f69d94f | 2005-01-13 11:30:54 +0000 | [diff] [blame] | 619 | 		if (c == '\0') | 
 | 620 | 			c = '\n'; | 
| Andrew McNamara | e4d05c4 | 2005-01-11 07:32:02 +0000 | [diff] [blame] | 621 | 		if (parse_add_char(self, c) < 0) | 
 | 622 | 			return -1; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 623 | 		self->state = IN_FIELD; | 
 | 624 | 		break; | 
 | 625 |  | 
 | 626 | 	case IN_FIELD: | 
 | 627 | 		/* in unquoted field */ | 
| Andrew McNamara | f69d94f | 2005-01-13 11:30:54 +0000 | [diff] [blame] | 628 | 		if (c == '\n' || c == '\r' || c == '\0') { | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 629 | 			/* end of line - return [fields] */ | 
| Andrew McNamara | 0f0599d | 2005-01-12 09:45:18 +0000 | [diff] [blame] | 630 | 			if (parse_save_field(self) < 0) | 
 | 631 | 				return -1; | 
| Andrew McNamara | f69d94f | 2005-01-13 11:30:54 +0000 | [diff] [blame] | 632 | 			self->state = (c == '\0' ? START_RECORD : EAT_CRNL); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 633 | 		} | 
 | 634 | 		else if (c == dialect->escapechar) { | 
 | 635 | 			/* possible escaped character */ | 
 | 636 | 			self->state = ESCAPED_CHAR; | 
 | 637 | 		} | 
 | 638 | 		else if (c == dialect->delimiter) { | 
 | 639 | 			/* save field - wait for new field */ | 
| Andrew McNamara | 0f0599d | 2005-01-12 09:45:18 +0000 | [diff] [blame] | 640 | 			if (parse_save_field(self) < 0) | 
 | 641 | 				return -1; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 642 | 			self->state = START_FIELD; | 
 | 643 | 		} | 
 | 644 | 		else { | 
 | 645 | 			/* normal character - save in field */ | 
| Andrew McNamara | e4d05c4 | 2005-01-11 07:32:02 +0000 | [diff] [blame] | 646 | 			if (parse_add_char(self, c) < 0) | 
 | 647 | 				return -1; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 648 | 		} | 
 | 649 | 		break; | 
 | 650 |  | 
 | 651 | 	case IN_QUOTED_FIELD: | 
 | 652 | 		/* in quoted field */ | 
| Andrew McNamara | f69d94f | 2005-01-13 11:30:54 +0000 | [diff] [blame] | 653 | 		if (c == '\0') | 
 | 654 | 			; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 655 | 		else if (c == dialect->escapechar) { | 
 | 656 | 			/* Possible escape character */ | 
 | 657 | 			self->state = ESCAPE_IN_QUOTED_FIELD; | 
 | 658 | 		} | 
| Andrew McNamara | 1196cf1 | 2005-01-07 04:42:45 +0000 | [diff] [blame] | 659 | 		else if (c == dialect->quotechar && | 
 | 660 | 			 dialect->quoting != QUOTE_NONE) { | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 661 | 			if (dialect->doublequote) { | 
 | 662 | 				/* doublequote; " represented by "" */ | 
 | 663 | 				self->state = QUOTE_IN_QUOTED_FIELD; | 
 | 664 | 			} | 
 | 665 | 			else { | 
 | 666 | 				/* end of quote part of field */ | 
 | 667 | 				self->state = IN_FIELD; | 
 | 668 | 			} | 
 | 669 | 		} | 
 | 670 | 		else { | 
 | 671 | 			/* normal character - save in field */ | 
| Andrew McNamara | e4d05c4 | 2005-01-11 07:32:02 +0000 | [diff] [blame] | 672 | 			if (parse_add_char(self, c) < 0) | 
 | 673 | 				return -1; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 674 | 		} | 
 | 675 | 		break; | 
 | 676 |  | 
 | 677 | 	case ESCAPE_IN_QUOTED_FIELD: | 
| Andrew McNamara | f69d94f | 2005-01-13 11:30:54 +0000 | [diff] [blame] | 678 | 		if (c == '\0') | 
 | 679 | 			c = '\n'; | 
| Andrew McNamara | e4d05c4 | 2005-01-11 07:32:02 +0000 | [diff] [blame] | 680 | 		if (parse_add_char(self, c) < 0) | 
 | 681 | 			return -1; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 682 | 		self->state = IN_QUOTED_FIELD; | 
 | 683 | 		break; | 
 | 684 |  | 
 | 685 | 	case QUOTE_IN_QUOTED_FIELD: | 
 | 686 | 		/* doublequote - seen a quote in an quoted field */ | 
 | 687 | 		if (dialect->quoting != QUOTE_NONE &&  | 
 | 688 |                     c == dialect->quotechar) { | 
 | 689 | 			/* save "" as " */ | 
| Andrew McNamara | e4d05c4 | 2005-01-11 07:32:02 +0000 | [diff] [blame] | 690 | 			if (parse_add_char(self, c) < 0) | 
 | 691 | 				return -1; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 692 | 			self->state = IN_QUOTED_FIELD; | 
 | 693 | 		} | 
 | 694 | 		else if (c == dialect->delimiter) { | 
 | 695 | 			/* save field - wait for new field */ | 
| Andrew McNamara | 0f0599d | 2005-01-12 09:45:18 +0000 | [diff] [blame] | 696 | 			if (parse_save_field(self) < 0) | 
 | 697 | 				return -1; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 698 | 			self->state = START_FIELD; | 
 | 699 | 		} | 
| Andrew McNamara | f69d94f | 2005-01-13 11:30:54 +0000 | [diff] [blame] | 700 | 		else if (c == '\n' || c == '\r' || c == '\0') { | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 701 | 			/* end of line - return [fields] */ | 
| Andrew McNamara | 0f0599d | 2005-01-12 09:45:18 +0000 | [diff] [blame] | 702 | 			if (parse_save_field(self) < 0) | 
 | 703 | 				return -1; | 
| Andrew McNamara | f69d94f | 2005-01-13 11:30:54 +0000 | [diff] [blame] | 704 | 			self->state = (c == '\0' ? START_RECORD : EAT_CRNL); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 705 | 		} | 
 | 706 | 		else if (!dialect->strict) { | 
| Andrew McNamara | e4d05c4 | 2005-01-11 07:32:02 +0000 | [diff] [blame] | 707 | 			if (parse_add_char(self, c) < 0) | 
 | 708 | 				return -1; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 709 | 			self->state = IN_FIELD; | 
 | 710 | 		} | 
 | 711 | 		else { | 
 | 712 | 			/* illegal */ | 
| Andrew McNamara | 5cfd837 | 2005-01-12 11:39:50 +0000 | [diff] [blame] | 713 | 			PyErr_Format(error_obj, "'%c' expected after '%c'",  | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 714 | 					dialect->delimiter,  | 
 | 715 |                                         dialect->quotechar); | 
| Andrew McNamara | e4d05c4 | 2005-01-11 07:32:02 +0000 | [diff] [blame] | 716 | 			return -1; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 717 | 		} | 
 | 718 | 		break; | 
 | 719 |  | 
| Andrew McNamara | f69d94f | 2005-01-13 11:30:54 +0000 | [diff] [blame] | 720 | 	case EAT_CRNL: | 
 | 721 | 		if (c == '\n' || c == '\r') | 
 | 722 | 			; | 
 | 723 | 		else if (c == '\0') | 
 | 724 | 			self->state = START_RECORD; | 
 | 725 | 		else { | 
 | 726 | 			PyErr_Format(error_obj, "new-line character seen in unquoted field - do you need to open the file in universal-newline mode?"); | 
 | 727 | 			return -1; | 
 | 728 | 		} | 
 | 729 | 		break; | 
 | 730 |  | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 731 | 	} | 
| Andrew McNamara | e4d05c4 | 2005-01-11 07:32:02 +0000 | [diff] [blame] | 732 | 	return 0; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 733 | } | 
 | 734 |  | 
| Andrew McNamara | f69d94f | 2005-01-13 11:30:54 +0000 | [diff] [blame] | 735 | static int | 
 | 736 | parse_reset(ReaderObj *self) | 
 | 737 | { | 
 | 738 | 	Py_XDECREF(self->fields); | 
 | 739 | 	self->fields = PyList_New(0); | 
 | 740 | 	if (self->fields == NULL) | 
 | 741 | 		return -1; | 
 | 742 | 	self->field_len = 0; | 
 | 743 | 	self->state = START_RECORD; | 
 | 744 | 	self->numeric_field = 0; | 
 | 745 | 	return 0; | 
 | 746 | } | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 747 |  | 
 | 748 | static PyObject * | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 749 | Reader_iternext(ReaderObj *self) | 
 | 750 | { | 
 | 751 |         PyObject *lineobj; | 
| Andrew McNamara | f69d94f | 2005-01-13 11:30:54 +0000 | [diff] [blame] | 752 |         PyObject *fields = NULL; | 
 | 753 |         char *line, c; | 
 | 754 | 	int linelen; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 755 |  | 
| Andrew McNamara | f69d94f | 2005-01-13 11:30:54 +0000 | [diff] [blame] | 756 | 	if (parse_reset(self) < 0) | 
 | 757 | 		return NULL; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 758 |         do { | 
 | 759 |                 lineobj = PyIter_Next(self->input_iter); | 
 | 760 |                 if (lineobj == NULL) { | 
 | 761 |                         /* End of input OR exception */ | 
 | 762 |                         if (!PyErr_Occurred() && self->field_len != 0) | 
| Andrew McNamara | f69d94f | 2005-01-13 11:30:54 +0000 | [diff] [blame] | 763 |                                 PyErr_Format(error_obj, | 
 | 764 | 					     "newline inside string"); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 765 |                         return NULL; | 
 | 766 |                 } | 
| Andrew McNamara | 7f2053e | 2005-01-12 11:17:16 +0000 | [diff] [blame] | 767 | 		++self->line_num; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 768 |  | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 769 |                 line = PyString_AsString(lineobj); | 
| Andrew McNamara | f69d94f | 2005-01-13 11:30:54 +0000 | [diff] [blame] | 770 | 		linelen = PyString_Size(lineobj); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 771 |  | 
| Andrew McNamara | f69d94f | 2005-01-13 11:30:54 +0000 | [diff] [blame] | 772 |                 if (line == NULL || linelen < 0) { | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 773 |                         Py_DECREF(lineobj); | 
 | 774 |                         return NULL; | 
 | 775 |                 } | 
| Andrew McNamara | f69d94f | 2005-01-13 11:30:54 +0000 | [diff] [blame] | 776 |                 while (linelen--) { | 
 | 777 | 			c = *line++; | 
 | 778 | 			if (c == '\0') { | 
 | 779 | 				Py_DECREF(lineobj); | 
 | 780 | 				PyErr_Format(error_obj, | 
 | 781 | 					     "line contains NULL byte"); | 
 | 782 | 				goto err; | 
 | 783 | 			} | 
| Andrew McNamara | e4d05c4 | 2005-01-11 07:32:02 +0000 | [diff] [blame] | 784 | 			if (parse_process_char(self, c) < 0) { | 
 | 785 | 				Py_DECREF(lineobj); | 
| Andrew McNamara | f69d94f | 2005-01-13 11:30:54 +0000 | [diff] [blame] | 786 | 				goto err; | 
| Andrew McNamara | e4d05c4 | 2005-01-11 07:32:02 +0000 | [diff] [blame] | 787 | 			} | 
 | 788 | 		} | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 789 |                 Py_DECREF(lineobj); | 
| Andrew McNamara | f69d94f | 2005-01-13 11:30:54 +0000 | [diff] [blame] | 790 | 		if (parse_process_char(self, 0) < 0) | 
 | 791 | 			goto err; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 792 |         } while (self->state != START_RECORD); | 
 | 793 |  | 
 | 794 |         fields = self->fields; | 
| Andrew McNamara | f69d94f | 2005-01-13 11:30:54 +0000 | [diff] [blame] | 795 |         self->fields = NULL; | 
 | 796 | err: | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 797 |         return fields; | 
 | 798 | } | 
 | 799 |  | 
 | 800 | static void | 
 | 801 | Reader_dealloc(ReaderObj *self) | 
 | 802 | { | 
| Andrew McNamara | 77ead87 | 2005-01-10 02:09:41 +0000 | [diff] [blame] | 803 | 	PyObject_GC_UnTrack(self); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 804 |         Py_XDECREF(self->dialect); | 
 | 805 |         Py_XDECREF(self->input_iter); | 
 | 806 |         Py_XDECREF(self->fields); | 
| Andrew McNamara | dcfb38c | 2003-06-09 05:59:23 +0000 | [diff] [blame] | 807 |         if (self->field != NULL) | 
 | 808 |         	PyMem_Free(self->field); | 
| Jeremy Hylton | 42a8aed | 2003-04-14 02:20:55 +0000 | [diff] [blame] | 809 | 	PyObject_GC_Del(self); | 
 | 810 | } | 
 | 811 |  | 
 | 812 | static int | 
 | 813 | Reader_traverse(ReaderObj *self, visitproc visit, void *arg) | 
 | 814 | { | 
 | 815 | 	int err; | 
 | 816 | #define VISIT(SLOT) \ | 
 | 817 | 	if (SLOT) { \ | 
 | 818 | 		err = visit((PyObject *)(SLOT), arg); \ | 
 | 819 | 		if (err) \ | 
 | 820 | 			return err; \ | 
 | 821 | 	} | 
 | 822 | 	VISIT(self->dialect); | 
 | 823 | 	VISIT(self->input_iter); | 
 | 824 | 	VISIT(self->fields); | 
 | 825 | 	return 0; | 
 | 826 | } | 
 | 827 |  | 
 | 828 | static int | 
 | 829 | Reader_clear(ReaderObj *self) | 
 | 830 | { | 
 | 831 |         Py_XDECREF(self->dialect); | 
 | 832 |         Py_XDECREF(self->input_iter); | 
 | 833 |         Py_XDECREF(self->fields); | 
 | 834 |         self->dialect = NULL; | 
 | 835 |         self->input_iter = NULL; | 
 | 836 |         self->fields = NULL; | 
 | 837 | 	return 0; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 838 | } | 
 | 839 |  | 
 | 840 | PyDoc_STRVAR(Reader_Type_doc, | 
 | 841 | "CSV reader\n" | 
 | 842 | "\n" | 
 | 843 | "Reader objects are responsible for reading and parsing tabular data\n" | 
 | 844 | "in CSV format.\n" | 
 | 845 | ); | 
 | 846 |  | 
 | 847 | static struct PyMethodDef Reader_methods[] = { | 
 | 848 | 	{ NULL, NULL } | 
 | 849 | }; | 
| Andrew McNamara | f69d94f | 2005-01-13 11:30:54 +0000 | [diff] [blame] | 850 | #define R_OFF(x) offsetof(ReaderObj, x) | 
 | 851 |  | 
 | 852 | static struct PyMemberDef Reader_memberlist[] = { | 
 | 853 | 	{ "dialect", T_OBJECT, R_OFF(dialect), RO }, | 
 | 854 | 	{ "line_num", T_ULONG, R_OFF(line_num), RO }, | 
 | 855 | 	{ NULL } | 
 | 856 | }; | 
 | 857 |  | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 858 |  | 
 | 859 | static PyTypeObject Reader_Type = { | 
 | 860 | 	PyObject_HEAD_INIT(NULL) | 
 | 861 | 	0,                                      /*ob_size*/ | 
 | 862 | 	"_csv.reader",                          /*tp_name*/ | 
 | 863 | 	sizeof(ReaderObj),                      /*tp_basicsize*/ | 
 | 864 | 	0,                                      /*tp_itemsize*/ | 
 | 865 | 	/* methods */ | 
 | 866 | 	(destructor)Reader_dealloc,             /*tp_dealloc*/ | 
 | 867 | 	(printfunc)0,                           /*tp_print*/ | 
 | 868 | 	(getattrfunc)0,                         /*tp_getattr*/ | 
 | 869 | 	(setattrfunc)0,                         /*tp_setattr*/ | 
 | 870 | 	(cmpfunc)0,                             /*tp_compare*/ | 
 | 871 | 	(reprfunc)0,                            /*tp_repr*/ | 
 | 872 | 	0,                                      /*tp_as_number*/ | 
 | 873 | 	0,                                      /*tp_as_sequence*/ | 
 | 874 | 	0,                                      /*tp_as_mapping*/ | 
 | 875 | 	(hashfunc)0,                            /*tp_hash*/ | 
 | 876 | 	(ternaryfunc)0,                         /*tp_call*/ | 
 | 877 | 	(reprfunc)0,                    	/*tp_str*/ | 
 | 878 | 	0,                                      /*tp_getattro*/ | 
 | 879 |         0,                                      /*tp_setattro*/ | 
 | 880 |         0,                                      /*tp_as_buffer*/ | 
| Jeremy Hylton | 42a8aed | 2003-04-14 02:20:55 +0000 | [diff] [blame] | 881 |         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | | 
 | 882 | 		Py_TPFLAGS_HAVE_GC,		/*tp_flags*/ | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 883 | 	Reader_Type_doc,                        /*tp_doc*/ | 
| Jeremy Hylton | 42a8aed | 2003-04-14 02:20:55 +0000 | [diff] [blame] | 884 |         (traverseproc)Reader_traverse,          /*tp_traverse*/ | 
 | 885 |         (inquiry)Reader_clear,                  /*tp_clear*/ | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 886 |         0,                                      /*tp_richcompare*/ | 
 | 887 |         0,                                      /*tp_weaklistoffset*/ | 
| Andrew McNamara | 575a00b | 2005-01-06 02:25:41 +0000 | [diff] [blame] | 888 |         PyObject_SelfIter,		        /*tp_iter*/ | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 889 |         (getiterfunc)Reader_iternext,           /*tp_iternext*/ | 
 | 890 |         Reader_methods,                         /*tp_methods*/ | 
 | 891 |         Reader_memberlist,                      /*tp_members*/ | 
 | 892 |         0,                                      /*tp_getset*/ | 
 | 893 |  | 
 | 894 | }; | 
 | 895 |  | 
 | 896 | static PyObject * | 
 | 897 | csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args) | 
 | 898 | { | 
| Andrew McNamara | 91b9746 | 2005-01-11 01:07:23 +0000 | [diff] [blame] | 899 | 	PyObject * iterator, * dialect = NULL; | 
| Jeremy Hylton | 42a8aed | 2003-04-14 02:20:55 +0000 | [diff] [blame] | 900 |         ReaderObj * self = PyObject_GC_New(ReaderObj, &Reader_Type); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 901 |  | 
 | 902 |         if (!self) | 
 | 903 |                 return NULL; | 
 | 904 |  | 
 | 905 |         self->dialect = NULL; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 906 |         self->fields = NULL; | 
 | 907 |         self->input_iter = NULL; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 908 | 	self->field = NULL; | 
 | 909 | 	self->field_size = 0; | 
| Andrew McNamara | 7f2053e | 2005-01-12 11:17:16 +0000 | [diff] [blame] | 910 | 	self->line_num = 0; | 
| Andrew McNamara | 0f0599d | 2005-01-12 09:45:18 +0000 | [diff] [blame] | 911 |  | 
 | 912 | 	if (parse_reset(self) < 0) { | 
 | 913 |                 Py_DECREF(self); | 
 | 914 |                 return NULL; | 
 | 915 | 	} | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 916 |  | 
| Raymond Hettinger | 1761a7c | 2004-06-20 04:23:19 +0000 | [diff] [blame] | 917 | 	if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) { | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 918 |                 Py_DECREF(self); | 
 | 919 |                 return NULL; | 
 | 920 |         } | 
 | 921 |         self->input_iter = PyObject_GetIter(iterator); | 
 | 922 |         if (self->input_iter == NULL) { | 
 | 923 |                 PyErr_SetString(PyExc_TypeError,  | 
 | 924 |                                 "argument 1 must be an iterator"); | 
 | 925 |                 Py_DECREF(self); | 
 | 926 |                 return NULL; | 
 | 927 |         } | 
| Andrew McNamara | 91b9746 | 2005-01-11 01:07:23 +0000 | [diff] [blame] | 928 | 	self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 929 |         if (self->dialect == NULL) { | 
 | 930 |                 Py_DECREF(self); | 
 | 931 |                 return NULL; | 
 | 932 |         } | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 933 |  | 
| Andrew McNamara | 77ead87 | 2005-01-10 02:09:41 +0000 | [diff] [blame] | 934 | 	PyObject_GC_Track(self); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 935 |         return (PyObject *)self; | 
 | 936 | } | 
 | 937 |  | 
 | 938 | /* | 
 | 939 |  * WRITER | 
 | 940 |  */ | 
 | 941 | /* ---------------------------------------------------------------- */ | 
 | 942 | static void | 
 | 943 | join_reset(WriterObj *self) | 
 | 944 | { | 
 | 945 | 	self->rec_len = 0; | 
 | 946 | 	self->num_fields = 0; | 
 | 947 | } | 
 | 948 |  | 
 | 949 | #define MEM_INCR 32768 | 
 | 950 |  | 
 | 951 | /* Calculate new record length or append field to record.  Return new | 
 | 952 |  * record length. | 
 | 953 |  */ | 
 | 954 | static int | 
 | 955 | join_append_data(WriterObj *self, char *field, int quote_empty, | 
 | 956 | 		 int *quoted, int copy_phase) | 
 | 957 | { | 
 | 958 |         DialectObj *dialect = self->dialect; | 
 | 959 | 	int i, rec_len; | 
| Andrew McNamara | c89f284 | 2005-01-12 07:44:42 +0000 | [diff] [blame] | 960 | 	char *lineterm; | 
 | 961 |  | 
 | 962 | #define ADDCH(c) \ | 
 | 963 | 	do {\ | 
 | 964 | 		if (copy_phase) \ | 
 | 965 | 			self->rec[rec_len] = c;\ | 
 | 966 | 		rec_len++;\ | 
 | 967 | 	} while(0) | 
 | 968 |  | 
 | 969 | 	lineterm = PyString_AsString(dialect->lineterminator); | 
 | 970 | 	if (lineterm == NULL) | 
 | 971 | 		return -1; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 972 |  | 
 | 973 | 	rec_len = self->rec_len; | 
 | 974 |  | 
| Andrew McNamara | c89f284 | 2005-01-12 07:44:42 +0000 | [diff] [blame] | 975 | 	/* If this is not the first field we need a field separator */ | 
 | 976 | 	if (self->num_fields > 0) | 
 | 977 | 		ADDCH(dialect->delimiter); | 
 | 978 |  | 
 | 979 | 	/* Handle preceding quote */ | 
 | 980 | 	if (copy_phase && *quoted) | 
 | 981 | 		ADDCH(dialect->quotechar); | 
 | 982 |  | 
 | 983 | 	/* Copy/count field data */ | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 984 | 	for (i = 0;; i++) { | 
 | 985 | 		char c = field[i]; | 
| Andrew McNamara | c89f284 | 2005-01-12 07:44:42 +0000 | [diff] [blame] | 986 | 		int want_escape = 0; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 987 |  | 
 | 988 | 		if (c == '\0') | 
 | 989 | 			break; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 990 |  | 
| Andrew McNamara | c89f284 | 2005-01-12 07:44:42 +0000 | [diff] [blame] | 991 | 		if (c == dialect->delimiter || | 
 | 992 | 		    c == dialect->escapechar || | 
 | 993 | 		    c == dialect->quotechar || | 
 | 994 | 		    strchr(lineterm, c)) { | 
 | 995 | 			if (dialect->quoting == QUOTE_NONE) | 
 | 996 | 				want_escape = 1; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 997 | 			else { | 
| Andrew McNamara | c89f284 | 2005-01-12 07:44:42 +0000 | [diff] [blame] | 998 | 				if (c == dialect->quotechar) { | 
 | 999 | 					if (dialect->doublequote) | 
 | 1000 | 						ADDCH(dialect->quotechar); | 
 | 1001 | 					else | 
 | 1002 | 						want_escape = 1; | 
 | 1003 | 				} | 
 | 1004 | 				if (!want_escape) | 
 | 1005 | 					*quoted = 1; | 
 | 1006 | 			} | 
 | 1007 | 			if (want_escape) { | 
 | 1008 | 				if (!dialect->escapechar) { | 
 | 1009 | 					PyErr_Format(error_obj,  | 
 | 1010 | 						     "need to escape, but no escapechar set"); | 
 | 1011 | 					return -1; | 
 | 1012 | 				} | 
 | 1013 | 				ADDCH(dialect->escapechar); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1014 | 			} | 
 | 1015 | 		} | 
 | 1016 | 		/* Copy field character into record buffer. | 
 | 1017 | 		 */ | 
| Andrew McNamara | c89f284 | 2005-01-12 07:44:42 +0000 | [diff] [blame] | 1018 | 		ADDCH(c); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1019 | 	} | 
 | 1020 |  | 
 | 1021 | 	/* If field is empty check if it needs to be quoted. | 
 | 1022 | 	 */ | 
 | 1023 | 	if (i == 0 && quote_empty) { | 
 | 1024 | 		if (dialect->quoting == QUOTE_NONE) { | 
 | 1025 | 			PyErr_Format(error_obj, | 
 | 1026 |                                      "single empty field record must be quoted"); | 
 | 1027 | 			return -1; | 
| Andrew McNamara | dd3e6cb | 2005-01-07 06:46:50 +0000 | [diff] [blame] | 1028 | 		} | 
 | 1029 | 		else | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1030 | 			*quoted = 1; | 
 | 1031 | 	} | 
 | 1032 |  | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1033 | 	if (*quoted) { | 
 | 1034 | 		if (copy_phase) | 
| Andrew McNamara | c89f284 | 2005-01-12 07:44:42 +0000 | [diff] [blame] | 1035 | 			ADDCH(dialect->quotechar); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1036 | 		else | 
| Andrew McNamara | c89f284 | 2005-01-12 07:44:42 +0000 | [diff] [blame] | 1037 | 			rec_len += 2; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1038 | 	} | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1039 | 	return rec_len; | 
| Andrew McNamara | c89f284 | 2005-01-12 07:44:42 +0000 | [diff] [blame] | 1040 | #undef ADDCH | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1041 | } | 
 | 1042 |  | 
 | 1043 | static int | 
 | 1044 | join_check_rec_size(WriterObj *self, int rec_len) | 
 | 1045 | { | 
 | 1046 | 	if (rec_len > self->rec_size) { | 
 | 1047 | 		if (self->rec_size == 0) { | 
 | 1048 | 			self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR; | 
| Andrew McNamara | dcfb38c | 2003-06-09 05:59:23 +0000 | [diff] [blame] | 1049 | 			if (self->rec != NULL) | 
 | 1050 | 				PyMem_Free(self->rec); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1051 | 			self->rec = PyMem_Malloc(self->rec_size); | 
 | 1052 | 		} | 
 | 1053 | 		else { | 
 | 1054 | 			char *old_rec = self->rec; | 
 | 1055 |  | 
 | 1056 | 			self->rec_size = (rec_len / MEM_INCR + 1) * MEM_INCR; | 
 | 1057 | 			self->rec = PyMem_Realloc(self->rec, self->rec_size); | 
 | 1058 | 			if (self->rec == NULL) | 
 | 1059 | 				PyMem_Free(old_rec); | 
 | 1060 | 		} | 
 | 1061 | 		if (self->rec == NULL) { | 
 | 1062 | 			PyErr_NoMemory(); | 
 | 1063 | 			return 0; | 
 | 1064 | 		} | 
 | 1065 | 	} | 
 | 1066 | 	return 1; | 
 | 1067 | } | 
 | 1068 |  | 
 | 1069 | static int | 
 | 1070 | join_append(WriterObj *self, char *field, int *quoted, int quote_empty) | 
 | 1071 | { | 
 | 1072 | 	int rec_len; | 
 | 1073 |  | 
 | 1074 | 	rec_len = join_append_data(self, field, quote_empty, quoted, 0); | 
 | 1075 | 	if (rec_len < 0) | 
 | 1076 | 		return 0; | 
 | 1077 |  | 
 | 1078 | 	/* grow record buffer if necessary */ | 
 | 1079 | 	if (!join_check_rec_size(self, rec_len)) | 
 | 1080 | 		return 0; | 
 | 1081 |  | 
 | 1082 | 	self->rec_len = join_append_data(self, field, quote_empty, quoted, 1); | 
 | 1083 | 	self->num_fields++; | 
 | 1084 |  | 
 | 1085 | 	return 1; | 
 | 1086 | } | 
 | 1087 |  | 
 | 1088 | static int | 
 | 1089 | join_append_lineterminator(WriterObj *self) | 
 | 1090 | { | 
 | 1091 | 	int terminator_len; | 
| Andrew McNamara | cf0fd5a | 2005-01-12 01:16:35 +0000 | [diff] [blame] | 1092 | 	char *terminator; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1093 |  | 
 | 1094 | 	terminator_len = PyString_Size(self->dialect->lineterminator); | 
 | 1095 |  | 
 | 1096 | 	/* grow record buffer if necessary */ | 
 | 1097 | 	if (!join_check_rec_size(self, self->rec_len + terminator_len)) | 
 | 1098 | 		return 0; | 
 | 1099 |  | 
| Andrew McNamara | cf0fd5a | 2005-01-12 01:16:35 +0000 | [diff] [blame] | 1100 | 	terminator = PyString_AsString(self->dialect->lineterminator);  | 
 | 1101 | 	if (terminator == NULL) | 
 | 1102 | 		return 0; | 
 | 1103 | 	memmove(self->rec + self->rec_len, terminator, terminator_len); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1104 | 	self->rec_len += terminator_len; | 
 | 1105 |  | 
 | 1106 | 	return 1; | 
 | 1107 | } | 
 | 1108 |  | 
 | 1109 | PyDoc_STRVAR(csv_writerow_doc, | 
| Skip Montanaro | 860fc0b | 2003-04-12 18:57:52 +0000 | [diff] [blame] | 1110 | "writerow(sequence)\n" | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1111 | "\n" | 
| Skip Montanaro | 860fc0b | 2003-04-12 18:57:52 +0000 | [diff] [blame] | 1112 | "Construct and write a CSV record from a sequence of fields.  Non-string\n" | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1113 | "elements will be converted to string."); | 
 | 1114 |  | 
 | 1115 | static PyObject * | 
 | 1116 | csv_writerow(WriterObj *self, PyObject *seq) | 
 | 1117 | { | 
 | 1118 |         DialectObj *dialect = self->dialect; | 
 | 1119 | 	int len, i; | 
 | 1120 |  | 
 | 1121 | 	if (!PySequence_Check(seq)) | 
 | 1122 | 		return PyErr_Format(error_obj, "sequence expected"); | 
 | 1123 |  | 
 | 1124 | 	len = PySequence_Length(seq); | 
 | 1125 | 	if (len < 0) | 
 | 1126 | 		return NULL; | 
 | 1127 |  | 
 | 1128 | 	/* Join all fields in internal buffer. | 
 | 1129 | 	 */ | 
 | 1130 | 	join_reset(self); | 
 | 1131 | 	for (i = 0; i < len; i++) { | 
 | 1132 | 		PyObject *field; | 
 | 1133 | 		int append_ok; | 
 | 1134 | 		int quoted; | 
 | 1135 |  | 
 | 1136 | 		field = PySequence_GetItem(seq, i); | 
 | 1137 | 		if (field == NULL) | 
 | 1138 | 			return NULL; | 
 | 1139 |  | 
| Andrew McNamara | c89f284 | 2005-01-12 07:44:42 +0000 | [diff] [blame] | 1140 | 		switch (dialect->quoting) { | 
 | 1141 | 		case QUOTE_NONNUMERIC: | 
 | 1142 | 			quoted = !PyNumber_Check(field); | 
 | 1143 | 			break; | 
 | 1144 | 		case QUOTE_ALL: | 
 | 1145 | 			quoted = 1; | 
 | 1146 | 			break; | 
 | 1147 | 		default: | 
 | 1148 | 			quoted = 0; | 
 | 1149 | 			break; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1150 | 		} | 
 | 1151 |  | 
 | 1152 | 		if (PyString_Check(field)) { | 
| Skip Montanaro | 577c7a7 | 2003-04-12 19:17:14 +0000 | [diff] [blame] | 1153 | 			append_ok = join_append(self, | 
 | 1154 | 						PyString_AS_STRING(field), | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1155 |                                                 "ed, len == 1); | 
 | 1156 | 			Py_DECREF(field); | 
 | 1157 | 		} | 
 | 1158 | 		else if (field == Py_None) { | 
 | 1159 | 			append_ok = join_append(self, "", "ed, len == 1); | 
 | 1160 | 			Py_DECREF(field); | 
 | 1161 | 		} | 
 | 1162 | 		else { | 
 | 1163 | 			PyObject *str; | 
 | 1164 |  | 
 | 1165 | 			str = PyObject_Str(field); | 
 | 1166 | 			Py_DECREF(field); | 
 | 1167 | 			if (str == NULL) | 
 | 1168 | 				return NULL; | 
 | 1169 |  | 
| Skip Montanaro | 577c7a7 | 2003-04-12 19:17:14 +0000 | [diff] [blame] | 1170 | 			append_ok = join_append(self, PyString_AS_STRING(str),  | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1171 |                                                 "ed, len == 1); | 
 | 1172 | 			Py_DECREF(str); | 
 | 1173 | 		} | 
 | 1174 | 		if (!append_ok) | 
 | 1175 | 			return NULL; | 
 | 1176 | 	} | 
 | 1177 |  | 
 | 1178 | 	/* Add line terminator. | 
 | 1179 | 	 */ | 
 | 1180 | 	if (!join_append_lineterminator(self)) | 
 | 1181 | 		return 0; | 
 | 1182 |  | 
 | 1183 | 	return PyObject_CallFunction(self->writeline,  | 
 | 1184 |                                      "(s#)", self->rec, self->rec_len); | 
 | 1185 | } | 
 | 1186 |  | 
| Skip Montanaro | 860fc0b | 2003-04-12 18:57:52 +0000 | [diff] [blame] | 1187 | PyDoc_STRVAR(csv_writerows_doc, | 
 | 1188 | "writerows(sequence of sequences)\n" | 
 | 1189 | "\n" | 
 | 1190 | "Construct and write a series of sequences to a csv file.  Non-string\n" | 
 | 1191 | "elements will be converted to string."); | 
 | 1192 |  | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1193 | static PyObject * | 
 | 1194 | csv_writerows(WriterObj *self, PyObject *seqseq) | 
 | 1195 | { | 
 | 1196 |         PyObject *row_iter, *row_obj, *result; | 
 | 1197 |  | 
 | 1198 |         row_iter = PyObject_GetIter(seqseq); | 
 | 1199 |         if (row_iter == NULL) { | 
 | 1200 |                 PyErr_SetString(PyExc_TypeError, | 
| Skip Montanaro | 98f16e0 | 2003-04-11 23:10:13 +0000 | [diff] [blame] | 1201 |                                 "writerows() argument must be iterable"); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1202 |                 return NULL; | 
 | 1203 |         } | 
 | 1204 |         while ((row_obj = PyIter_Next(row_iter))) { | 
 | 1205 |                 result = csv_writerow(self, row_obj); | 
 | 1206 |                 Py_DECREF(row_obj); | 
 | 1207 |                 if (!result) { | 
 | 1208 |                         Py_DECREF(row_iter); | 
 | 1209 |                         return NULL; | 
 | 1210 |                 } | 
 | 1211 |                 else | 
 | 1212 |                      Py_DECREF(result);    | 
 | 1213 |         } | 
 | 1214 |         Py_DECREF(row_iter); | 
 | 1215 |         if (PyErr_Occurred()) | 
 | 1216 |                 return NULL; | 
 | 1217 |         Py_INCREF(Py_None); | 
 | 1218 |         return Py_None; | 
 | 1219 | } | 
 | 1220 |  | 
 | 1221 | static struct PyMethodDef Writer_methods[] = { | 
 | 1222 |         { "writerow", (PyCFunction)csv_writerow, METH_O, csv_writerow_doc}, | 
| Skip Montanaro | 860fc0b | 2003-04-12 18:57:52 +0000 | [diff] [blame] | 1223 |         { "writerows", (PyCFunction)csv_writerows, METH_O, csv_writerows_doc}, | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1224 | 	{ NULL, NULL } | 
 | 1225 | }; | 
 | 1226 |  | 
 | 1227 | #define W_OFF(x) offsetof(WriterObj, x) | 
 | 1228 |  | 
 | 1229 | static struct PyMemberDef Writer_memberlist[] = { | 
 | 1230 | 	{ "dialect", T_OBJECT, W_OFF(dialect), RO }, | 
 | 1231 | 	{ NULL } | 
 | 1232 | }; | 
 | 1233 |  | 
 | 1234 | static void | 
 | 1235 | Writer_dealloc(WriterObj *self) | 
 | 1236 | { | 
| Andrew McNamara | 77ead87 | 2005-01-10 02:09:41 +0000 | [diff] [blame] | 1237 | 	PyObject_GC_UnTrack(self); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1238 |         Py_XDECREF(self->dialect); | 
 | 1239 |         Py_XDECREF(self->writeline); | 
| Andrew McNamara | dcfb38c | 2003-06-09 05:59:23 +0000 | [diff] [blame] | 1240 | 	if (self->rec != NULL) | 
 | 1241 | 		PyMem_Free(self->rec); | 
| Jeremy Hylton | 42a8aed | 2003-04-14 02:20:55 +0000 | [diff] [blame] | 1242 | 	PyObject_GC_Del(self); | 
 | 1243 | } | 
 | 1244 |  | 
 | 1245 | static int | 
 | 1246 | Writer_traverse(WriterObj *self, visitproc visit, void *arg) | 
 | 1247 | { | 
 | 1248 | 	int err; | 
 | 1249 | #define VISIT(SLOT) \ | 
 | 1250 | 	if (SLOT) { \ | 
 | 1251 | 		err = visit((PyObject *)(SLOT), arg); \ | 
 | 1252 | 		if (err) \ | 
 | 1253 | 			return err; \ | 
 | 1254 | 	} | 
 | 1255 | 	VISIT(self->dialect); | 
 | 1256 | 	VISIT(self->writeline); | 
 | 1257 | 	return 0; | 
 | 1258 | } | 
 | 1259 |  | 
 | 1260 | static int | 
 | 1261 | Writer_clear(WriterObj *self) | 
 | 1262 | { | 
 | 1263 |         Py_XDECREF(self->dialect); | 
 | 1264 |         Py_XDECREF(self->writeline); | 
 | 1265 | 	self->dialect = NULL; | 
 | 1266 | 	self->writeline = NULL; | 
 | 1267 | 	return 0; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1268 | } | 
 | 1269 |  | 
 | 1270 | PyDoc_STRVAR(Writer_Type_doc,  | 
 | 1271 | "CSV writer\n" | 
 | 1272 | "\n" | 
 | 1273 | "Writer objects are responsible for generating tabular data\n" | 
 | 1274 | "in CSV format from sequence input.\n" | 
 | 1275 | ); | 
 | 1276 |  | 
 | 1277 | static PyTypeObject Writer_Type = { | 
 | 1278 | 	PyObject_HEAD_INIT(NULL) | 
 | 1279 | 	0,                                      /*ob_size*/ | 
 | 1280 | 	"_csv.writer",                          /*tp_name*/ | 
 | 1281 | 	sizeof(WriterObj),                      /*tp_basicsize*/ | 
 | 1282 | 	0,                                      /*tp_itemsize*/ | 
 | 1283 | 	/* methods */ | 
 | 1284 | 	(destructor)Writer_dealloc,             /*tp_dealloc*/ | 
 | 1285 | 	(printfunc)0,                           /*tp_print*/ | 
 | 1286 | 	(getattrfunc)0,                         /*tp_getattr*/ | 
 | 1287 | 	(setattrfunc)0,                         /*tp_setattr*/ | 
 | 1288 | 	(cmpfunc)0,                             /*tp_compare*/ | 
 | 1289 | 	(reprfunc)0,                            /*tp_repr*/ | 
 | 1290 | 	0,                                      /*tp_as_number*/ | 
 | 1291 | 	0,                                      /*tp_as_sequence*/ | 
 | 1292 | 	0,                                      /*tp_as_mapping*/ | 
 | 1293 | 	(hashfunc)0,                            /*tp_hash*/ | 
 | 1294 | 	(ternaryfunc)0,                         /*tp_call*/ | 
 | 1295 | 	(reprfunc)0,                            /*tp_str*/ | 
 | 1296 | 	0,                                      /*tp_getattro*/ | 
 | 1297 |         0,                                      /*tp_setattro*/ | 
 | 1298 |         0,                                      /*tp_as_buffer*/ | 
| Jeremy Hylton | 42a8aed | 2003-04-14 02:20:55 +0000 | [diff] [blame] | 1299 |         Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | | 
 | 1300 | 		Py_TPFLAGS_HAVE_GC,		/*tp_flags*/ | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1301 | 	Writer_Type_doc, | 
| Jeremy Hylton | 42a8aed | 2003-04-14 02:20:55 +0000 | [diff] [blame] | 1302 |         (traverseproc)Writer_traverse,          /*tp_traverse*/ | 
 | 1303 |         (inquiry)Writer_clear,                  /*tp_clear*/ | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1304 |         0,                                      /*tp_richcompare*/ | 
 | 1305 |         0,                                      /*tp_weaklistoffset*/ | 
 | 1306 |         (getiterfunc)0,                         /*tp_iter*/ | 
 | 1307 |         (getiterfunc)0,                         /*tp_iternext*/ | 
 | 1308 |         Writer_methods,                         /*tp_methods*/ | 
 | 1309 |         Writer_memberlist,                      /*tp_members*/ | 
 | 1310 |         0,                                      /*tp_getset*/ | 
 | 1311 | }; | 
 | 1312 |  | 
 | 1313 | static PyObject * | 
 | 1314 | csv_writer(PyObject *module, PyObject *args, PyObject *keyword_args) | 
 | 1315 | { | 
| Andrew McNamara | 91b9746 | 2005-01-11 01:07:23 +0000 | [diff] [blame] | 1316 | 	PyObject * output_file, * dialect = NULL; | 
| Jeremy Hylton | 42a8aed | 2003-04-14 02:20:55 +0000 | [diff] [blame] | 1317 |         WriterObj * self = PyObject_GC_New(WriterObj, &Writer_Type); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1318 |  | 
 | 1319 |         if (!self) | 
 | 1320 |                 return NULL; | 
 | 1321 |  | 
 | 1322 |         self->dialect = NULL; | 
 | 1323 |         self->writeline = NULL; | 
 | 1324 |  | 
 | 1325 | 	self->rec = NULL; | 
 | 1326 | 	self->rec_size = 0; | 
 | 1327 | 	self->rec_len = 0; | 
 | 1328 | 	self->num_fields = 0; | 
 | 1329 |  | 
| Raymond Hettinger | 1761a7c | 2004-06-20 04:23:19 +0000 | [diff] [blame] | 1330 | 	if (!PyArg_UnpackTuple(args, "", 1, 2, &output_file, &dialect)) { | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1331 |                 Py_DECREF(self); | 
 | 1332 |                 return NULL; | 
 | 1333 |         } | 
 | 1334 |         self->writeline = PyObject_GetAttrString(output_file, "write"); | 
 | 1335 |         if (self->writeline == NULL || !PyCallable_Check(self->writeline)) { | 
 | 1336 |                 PyErr_SetString(PyExc_TypeError, | 
| Andrew McNamara | 5cfd837 | 2005-01-12 11:39:50 +0000 | [diff] [blame] | 1337 |                                 "argument 1 must have a \"write\" method"); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1338 |                 Py_DECREF(self); | 
 | 1339 |                 return NULL; | 
 | 1340 |         } | 
| Andrew McNamara | 91b9746 | 2005-01-11 01:07:23 +0000 | [diff] [blame] | 1341 | 	self->dialect = (DialectObj *)_call_dialect(dialect, keyword_args); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1342 |         if (self->dialect == NULL) { | 
 | 1343 |                 Py_DECREF(self); | 
 | 1344 |                 return NULL; | 
 | 1345 |         } | 
| Andrew McNamara | 77ead87 | 2005-01-10 02:09:41 +0000 | [diff] [blame] | 1346 | 	PyObject_GC_Track(self); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1347 |         return (PyObject *)self; | 
 | 1348 | } | 
 | 1349 |  | 
 | 1350 | /* | 
 | 1351 |  * DIALECT REGISTRY | 
 | 1352 |  */ | 
 | 1353 | static PyObject * | 
 | 1354 | csv_list_dialects(PyObject *module, PyObject *args) | 
 | 1355 | { | 
 | 1356 |         return PyDict_Keys(dialects); | 
 | 1357 | } | 
 | 1358 |  | 
 | 1359 | static PyObject * | 
| Andrew McNamara | 8662597 | 2005-01-11 01:28:33 +0000 | [diff] [blame] | 1360 | csv_register_dialect(PyObject *module, PyObject *args, PyObject *kwargs) | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1361 | { | 
| Andrew McNamara | 8662597 | 2005-01-11 01:28:33 +0000 | [diff] [blame] | 1362 | 	PyObject *name_obj, *dialect_obj = NULL; | 
 | 1363 | 	PyObject *dialect; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1364 |  | 
| Andrew McNamara | 8662597 | 2005-01-11 01:28:33 +0000 | [diff] [blame] | 1365 | 	if (!PyArg_UnpackTuple(args, "", 1, 2, &name_obj, &dialect_obj)) | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1366 |                 return NULL; | 
| Andrew McNamara | 37d2bdf | 2005-01-10 12:22:48 +0000 | [diff] [blame] | 1367 |         if (!IS_BASESTRING(name_obj)) { | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1368 |                 PyErr_SetString(PyExc_TypeError,  | 
 | 1369 |                                 "dialect name must be a string or unicode"); | 
 | 1370 |                 return NULL; | 
 | 1371 |         } | 
| Andrew McNamara | 8662597 | 2005-01-11 01:28:33 +0000 | [diff] [blame] | 1372 | 	dialect = _call_dialect(dialect_obj, kwargs); | 
 | 1373 | 	if (dialect == NULL) | 
 | 1374 | 		return NULL; | 
 | 1375 | 	if (PyDict_SetItem(dialects, name_obj, dialect) < 0) { | 
 | 1376 | 		Py_DECREF(dialect); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1377 |                 return NULL; | 
 | 1378 |         } | 
| Andrew McNamara | 8662597 | 2005-01-11 01:28:33 +0000 | [diff] [blame] | 1379 | 	Py_DECREF(dialect); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1380 |         Py_INCREF(Py_None); | 
 | 1381 |         return Py_None; | 
 | 1382 | } | 
 | 1383 |  | 
 | 1384 | static PyObject * | 
| Skip Montanaro | 577c7a7 | 2003-04-12 19:17:14 +0000 | [diff] [blame] | 1385 | csv_unregister_dialect(PyObject *module, PyObject *name_obj) | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1386 | { | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1387 |         if (PyDict_DelItem(dialects, name_obj) < 0) | 
 | 1388 |                 return PyErr_Format(error_obj, "unknown dialect"); | 
 | 1389 |         Py_INCREF(Py_None); | 
 | 1390 |         return Py_None; | 
 | 1391 | } | 
 | 1392 |  | 
 | 1393 | static PyObject * | 
| Skip Montanaro | 577c7a7 | 2003-04-12 19:17:14 +0000 | [diff] [blame] | 1394 | csv_get_dialect(PyObject *module, PyObject *name_obj) | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1395 | { | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1396 |         return get_dialect_from_registry(name_obj); | 
 | 1397 | } | 
 | 1398 |  | 
| Andrew McNamara | e4d05c4 | 2005-01-11 07:32:02 +0000 | [diff] [blame] | 1399 | static PyObject * | 
| Andrew McNamara | 31d8896 | 2005-01-12 03:45:10 +0000 | [diff] [blame] | 1400 | csv_field_size_limit(PyObject *module, PyObject *args) | 
| Andrew McNamara | e4d05c4 | 2005-01-11 07:32:02 +0000 | [diff] [blame] | 1401 | { | 
 | 1402 | 	PyObject *new_limit = NULL; | 
 | 1403 | 	long old_limit = field_limit; | 
 | 1404 |  | 
| Andrew McNamara | 31d8896 | 2005-01-12 03:45:10 +0000 | [diff] [blame] | 1405 | 	if (!PyArg_UnpackTuple(args, "field_size_limit", 0, 1, &new_limit)) | 
| Andrew McNamara | e4d05c4 | 2005-01-11 07:32:02 +0000 | [diff] [blame] | 1406 | 		return NULL; | 
 | 1407 | 	if (new_limit != NULL) { | 
 | 1408 | 		if (!PyInt_Check(new_limit)) { | 
 | 1409 | 			PyErr_Format(PyExc_TypeError,  | 
 | 1410 | 				     "limit must be an integer"); | 
 | 1411 | 			return NULL; | 
 | 1412 | 		} | 
 | 1413 | 		field_limit = PyInt_AsLong(new_limit); | 
 | 1414 | 	} | 
 | 1415 | 	return PyInt_FromLong(old_limit); | 
 | 1416 | } | 
 | 1417 |  | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1418 | /* | 
 | 1419 |  * MODULE | 
 | 1420 |  */ | 
 | 1421 |  | 
 | 1422 | PyDoc_STRVAR(csv_module_doc, | 
 | 1423 | "CSV parsing and writing.\n" | 
 | 1424 | "\n" | 
 | 1425 | "This module provides classes that assist in the reading and writing\n" | 
 | 1426 | "of Comma Separated Value (CSV) files, and implements the interface\n" | 
 | 1427 | "described by PEP 305.  Although many CSV files are simple to parse,\n" | 
 | 1428 | "the format is not formally defined by a stable specification and\n" | 
 | 1429 | "is subtle enough that parsing lines of a CSV file with something\n" | 
 | 1430 | "like line.split(\",\") is bound to fail.  The module supports three\n" | 
 | 1431 | "basic APIs: reading, writing, and registration of dialects.\n" | 
 | 1432 | "\n" | 
 | 1433 | "\n" | 
 | 1434 | "DIALECT REGISTRATION:\n" | 
 | 1435 | "\n" | 
 | 1436 | "Readers and writers support a dialect argument, which is a convenient\n" | 
 | 1437 | "handle on a group of settings.  When the dialect argument is a string,\n" | 
 | 1438 | "it identifies one of the dialects previously registered with the module.\n" | 
 | 1439 | "If it is a class or instance, the attributes of the argument are used as\n" | 
 | 1440 | "the settings for the reader or writer:\n" | 
 | 1441 | "\n" | 
 | 1442 | "    class excel:\n" | 
 | 1443 | "        delimiter = ','\n" | 
 | 1444 | "        quotechar = '\"'\n" | 
 | 1445 | "        escapechar = None\n" | 
 | 1446 | "        doublequote = True\n" | 
 | 1447 | "        skipinitialspace = False\n" | 
| Johannes Gijsbers | 8d3b9dd | 2004-08-15 12:23:10 +0000 | [diff] [blame] | 1448 | "        lineterminator = '\\r\\n'\n" | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1449 | "        quoting = QUOTE_MINIMAL\n" | 
 | 1450 | "\n" | 
 | 1451 | "SETTINGS:\n" | 
 | 1452 | "\n" | 
 | 1453 | "    * quotechar - specifies a one-character string to use as the \n" | 
 | 1454 | "        quoting character.  It defaults to '\"'.\n" | 
 | 1455 | "    * delimiter - specifies a one-character string to use as the \n" | 
 | 1456 | "        field separator.  It defaults to ','.\n" | 
 | 1457 | "    * skipinitialspace - specifies how to interpret whitespace which\n" | 
 | 1458 | "        immediately follows a delimiter.  It defaults to False, which\n" | 
 | 1459 | "        means that whitespace immediately following a delimiter is part\n" | 
 | 1460 | "        of the following field.\n" | 
 | 1461 | "    * lineterminator -  specifies the character sequence which should \n" | 
 | 1462 | "        terminate rows.\n" | 
 | 1463 | "    * quoting - controls when quotes should be generated by the writer.\n" | 
 | 1464 | "        It can take on any of the following module constants:\n" | 
 | 1465 | "\n" | 
 | 1466 | "        csv.QUOTE_MINIMAL means only when required, for example, when a\n" | 
 | 1467 | "            field contains either the quotechar or the delimiter\n" | 
 | 1468 | "        csv.QUOTE_ALL means that quotes are always placed around fields.\n" | 
 | 1469 | "        csv.QUOTE_NONNUMERIC means that quotes are always placed around\n" | 
| Skip Montanaro | 148eb6a | 2003-12-02 18:57:47 +0000 | [diff] [blame] | 1470 | "            fields which do not parse as integers or floating point\n" | 
 | 1471 | "            numbers.\n" | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1472 | "        csv.QUOTE_NONE means that quotes are never placed around fields.\n" | 
 | 1473 | "    * escapechar - specifies a one-character string used to escape \n" | 
 | 1474 | "        the delimiter when quoting is set to QUOTE_NONE.\n" | 
 | 1475 | "    * doublequote - controls the handling of quotes inside fields.  When\n" | 
 | 1476 | "        True, two consecutive quotes are interpreted as one during read,\n" | 
 | 1477 | "        and when writing, each quote character embedded in the data is\n" | 
 | 1478 | "        written as two quotes\n"); | 
 | 1479 |  | 
 | 1480 | PyDoc_STRVAR(csv_reader_doc, | 
 | 1481 | "    csv_reader = reader(iterable [, dialect='excel']\n" | 
 | 1482 | "                        [optional keyword args])\n" | 
 | 1483 | "    for row in csv_reader:\n" | 
 | 1484 | "        process(row)\n" | 
 | 1485 | "\n" | 
 | 1486 | "The \"iterable\" argument can be any object that returns a line\n" | 
 | 1487 | "of input for each iteration, such as a file object or a list.  The\n" | 
 | 1488 | "optional \"dialect\" parameter is discussed below.  The function\n" | 
 | 1489 | "also accepts optional keyword arguments which override settings\n" | 
 | 1490 | "provided by the dialect.\n" | 
 | 1491 | "\n" | 
 | 1492 | "The returned object is an iterator.  Each iteration returns a row\n" | 
| Johannes Gijsbers | 8d3b9dd | 2004-08-15 12:23:10 +0000 | [diff] [blame] | 1493 | "of the CSV file (which can span multiple input lines):\n"); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1494 |  | 
 | 1495 | PyDoc_STRVAR(csv_writer_doc, | 
 | 1496 | "    csv_writer = csv.writer(fileobj [, dialect='excel']\n" | 
 | 1497 | "                            [optional keyword args])\n" | 
 | 1498 | "    for row in csv_writer:\n" | 
 | 1499 | "        csv_writer.writerow(row)\n" | 
 | 1500 | "\n" | 
 | 1501 | "    [or]\n" | 
 | 1502 | "\n" | 
 | 1503 | "    csv_writer = csv.writer(fileobj [, dialect='excel']\n" | 
 | 1504 | "                            [optional keyword args])\n" | 
 | 1505 | "    csv_writer.writerows(rows)\n" | 
 | 1506 | "\n" | 
 | 1507 | "The \"fileobj\" argument can be any object that supports the file API.\n"); | 
 | 1508 |  | 
 | 1509 | PyDoc_STRVAR(csv_list_dialects_doc, | 
 | 1510 | "Return a list of all know dialect names.\n" | 
 | 1511 | "    names = csv.list_dialects()"); | 
 | 1512 |  | 
 | 1513 | PyDoc_STRVAR(csv_get_dialect_doc, | 
 | 1514 | "Return the dialect instance associated with name.\n" | 
 | 1515 | "    dialect = csv.get_dialect(name)"); | 
 | 1516 |  | 
 | 1517 | PyDoc_STRVAR(csv_register_dialect_doc, | 
 | 1518 | "Create a mapping from a string name to a dialect class.\n" | 
 | 1519 | "    dialect = csv.register_dialect(name, dialect)"); | 
 | 1520 |  | 
 | 1521 | PyDoc_STRVAR(csv_unregister_dialect_doc, | 
 | 1522 | "Delete the name/dialect mapping associated with a string name.\n" | 
 | 1523 | "    csv.unregister_dialect(name)"); | 
 | 1524 |  | 
| Andrew McNamara | 31d8896 | 2005-01-12 03:45:10 +0000 | [diff] [blame] | 1525 | PyDoc_STRVAR(csv_field_size_limit_doc, | 
| Andrew McNamara | e4d05c4 | 2005-01-11 07:32:02 +0000 | [diff] [blame] | 1526 | "Sets an upper limit on parsed fields.\n" | 
| Andrew McNamara | 31d8896 | 2005-01-12 03:45:10 +0000 | [diff] [blame] | 1527 | "    csv.field_size_limit([limit])\n" | 
| Andrew McNamara | e4d05c4 | 2005-01-11 07:32:02 +0000 | [diff] [blame] | 1528 | "\n" | 
 | 1529 | "Returns old limit. If limit is not given, no new limit is set and\n" | 
 | 1530 | "the old limit is returned"); | 
 | 1531 |  | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1532 | static struct PyMethodDef csv_methods[] = { | 
| Andrew McNamara | e4d05c4 | 2005-01-11 07:32:02 +0000 | [diff] [blame] | 1533 | 	{ "reader", (PyCFunction)csv_reader,  | 
 | 1534 | 		METH_VARARGS | METH_KEYWORDS, csv_reader_doc}, | 
 | 1535 | 	{ "writer", (PyCFunction)csv_writer,  | 
 | 1536 | 		METH_VARARGS | METH_KEYWORDS, csv_writer_doc}, | 
 | 1537 | 	{ "list_dialects", (PyCFunction)csv_list_dialects,  | 
 | 1538 | 		METH_NOARGS, csv_list_dialects_doc}, | 
 | 1539 | 	{ "register_dialect", (PyCFunction)csv_register_dialect,  | 
| Andrew McNamara | 8662597 | 2005-01-11 01:28:33 +0000 | [diff] [blame] | 1540 | 		METH_VARARGS | METH_KEYWORDS, csv_register_dialect_doc}, | 
| Andrew McNamara | e4d05c4 | 2005-01-11 07:32:02 +0000 | [diff] [blame] | 1541 | 	{ "unregister_dialect", (PyCFunction)csv_unregister_dialect,  | 
 | 1542 | 		METH_O, csv_unregister_dialect_doc}, | 
 | 1543 | 	{ "get_dialect", (PyCFunction)csv_get_dialect,  | 
 | 1544 | 		METH_O, csv_get_dialect_doc}, | 
| Andrew McNamara | 31d8896 | 2005-01-12 03:45:10 +0000 | [diff] [blame] | 1545 | 	{ "field_size_limit", (PyCFunction)csv_field_size_limit,  | 
 | 1546 | 		METH_VARARGS, csv_field_size_limit_doc}, | 
| Andrew McNamara | e4d05c4 | 2005-01-11 07:32:02 +0000 | [diff] [blame] | 1547 | 	{ NULL, NULL } | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1548 | }; | 
 | 1549 |  | 
 | 1550 | PyMODINIT_FUNC | 
 | 1551 | init_csv(void) | 
 | 1552 | { | 
 | 1553 | 	PyObject *module; | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1554 | 	StyleDesc *style; | 
 | 1555 |  | 
 | 1556 | 	if (PyType_Ready(&Dialect_Type) < 0) | 
 | 1557 | 		return; | 
 | 1558 |  | 
 | 1559 | 	if (PyType_Ready(&Reader_Type) < 0) | 
 | 1560 | 		return; | 
 | 1561 |  | 
 | 1562 | 	if (PyType_Ready(&Writer_Type) < 0) | 
 | 1563 | 		return; | 
 | 1564 |  | 
 | 1565 | 	/* Create the module and add the functions */ | 
 | 1566 | 	module = Py_InitModule3("_csv", csv_methods, csv_module_doc); | 
 | 1567 | 	if (module == NULL) | 
 | 1568 | 		return; | 
 | 1569 |  | 
 | 1570 | 	/* Add version to the module. */ | 
| Skip Montanaro | 7b01a83 | 2003-04-12 19:23:46 +0000 | [diff] [blame] | 1571 | 	if (PyModule_AddStringConstant(module, "__version__", | 
 | 1572 | 				       MODULE_VERSION) == -1) | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1573 | 		return; | 
 | 1574 |  | 
 | 1575 |         /* Add _dialects dictionary */ | 
 | 1576 |         dialects = PyDict_New(); | 
 | 1577 |         if (dialects == NULL) | 
 | 1578 |                 return; | 
 | 1579 |         if (PyModule_AddObject(module, "_dialects", dialects)) | 
 | 1580 |                 return; | 
 | 1581 |  | 
 | 1582 | 	/* Add quote styles into dictionary */ | 
 | 1583 | 	for (style = quote_styles; style->name; style++) { | 
| Skip Montanaro | 7b01a83 | 2003-04-12 19:23:46 +0000 | [diff] [blame] | 1584 | 		if (PyModule_AddIntConstant(module, style->name, | 
 | 1585 | 					    style->style) == -1) | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1586 | 			return; | 
 | 1587 | 	} | 
 | 1588 |  | 
 | 1589 |         /* Add the Dialect type */ | 
| Skip Montanaro | 32c5d42 | 2005-06-15 13:35:08 +0000 | [diff] [blame] | 1590 | 	Py_INCREF(&Dialect_Type); | 
| Skip Montanaro | b4a0417 | 2003-03-20 23:29:12 +0000 | [diff] [blame] | 1591 |         if (PyModule_AddObject(module, "Dialect", (PyObject *)&Dialect_Type)) | 
 | 1592 |                 return; | 
 | 1593 |  | 
 | 1594 | 	/* Add the CSV exception object to the module. */ | 
 | 1595 | 	error_obj = PyErr_NewException("_csv.Error", NULL, NULL); | 
 | 1596 | 	if (error_obj == NULL) | 
 | 1597 | 		return; | 
 | 1598 | 	PyModule_AddObject(module, "Error", error_obj); | 
 | 1599 | } |