blob: ff45b611371110020277a8748a6415c40391cb63 [file] [log] [blame]
Skip Montanaro1a566652003-05-06 15:56:05 +00001# -*- coding: iso-8859-1 -*-
Skip Montanarob4a04172003-03-20 23:29:12 +00002# Copyright (C) 2001,2002 Python Software Foundation
3# csv package unit tests
4
5import sys
Skip Montanaro58fc5d02004-06-05 17:03:20 +00006import os
Skip Montanarob4a04172003-03-20 23:29:12 +00007import unittest
8from StringIO import StringIO
Skip Montanaro58fc5d02004-06-05 17:03:20 +00009import tempfile
Skip Montanaro594adac2003-04-10 17:16:15 +000010import csv
Skip Montanarob4a04172003-03-20 23:29:12 +000011import gc
Walter Dörwald21d3a322003-05-01 17:45:56 +000012from test import test_support
Skip Montanarob4a04172003-03-20 23:29:12 +000013
14class Test_Csv(unittest.TestCase):
15 """
Tim Peters0eadaac2003-04-24 16:02:54 +000016 Test the underlying C csv parser in ways that are not appropriate
Skip Montanarob4a04172003-03-20 23:29:12 +000017 from the high level interface. Further tests of this nature are done
18 in TestDialectRegistry.
19 """
Andrew McNamara1196cf12005-01-07 04:42:45 +000020 def _test_arg_valid(self, ctor, arg):
21 self.assertRaises(TypeError, ctor)
22 self.assertRaises(TypeError, ctor, None)
23 self.assertRaises(TypeError, ctor, arg, bad_attr = 0)
24 self.assertRaises(TypeError, ctor, arg, delimiter = 0)
25 self.assertRaises(TypeError, ctor, arg, delimiter = 'XX')
26 self.assertRaises(csv.Error, ctor, arg, 'foo')
Andrew McNamara1196cf12005-01-07 04:42:45 +000027 self.assertRaises(TypeError, ctor, arg, delimiter=None)
28 self.assertRaises(TypeError, ctor, arg, delimiter=1)
29 self.assertRaises(TypeError, ctor, arg, quotechar=1)
30 self.assertRaises(TypeError, ctor, arg, lineterminator=None)
31 self.assertRaises(TypeError, ctor, arg, lineterminator=1)
32 self.assertRaises(TypeError, ctor, arg, quoting=None)
Andrew McNamara1196cf12005-01-07 04:42:45 +000033
34 def test_reader_arg_valid(self):
35 self._test_arg_valid(csv.reader, [])
Skip Montanarob4a04172003-03-20 23:29:12 +000036
37 def test_writer_arg_valid(self):
Andrew McNamara1196cf12005-01-07 04:42:45 +000038 self._test_arg_valid(csv.writer, StringIO())
Skip Montanarob4a04172003-03-20 23:29:12 +000039
Andrew McNamara1196cf12005-01-07 04:42:45 +000040 def _test_default_attrs(self, ctor, *args):
41 obj = ctor(*args)
42 # Check defaults
Skip Montanarob4a04172003-03-20 23:29:12 +000043 self.assertEqual(obj.dialect.delimiter, ',')
Andrew McNamara1196cf12005-01-07 04:42:45 +000044 self.assertEqual(obj.dialect.doublequote, True)
Skip Montanarob4a04172003-03-20 23:29:12 +000045 self.assertEqual(obj.dialect.escapechar, None)
Andrew McNamara1196cf12005-01-07 04:42:45 +000046 self.assertEqual(obj.dialect.lineterminator, "\r\n")
47 self.assertEqual(obj.dialect.quotechar, '"')
48 self.assertEqual(obj.dialect.quoting, csv.QUOTE_MINIMAL)
49 self.assertEqual(obj.dialect.skipinitialspace, False)
50 self.assertEqual(obj.dialect.strict, False)
51 # Try deleting or changing attributes (they are read-only)
52 self.assertRaises(TypeError, delattr, obj.dialect, 'delimiter')
53 self.assertRaises(TypeError, setattr, obj.dialect, 'delimiter', ':')
Skip Montanarob4a04172003-03-20 23:29:12 +000054 self.assertRaises(TypeError, delattr, obj.dialect, 'quoting')
55 self.assertRaises(TypeError, setattr, obj.dialect, 'quoting', None)
Skip Montanarob4a04172003-03-20 23:29:12 +000056
57 def test_reader_attrs(self):
Andrew McNamara1196cf12005-01-07 04:42:45 +000058 self._test_default_attrs(csv.reader, [])
Skip Montanarob4a04172003-03-20 23:29:12 +000059
60 def test_writer_attrs(self):
Andrew McNamara1196cf12005-01-07 04:42:45 +000061 self._test_default_attrs(csv.writer, StringIO())
62
63 def _test_kw_attrs(self, ctor, *args):
64 # Now try with alternate options
65 kwargs = dict(delimiter=':', doublequote=False, escapechar='\\',
66 lineterminator='\r', quotechar='*',
67 quoting=csv.QUOTE_NONE, skipinitialspace=True,
68 strict=True)
69 obj = ctor(*args, **kwargs)
70 self.assertEqual(obj.dialect.delimiter, ':')
71 self.assertEqual(obj.dialect.doublequote, False)
72 self.assertEqual(obj.dialect.escapechar, '\\')
73 self.assertEqual(obj.dialect.lineterminator, "\r")
74 self.assertEqual(obj.dialect.quotechar, '*')
75 self.assertEqual(obj.dialect.quoting, csv.QUOTE_NONE)
76 self.assertEqual(obj.dialect.skipinitialspace, True)
77 self.assertEqual(obj.dialect.strict, True)
78
79 def test_reader_kw_attrs(self):
80 self._test_kw_attrs(csv.reader, [])
81
82 def test_writer_kw_attrs(self):
83 self._test_kw_attrs(csv.writer, StringIO())
84
85 def _test_dialect_attrs(self, ctor, *args):
86 # Now try with dialect-derived options
87 class dialect:
88 delimiter='-'
89 doublequote=False
90 escapechar='^'
91 lineterminator='$'
92 quotechar='#'
93 quoting=csv.QUOTE_ALL
94 skipinitialspace=True
95 strict=False
96 args = args + (dialect,)
97 obj = ctor(*args)
98 self.assertEqual(obj.dialect.delimiter, '-')
99 self.assertEqual(obj.dialect.doublequote, False)
100 self.assertEqual(obj.dialect.escapechar, '^')
101 self.assertEqual(obj.dialect.lineterminator, "$")
102 self.assertEqual(obj.dialect.quotechar, '#')
103 self.assertEqual(obj.dialect.quoting, csv.QUOTE_ALL)
104 self.assertEqual(obj.dialect.skipinitialspace, True)
105 self.assertEqual(obj.dialect.strict, False)
106
107 def test_reader_dialect_attrs(self):
108 self._test_dialect_attrs(csv.reader, [])
109
110 def test_writer_dialect_attrs(self):
111 self._test_dialect_attrs(csv.writer, StringIO())
112
Skip Montanarob4a04172003-03-20 23:29:12 +0000113
114 def _write_test(self, fields, expect, **kwargs):
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000115 fd, name = tempfile.mkstemp()
116 fileobj = os.fdopen(fd, "w+b")
117 try:
118 writer = csv.writer(fileobj, **kwargs)
119 writer.writerow(fields)
120 fileobj.seek(0)
121 self.assertEqual(fileobj.read(),
122 expect + writer.dialect.lineterminator)
123 finally:
124 fileobj.close()
125 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000126
127 def test_write_arg_valid(self):
128 self.assertRaises(csv.Error, self._write_test, None, '')
129 self._write_test((), '')
130 self._write_test([None], '""')
Tim Peters0eadaac2003-04-24 16:02:54 +0000131 self.assertRaises(csv.Error, self._write_test,
Skip Montanarob4a04172003-03-20 23:29:12 +0000132 [None], None, quoting = csv.QUOTE_NONE)
133 # Check that exceptions are passed up the chain
134 class BadList:
135 def __len__(self):
136 return 10;
137 def __getitem__(self, i):
138 if i > 2:
139 raise IOError
140 self.assertRaises(IOError, self._write_test, BadList(), '')
141 class BadItem:
142 def __str__(self):
143 raise IOError
144 self.assertRaises(IOError, self._write_test, [BadItem()], '')
145
146 def test_write_bigfield(self):
147 # This exercises the buffer realloc functionality
148 bigstring = 'X' * 50000
149 self._write_test([bigstring,bigstring], '%s,%s' % \
150 (bigstring, bigstring))
151
152 def test_write_quoting(self):
153 self._write_test(['a','1','p,q'], 'a,1,"p,q"')
Tim Peters0eadaac2003-04-24 16:02:54 +0000154 self.assertRaises(csv.Error,
Skip Montanarob4a04172003-03-20 23:29:12 +0000155 self._write_test,
156 ['a','1','p,q'], 'a,1,"p,q"',
157 quoting = csv.QUOTE_NONE)
158 self._write_test(['a','1','p,q'], 'a,1,"p,q"',
159 quoting = csv.QUOTE_MINIMAL)
160 self._write_test(['a','1','p,q'], '"a",1,"p,q"',
161 quoting = csv.QUOTE_NONNUMERIC)
162 self._write_test(['a','1','p,q'], '"a","1","p,q"',
163 quoting = csv.QUOTE_ALL)
164
165 def test_write_escape(self):
166 self._write_test(['a','1','p,q'], 'a,1,"p,q"',
167 escapechar='\\')
168# FAILED - needs to be fixed [am]:
169# self._write_test(['a','1','p,"q"'], 'a,1,"p,\\"q\\"',
170# escapechar='\\', doublequote = 0)
171 self._write_test(['a','1','p,q'], 'a,1,p\\,q',
172 escapechar='\\', quoting = csv.QUOTE_NONE)
173
174 def test_writerows(self):
175 class BrokenFile:
176 def write(self, buf):
177 raise IOError
178 writer = csv.writer(BrokenFile())
179 self.assertRaises(IOError, writer.writerows, [['a']])
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000180 fd, name = tempfile.mkstemp()
181 fileobj = os.fdopen(fd, "w+b")
182 try:
183 writer = csv.writer(fileobj)
184 self.assertRaises(TypeError, writer.writerows, None)
185 writer.writerows([['a','b'],['c','d']])
186 fileobj.seek(0)
187 self.assertEqual(fileobj.read(), "a,b\r\nc,d\r\n")
188 finally:
189 fileobj.close()
190 os.unlink(name)
Tim Peters27f88362004-07-08 04:22:35 +0000191
Skip Montanarob4a04172003-03-20 23:29:12 +0000192 def _read_test(self, input, expect, **kwargs):
193 reader = csv.reader(input, **kwargs)
194 result = list(reader)
195 self.assertEqual(result, expect)
196
197 def test_read_oddinputs(self):
198 self._read_test([], [])
199 self._read_test([''], [[]])
200 self.assertRaises(csv.Error, self._read_test,
201 ['"ab"c'], None, strict = 1)
202 # cannot handle null bytes for the moment
203 self.assertRaises(csv.Error, self._read_test,
204 ['ab\0c'], None, strict = 1)
205 self._read_test(['"ab"c'], [['abc']], doublequote = 0)
206
207 def test_read_eol(self):
208 self._read_test(['a,b'], [['a','b']])
209 self._read_test(['a,b\n'], [['a','b']])
210 self._read_test(['a,b\r\n'], [['a','b']])
211 self._read_test(['a,b\r'], [['a','b']])
212 self.assertRaises(csv.Error, self._read_test, ['a,b\rc,d'], [])
213 self.assertRaises(csv.Error, self._read_test, ['a,b\nc,d'], [])
214 self.assertRaises(csv.Error, self._read_test, ['a,b\r\nc,d'], [])
215
216 def test_read_escape(self):
Andrew McNamara36a76912005-01-10 01:04:40 +0000217 self._read_test(['a,\\b,c'], [['a', 'b', 'c']], escapechar='\\')
Skip Montanarob4a04172003-03-20 23:29:12 +0000218 self._read_test(['a,b\\,c'], [['a', 'b,c']], escapechar='\\')
219 self._read_test(['a,"b\\,c"'], [['a', 'b,c']], escapechar='\\')
Andrew McNamara36a76912005-01-10 01:04:40 +0000220 self._read_test(['a,"b,\\c"'], [['a', 'b,c']], escapechar='\\')
Skip Montanarob4a04172003-03-20 23:29:12 +0000221 self._read_test(['a,"b,c\\""'], [['a', 'b,c"']], escapechar='\\')
222 self._read_test(['a,"b,c"\\'], [['a', 'b,c\\']], escapechar='\\')
223
Andrew McNamara1196cf12005-01-07 04:42:45 +0000224 def test_read_quoting(self):
225 self._read_test(['1,",3,",5'], [['1', ',3,', '5']])
226 self._read_test(['1,",3,",5'], [['1', '"', '3', '"', '5']],
227 quotechar=None, escapechar='\\')
228 self._read_test(['1,",3,",5'], [['1', '"', '3', '"', '5']],
229 quoting=csv.QUOTE_NONE, escapechar='\\')
230
Skip Montanarob4a04172003-03-20 23:29:12 +0000231 def test_read_bigfield(self):
232 # This exercises the buffer realloc functionality
233 bigstring = 'X' * 50000
234 bigline = '%s,%s' % (bigstring, bigstring)
235 self._read_test([bigline], [[bigstring, bigstring]])
236
237class TestDialectRegistry(unittest.TestCase):
238 def test_registry_badargs(self):
239 self.assertRaises(TypeError, csv.list_dialects, None)
240 self.assertRaises(TypeError, csv.get_dialect)
241 self.assertRaises(csv.Error, csv.get_dialect, None)
242 self.assertRaises(csv.Error, csv.get_dialect, "nonesuch")
243 self.assertRaises(TypeError, csv.unregister_dialect)
244 self.assertRaises(csv.Error, csv.unregister_dialect, None)
245 self.assertRaises(csv.Error, csv.unregister_dialect, "nonesuch")
246 self.assertRaises(TypeError, csv.register_dialect, None)
247 self.assertRaises(TypeError, csv.register_dialect, None, None)
Skip Montanarob4a04172003-03-20 23:29:12 +0000248
249 def test_registry(self):
250 class myexceltsv(csv.excel):
251 delimiter = "\t"
252 name = "myexceltsv"
253 expected_dialects = csv.list_dialects() + [name]
254 expected_dialects.sort()
255 csv.register_dialect(name, myexceltsv)
256 try:
Andrew McNamara86625972005-01-11 01:28:33 +0000257 self.failUnless(csv.get_dialect(name).delimiter, '\t')
Skip Montanarob4a04172003-03-20 23:29:12 +0000258 got_dialects = csv.list_dialects()
259 got_dialects.sort()
260 self.assertEqual(expected_dialects, got_dialects)
261 finally:
262 csv.unregister_dialect(name)
263
Andrew McNamara86625972005-01-11 01:28:33 +0000264 def test_register_kwargs(self):
265 name = 'fedcba'
266 csv.register_dialect(name, delimiter=';')
267 try:
268 self.failUnless(csv.get_dialect(name).delimiter, '\t')
269 self.failUnless(list(csv.reader('X;Y;Z', name)), ['X', 'Y', 'Z'])
270 finally:
271 csv.unregister_dialect(name)
272
Skip Montanarob4a04172003-03-20 23:29:12 +0000273 def test_incomplete_dialect(self):
274 class myexceltsv(csv.Dialect):
275 delimiter = "\t"
276 self.assertRaises(csv.Error, myexceltsv)
277
278 def test_space_dialect(self):
279 class space(csv.excel):
280 delimiter = " "
281 quoting = csv.QUOTE_NONE
282 escapechar = "\\"
283
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000284 fd, name = tempfile.mkstemp()
285 fileobj = os.fdopen(fd, "w+b")
286 try:
287 fileobj.write("abc def\nc1ccccc1 benzene\n")
288 fileobj.seek(0)
289 rdr = csv.reader(fileobj, dialect=space())
290 self.assertEqual(rdr.next(), ["abc", "def"])
291 self.assertEqual(rdr.next(), ["c1ccccc1", "benzene"])
292 finally:
293 fileobj.close()
294 os.unlink(name)
Tim Peters27f88362004-07-08 04:22:35 +0000295
Skip Montanarob4a04172003-03-20 23:29:12 +0000296 def test_dialect_apply(self):
297 class testA(csv.excel):
298 delimiter = "\t"
299 class testB(csv.excel):
300 delimiter = ":"
301 class testC(csv.excel):
302 delimiter = "|"
303
304 csv.register_dialect('testC', testC)
305 try:
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000306 fd, name = tempfile.mkstemp()
307 fileobj = os.fdopen(fd, "w+b")
308 try:
309 writer = csv.writer(fileobj)
310 writer.writerow([1,2,3])
311 fileobj.seek(0)
312 self.assertEqual(fileobj.read(), "1,2,3\r\n")
313 finally:
314 fileobj.close()
315 os.unlink(name)
Tim Peters27f88362004-07-08 04:22:35 +0000316
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000317 fd, name = tempfile.mkstemp()
318 fileobj = os.fdopen(fd, "w+b")
319 try:
320 writer = csv.writer(fileobj, testA)
321 writer.writerow([1,2,3])
322 fileobj.seek(0)
323 self.assertEqual(fileobj.read(), "1\t2\t3\r\n")
324 finally:
325 fileobj.close()
326 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000327
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000328 fd, name = tempfile.mkstemp()
329 fileobj = os.fdopen(fd, "w+b")
330 try:
331 writer = csv.writer(fileobj, dialect=testB())
332 writer.writerow([1,2,3])
333 fileobj.seek(0)
334 self.assertEqual(fileobj.read(), "1:2:3\r\n")
335 finally:
336 fileobj.close()
337 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000338
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000339 fd, name = tempfile.mkstemp()
340 fileobj = os.fdopen(fd, "w+b")
341 try:
342 writer = csv.writer(fileobj, dialect='testC')
343 writer.writerow([1,2,3])
344 fileobj.seek(0)
345 self.assertEqual(fileobj.read(), "1|2|3\r\n")
346 finally:
347 fileobj.close()
348 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000349
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000350 fd, name = tempfile.mkstemp()
351 fileobj = os.fdopen(fd, "w+b")
352 try:
353 writer = csv.writer(fileobj, dialect=testA, delimiter=';')
354 writer.writerow([1,2,3])
355 fileobj.seek(0)
356 self.assertEqual(fileobj.read(), "1;2;3\r\n")
357 finally:
358 fileobj.close()
359 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000360
Skip Montanarob4a04172003-03-20 23:29:12 +0000361 finally:
362 csv.unregister_dialect('testC')
363
364 def test_bad_dialect(self):
365 # Unknown parameter
Andrew McNamara1196cf12005-01-07 04:42:45 +0000366 self.assertRaises(TypeError, csv.reader, [], bad_attr = 0)
Skip Montanarob4a04172003-03-20 23:29:12 +0000367 # Bad values
368 self.assertRaises(TypeError, csv.reader, [], delimiter = None)
369 self.assertRaises(TypeError, csv.reader, [], quoting = -1)
370 self.assertRaises(TypeError, csv.reader, [], quoting = 100)
371
372class TestCsvBase(unittest.TestCase):
373 def readerAssertEqual(self, input, expected_result):
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000374 fd, name = tempfile.mkstemp()
375 fileobj = os.fdopen(fd, "w+b")
376 try:
377 fileobj.write(input)
378 fileobj.seek(0)
379 reader = csv.reader(fileobj, dialect = self.dialect)
380 fields = list(reader)
381 self.assertEqual(fields, expected_result)
382 finally:
383 fileobj.close()
384 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000385
386 def writerAssertEqual(self, input, expected_result):
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000387 fd, name = tempfile.mkstemp()
388 fileobj = os.fdopen(fd, "w+b")
389 try:
390 writer = csv.writer(fileobj, dialect = self.dialect)
391 writer.writerows(input)
392 fileobj.seek(0)
393 self.assertEqual(fileobj.read(), expected_result)
394 finally:
395 fileobj.close()
396 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000397
398class TestDialectExcel(TestCsvBase):
399 dialect = 'excel'
400
401 def test_single(self):
402 self.readerAssertEqual('abc', [['abc']])
403
404 def test_simple(self):
405 self.readerAssertEqual('1,2,3,4,5', [['1','2','3','4','5']])
406
407 def test_blankline(self):
408 self.readerAssertEqual('', [])
409
410 def test_empty_fields(self):
411 self.readerAssertEqual(',', [['', '']])
412
413 def test_singlequoted(self):
414 self.readerAssertEqual('""', [['']])
415
416 def test_singlequoted_left_empty(self):
417 self.readerAssertEqual('"",', [['','']])
418
419 def test_singlequoted_right_empty(self):
420 self.readerAssertEqual(',""', [['','']])
421
422 def test_single_quoted_quote(self):
423 self.readerAssertEqual('""""', [['"']])
424
425 def test_quoted_quotes(self):
426 self.readerAssertEqual('""""""', [['""']])
427
428 def test_inline_quote(self):
429 self.readerAssertEqual('a""b', [['a""b']])
430
431 def test_inline_quotes(self):
432 self.readerAssertEqual('a"b"c', [['a"b"c']])
433
434 def test_quotes_and_more(self):
435 self.readerAssertEqual('"a"b', [['ab']])
436
437 def test_lone_quote(self):
438 self.readerAssertEqual('a"b', [['a"b']])
439
440 def test_quote_and_quote(self):
441 self.readerAssertEqual('"a" "b"', [['a "b"']])
442
443 def test_space_and_quote(self):
444 self.readerAssertEqual(' "a"', [[' "a"']])
445
446 def test_quoted(self):
Tim Peters0eadaac2003-04-24 16:02:54 +0000447 self.readerAssertEqual('1,2,3,"I think, therefore I am",5,6',
448 [['1', '2', '3',
449 'I think, therefore I am',
Skip Montanarob4a04172003-03-20 23:29:12 +0000450 '5', '6']])
451
452 def test_quoted_quote(self):
453 self.readerAssertEqual('1,2,3,"""I see,"" said the blind man","as he picked up his hammer and saw"',
Tim Peters0eadaac2003-04-24 16:02:54 +0000454 [['1', '2', '3',
455 '"I see," said the blind man',
Skip Montanarob4a04172003-03-20 23:29:12 +0000456 'as he picked up his hammer and saw']])
457
458 def test_quoted_nl(self):
459 input = '''\
4601,2,3,"""I see,""
461said the blind man","as he picked up his
462hammer and saw"
4639,8,7,6'''
464 self.readerAssertEqual(input,
Tim Peters0eadaac2003-04-24 16:02:54 +0000465 [['1', '2', '3',
466 '"I see,"\nsaid the blind man',
Skip Montanarob4a04172003-03-20 23:29:12 +0000467 'as he picked up his\nhammer and saw'],
468 ['9','8','7','6']])
469
470 def test_dubious_quote(self):
471 self.readerAssertEqual('12,12,1",', [['12', '12', '1"', '']])
472
473 def test_null(self):
474 self.writerAssertEqual([], '')
475
476 def test_single(self):
477 self.writerAssertEqual([['abc']], 'abc\r\n')
478
479 def test_simple(self):
480 self.writerAssertEqual([[1, 2, 'abc', 3, 4]], '1,2,abc,3,4\r\n')
481
482 def test_quotes(self):
483 self.writerAssertEqual([[1, 2, 'a"bc"', 3, 4]], '1,2,"a""bc""",3,4\r\n')
484
485 def test_quote_fieldsep(self):
486 self.writerAssertEqual([['abc,def']], '"abc,def"\r\n')
487
488 def test_newlines(self):
489 self.writerAssertEqual([[1, 2, 'a\nbc', 3, 4]], '1,2,"a\nbc",3,4\r\n')
490
491class EscapedExcel(csv.excel):
492 quoting = csv.QUOTE_NONE
493 escapechar = '\\'
494
495class TestEscapedExcel(TestCsvBase):
496 dialect = EscapedExcel()
497
498 def test_escape_fieldsep(self):
499 self.writerAssertEqual([['abc,def']], 'abc\\,def\r\n')
500
501 def test_read_escape_fieldsep(self):
502 self.readerAssertEqual('abc\\,def\r\n', [['abc,def']])
503
504class QuotedEscapedExcel(csv.excel):
505 quoting = csv.QUOTE_NONNUMERIC
506 escapechar = '\\'
507
508class TestQuotedEscapedExcel(TestCsvBase):
509 dialect = QuotedEscapedExcel()
510
511 def test_write_escape_fieldsep(self):
512 self.writerAssertEqual([['abc,def']], '"abc,def"\r\n')
513
514 def test_read_escape_fieldsep(self):
515 self.readerAssertEqual('"abc\\,def"\r\n', [['abc,def']])
516
Skip Montanarob4a04172003-03-20 23:29:12 +0000517class TestDictFields(unittest.TestCase):
518 ### "long" means the row is longer than the number of fieldnames
519 ### "short" means there are fewer elements in the row than fieldnames
520 def test_write_simple_dict(self):
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000521 fd, name = tempfile.mkstemp()
522 fileobj = os.fdopen(fd, "w+b")
523 try:
524 writer = csv.DictWriter(fileobj, fieldnames = ["f1", "f2", "f3"])
525 writer.writerow({"f1": 10, "f3": "abc"})
526 fileobj.seek(0)
527 self.assertEqual(fileobj.read(), "10,,abc\r\n")
528 finally:
529 fileobj.close()
530 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000531
532 def test_write_no_fields(self):
533 fileobj = StringIO()
534 self.assertRaises(TypeError, csv.DictWriter, fileobj)
535
536 def test_read_dict_fields(self):
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000537 fd, name = tempfile.mkstemp()
538 fileobj = os.fdopen(fd, "w+b")
539 try:
540 fileobj.write("1,2,abc\r\n")
541 fileobj.seek(0)
542 reader = csv.DictReader(fileobj,
543 fieldnames=["f1", "f2", "f3"])
544 self.assertEqual(reader.next(), {"f1": '1', "f2": '2', "f3": 'abc'})
545 finally:
546 fileobj.close()
547 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000548
Skip Montanarodffeed32003-10-03 14:03:01 +0000549 def test_read_dict_no_fieldnames(self):
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000550 fd, name = tempfile.mkstemp()
551 fileobj = os.fdopen(fd, "w+b")
552 try:
553 fileobj.write("f1,f2,f3\r\n1,2,abc\r\n")
554 fileobj.seek(0)
555 reader = csv.DictReader(fileobj)
556 self.assertEqual(reader.next(), {"f1": '1', "f2": '2', "f3": 'abc'})
557 finally:
558 fileobj.close()
559 os.unlink(name)
Skip Montanarodffeed32003-10-03 14:03:01 +0000560
Skip Montanarob4a04172003-03-20 23:29:12 +0000561 def test_read_long(self):
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000562 fd, name = tempfile.mkstemp()
563 fileobj = os.fdopen(fd, "w+b")
564 try:
565 fileobj.write("1,2,abc,4,5,6\r\n")
566 fileobj.seek(0)
567 reader = csv.DictReader(fileobj,
568 fieldnames=["f1", "f2"])
569 self.assertEqual(reader.next(), {"f1": '1', "f2": '2',
570 None: ["abc", "4", "5", "6"]})
571 finally:
572 fileobj.close()
573 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000574
575 def test_read_long_with_rest(self):
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000576 fd, name = tempfile.mkstemp()
577 fileobj = os.fdopen(fd, "w+b")
578 try:
579 fileobj.write("1,2,abc,4,5,6\r\n")
580 fileobj.seek(0)
581 reader = csv.DictReader(fileobj,
582 fieldnames=["f1", "f2"], restkey="_rest")
583 self.assertEqual(reader.next(), {"f1": '1', "f2": '2',
584 "_rest": ["abc", "4", "5", "6"]})
585 finally:
586 fileobj.close()
587 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000588
Skip Montanarodffeed32003-10-03 14:03:01 +0000589 def test_read_long_with_rest_no_fieldnames(self):
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000590 fd, name = tempfile.mkstemp()
591 fileobj = os.fdopen(fd, "w+b")
592 try:
593 fileobj.write("f1,f2\r\n1,2,abc,4,5,6\r\n")
594 fileobj.seek(0)
595 reader = csv.DictReader(fileobj, restkey="_rest")
596 self.assertEqual(reader.next(), {"f1": '1', "f2": '2',
597 "_rest": ["abc", "4", "5", "6"]})
598 finally:
599 fileobj.close()
600 os.unlink(name)
Skip Montanarodffeed32003-10-03 14:03:01 +0000601
Skip Montanarob4a04172003-03-20 23:29:12 +0000602 def test_read_short(self):
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000603 fd, name = tempfile.mkstemp()
604 fileobj = os.fdopen(fd, "w+b")
605 try:
606 fileobj.write("1,2,abc,4,5,6\r\n1,2,abc\r\n")
607 fileobj.seek(0)
608 reader = csv.DictReader(fileobj,
609 fieldnames="1 2 3 4 5 6".split(),
610 restval="DEFAULT")
611 self.assertEqual(reader.next(), {"1": '1', "2": '2', "3": 'abc',
612 "4": '4', "5": '5', "6": '6'})
613 self.assertEqual(reader.next(), {"1": '1', "2": '2', "3": 'abc',
614 "4": 'DEFAULT', "5": 'DEFAULT',
615 "6": 'DEFAULT'})
616 finally:
617 fileobj.close()
618 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000619
Skip Montanaro1546bc42003-06-12 02:40:22 +0000620 def test_read_multi(self):
621 sample = [
622 '2147483648,43.0e12,17,abc,def\r\n',
623 '147483648,43.0e2,17,abc,def\r\n',
624 '47483648,43.0,170,abc,def\r\n'
625 ]
626
627 reader = csv.DictReader(sample,
628 fieldnames="i1 float i2 s1 s2".split())
629 self.assertEqual(reader.next(), {"i1": '2147483648',
630 "float": '43.0e12',
631 "i2": '17',
632 "s1": 'abc',
633 "s2": 'def'})
634
Skip Montanarob4a04172003-03-20 23:29:12 +0000635 def test_read_with_blanks(self):
636 reader = csv.DictReader(["1,2,abc,4,5,6\r\n","\r\n",
637 "1,2,abc,4,5,6\r\n"],
638 fieldnames="1 2 3 4 5 6".split())
639 self.assertEqual(reader.next(), {"1": '1', "2": '2', "3": 'abc',
640 "4": '4', "5": '5', "6": '6'})
641 self.assertEqual(reader.next(), {"1": '1', "2": '2', "3": 'abc',
642 "4": '4', "5": '5', "6": '6'})
643
Skip Montanaro3f7a9482003-09-06 19:52:12 +0000644 def test_read_semi_sep(self):
645 reader = csv.DictReader(["1;2;abc;4;5;6\r\n"],
646 fieldnames="1 2 3 4 5 6".split(),
647 delimiter=';')
648 self.assertEqual(reader.next(), {"1": '1', "2": '2', "3": 'abc',
649 "4": '4', "5": '5', "6": '6'})
650
Skip Montanarob4a04172003-03-20 23:29:12 +0000651class TestArrayWrites(unittest.TestCase):
652 def test_int_write(self):
653 import array
654 contents = [(20-i) for i in range(20)]
655 a = array.array('i', contents)
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000656
657 fd, name = tempfile.mkstemp()
658 fileobj = os.fdopen(fd, "w+b")
659 try:
660 writer = csv.writer(fileobj, dialect="excel")
661 writer.writerow(a)
662 expected = ",".join([str(i) for i in a])+"\r\n"
663 fileobj.seek(0)
664 self.assertEqual(fileobj.read(), expected)
665 finally:
666 fileobj.close()
667 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000668
669 def test_double_write(self):
670 import array
671 contents = [(20-i)*0.1 for i in range(20)]
672 a = array.array('d', contents)
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000673 fd, name = tempfile.mkstemp()
674 fileobj = os.fdopen(fd, "w+b")
675 try:
676 writer = csv.writer(fileobj, dialect="excel")
677 writer.writerow(a)
678 expected = ",".join([str(i) for i in a])+"\r\n"
679 fileobj.seek(0)
680 self.assertEqual(fileobj.read(), expected)
681 finally:
682 fileobj.close()
683 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000684
685 def test_float_write(self):
686 import array
687 contents = [(20-i)*0.1 for i in range(20)]
688 a = array.array('f', contents)
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000689 fd, name = tempfile.mkstemp()
690 fileobj = os.fdopen(fd, "w+b")
691 try:
692 writer = csv.writer(fileobj, dialect="excel")
693 writer.writerow(a)
694 expected = ",".join([str(i) for i in a])+"\r\n"
695 fileobj.seek(0)
696 self.assertEqual(fileobj.read(), expected)
697 finally:
698 fileobj.close()
699 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000700
701 def test_char_write(self):
702 import array, string
703 a = array.array('c', string.letters)
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000704 fd, name = tempfile.mkstemp()
705 fileobj = os.fdopen(fd, "w+b")
706 try:
707 writer = csv.writer(fileobj, dialect="excel")
708 writer.writerow(a)
709 expected = ",".join(a)+"\r\n"
710 fileobj.seek(0)
711 self.assertEqual(fileobj.read(), expected)
712 finally:
713 fileobj.close()
714 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000715
716class TestDialectValidity(unittest.TestCase):
717 def test_quoting(self):
718 class mydialect(csv.Dialect):
719 delimiter = ";"
720 escapechar = '\\'
721 doublequote = False
722 skipinitialspace = True
723 lineterminator = '\r\n'
724 quoting = csv.QUOTE_NONE
725 d = mydialect()
726
727 mydialect.quoting = None
728 self.assertRaises(csv.Error, mydialect)
729
Skip Montanarob4a04172003-03-20 23:29:12 +0000730 mydialect.doublequote = True
731 mydialect.quoting = csv.QUOTE_ALL
732 mydialect.quotechar = '"'
733 d = mydialect()
734
735 mydialect.quotechar = "''"
736 self.assertRaises(csv.Error, mydialect)
737
738 mydialect.quotechar = 4
739 self.assertRaises(csv.Error, mydialect)
740
741 def test_delimiter(self):
742 class mydialect(csv.Dialect):
743 delimiter = ";"
744 escapechar = '\\'
745 doublequote = False
746 skipinitialspace = True
747 lineterminator = '\r\n'
748 quoting = csv.QUOTE_NONE
749 d = mydialect()
750
751 mydialect.delimiter = ":::"
752 self.assertRaises(csv.Error, mydialect)
753
754 mydialect.delimiter = 4
755 self.assertRaises(csv.Error, mydialect)
756
757 def test_lineterminator(self):
758 class mydialect(csv.Dialect):
759 delimiter = ";"
760 escapechar = '\\'
761 doublequote = False
762 skipinitialspace = True
763 lineterminator = '\r\n'
764 quoting = csv.QUOTE_NONE
765 d = mydialect()
766
767 mydialect.lineterminator = ":::"
768 d = mydialect()
769
770 mydialect.lineterminator = 4
771 self.assertRaises(csv.Error, mydialect)
772
773
Skip Montanaro48816c62003-04-25 14:43:14 +0000774class TestSniffer(unittest.TestCase):
775 sample1 = """\
776Harry's, Arlington Heights, IL, 2/1/03, Kimi Hayes
777Shark City, Glendale Heights, IL, 12/28/02, Prezence
778Tommy's Place, Blue Island, IL, 12/28/02, Blue Sunday/White Crow
779Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back
780"""
781 sample2 = """\
782'Harry''s':'Arlington Heights':'IL':'2/1/03':'Kimi Hayes'
783'Shark City':'Glendale Heights':'IL':'12/28/02':'Prezence'
784'Tommy''s Place':'Blue Island':'IL':'12/28/02':'Blue Sunday/White Crow'
785'Stonecutters Seafood and Chop House':'Lemont':'IL':'12/19/02':'Week Back'
786"""
787
788 header = '''\
789"venue","city","state","date","performers"
790'''
Skip Montanaro77892372003-05-19 15:33:36 +0000791 sample3 = '''\
79205/05/03?05/05/03?05/05/03?05/05/03?05/05/03?05/05/03
79305/05/03?05/05/03?05/05/03?05/05/03?05/05/03?05/05/03
79405/05/03?05/05/03?05/05/03?05/05/03?05/05/03?05/05/03
795'''
796
Skip Montanaro1546bc42003-06-12 02:40:22 +0000797 sample4 = '''\
7982147483648;43.0e12;17;abc;def
799147483648;43.0e2;17;abc;def
80047483648;43.0;170;abc;def
801'''
802
Skip Montanaro48816c62003-04-25 14:43:14 +0000803 def test_has_header(self):
804 sniffer = csv.Sniffer()
805 self.assertEqual(sniffer.has_header(self.sample1), False)
806 self.assertEqual(sniffer.has_header(self.header+self.sample1), True)
807
808 def test_sniff(self):
809 sniffer = csv.Sniffer()
810 dialect = sniffer.sniff(self.sample1)
811 self.assertEqual(dialect.delimiter, ",")
812 self.assertEqual(dialect.quotechar, '"')
813 self.assertEqual(dialect.skipinitialspace, True)
814
815 dialect = sniffer.sniff(self.sample2)
816 self.assertEqual(dialect.delimiter, ":")
817 self.assertEqual(dialect.quotechar, "'")
818 self.assertEqual(dialect.skipinitialspace, False)
819
Skip Montanaro77892372003-05-19 15:33:36 +0000820 def test_delimiters(self):
821 sniffer = csv.Sniffer()
822 dialect = sniffer.sniff(self.sample3)
823 self.assertEqual(dialect.delimiter, "0")
824 dialect = sniffer.sniff(self.sample3, delimiters="?,")
825 self.assertEqual(dialect.delimiter, "?")
826 dialect = sniffer.sniff(self.sample3, delimiters="/,")
827 self.assertEqual(dialect.delimiter, "/")
Skip Montanaro1546bc42003-06-12 02:40:22 +0000828 dialect = sniffer.sniff(self.sample4)
829 self.assertEqual(dialect.delimiter, ";")
Skip Montanaro77892372003-05-19 15:33:36 +0000830
Skip Montanarob4a04172003-03-20 23:29:12 +0000831if not hasattr(sys, "gettotalrefcount"):
Walter Dörwald21d3a322003-05-01 17:45:56 +0000832 if test_support.verbose: print "*** skipping leakage tests ***"
Skip Montanarob4a04172003-03-20 23:29:12 +0000833else:
834 class NUL:
835 def write(s, *args):
836 pass
837 writelines = write
838
839 class TestLeaks(unittest.TestCase):
840 def test_create_read(self):
841 delta = 0
842 lastrc = sys.gettotalrefcount()
843 for i in xrange(20):
844 gc.collect()
845 self.assertEqual(gc.garbage, [])
846 rc = sys.gettotalrefcount()
847 csv.reader(["a,b,c\r\n"])
848 csv.reader(["a,b,c\r\n"])
849 csv.reader(["a,b,c\r\n"])
850 delta = rc-lastrc
851 lastrc = rc
852 # if csv.reader() leaks, last delta should be 3 or more
853 self.assertEqual(delta < 3, True)
854
855 def test_create_write(self):
856 delta = 0
857 lastrc = sys.gettotalrefcount()
858 s = NUL()
859 for i in xrange(20):
860 gc.collect()
861 self.assertEqual(gc.garbage, [])
862 rc = sys.gettotalrefcount()
863 csv.writer(s)
864 csv.writer(s)
865 csv.writer(s)
866 delta = rc-lastrc
867 lastrc = rc
868 # if csv.writer() leaks, last delta should be 3 or more
869 self.assertEqual(delta < 3, True)
870
871 def test_read(self):
872 delta = 0
873 rows = ["a,b,c\r\n"]*5
874 lastrc = sys.gettotalrefcount()
875 for i in xrange(20):
876 gc.collect()
877 self.assertEqual(gc.garbage, [])
878 rc = sys.gettotalrefcount()
879 rdr = csv.reader(rows)
880 for row in rdr:
881 pass
882 delta = rc-lastrc
883 lastrc = rc
884 # if reader leaks during read, delta should be 5 or more
885 self.assertEqual(delta < 5, True)
886
887 def test_write(self):
888 delta = 0
889 rows = [[1,2,3]]*5
890 s = NUL()
891 lastrc = sys.gettotalrefcount()
892 for i in xrange(20):
893 gc.collect()
894 self.assertEqual(gc.garbage, [])
895 rc = sys.gettotalrefcount()
896 writer = csv.writer(s)
897 for row in rows:
898 writer.writerow(row)
899 delta = rc-lastrc
900 lastrc = rc
901 # if writer leaks during write, last delta should be 5 or more
902 self.assertEqual(delta < 5, True)
903
Skip Montanaro1a566652003-05-06 15:56:05 +0000904# commented out for now - csv module doesn't yet support Unicode
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000905## class TestUnicode(unittest.TestCase):
906## def test_unicode_read(self):
907## import codecs
908## f = codecs.EncodedFile(StringIO("Martin von Löwis,"
909## "Marc André Lemburg,"
910## "Guido van Rossum,"
911## "François Pinard\r\n"),
912## data_encoding='iso-8859-1')
913## reader = csv.reader(f)
914## self.assertEqual(list(reader), [[u"Martin von Löwis",
915## u"Marc André Lemburg",
916## u"Guido van Rossum",
917## u"François Pinardn"]])
Skip Montanaro1a566652003-05-06 15:56:05 +0000918
Walter Dörwald21d3a322003-05-01 17:45:56 +0000919def test_main():
Skip Montanarob4a04172003-03-20 23:29:12 +0000920 mod = sys.modules[__name__]
Walter Dörwald21d3a322003-05-01 17:45:56 +0000921 test_support.run_unittest(
922 *[getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
923 )
Skip Montanarob4a04172003-03-20 23:29:12 +0000924
925if __name__ == '__main__':
Walter Dörwald21d3a322003-05-01 17:45:56 +0000926 test_main()