blob: cdda17976cda7489de2cf4dc74e7caf76ffb20c5 [file] [log] [blame]
Skip Montanaro1a566652003-05-06 15:56:05 +00001# -*- coding: iso-8859-1 -*-
Skip Montanarob4a04172003-03-20 23:29:12 +00002# Copyright (C) 2001,2002 Python Software Foundation
3# csv package unit tests
4
5import sys
Skip Montanaro58fc5d082004-06-05 17:03:20 +00006import os
Skip Montanarob4a04172003-03-20 23:29:12 +00007import unittest
8from StringIO import StringIO
Skip Montanaro58fc5d082004-06-05 17:03:20 +00009import tempfile
Skip Montanaro594adac2003-04-10 17:16:15 +000010import csv
Skip Montanarob4a04172003-03-20 23:29:12 +000011import gc
Walter Dörwald21d3a322003-05-01 17:45:56 +000012from test import test_support
Skip Montanarob4a04172003-03-20 23:29:12 +000013
14class Test_Csv(unittest.TestCase):
15 """
Tim Peters0eadaac2003-04-24 16:02:54 +000016 Test the underlying C csv parser in ways that are not appropriate
Skip Montanarob4a04172003-03-20 23:29:12 +000017 from the high level interface. Further tests of this nature are done
18 in TestDialectRegistry.
19 """
Andrew McNamara1196cf12005-01-07 04:42:45 +000020 def _test_arg_valid(self, ctor, arg):
21 self.assertRaises(TypeError, ctor)
22 self.assertRaises(TypeError, ctor, None)
23 self.assertRaises(TypeError, ctor, arg, bad_attr = 0)
24 self.assertRaises(TypeError, ctor, arg, delimiter = 0)
25 self.assertRaises(TypeError, ctor, arg, delimiter = 'XX')
26 self.assertRaises(csv.Error, ctor, arg, 'foo')
27 self.assertRaises(TypeError, ctor, arg, None)
28 self.assertRaises(TypeError, ctor, arg, delimiter=None)
29 self.assertRaises(TypeError, ctor, arg, delimiter=1)
30 self.assertRaises(TypeError, ctor, arg, quotechar=1)
31 self.assertRaises(TypeError, ctor, arg, lineterminator=None)
32 self.assertRaises(TypeError, ctor, arg, lineterminator=1)
33 self.assertRaises(TypeError, ctor, arg, quoting=None)
34# We now allow this, only raising an exception if quoting is needed.
35# self.assertRaises(TypeError, ctor, arg, quotechar=None)
Tim Peters5a9fb3c2005-01-07 16:01:32 +000036# self.assertRaises(TypeError, ctor, arg,
Andrew McNamara1196cf12005-01-07 04:42:45 +000037# quoting=csv.QUOTE_NONE, escapechar=None)
38# No longer complains about dialects with invalid attributes [AM]
39# class BadDialect:
40# bad_attr = 0
41# self.assertRaises(AttributeError, csv.reader, [], BadDialect)
Skip Montanarob4a04172003-03-20 23:29:12 +000042 class BadClass:
43 def __init__(self):
44 raise IOError
45 self.assertRaises(IOError, csv.reader, [], BadClass)
Andrew McNamara1196cf12005-01-07 04:42:45 +000046
47 def test_reader_arg_valid(self):
48 self._test_arg_valid(csv.reader, [])
Skip Montanarob4a04172003-03-20 23:29:12 +000049
50 def test_writer_arg_valid(self):
Andrew McNamara1196cf12005-01-07 04:42:45 +000051 self._test_arg_valid(csv.writer, StringIO())
Skip Montanarob4a04172003-03-20 23:29:12 +000052
Andrew McNamara1196cf12005-01-07 04:42:45 +000053 def _test_default_attrs(self, ctor, *args):
54 obj = ctor(*args)
55 # Check defaults
Skip Montanarob4a04172003-03-20 23:29:12 +000056 self.assertEqual(obj.dialect.delimiter, ',')
Andrew McNamara1196cf12005-01-07 04:42:45 +000057 self.assertEqual(obj.dialect.doublequote, True)
Skip Montanarob4a04172003-03-20 23:29:12 +000058 self.assertEqual(obj.dialect.escapechar, None)
Andrew McNamara1196cf12005-01-07 04:42:45 +000059 self.assertEqual(obj.dialect.lineterminator, "\r\n")
60 self.assertEqual(obj.dialect.quotechar, '"')
61 self.assertEqual(obj.dialect.quoting, csv.QUOTE_MINIMAL)
62 self.assertEqual(obj.dialect.skipinitialspace, False)
63 self.assertEqual(obj.dialect.strict, False)
64 # Try deleting or changing attributes (they are read-only)
65 self.assertRaises(TypeError, delattr, obj.dialect, 'delimiter')
66 self.assertRaises(TypeError, setattr, obj.dialect, 'delimiter', ':')
Skip Montanarob4a04172003-03-20 23:29:12 +000067 self.assertRaises(TypeError, delattr, obj.dialect, 'quoting')
68 self.assertRaises(TypeError, setattr, obj.dialect, 'quoting', None)
Skip Montanarob4a04172003-03-20 23:29:12 +000069
70 def test_reader_attrs(self):
Andrew McNamara1196cf12005-01-07 04:42:45 +000071 self._test_default_attrs(csv.reader, [])
Skip Montanarob4a04172003-03-20 23:29:12 +000072
73 def test_writer_attrs(self):
Andrew McNamara1196cf12005-01-07 04:42:45 +000074 self._test_default_attrs(csv.writer, StringIO())
75
76 def _test_kw_attrs(self, ctor, *args):
77 # Now try with alternate options
78 kwargs = dict(delimiter=':', doublequote=False, escapechar='\\',
79 lineterminator='\r', quotechar='*',
80 quoting=csv.QUOTE_NONE, skipinitialspace=True,
81 strict=True)
82 obj = ctor(*args, **kwargs)
83 self.assertEqual(obj.dialect.delimiter, ':')
84 self.assertEqual(obj.dialect.doublequote, False)
85 self.assertEqual(obj.dialect.escapechar, '\\')
86 self.assertEqual(obj.dialect.lineterminator, "\r")
87 self.assertEqual(obj.dialect.quotechar, '*')
88 self.assertEqual(obj.dialect.quoting, csv.QUOTE_NONE)
89 self.assertEqual(obj.dialect.skipinitialspace, True)
90 self.assertEqual(obj.dialect.strict, True)
91
92 def test_reader_kw_attrs(self):
93 self._test_kw_attrs(csv.reader, [])
94
95 def test_writer_kw_attrs(self):
96 self._test_kw_attrs(csv.writer, StringIO())
97
98 def _test_dialect_attrs(self, ctor, *args):
99 # Now try with dialect-derived options
100 class dialect:
101 delimiter='-'
102 doublequote=False
103 escapechar='^'
104 lineterminator='$'
105 quotechar='#'
106 quoting=csv.QUOTE_ALL
107 skipinitialspace=True
108 strict=False
109 args = args + (dialect,)
110 obj = ctor(*args)
111 self.assertEqual(obj.dialect.delimiter, '-')
112 self.assertEqual(obj.dialect.doublequote, False)
113 self.assertEqual(obj.dialect.escapechar, '^')
114 self.assertEqual(obj.dialect.lineterminator, "$")
115 self.assertEqual(obj.dialect.quotechar, '#')
116 self.assertEqual(obj.dialect.quoting, csv.QUOTE_ALL)
117 self.assertEqual(obj.dialect.skipinitialspace, True)
118 self.assertEqual(obj.dialect.strict, False)
119
120 def test_reader_dialect_attrs(self):
121 self._test_dialect_attrs(csv.reader, [])
122
123 def test_writer_dialect_attrs(self):
124 self._test_dialect_attrs(csv.writer, StringIO())
125
Skip Montanarob4a04172003-03-20 23:29:12 +0000126
127 def _write_test(self, fields, expect, **kwargs):
Skip Montanaro58fc5d082004-06-05 17:03:20 +0000128 fd, name = tempfile.mkstemp()
129 fileobj = os.fdopen(fd, "w+b")
130 try:
131 writer = csv.writer(fileobj, **kwargs)
132 writer.writerow(fields)
133 fileobj.seek(0)
134 self.assertEqual(fileobj.read(),
135 expect + writer.dialect.lineterminator)
136 finally:
137 fileobj.close()
138 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000139
140 def test_write_arg_valid(self):
141 self.assertRaises(csv.Error, self._write_test, None, '')
142 self._write_test((), '')
143 self._write_test([None], '""')
Tim Peters0eadaac2003-04-24 16:02:54 +0000144 self.assertRaises(csv.Error, self._write_test,
Skip Montanarob4a04172003-03-20 23:29:12 +0000145 [None], None, quoting = csv.QUOTE_NONE)
146 # Check that exceptions are passed up the chain
147 class BadList:
148 def __len__(self):
149 return 10;
150 def __getitem__(self, i):
151 if i > 2:
152 raise IOError
153 self.assertRaises(IOError, self._write_test, BadList(), '')
154 class BadItem:
155 def __str__(self):
156 raise IOError
157 self.assertRaises(IOError, self._write_test, [BadItem()], '')
158
159 def test_write_bigfield(self):
160 # This exercises the buffer realloc functionality
161 bigstring = 'X' * 50000
162 self._write_test([bigstring,bigstring], '%s,%s' % \
163 (bigstring, bigstring))
164
165 def test_write_quoting(self):
166 self._write_test(['a','1','p,q'], 'a,1,"p,q"')
Tim Peters0eadaac2003-04-24 16:02:54 +0000167 self.assertRaises(csv.Error,
Skip Montanarob4a04172003-03-20 23:29:12 +0000168 self._write_test,
169 ['a','1','p,q'], 'a,1,"p,q"',
170 quoting = csv.QUOTE_NONE)
171 self._write_test(['a','1','p,q'], 'a,1,"p,q"',
172 quoting = csv.QUOTE_MINIMAL)
173 self._write_test(['a','1','p,q'], '"a",1,"p,q"',
174 quoting = csv.QUOTE_NONNUMERIC)
175 self._write_test(['a','1','p,q'], '"a","1","p,q"',
176 quoting = csv.QUOTE_ALL)
177
178 def test_write_escape(self):
179 self._write_test(['a','1','p,q'], 'a,1,"p,q"',
180 escapechar='\\')
181# FAILED - needs to be fixed [am]:
182# self._write_test(['a','1','p,"q"'], 'a,1,"p,\\"q\\"',
183# escapechar='\\', doublequote = 0)
184 self._write_test(['a','1','p,q'], 'a,1,p\\,q',
185 escapechar='\\', quoting = csv.QUOTE_NONE)
186
187 def test_writerows(self):
188 class BrokenFile:
189 def write(self, buf):
190 raise IOError
191 writer = csv.writer(BrokenFile())
192 self.assertRaises(IOError, writer.writerows, [['a']])
Skip Montanaro58fc5d082004-06-05 17:03:20 +0000193 fd, name = tempfile.mkstemp()
194 fileobj = os.fdopen(fd, "w+b")
195 try:
196 writer = csv.writer(fileobj)
197 self.assertRaises(TypeError, writer.writerows, None)
198 writer.writerows([['a','b'],['c','d']])
199 fileobj.seek(0)
200 self.assertEqual(fileobj.read(), "a,b\r\nc,d\r\n")
201 finally:
202 fileobj.close()
203 os.unlink(name)
Tim Peters27f88362004-07-08 04:22:35 +0000204
Skip Montanarob4a04172003-03-20 23:29:12 +0000205 def _read_test(self, input, expect, **kwargs):
206 reader = csv.reader(input, **kwargs)
207 result = list(reader)
208 self.assertEqual(result, expect)
209
210 def test_read_oddinputs(self):
211 self._read_test([], [])
212 self._read_test([''], [[]])
213 self.assertRaises(csv.Error, self._read_test,
214 ['"ab"c'], None, strict = 1)
215 # cannot handle null bytes for the moment
216 self.assertRaises(csv.Error, self._read_test,
217 ['ab\0c'], None, strict = 1)
218 self._read_test(['"ab"c'], [['abc']], doublequote = 0)
219
220 def test_read_eol(self):
221 self._read_test(['a,b'], [['a','b']])
222 self._read_test(['a,b\n'], [['a','b']])
223 self._read_test(['a,b\r\n'], [['a','b']])
224 self._read_test(['a,b\r'], [['a','b']])
225 self.assertRaises(csv.Error, self._read_test, ['a,b\rc,d'], [])
226 self.assertRaises(csv.Error, self._read_test, ['a,b\nc,d'], [])
227 self.assertRaises(csv.Error, self._read_test, ['a,b\r\nc,d'], [])
228
229 def test_read_escape(self):
Andrew McNamara36a76912005-01-10 01:04:40 +0000230 self._read_test(['a,\\b,c'], [['a', 'b', 'c']], escapechar='\\')
Skip Montanarob4a04172003-03-20 23:29:12 +0000231 self._read_test(['a,b\\,c'], [['a', 'b,c']], escapechar='\\')
232 self._read_test(['a,"b\\,c"'], [['a', 'b,c']], escapechar='\\')
Andrew McNamara36a76912005-01-10 01:04:40 +0000233 self._read_test(['a,"b,\\c"'], [['a', 'b,c']], escapechar='\\')
Skip Montanarob4a04172003-03-20 23:29:12 +0000234 self._read_test(['a,"b,c\\""'], [['a', 'b,c"']], escapechar='\\')
235 self._read_test(['a,"b,c"\\'], [['a', 'b,c\\']], escapechar='\\')
236
Andrew McNamara1196cf12005-01-07 04:42:45 +0000237 def test_read_quoting(self):
238 self._read_test(['1,",3,",5'], [['1', ',3,', '5']])
239 self._read_test(['1,",3,",5'], [['1', '"', '3', '"', '5']],
240 quotechar=None, escapechar='\\')
241 self._read_test(['1,",3,",5'], [['1', '"', '3', '"', '5']],
242 quoting=csv.QUOTE_NONE, escapechar='\\')
243
Skip Montanarob4a04172003-03-20 23:29:12 +0000244 def test_read_bigfield(self):
245 # This exercises the buffer realloc functionality
246 bigstring = 'X' * 50000
247 bigline = '%s,%s' % (bigstring, bigstring)
248 self._read_test([bigline], [[bigstring, bigstring]])
249
250class TestDialectRegistry(unittest.TestCase):
251 def test_registry_badargs(self):
252 self.assertRaises(TypeError, csv.list_dialects, None)
253 self.assertRaises(TypeError, csv.get_dialect)
254 self.assertRaises(csv.Error, csv.get_dialect, None)
255 self.assertRaises(csv.Error, csv.get_dialect, "nonesuch")
256 self.assertRaises(TypeError, csv.unregister_dialect)
257 self.assertRaises(csv.Error, csv.unregister_dialect, None)
258 self.assertRaises(csv.Error, csv.unregister_dialect, "nonesuch")
259 self.assertRaises(TypeError, csv.register_dialect, None)
260 self.assertRaises(TypeError, csv.register_dialect, None, None)
261 self.assertRaises(TypeError, csv.register_dialect, "nonesuch", None)
262 class bogus:
263 def __init__(self):
264 raise KeyError
265 self.assertRaises(KeyError, csv.register_dialect, "nonesuch", bogus)
266
267 def test_registry(self):
268 class myexceltsv(csv.excel):
269 delimiter = "\t"
270 name = "myexceltsv"
271 expected_dialects = csv.list_dialects() + [name]
272 expected_dialects.sort()
273 csv.register_dialect(name, myexceltsv)
274 try:
Andrew McNamara86625972005-01-11 01:28:33 +0000275 self.failUnless(csv.get_dialect(name).delimiter, '\t')
Skip Montanarob4a04172003-03-20 23:29:12 +0000276 got_dialects = csv.list_dialects()
277 got_dialects.sort()
278 self.assertEqual(expected_dialects, got_dialects)
279 finally:
280 csv.unregister_dialect(name)
281
Andrew McNamara86625972005-01-11 01:28:33 +0000282 def test_register_kwargs(self):
283 name = 'fedcba'
284 csv.register_dialect(name, delimiter=';')
285 try:
286 self.failUnless(csv.get_dialect(name).delimiter, '\t')
287 self.failUnless(list(csv.reader('X;Y;Z', name)), ['X', 'Y', 'Z'])
288 finally:
289 csv.unregister_dialect(name)
290
Skip Montanarob4a04172003-03-20 23:29:12 +0000291 def test_incomplete_dialect(self):
292 class myexceltsv(csv.Dialect):
293 delimiter = "\t"
294 self.assertRaises(csv.Error, myexceltsv)
295
296 def test_space_dialect(self):
297 class space(csv.excel):
298 delimiter = " "
299 quoting = csv.QUOTE_NONE
300 escapechar = "\\"
301
Skip Montanaro58fc5d082004-06-05 17:03:20 +0000302 fd, name = tempfile.mkstemp()
303 fileobj = os.fdopen(fd, "w+b")
304 try:
305 fileobj.write("abc def\nc1ccccc1 benzene\n")
306 fileobj.seek(0)
307 rdr = csv.reader(fileobj, dialect=space())
308 self.assertEqual(rdr.next(), ["abc", "def"])
309 self.assertEqual(rdr.next(), ["c1ccccc1", "benzene"])
310 finally:
311 fileobj.close()
312 os.unlink(name)
Tim Peters27f88362004-07-08 04:22:35 +0000313
Skip Montanarob4a04172003-03-20 23:29:12 +0000314 def test_dialect_apply(self):
315 class testA(csv.excel):
316 delimiter = "\t"
317 class testB(csv.excel):
318 delimiter = ":"
319 class testC(csv.excel):
320 delimiter = "|"
321
322 csv.register_dialect('testC', testC)
323 try:
Skip Montanaro58fc5d082004-06-05 17:03:20 +0000324 fd, name = tempfile.mkstemp()
325 fileobj = os.fdopen(fd, "w+b")
326 try:
327 writer = csv.writer(fileobj)
328 writer.writerow([1,2,3])
329 fileobj.seek(0)
330 self.assertEqual(fileobj.read(), "1,2,3\r\n")
331 finally:
332 fileobj.close()
333 os.unlink(name)
Tim Peters27f88362004-07-08 04:22:35 +0000334
Skip Montanaro58fc5d082004-06-05 17:03:20 +0000335 fd, name = tempfile.mkstemp()
336 fileobj = os.fdopen(fd, "w+b")
337 try:
338 writer = csv.writer(fileobj, testA)
339 writer.writerow([1,2,3])
340 fileobj.seek(0)
341 self.assertEqual(fileobj.read(), "1\t2\t3\r\n")
342 finally:
343 fileobj.close()
344 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000345
Skip Montanaro58fc5d082004-06-05 17:03:20 +0000346 fd, name = tempfile.mkstemp()
347 fileobj = os.fdopen(fd, "w+b")
348 try:
349 writer = csv.writer(fileobj, dialect=testB())
350 writer.writerow([1,2,3])
351 fileobj.seek(0)
352 self.assertEqual(fileobj.read(), "1:2:3\r\n")
353 finally:
354 fileobj.close()
355 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000356
Skip Montanaro58fc5d082004-06-05 17:03:20 +0000357 fd, name = tempfile.mkstemp()
358 fileobj = os.fdopen(fd, "w+b")
359 try:
360 writer = csv.writer(fileobj, dialect='testC')
361 writer.writerow([1,2,3])
362 fileobj.seek(0)
363 self.assertEqual(fileobj.read(), "1|2|3\r\n")
364 finally:
365 fileobj.close()
366 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000367
Skip Montanaro58fc5d082004-06-05 17:03:20 +0000368 fd, name = tempfile.mkstemp()
369 fileobj = os.fdopen(fd, "w+b")
370 try:
371 writer = csv.writer(fileobj, dialect=testA, delimiter=';')
372 writer.writerow([1,2,3])
373 fileobj.seek(0)
374 self.assertEqual(fileobj.read(), "1;2;3\r\n")
375 finally:
376 fileobj.close()
377 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000378
Skip Montanarob4a04172003-03-20 23:29:12 +0000379 finally:
380 csv.unregister_dialect('testC')
381
382 def test_bad_dialect(self):
383 # Unknown parameter
Andrew McNamara1196cf12005-01-07 04:42:45 +0000384 self.assertRaises(TypeError, csv.reader, [], bad_attr = 0)
Skip Montanarob4a04172003-03-20 23:29:12 +0000385 # Bad values
386 self.assertRaises(TypeError, csv.reader, [], delimiter = None)
387 self.assertRaises(TypeError, csv.reader, [], quoting = -1)
388 self.assertRaises(TypeError, csv.reader, [], quoting = 100)
389
390class TestCsvBase(unittest.TestCase):
391 def readerAssertEqual(self, input, expected_result):
Skip Montanaro58fc5d082004-06-05 17:03:20 +0000392 fd, name = tempfile.mkstemp()
393 fileobj = os.fdopen(fd, "w+b")
394 try:
395 fileobj.write(input)
396 fileobj.seek(0)
397 reader = csv.reader(fileobj, dialect = self.dialect)
398 fields = list(reader)
399 self.assertEqual(fields, expected_result)
400 finally:
401 fileobj.close()
402 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000403
404 def writerAssertEqual(self, input, expected_result):
Skip Montanaro58fc5d082004-06-05 17:03:20 +0000405 fd, name = tempfile.mkstemp()
406 fileobj = os.fdopen(fd, "w+b")
407 try:
408 writer = csv.writer(fileobj, dialect = self.dialect)
409 writer.writerows(input)
410 fileobj.seek(0)
411 self.assertEqual(fileobj.read(), expected_result)
412 finally:
413 fileobj.close()
414 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000415
416class TestDialectExcel(TestCsvBase):
417 dialect = 'excel'
418
419 def test_single(self):
420 self.readerAssertEqual('abc', [['abc']])
421
422 def test_simple(self):
423 self.readerAssertEqual('1,2,3,4,5', [['1','2','3','4','5']])
424
425 def test_blankline(self):
426 self.readerAssertEqual('', [])
427
428 def test_empty_fields(self):
429 self.readerAssertEqual(',', [['', '']])
430
431 def test_singlequoted(self):
432 self.readerAssertEqual('""', [['']])
433
434 def test_singlequoted_left_empty(self):
435 self.readerAssertEqual('"",', [['','']])
436
437 def test_singlequoted_right_empty(self):
438 self.readerAssertEqual(',""', [['','']])
439
440 def test_single_quoted_quote(self):
441 self.readerAssertEqual('""""', [['"']])
442
443 def test_quoted_quotes(self):
444 self.readerAssertEqual('""""""', [['""']])
445
446 def test_inline_quote(self):
447 self.readerAssertEqual('a""b', [['a""b']])
448
449 def test_inline_quotes(self):
450 self.readerAssertEqual('a"b"c', [['a"b"c']])
451
452 def test_quotes_and_more(self):
453 self.readerAssertEqual('"a"b', [['ab']])
454
455 def test_lone_quote(self):
456 self.readerAssertEqual('a"b', [['a"b']])
457
458 def test_quote_and_quote(self):
459 self.readerAssertEqual('"a" "b"', [['a "b"']])
460
461 def test_space_and_quote(self):
462 self.readerAssertEqual(' "a"', [[' "a"']])
463
464 def test_quoted(self):
Tim Peters0eadaac2003-04-24 16:02:54 +0000465 self.readerAssertEqual('1,2,3,"I think, therefore I am",5,6',
466 [['1', '2', '3',
467 'I think, therefore I am',
Skip Montanarob4a04172003-03-20 23:29:12 +0000468 '5', '6']])
469
470 def test_quoted_quote(self):
471 self.readerAssertEqual('1,2,3,"""I see,"" said the blind man","as he picked up his hammer and saw"',
Tim Peters0eadaac2003-04-24 16:02:54 +0000472 [['1', '2', '3',
473 '"I see," said the blind man',
Skip Montanarob4a04172003-03-20 23:29:12 +0000474 'as he picked up his hammer and saw']])
475
476 def test_quoted_nl(self):
477 input = '''\
4781,2,3,"""I see,""
479said the blind man","as he picked up his
480hammer and saw"
4819,8,7,6'''
482 self.readerAssertEqual(input,
Tim Peters0eadaac2003-04-24 16:02:54 +0000483 [['1', '2', '3',
484 '"I see,"\nsaid the blind man',
Skip Montanarob4a04172003-03-20 23:29:12 +0000485 'as he picked up his\nhammer and saw'],
486 ['9','8','7','6']])
487
488 def test_dubious_quote(self):
489 self.readerAssertEqual('12,12,1",', [['12', '12', '1"', '']])
490
491 def test_null(self):
492 self.writerAssertEqual([], '')
493
494 def test_single(self):
495 self.writerAssertEqual([['abc']], 'abc\r\n')
496
497 def test_simple(self):
498 self.writerAssertEqual([[1, 2, 'abc', 3, 4]], '1,2,abc,3,4\r\n')
499
500 def test_quotes(self):
501 self.writerAssertEqual([[1, 2, 'a"bc"', 3, 4]], '1,2,"a""bc""",3,4\r\n')
502
503 def test_quote_fieldsep(self):
504 self.writerAssertEqual([['abc,def']], '"abc,def"\r\n')
505
506 def test_newlines(self):
507 self.writerAssertEqual([[1, 2, 'a\nbc', 3, 4]], '1,2,"a\nbc",3,4\r\n')
508
509class EscapedExcel(csv.excel):
510 quoting = csv.QUOTE_NONE
511 escapechar = '\\'
512
513class TestEscapedExcel(TestCsvBase):
514 dialect = EscapedExcel()
515
516 def test_escape_fieldsep(self):
517 self.writerAssertEqual([['abc,def']], 'abc\\,def\r\n')
518
519 def test_read_escape_fieldsep(self):
520 self.readerAssertEqual('abc\\,def\r\n', [['abc,def']])
521
522class QuotedEscapedExcel(csv.excel):
523 quoting = csv.QUOTE_NONNUMERIC
524 escapechar = '\\'
525
526class TestQuotedEscapedExcel(TestCsvBase):
527 dialect = QuotedEscapedExcel()
528
529 def test_write_escape_fieldsep(self):
530 self.writerAssertEqual([['abc,def']], '"abc,def"\r\n')
531
532 def test_read_escape_fieldsep(self):
533 self.readerAssertEqual('"abc\\,def"\r\n', [['abc,def']])
534
Skip Montanarob4a04172003-03-20 23:29:12 +0000535class TestDictFields(unittest.TestCase):
536 ### "long" means the row is longer than the number of fieldnames
537 ### "short" means there are fewer elements in the row than fieldnames
538 def test_write_simple_dict(self):
Skip Montanaro58fc5d082004-06-05 17:03:20 +0000539 fd, name = tempfile.mkstemp()
540 fileobj = os.fdopen(fd, "w+b")
541 try:
542 writer = csv.DictWriter(fileobj, fieldnames = ["f1", "f2", "f3"])
543 writer.writerow({"f1": 10, "f3": "abc"})
544 fileobj.seek(0)
545 self.assertEqual(fileobj.read(), "10,,abc\r\n")
546 finally:
547 fileobj.close()
548 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000549
550 def test_write_no_fields(self):
551 fileobj = StringIO()
552 self.assertRaises(TypeError, csv.DictWriter, fileobj)
553
554 def test_read_dict_fields(self):
Skip Montanaro58fc5d082004-06-05 17:03:20 +0000555 fd, name = tempfile.mkstemp()
556 fileobj = os.fdopen(fd, "w+b")
557 try:
558 fileobj.write("1,2,abc\r\n")
559 fileobj.seek(0)
560 reader = csv.DictReader(fileobj,
561 fieldnames=["f1", "f2", "f3"])
562 self.assertEqual(reader.next(), {"f1": '1', "f2": '2', "f3": 'abc'})
563 finally:
564 fileobj.close()
565 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000566
Skip Montanarodffeed32003-10-03 14:03:01 +0000567 def test_read_dict_no_fieldnames(self):
Skip Montanaro58fc5d082004-06-05 17:03:20 +0000568 fd, name = tempfile.mkstemp()
569 fileobj = os.fdopen(fd, "w+b")
570 try:
571 fileobj.write("f1,f2,f3\r\n1,2,abc\r\n")
572 fileobj.seek(0)
573 reader = csv.DictReader(fileobj)
574 self.assertEqual(reader.next(), {"f1": '1', "f2": '2', "f3": 'abc'})
575 finally:
576 fileobj.close()
577 os.unlink(name)
Skip Montanarodffeed32003-10-03 14:03:01 +0000578
Skip Montanarob4a04172003-03-20 23:29:12 +0000579 def test_read_long(self):
Skip Montanaro58fc5d082004-06-05 17:03:20 +0000580 fd, name = tempfile.mkstemp()
581 fileobj = os.fdopen(fd, "w+b")
582 try:
583 fileobj.write("1,2,abc,4,5,6\r\n")
584 fileobj.seek(0)
585 reader = csv.DictReader(fileobj,
586 fieldnames=["f1", "f2"])
587 self.assertEqual(reader.next(), {"f1": '1', "f2": '2',
588 None: ["abc", "4", "5", "6"]})
589 finally:
590 fileobj.close()
591 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000592
593 def test_read_long_with_rest(self):
Skip Montanaro58fc5d082004-06-05 17:03:20 +0000594 fd, name = tempfile.mkstemp()
595 fileobj = os.fdopen(fd, "w+b")
596 try:
597 fileobj.write("1,2,abc,4,5,6\r\n")
598 fileobj.seek(0)
599 reader = csv.DictReader(fileobj,
600 fieldnames=["f1", "f2"], restkey="_rest")
601 self.assertEqual(reader.next(), {"f1": '1', "f2": '2',
602 "_rest": ["abc", "4", "5", "6"]})
603 finally:
604 fileobj.close()
605 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000606
Skip Montanarodffeed32003-10-03 14:03:01 +0000607 def test_read_long_with_rest_no_fieldnames(self):
Skip Montanaro58fc5d082004-06-05 17:03:20 +0000608 fd, name = tempfile.mkstemp()
609 fileobj = os.fdopen(fd, "w+b")
610 try:
611 fileobj.write("f1,f2\r\n1,2,abc,4,5,6\r\n")
612 fileobj.seek(0)
613 reader = csv.DictReader(fileobj, restkey="_rest")
614 self.assertEqual(reader.next(), {"f1": '1', "f2": '2',
615 "_rest": ["abc", "4", "5", "6"]})
616 finally:
617 fileobj.close()
618 os.unlink(name)
Skip Montanarodffeed32003-10-03 14:03:01 +0000619
Skip Montanarob4a04172003-03-20 23:29:12 +0000620 def test_read_short(self):
Skip Montanaro58fc5d082004-06-05 17:03:20 +0000621 fd, name = tempfile.mkstemp()
622 fileobj = os.fdopen(fd, "w+b")
623 try:
624 fileobj.write("1,2,abc,4,5,6\r\n1,2,abc\r\n")
625 fileobj.seek(0)
626 reader = csv.DictReader(fileobj,
627 fieldnames="1 2 3 4 5 6".split(),
628 restval="DEFAULT")
629 self.assertEqual(reader.next(), {"1": '1', "2": '2', "3": 'abc',
630 "4": '4', "5": '5', "6": '6'})
631 self.assertEqual(reader.next(), {"1": '1', "2": '2', "3": 'abc',
632 "4": 'DEFAULT', "5": 'DEFAULT',
633 "6": 'DEFAULT'})
634 finally:
635 fileobj.close()
636 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000637
Skip Montanaro1546bc42003-06-12 02:40:22 +0000638 def test_read_multi(self):
639 sample = [
640 '2147483648,43.0e12,17,abc,def\r\n',
641 '147483648,43.0e2,17,abc,def\r\n',
642 '47483648,43.0,170,abc,def\r\n'
643 ]
644
645 reader = csv.DictReader(sample,
646 fieldnames="i1 float i2 s1 s2".split())
647 self.assertEqual(reader.next(), {"i1": '2147483648',
648 "float": '43.0e12',
649 "i2": '17',
650 "s1": 'abc',
651 "s2": 'def'})
652
Skip Montanarob4a04172003-03-20 23:29:12 +0000653 def test_read_with_blanks(self):
654 reader = csv.DictReader(["1,2,abc,4,5,6\r\n","\r\n",
655 "1,2,abc,4,5,6\r\n"],
656 fieldnames="1 2 3 4 5 6".split())
657 self.assertEqual(reader.next(), {"1": '1', "2": '2', "3": 'abc',
658 "4": '4', "5": '5', "6": '6'})
659 self.assertEqual(reader.next(), {"1": '1', "2": '2', "3": 'abc',
660 "4": '4', "5": '5', "6": '6'})
661
Skip Montanaro3f7a9482003-09-06 19:52:12 +0000662 def test_read_semi_sep(self):
663 reader = csv.DictReader(["1;2;abc;4;5;6\r\n"],
664 fieldnames="1 2 3 4 5 6".split(),
665 delimiter=';')
666 self.assertEqual(reader.next(), {"1": '1', "2": '2', "3": 'abc',
667 "4": '4', "5": '5', "6": '6'})
668
Skip Montanarob4a04172003-03-20 23:29:12 +0000669class TestArrayWrites(unittest.TestCase):
670 def test_int_write(self):
671 import array
672 contents = [(20-i) for i in range(20)]
673 a = array.array('i', contents)
Skip Montanaro58fc5d082004-06-05 17:03:20 +0000674
675 fd, name = tempfile.mkstemp()
676 fileobj = os.fdopen(fd, "w+b")
677 try:
678 writer = csv.writer(fileobj, dialect="excel")
679 writer.writerow(a)
680 expected = ",".join([str(i) for i in a])+"\r\n"
681 fileobj.seek(0)
682 self.assertEqual(fileobj.read(), expected)
683 finally:
684 fileobj.close()
685 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000686
687 def test_double_write(self):
688 import array
689 contents = [(20-i)*0.1 for i in range(20)]
690 a = array.array('d', contents)
Skip Montanaro58fc5d082004-06-05 17:03:20 +0000691 fd, name = tempfile.mkstemp()
692 fileobj = os.fdopen(fd, "w+b")
693 try:
694 writer = csv.writer(fileobj, dialect="excel")
695 writer.writerow(a)
696 expected = ",".join([str(i) for i in a])+"\r\n"
697 fileobj.seek(0)
698 self.assertEqual(fileobj.read(), expected)
699 finally:
700 fileobj.close()
701 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000702
703 def test_float_write(self):
704 import array
705 contents = [(20-i)*0.1 for i in range(20)]
706 a = array.array('f', contents)
Skip Montanaro58fc5d082004-06-05 17:03:20 +0000707 fd, name = tempfile.mkstemp()
708 fileobj = os.fdopen(fd, "w+b")
709 try:
710 writer = csv.writer(fileobj, dialect="excel")
711 writer.writerow(a)
712 expected = ",".join([str(i) for i in a])+"\r\n"
713 fileobj.seek(0)
714 self.assertEqual(fileobj.read(), expected)
715 finally:
716 fileobj.close()
717 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000718
719 def test_char_write(self):
720 import array, string
721 a = array.array('c', string.letters)
Skip Montanaro58fc5d082004-06-05 17:03:20 +0000722 fd, name = tempfile.mkstemp()
723 fileobj = os.fdopen(fd, "w+b")
724 try:
725 writer = csv.writer(fileobj, dialect="excel")
726 writer.writerow(a)
727 expected = ",".join(a)+"\r\n"
728 fileobj.seek(0)
729 self.assertEqual(fileobj.read(), expected)
730 finally:
731 fileobj.close()
732 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000733
734class TestDialectValidity(unittest.TestCase):
735 def test_quoting(self):
736 class mydialect(csv.Dialect):
737 delimiter = ";"
738 escapechar = '\\'
739 doublequote = False
740 skipinitialspace = True
741 lineterminator = '\r\n'
742 quoting = csv.QUOTE_NONE
743 d = mydialect()
744
745 mydialect.quoting = None
746 self.assertRaises(csv.Error, mydialect)
747
748 mydialect.quoting = csv.QUOTE_NONE
749 mydialect.escapechar = None
750 self.assertRaises(csv.Error, mydialect)
751
752 mydialect.doublequote = True
753 mydialect.quoting = csv.QUOTE_ALL
754 mydialect.quotechar = '"'
755 d = mydialect()
756
757 mydialect.quotechar = "''"
758 self.assertRaises(csv.Error, mydialect)
759
760 mydialect.quotechar = 4
761 self.assertRaises(csv.Error, mydialect)
762
763 def test_delimiter(self):
764 class mydialect(csv.Dialect):
765 delimiter = ";"
766 escapechar = '\\'
767 doublequote = False
768 skipinitialspace = True
769 lineterminator = '\r\n'
770 quoting = csv.QUOTE_NONE
771 d = mydialect()
772
773 mydialect.delimiter = ":::"
774 self.assertRaises(csv.Error, mydialect)
775
776 mydialect.delimiter = 4
777 self.assertRaises(csv.Error, mydialect)
778
779 def test_lineterminator(self):
780 class mydialect(csv.Dialect):
781 delimiter = ";"
782 escapechar = '\\'
783 doublequote = False
784 skipinitialspace = True
785 lineterminator = '\r\n'
786 quoting = csv.QUOTE_NONE
787 d = mydialect()
788
789 mydialect.lineterminator = ":::"
790 d = mydialect()
791
792 mydialect.lineterminator = 4
793 self.assertRaises(csv.Error, mydialect)
794
795
Skip Montanaro48816c62003-04-25 14:43:14 +0000796class TestSniffer(unittest.TestCase):
797 sample1 = """\
798Harry's, Arlington Heights, IL, 2/1/03, Kimi Hayes
799Shark City, Glendale Heights, IL, 12/28/02, Prezence
800Tommy's Place, Blue Island, IL, 12/28/02, Blue Sunday/White Crow
801Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back
802"""
803 sample2 = """\
804'Harry''s':'Arlington Heights':'IL':'2/1/03':'Kimi Hayes'
805'Shark City':'Glendale Heights':'IL':'12/28/02':'Prezence'
806'Tommy''s Place':'Blue Island':'IL':'12/28/02':'Blue Sunday/White Crow'
807'Stonecutters Seafood and Chop House':'Lemont':'IL':'12/19/02':'Week Back'
808"""
809
810 header = '''\
811"venue","city","state","date","performers"
812'''
Skip Montanaro77892372003-05-19 15:33:36 +0000813 sample3 = '''\
81405/05/03?05/05/03?05/05/03?05/05/03?05/05/03?05/05/03
81505/05/03?05/05/03?05/05/03?05/05/03?05/05/03?05/05/03
81605/05/03?05/05/03?05/05/03?05/05/03?05/05/03?05/05/03
817'''
818
Skip Montanaro1546bc42003-06-12 02:40:22 +0000819 sample4 = '''\
8202147483648;43.0e12;17;abc;def
821147483648;43.0e2;17;abc;def
82247483648;43.0;170;abc;def
823'''
824
Skip Montanaro48816c62003-04-25 14:43:14 +0000825 def test_has_header(self):
826 sniffer = csv.Sniffer()
827 self.assertEqual(sniffer.has_header(self.sample1), False)
828 self.assertEqual(sniffer.has_header(self.header+self.sample1), True)
829
830 def test_sniff(self):
831 sniffer = csv.Sniffer()
832 dialect = sniffer.sniff(self.sample1)
833 self.assertEqual(dialect.delimiter, ",")
834 self.assertEqual(dialect.quotechar, '"')
835 self.assertEqual(dialect.skipinitialspace, True)
836
837 dialect = sniffer.sniff(self.sample2)
838 self.assertEqual(dialect.delimiter, ":")
839 self.assertEqual(dialect.quotechar, "'")
840 self.assertEqual(dialect.skipinitialspace, False)
841
Skip Montanaro77892372003-05-19 15:33:36 +0000842 def test_delimiters(self):
843 sniffer = csv.Sniffer()
844 dialect = sniffer.sniff(self.sample3)
845 self.assertEqual(dialect.delimiter, "0")
846 dialect = sniffer.sniff(self.sample3, delimiters="?,")
847 self.assertEqual(dialect.delimiter, "?")
848 dialect = sniffer.sniff(self.sample3, delimiters="/,")
849 self.assertEqual(dialect.delimiter, "/")
Skip Montanaro1546bc42003-06-12 02:40:22 +0000850 dialect = sniffer.sniff(self.sample4)
851 self.assertEqual(dialect.delimiter, ";")
Skip Montanaro77892372003-05-19 15:33:36 +0000852
Skip Montanarob4a04172003-03-20 23:29:12 +0000853if not hasattr(sys, "gettotalrefcount"):
Walter Dörwald21d3a322003-05-01 17:45:56 +0000854 if test_support.verbose: print "*** skipping leakage tests ***"
Skip Montanarob4a04172003-03-20 23:29:12 +0000855else:
856 class NUL:
857 def write(s, *args):
858 pass
859 writelines = write
860
861 class TestLeaks(unittest.TestCase):
862 def test_create_read(self):
863 delta = 0
864 lastrc = sys.gettotalrefcount()
865 for i in xrange(20):
866 gc.collect()
867 self.assertEqual(gc.garbage, [])
868 rc = sys.gettotalrefcount()
869 csv.reader(["a,b,c\r\n"])
870 csv.reader(["a,b,c\r\n"])
871 csv.reader(["a,b,c\r\n"])
872 delta = rc-lastrc
873 lastrc = rc
874 # if csv.reader() leaks, last delta should be 3 or more
875 self.assertEqual(delta < 3, True)
876
877 def test_create_write(self):
878 delta = 0
879 lastrc = sys.gettotalrefcount()
880 s = NUL()
881 for i in xrange(20):
882 gc.collect()
883 self.assertEqual(gc.garbage, [])
884 rc = sys.gettotalrefcount()
885 csv.writer(s)
886 csv.writer(s)
887 csv.writer(s)
888 delta = rc-lastrc
889 lastrc = rc
890 # if csv.writer() leaks, last delta should be 3 or more
891 self.assertEqual(delta < 3, True)
892
893 def test_read(self):
894 delta = 0
895 rows = ["a,b,c\r\n"]*5
896 lastrc = sys.gettotalrefcount()
897 for i in xrange(20):
898 gc.collect()
899 self.assertEqual(gc.garbage, [])
900 rc = sys.gettotalrefcount()
901 rdr = csv.reader(rows)
902 for row in rdr:
903 pass
904 delta = rc-lastrc
905 lastrc = rc
906 # if reader leaks during read, delta should be 5 or more
907 self.assertEqual(delta < 5, True)
908
909 def test_write(self):
910 delta = 0
911 rows = [[1,2,3]]*5
912 s = NUL()
913 lastrc = sys.gettotalrefcount()
914 for i in xrange(20):
915 gc.collect()
916 self.assertEqual(gc.garbage, [])
917 rc = sys.gettotalrefcount()
918 writer = csv.writer(s)
919 for row in rows:
920 writer.writerow(row)
921 delta = rc-lastrc
922 lastrc = rc
923 # if writer leaks during write, last delta should be 5 or more
924 self.assertEqual(delta < 5, True)
925
Skip Montanaro1a566652003-05-06 15:56:05 +0000926# commented out for now - csv module doesn't yet support Unicode
Skip Montanaro58fc5d082004-06-05 17:03:20 +0000927## class TestUnicode(unittest.TestCase):
928## def test_unicode_read(self):
929## import codecs
930## f = codecs.EncodedFile(StringIO("Martin von Löwis,"
931## "Marc André Lemburg,"
932## "Guido van Rossum,"
933## "François Pinard\r\n"),
934## data_encoding='iso-8859-1')
935## reader = csv.reader(f)
936## self.assertEqual(list(reader), [[u"Martin von Löwis",
937## u"Marc André Lemburg",
938## u"Guido van Rossum",
939## u"François Pinardn"]])
Skip Montanaro1a566652003-05-06 15:56:05 +0000940
Walter Dörwald21d3a322003-05-01 17:45:56 +0000941def test_main():
Skip Montanarob4a04172003-03-20 23:29:12 +0000942 mod = sys.modules[__name__]
Walter Dörwald21d3a322003-05-01 17:45:56 +0000943 test_support.run_unittest(
944 *[getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
945 )
Skip Montanarob4a04172003-03-20 23:29:12 +0000946
947if __name__ == '__main__':
Walter Dörwald21d3a322003-05-01 17:45:56 +0000948 test_main()