blob: e2eec70285d5a56b8621b3d40a29bed7f7bbef6e [file] [log] [blame]
Skip Montanaro1a566652003-05-06 15:56:05 +00001# -*- coding: iso-8859-1 -*-
Skip Montanarob4a04172003-03-20 23:29:12 +00002# Copyright (C) 2001,2002 Python Software Foundation
3# csv package unit tests
4
5import sys
Skip Montanaro58fc5d02004-06-05 17:03:20 +00006import os
Skip Montanarob4a04172003-03-20 23:29:12 +00007import unittest
8from StringIO import StringIO
Skip Montanaro58fc5d02004-06-05 17:03:20 +00009import tempfile
Skip Montanaro594adac2003-04-10 17:16:15 +000010import csv
Skip Montanarob4a04172003-03-20 23:29:12 +000011import gc
Dirkjan Ochtman19c9b602010-03-04 19:21:53 +000012import io
Walter Dörwald21d3a322003-05-01 17:45:56 +000013from test import test_support
Skip Montanarob4a04172003-03-20 23:29:12 +000014
15class Test_Csv(unittest.TestCase):
16 """
Tim Peters0eadaac2003-04-24 16:02:54 +000017 Test the underlying C csv parser in ways that are not appropriate
Skip Montanarob4a04172003-03-20 23:29:12 +000018 from the high level interface. Further tests of this nature are done
19 in TestDialectRegistry.
20 """
Andrew McNamara1196cf12005-01-07 04:42:45 +000021 def _test_arg_valid(self, ctor, arg):
22 self.assertRaises(TypeError, ctor)
23 self.assertRaises(TypeError, ctor, None)
24 self.assertRaises(TypeError, ctor, arg, bad_attr = 0)
25 self.assertRaises(TypeError, ctor, arg, delimiter = 0)
26 self.assertRaises(TypeError, ctor, arg, delimiter = 'XX')
27 self.assertRaises(csv.Error, ctor, arg, 'foo')
Andrew McNamara1196cf12005-01-07 04:42:45 +000028 self.assertRaises(TypeError, ctor, arg, delimiter=None)
29 self.assertRaises(TypeError, ctor, arg, delimiter=1)
30 self.assertRaises(TypeError, ctor, arg, quotechar=1)
31 self.assertRaises(TypeError, ctor, arg, lineterminator=None)
32 self.assertRaises(TypeError, ctor, arg, lineterminator=1)
33 self.assertRaises(TypeError, ctor, arg, quoting=None)
Tim Peters608c2ff2005-01-13 17:37:38 +000034 self.assertRaises(TypeError, ctor, arg,
Andrew McNamaraaf1e3122005-01-12 01:55:21 +000035 quoting=csv.QUOTE_ALL, quotechar='')
Tim Peters608c2ff2005-01-13 17:37:38 +000036 self.assertRaises(TypeError, ctor, arg,
Andrew McNamara5d45a8d2005-01-12 08:16:17 +000037 quoting=csv.QUOTE_ALL, quotechar=None)
Andrew McNamara1196cf12005-01-07 04:42:45 +000038
39 def test_reader_arg_valid(self):
40 self._test_arg_valid(csv.reader, [])
Skip Montanarob4a04172003-03-20 23:29:12 +000041
42 def test_writer_arg_valid(self):
Andrew McNamara1196cf12005-01-07 04:42:45 +000043 self._test_arg_valid(csv.writer, StringIO())
Skip Montanarob4a04172003-03-20 23:29:12 +000044
Andrew McNamara1196cf12005-01-07 04:42:45 +000045 def _test_default_attrs(self, ctor, *args):
46 obj = ctor(*args)
47 # Check defaults
Skip Montanarob4a04172003-03-20 23:29:12 +000048 self.assertEqual(obj.dialect.delimiter, ',')
Andrew McNamara1196cf12005-01-07 04:42:45 +000049 self.assertEqual(obj.dialect.doublequote, True)
Skip Montanarob4a04172003-03-20 23:29:12 +000050 self.assertEqual(obj.dialect.escapechar, None)
Andrew McNamara1196cf12005-01-07 04:42:45 +000051 self.assertEqual(obj.dialect.lineterminator, "\r\n")
52 self.assertEqual(obj.dialect.quotechar, '"')
53 self.assertEqual(obj.dialect.quoting, csv.QUOTE_MINIMAL)
54 self.assertEqual(obj.dialect.skipinitialspace, False)
55 self.assertEqual(obj.dialect.strict, False)
56 # Try deleting or changing attributes (they are read-only)
57 self.assertRaises(TypeError, delattr, obj.dialect, 'delimiter')
58 self.assertRaises(TypeError, setattr, obj.dialect, 'delimiter', ':')
Barry Warsawb180c062005-04-20 19:41:36 +000059 self.assertRaises(AttributeError, delattr, obj.dialect, 'quoting')
60 self.assertRaises(AttributeError, setattr, obj.dialect,
61 'quoting', None)
Skip Montanarob4a04172003-03-20 23:29:12 +000062
63 def test_reader_attrs(self):
Andrew McNamara1196cf12005-01-07 04:42:45 +000064 self._test_default_attrs(csv.reader, [])
Skip Montanarob4a04172003-03-20 23:29:12 +000065
66 def test_writer_attrs(self):
Andrew McNamara1196cf12005-01-07 04:42:45 +000067 self._test_default_attrs(csv.writer, StringIO())
68
69 def _test_kw_attrs(self, ctor, *args):
70 # Now try with alternate options
71 kwargs = dict(delimiter=':', doublequote=False, escapechar='\\',
72 lineterminator='\r', quotechar='*',
73 quoting=csv.QUOTE_NONE, skipinitialspace=True,
74 strict=True)
75 obj = ctor(*args, **kwargs)
76 self.assertEqual(obj.dialect.delimiter, ':')
77 self.assertEqual(obj.dialect.doublequote, False)
78 self.assertEqual(obj.dialect.escapechar, '\\')
79 self.assertEqual(obj.dialect.lineterminator, "\r")
80 self.assertEqual(obj.dialect.quotechar, '*')
81 self.assertEqual(obj.dialect.quoting, csv.QUOTE_NONE)
82 self.assertEqual(obj.dialect.skipinitialspace, True)
83 self.assertEqual(obj.dialect.strict, True)
84
85 def test_reader_kw_attrs(self):
86 self._test_kw_attrs(csv.reader, [])
87
88 def test_writer_kw_attrs(self):
89 self._test_kw_attrs(csv.writer, StringIO())
90
91 def _test_dialect_attrs(self, ctor, *args):
92 # Now try with dialect-derived options
93 class dialect:
94 delimiter='-'
95 doublequote=False
96 escapechar='^'
97 lineterminator='$'
98 quotechar='#'
99 quoting=csv.QUOTE_ALL
100 skipinitialspace=True
101 strict=False
102 args = args + (dialect,)
103 obj = ctor(*args)
104 self.assertEqual(obj.dialect.delimiter, '-')
105 self.assertEqual(obj.dialect.doublequote, False)
106 self.assertEqual(obj.dialect.escapechar, '^')
107 self.assertEqual(obj.dialect.lineterminator, "$")
108 self.assertEqual(obj.dialect.quotechar, '#')
109 self.assertEqual(obj.dialect.quoting, csv.QUOTE_ALL)
110 self.assertEqual(obj.dialect.skipinitialspace, True)
111 self.assertEqual(obj.dialect.strict, False)
112
113 def test_reader_dialect_attrs(self):
114 self._test_dialect_attrs(csv.reader, [])
115
116 def test_writer_dialect_attrs(self):
117 self._test_dialect_attrs(csv.writer, StringIO())
118
Skip Montanarob4a04172003-03-20 23:29:12 +0000119
120 def _write_test(self, fields, expect, **kwargs):
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000121 fd, name = tempfile.mkstemp()
122 fileobj = os.fdopen(fd, "w+b")
123 try:
124 writer = csv.writer(fileobj, **kwargs)
125 writer.writerow(fields)
126 fileobj.seek(0)
127 self.assertEqual(fileobj.read(),
128 expect + writer.dialect.lineterminator)
129 finally:
130 fileobj.close()
131 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000132
Serhiy Storchaka837d7602015-03-25 19:15:56 +0200133 def _write_error_test(self, exc, fields, **kwargs):
134 fd, name = tempfile.mkstemp()
135 fileobj = os.fdopen(fd, "w+b")
136 try:
137 writer = csv.writer(fileobj, **kwargs)
138 with self.assertRaises(exc):
139 writer.writerow(fields)
140 fileobj.seek(0)
141 self.assertEqual(fileobj.read(), '')
142 finally:
143 fileobj.close()
144 os.unlink(name)
145
Skip Montanarob4a04172003-03-20 23:29:12 +0000146 def test_write_arg_valid(self):
Serhiy Storchaka837d7602015-03-25 19:15:56 +0200147 self._write_error_test(csv.Error, None)
Skip Montanarob4a04172003-03-20 23:29:12 +0000148 self._write_test((), '')
149 self._write_test([None], '""')
Serhiy Storchaka837d7602015-03-25 19:15:56 +0200150 self._write_error_test(csv.Error, [None], quoting = csv.QUOTE_NONE)
Skip Montanarob4a04172003-03-20 23:29:12 +0000151 # Check that exceptions are passed up the chain
152 class BadList:
153 def __len__(self):
154 return 10;
155 def __getitem__(self, i):
156 if i > 2:
157 raise IOError
Serhiy Storchaka837d7602015-03-25 19:15:56 +0200158 self._write_error_test(IOError, BadList())
Skip Montanarob4a04172003-03-20 23:29:12 +0000159 class BadItem:
160 def __str__(self):
161 raise IOError
Serhiy Storchaka837d7602015-03-25 19:15:56 +0200162 self._write_error_test(IOError, [BadItem()])
Skip Montanarob4a04172003-03-20 23:29:12 +0000163
164 def test_write_bigfield(self):
165 # This exercises the buffer realloc functionality
166 bigstring = 'X' * 50000
167 self._write_test([bigstring,bigstring], '%s,%s' % \
168 (bigstring, bigstring))
169
170 def test_write_quoting(self):
Andrew McNamarac89f2842005-01-12 07:44:42 +0000171 self._write_test(['a',1,'p,q'], 'a,1,"p,q"')
Serhiy Storchaka837d7602015-03-25 19:15:56 +0200172 self._write_error_test(csv.Error, ['a',1,'p,q'],
173 quoting = csv.QUOTE_NONE)
Andrew McNamarac89f2842005-01-12 07:44:42 +0000174 self._write_test(['a',1,'p,q'], 'a,1,"p,q"',
Skip Montanarob4a04172003-03-20 23:29:12 +0000175 quoting = csv.QUOTE_MINIMAL)
Andrew McNamarac89f2842005-01-12 07:44:42 +0000176 self._write_test(['a',1,'p,q'], '"a",1,"p,q"',
Skip Montanarob4a04172003-03-20 23:29:12 +0000177 quoting = csv.QUOTE_NONNUMERIC)
Andrew McNamarac89f2842005-01-12 07:44:42 +0000178 self._write_test(['a',1,'p,q'], '"a","1","p,q"',
Skip Montanarob4a04172003-03-20 23:29:12 +0000179 quoting = csv.QUOTE_ALL)
R. David Murray38644592009-04-01 21:26:18 +0000180 self._write_test(['a\nb',1], '"a\nb","1"',
181 quoting = csv.QUOTE_ALL)
Skip Montanarob4a04172003-03-20 23:29:12 +0000182
183 def test_write_escape(self):
Andrew McNamarac89f2842005-01-12 07:44:42 +0000184 self._write_test(['a',1,'p,q'], 'a,1,"p,q"',
Skip Montanarob4a04172003-03-20 23:29:12 +0000185 escapechar='\\')
Serhiy Storchaka837d7602015-03-25 19:15:56 +0200186 self._write_error_test(csv.Error, ['a',1,'p,"q"'],
187 escapechar=None, doublequote=False)
Andrew McNamarac89f2842005-01-12 07:44:42 +0000188 self._write_test(['a',1,'p,"q"'], 'a,1,"p,\\"q\\""',
189 escapechar='\\', doublequote = False)
Tim Peters608c2ff2005-01-13 17:37:38 +0000190 self._write_test(['"'], '""""',
Andrew McNamarac89f2842005-01-12 07:44:42 +0000191 escapechar='\\', quoting = csv.QUOTE_MINIMAL)
Tim Peters608c2ff2005-01-13 17:37:38 +0000192 self._write_test(['"'], '\\"',
Andrew McNamarac89f2842005-01-12 07:44:42 +0000193 escapechar='\\', quoting = csv.QUOTE_MINIMAL,
194 doublequote = False)
Tim Peters608c2ff2005-01-13 17:37:38 +0000195 self._write_test(['"'], '\\"',
Andrew McNamarac89f2842005-01-12 07:44:42 +0000196 escapechar='\\', quoting = csv.QUOTE_NONE)
197 self._write_test(['a',1,'p,q'], 'a,1,p\\,q',
Skip Montanarob4a04172003-03-20 23:29:12 +0000198 escapechar='\\', quoting = csv.QUOTE_NONE)
199
200 def test_writerows(self):
201 class BrokenFile:
202 def write(self, buf):
203 raise IOError
204 writer = csv.writer(BrokenFile())
205 self.assertRaises(IOError, writer.writerows, [['a']])
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000206 fd, name = tempfile.mkstemp()
207 fileobj = os.fdopen(fd, "w+b")
208 try:
209 writer = csv.writer(fileobj)
210 self.assertRaises(TypeError, writer.writerows, None)
211 writer.writerows([['a','b'],['c','d']])
212 fileobj.seek(0)
213 self.assertEqual(fileobj.read(), "a,b\r\nc,d\r\n")
214 finally:
215 fileobj.close()
216 os.unlink(name)
Tim Peters27f88362004-07-08 04:22:35 +0000217
Raymond Hettingerf5377022011-12-11 22:31:09 -0800218 def test_write_float(self):
219 # Issue 13573: loss of precision because csv.writer
220 # uses str() for floats instead of repr()
221 orig_row = [1.234567890123, 1.0/7.0, 'abc']
222 f = StringIO()
223 c = csv.writer(f, quoting=csv.QUOTE_NONNUMERIC)
224 c.writerow(orig_row)
225 f.seek(0)
226 c = csv.reader(f, quoting=csv.QUOTE_NONNUMERIC)
227 new_row = next(c)
228 self.assertEqual(orig_row, new_row)
229
Skip Montanarob4a04172003-03-20 23:29:12 +0000230 def _read_test(self, input, expect, **kwargs):
231 reader = csv.reader(input, **kwargs)
232 result = list(reader)
233 self.assertEqual(result, expect)
234
235 def test_read_oddinputs(self):
236 self._read_test([], [])
237 self._read_test([''], [[]])
238 self.assertRaises(csv.Error, self._read_test,
239 ['"ab"c'], None, strict = 1)
240 # cannot handle null bytes for the moment
241 self.assertRaises(csv.Error, self._read_test,
242 ['ab\0c'], None, strict = 1)
243 self._read_test(['"ab"c'], [['abc']], doublequote = 0)
244
245 def test_read_eol(self):
246 self._read_test(['a,b'], [['a','b']])
247 self._read_test(['a,b\n'], [['a','b']])
248 self._read_test(['a,b\r\n'], [['a','b']])
249 self._read_test(['a,b\r'], [['a','b']])
250 self.assertRaises(csv.Error, self._read_test, ['a,b\rc,d'], [])
251 self.assertRaises(csv.Error, self._read_test, ['a,b\nc,d'], [])
252 self.assertRaises(csv.Error, self._read_test, ['a,b\r\nc,d'], [])
253
Senthil Kumarand41dc7c2012-09-25 02:48:21 -0700254 def test_read_eof(self):
255 self._read_test(['a,"'], [['a', '']])
256 self._read_test(['"a'], [['a']])
257 self._read_test(['^'], [['\n']], escapechar='^')
258 self.assertRaises(csv.Error, self._read_test, ['a,"'], [], strict=True)
259 self.assertRaises(csv.Error, self._read_test, ['"a'], [], strict=True)
260 self.assertRaises(csv.Error, self._read_test,
261 ['^'], [], escapechar='^', strict=True)
262
Skip Montanarob4a04172003-03-20 23:29:12 +0000263 def test_read_escape(self):
Andrew McNamara36a76912005-01-10 01:04:40 +0000264 self._read_test(['a,\\b,c'], [['a', 'b', 'c']], escapechar='\\')
Skip Montanarob4a04172003-03-20 23:29:12 +0000265 self._read_test(['a,b\\,c'], [['a', 'b,c']], escapechar='\\')
266 self._read_test(['a,"b\\,c"'], [['a', 'b,c']], escapechar='\\')
Andrew McNamara36a76912005-01-10 01:04:40 +0000267 self._read_test(['a,"b,\\c"'], [['a', 'b,c']], escapechar='\\')
Skip Montanarob4a04172003-03-20 23:29:12 +0000268 self._read_test(['a,"b,c\\""'], [['a', 'b,c"']], escapechar='\\')
269 self._read_test(['a,"b,c"\\'], [['a', 'b,c\\']], escapechar='\\')
270
Andrew McNamara1196cf12005-01-07 04:42:45 +0000271 def test_read_quoting(self):
272 self._read_test(['1,",3,",5'], [['1', ',3,', '5']])
273 self._read_test(['1,",3,",5'], [['1', '"', '3', '"', '5']],
274 quotechar=None, escapechar='\\')
275 self._read_test(['1,",3,",5'], [['1', '"', '3', '"', '5']],
276 quoting=csv.QUOTE_NONE, escapechar='\\')
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000277 # will this fail where locale uses comma for decimals?
Andrew McNamara7f2053e2005-01-12 11:17:16 +0000278 self._read_test([',3,"5",7.3, 9'], [['', 3, '5', 7.3, 9]],
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000279 quoting=csv.QUOTE_NONNUMERIC)
R. David Murray38644592009-04-01 21:26:18 +0000280 self._read_test(['"a\nb", 7'], [['a\nb', ' 7']])
Tim Peters608c2ff2005-01-13 17:37:38 +0000281 self.assertRaises(ValueError, self._read_test,
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000282 ['abc,3'], [[]],
283 quoting=csv.QUOTE_NONNUMERIC)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000284
Skip Montanarob4a04172003-03-20 23:29:12 +0000285 def test_read_bigfield(self):
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000286 # This exercises the buffer realloc functionality and field size
287 # limits.
Andrew McNamara31d88962005-01-12 03:45:10 +0000288 limit = csv.field_size_limit()
Andrew McNamaraaf1e3122005-01-12 01:55:21 +0000289 try:
290 size = 50000
291 bigstring = 'X' * size
292 bigline = '%s,%s' % (bigstring, bigstring)
293 self._read_test([bigline], [[bigstring, bigstring]])
Andrew McNamara31d88962005-01-12 03:45:10 +0000294 csv.field_size_limit(size)
Andrew McNamaraaf1e3122005-01-12 01:55:21 +0000295 self._read_test([bigline], [[bigstring, bigstring]])
Andrew McNamara31d88962005-01-12 03:45:10 +0000296 self.assertEqual(csv.field_size_limit(), size)
297 csv.field_size_limit(size-1)
Andrew McNamaraaf1e3122005-01-12 01:55:21 +0000298 self.assertRaises(csv.Error, self._read_test, [bigline], [])
Andrew McNamara31d88962005-01-12 03:45:10 +0000299 self.assertRaises(TypeError, csv.field_size_limit, None)
300 self.assertRaises(TypeError, csv.field_size_limit, 1, None)
Andrew McNamaraaf1e3122005-01-12 01:55:21 +0000301 finally:
Andrew McNamara31d88962005-01-12 03:45:10 +0000302 csv.field_size_limit(limit)
Skip Montanarob4a04172003-03-20 23:29:12 +0000303
Andrew McNamara7f2053e2005-01-12 11:17:16 +0000304 def test_read_linenum(self):
Georg Brandl77354cf2008-03-21 20:01:51 +0000305 for r in (csv.reader(['line,1', 'line,2', 'line,3']),
306 csv.DictReader(['line,1', 'line,2', 'line,3'],
307 fieldnames=['a', 'b', 'c'])):
308 self.assertEqual(r.line_num, 0)
309 r.next()
310 self.assertEqual(r.line_num, 1)
311 r.next()
312 self.assertEqual(r.line_num, 2)
313 r.next()
314 self.assertEqual(r.line_num, 3)
315 self.assertRaises(StopIteration, r.next)
316 self.assertEqual(r.line_num, 3)
Andrew McNamara7f2053e2005-01-12 11:17:16 +0000317
R. David Murray38644592009-04-01 21:26:18 +0000318 def test_roundtrip_quoteed_newlines(self):
319 fd, name = tempfile.mkstemp()
320 fileobj = os.fdopen(fd, "w+b")
321 try:
322 writer = csv.writer(fileobj)
323 self.assertRaises(TypeError, writer.writerows, None)
324 rows = [['a\nb','b'],['c','x\r\nd']]
325 writer.writerows(rows)
326 fileobj.seek(0)
327 for i, row in enumerate(csv.reader(fileobj)):
328 self.assertEqual(row, rows[i])
329 finally:
330 fileobj.close()
331 os.unlink(name)
332
Skip Montanarob4a04172003-03-20 23:29:12 +0000333class TestDialectRegistry(unittest.TestCase):
334 def test_registry_badargs(self):
335 self.assertRaises(TypeError, csv.list_dialects, None)
336 self.assertRaises(TypeError, csv.get_dialect)
337 self.assertRaises(csv.Error, csv.get_dialect, None)
338 self.assertRaises(csv.Error, csv.get_dialect, "nonesuch")
339 self.assertRaises(TypeError, csv.unregister_dialect)
340 self.assertRaises(csv.Error, csv.unregister_dialect, None)
341 self.assertRaises(csv.Error, csv.unregister_dialect, "nonesuch")
342 self.assertRaises(TypeError, csv.register_dialect, None)
343 self.assertRaises(TypeError, csv.register_dialect, None, None)
Andrew McNamaraaf1e3122005-01-12 01:55:21 +0000344 self.assertRaises(TypeError, csv.register_dialect, "nonesuch", 0, 0)
345 self.assertRaises(TypeError, csv.register_dialect, "nonesuch",
346 badargument=None)
347 self.assertRaises(TypeError, csv.register_dialect, "nonesuch",
348 quoting=None)
349 self.assertRaises(TypeError, csv.register_dialect, [])
Skip Montanarob4a04172003-03-20 23:29:12 +0000350
351 def test_registry(self):
352 class myexceltsv(csv.excel):
353 delimiter = "\t"
354 name = "myexceltsv"
355 expected_dialects = csv.list_dialects() + [name]
356 expected_dialects.sort()
357 csv.register_dialect(name, myexceltsv)
Éric Araujod11058d2010-12-02 22:35:10 +0000358 self.addCleanup(csv.unregister_dialect, name)
359 self.assertEqual(csv.get_dialect(name).delimiter, '\t')
360 got_dialects = sorted(csv.list_dialects())
361 self.assertEqual(expected_dialects, got_dialects)
Skip Montanarob4a04172003-03-20 23:29:12 +0000362
Andrew McNamara86625972005-01-11 01:28:33 +0000363 def test_register_kwargs(self):
364 name = 'fedcba'
365 csv.register_dialect(name, delimiter=';')
Éric Araujod11058d2010-12-02 22:35:10 +0000366 self.addCleanup(csv.unregister_dialect, name)
367 self.assertEqual(csv.get_dialect(name).delimiter, ';')
368 self.assertEqual([['X', 'Y', 'Z']], list(csv.reader(['X;Y;Z'], name)))
Andrew McNamara86625972005-01-11 01:28:33 +0000369
Skip Montanarob4a04172003-03-20 23:29:12 +0000370 def test_incomplete_dialect(self):
371 class myexceltsv(csv.Dialect):
372 delimiter = "\t"
373 self.assertRaises(csv.Error, myexceltsv)
374
375 def test_space_dialect(self):
376 class space(csv.excel):
377 delimiter = " "
378 quoting = csv.QUOTE_NONE
379 escapechar = "\\"
380
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000381 fd, name = tempfile.mkstemp()
382 fileobj = os.fdopen(fd, "w+b")
383 try:
384 fileobj.write("abc def\nc1ccccc1 benzene\n")
385 fileobj.seek(0)
386 rdr = csv.reader(fileobj, dialect=space())
387 self.assertEqual(rdr.next(), ["abc", "def"])
388 self.assertEqual(rdr.next(), ["c1ccccc1", "benzene"])
389 finally:
390 fileobj.close()
391 os.unlink(name)
Tim Peters27f88362004-07-08 04:22:35 +0000392
Skip Montanarob4a04172003-03-20 23:29:12 +0000393 def test_dialect_apply(self):
394 class testA(csv.excel):
395 delimiter = "\t"
396 class testB(csv.excel):
397 delimiter = ":"
398 class testC(csv.excel):
399 delimiter = "|"
400
401 csv.register_dialect('testC', testC)
402 try:
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000403 fd, name = tempfile.mkstemp()
404 fileobj = os.fdopen(fd, "w+b")
405 try:
406 writer = csv.writer(fileobj)
407 writer.writerow([1,2,3])
408 fileobj.seek(0)
409 self.assertEqual(fileobj.read(), "1,2,3\r\n")
410 finally:
411 fileobj.close()
412 os.unlink(name)
Tim Peters27f88362004-07-08 04:22:35 +0000413
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000414 fd, name = tempfile.mkstemp()
415 fileobj = os.fdopen(fd, "w+b")
416 try:
417 writer = csv.writer(fileobj, testA)
418 writer.writerow([1,2,3])
419 fileobj.seek(0)
420 self.assertEqual(fileobj.read(), "1\t2\t3\r\n")
421 finally:
422 fileobj.close()
423 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000424
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000425 fd, name = tempfile.mkstemp()
426 fileobj = os.fdopen(fd, "w+b")
427 try:
428 writer = csv.writer(fileobj, dialect=testB())
429 writer.writerow([1,2,3])
430 fileobj.seek(0)
431 self.assertEqual(fileobj.read(), "1:2:3\r\n")
432 finally:
433 fileobj.close()
434 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000435
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000436 fd, name = tempfile.mkstemp()
437 fileobj = os.fdopen(fd, "w+b")
438 try:
439 writer = csv.writer(fileobj, dialect='testC')
440 writer.writerow([1,2,3])
441 fileobj.seek(0)
442 self.assertEqual(fileobj.read(), "1|2|3\r\n")
443 finally:
444 fileobj.close()
445 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000446
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000447 fd, name = tempfile.mkstemp()
448 fileobj = os.fdopen(fd, "w+b")
449 try:
450 writer = csv.writer(fileobj, dialect=testA, delimiter=';')
451 writer.writerow([1,2,3])
452 fileobj.seek(0)
453 self.assertEqual(fileobj.read(), "1;2;3\r\n")
454 finally:
455 fileobj.close()
456 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000457
Skip Montanarob4a04172003-03-20 23:29:12 +0000458 finally:
459 csv.unregister_dialect('testC')
460
461 def test_bad_dialect(self):
462 # Unknown parameter
Andrew McNamara1196cf12005-01-07 04:42:45 +0000463 self.assertRaises(TypeError, csv.reader, [], bad_attr = 0)
Skip Montanarob4a04172003-03-20 23:29:12 +0000464 # Bad values
465 self.assertRaises(TypeError, csv.reader, [], delimiter = None)
466 self.assertRaises(TypeError, csv.reader, [], quoting = -1)
467 self.assertRaises(TypeError, csv.reader, [], quoting = 100)
468
469class TestCsvBase(unittest.TestCase):
470 def readerAssertEqual(self, input, expected_result):
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000471 fd, name = tempfile.mkstemp()
472 fileobj = os.fdopen(fd, "w+b")
473 try:
474 fileobj.write(input)
475 fileobj.seek(0)
476 reader = csv.reader(fileobj, dialect = self.dialect)
477 fields = list(reader)
478 self.assertEqual(fields, expected_result)
479 finally:
480 fileobj.close()
481 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000482
483 def writerAssertEqual(self, input, expected_result):
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000484 fd, name = tempfile.mkstemp()
485 fileobj = os.fdopen(fd, "w+b")
486 try:
487 writer = csv.writer(fileobj, dialect = self.dialect)
488 writer.writerows(input)
489 fileobj.seek(0)
490 self.assertEqual(fileobj.read(), expected_result)
491 finally:
492 fileobj.close()
493 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000494
495class TestDialectExcel(TestCsvBase):
496 dialect = 'excel'
497
498 def test_single(self):
499 self.readerAssertEqual('abc', [['abc']])
500
501 def test_simple(self):
502 self.readerAssertEqual('1,2,3,4,5', [['1','2','3','4','5']])
503
504 def test_blankline(self):
505 self.readerAssertEqual('', [])
506
507 def test_empty_fields(self):
508 self.readerAssertEqual(',', [['', '']])
509
510 def test_singlequoted(self):
511 self.readerAssertEqual('""', [['']])
512
513 def test_singlequoted_left_empty(self):
514 self.readerAssertEqual('"",', [['','']])
515
516 def test_singlequoted_right_empty(self):
517 self.readerAssertEqual(',""', [['','']])
518
519 def test_single_quoted_quote(self):
520 self.readerAssertEqual('""""', [['"']])
521
522 def test_quoted_quotes(self):
523 self.readerAssertEqual('""""""', [['""']])
524
525 def test_inline_quote(self):
526 self.readerAssertEqual('a""b', [['a""b']])
527
528 def test_inline_quotes(self):
529 self.readerAssertEqual('a"b"c', [['a"b"c']])
530
531 def test_quotes_and_more(self):
Skip Montanaro9cea8e52007-03-12 03:30:50 +0000532 # Excel would never write a field containing '"a"b', but when
533 # reading one, it will return 'ab'.
Skip Montanarob4a04172003-03-20 23:29:12 +0000534 self.readerAssertEqual('"a"b', [['ab']])
535
536 def test_lone_quote(self):
537 self.readerAssertEqual('a"b', [['a"b']])
538
539 def test_quote_and_quote(self):
Skip Montanaro9cea8e52007-03-12 03:30:50 +0000540 # Excel would never write a field containing '"a" "b"', but when
541 # reading one, it will return 'a "b"'.
Skip Montanarob4a04172003-03-20 23:29:12 +0000542 self.readerAssertEqual('"a" "b"', [['a "b"']])
543
544 def test_space_and_quote(self):
545 self.readerAssertEqual(' "a"', [[' "a"']])
546
547 def test_quoted(self):
Tim Peters0eadaac2003-04-24 16:02:54 +0000548 self.readerAssertEqual('1,2,3,"I think, therefore I am",5,6',
549 [['1', '2', '3',
550 'I think, therefore I am',
Skip Montanarob4a04172003-03-20 23:29:12 +0000551 '5', '6']])
552
553 def test_quoted_quote(self):
554 self.readerAssertEqual('1,2,3,"""I see,"" said the blind man","as he picked up his hammer and saw"',
Tim Peters0eadaac2003-04-24 16:02:54 +0000555 [['1', '2', '3',
556 '"I see," said the blind man',
Skip Montanarob4a04172003-03-20 23:29:12 +0000557 'as he picked up his hammer and saw']])
558
559 def test_quoted_nl(self):
560 input = '''\
5611,2,3,"""I see,""
562said the blind man","as he picked up his
563hammer and saw"
5649,8,7,6'''
565 self.readerAssertEqual(input,
Tim Peters0eadaac2003-04-24 16:02:54 +0000566 [['1', '2', '3',
567 '"I see,"\nsaid the blind man',
Skip Montanarob4a04172003-03-20 23:29:12 +0000568 'as he picked up his\nhammer and saw'],
569 ['9','8','7','6']])
570
571 def test_dubious_quote(self):
572 self.readerAssertEqual('12,12,1",', [['12', '12', '1"', '']])
573
574 def test_null(self):
575 self.writerAssertEqual([], '')
576
Georg Brandl41dc63f2010-02-07 12:25:50 +0000577 def test_single_writer(self):
Skip Montanarob4a04172003-03-20 23:29:12 +0000578 self.writerAssertEqual([['abc']], 'abc\r\n')
579
Georg Brandl41dc63f2010-02-07 12:25:50 +0000580 def test_simple_writer(self):
Skip Montanarob4a04172003-03-20 23:29:12 +0000581 self.writerAssertEqual([[1, 2, 'abc', 3, 4]], '1,2,abc,3,4\r\n')
582
583 def test_quotes(self):
584 self.writerAssertEqual([[1, 2, 'a"bc"', 3, 4]], '1,2,"a""bc""",3,4\r\n')
585
586 def test_quote_fieldsep(self):
587 self.writerAssertEqual([['abc,def']], '"abc,def"\r\n')
588
589 def test_newlines(self):
590 self.writerAssertEqual([[1, 2, 'a\nbc', 3, 4]], '1,2,"a\nbc",3,4\r\n')
591
592class EscapedExcel(csv.excel):
593 quoting = csv.QUOTE_NONE
594 escapechar = '\\'
595
596class TestEscapedExcel(TestCsvBase):
597 dialect = EscapedExcel()
598
599 def test_escape_fieldsep(self):
600 self.writerAssertEqual([['abc,def']], 'abc\\,def\r\n')
601
602 def test_read_escape_fieldsep(self):
603 self.readerAssertEqual('abc\\,def\r\n', [['abc,def']])
604
605class QuotedEscapedExcel(csv.excel):
606 quoting = csv.QUOTE_NONNUMERIC
607 escapechar = '\\'
608
609class TestQuotedEscapedExcel(TestCsvBase):
610 dialect = QuotedEscapedExcel()
611
612 def test_write_escape_fieldsep(self):
613 self.writerAssertEqual([['abc,def']], '"abc,def"\r\n')
614
615 def test_read_escape_fieldsep(self):
616 self.readerAssertEqual('"abc\\,def"\r\n', [['abc,def']])
617
Skip Montanarob4a04172003-03-20 23:29:12 +0000618class TestDictFields(unittest.TestCase):
619 ### "long" means the row is longer than the number of fieldnames
620 ### "short" means there are fewer elements in the row than fieldnames
621 def test_write_simple_dict(self):
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000622 fd, name = tempfile.mkstemp()
Dirkjan Ochtman19c9b602010-03-04 19:21:53 +0000623 fileobj = io.open(fd, 'w+b')
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000624 try:
625 writer = csv.DictWriter(fileobj, fieldnames = ["f1", "f2", "f3"])
Dirkjan Ochtman86148172010-02-23 21:09:52 +0000626 writer.writeheader()
627 fileobj.seek(0)
628 self.assertEqual(fileobj.readline(), "f1,f2,f3\r\n")
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000629 writer.writerow({"f1": 10, "f3": "abc"})
630 fileobj.seek(0)
Dirkjan Ochtman86148172010-02-23 21:09:52 +0000631 fileobj.readline() # header
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000632 self.assertEqual(fileobj.read(), "10,,abc\r\n")
633 finally:
634 fileobj.close()
635 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000636
637 def test_write_no_fields(self):
638 fileobj = StringIO()
639 self.assertRaises(TypeError, csv.DictWriter, fileobj)
640
R David Murrayeccf9c22013-11-19 13:25:24 -0500641 def test_write_fields_not_in_fieldnames(self):
642 fd, name = tempfile.mkstemp()
643 fileobj = os.fdopen(fd, "w+b")
644 try:
645 writer = csv.DictWriter(fileobj, fieldnames = ["f1", "f2", "f3"])
646 # Of special note is the non-string key (issue 19449)
647 with self.assertRaises(ValueError) as cx:
648 writer.writerow({"f4": 10, "f2": "spam", 1: "abc"})
649 exception = str(cx.exception)
650 self.assertIn("fieldnames", exception)
651 self.assertIn("'f4'", exception)
652 self.assertNotIn("'f2'", exception)
653 self.assertIn("1", exception)
654 finally:
655 fileobj.close()
656 os.unlink(name)
657
Skip Montanarob4a04172003-03-20 23:29:12 +0000658 def test_read_dict_fields(self):
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000659 fd, name = tempfile.mkstemp()
660 fileobj = os.fdopen(fd, "w+b")
661 try:
662 fileobj.write("1,2,abc\r\n")
663 fileobj.seek(0)
664 reader = csv.DictReader(fileobj,
665 fieldnames=["f1", "f2", "f3"])
666 self.assertEqual(reader.next(), {"f1": '1', "f2": '2', "f3": 'abc'})
667 finally:
668 fileobj.close()
669 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000670
Skip Montanarodffeed32003-10-03 14:03:01 +0000671 def test_read_dict_no_fieldnames(self):
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000672 fd, name = tempfile.mkstemp()
673 fileobj = os.fdopen(fd, "w+b")
674 try:
675 fileobj.write("f1,f2,f3\r\n1,2,abc\r\n")
676 fileobj.seek(0)
677 reader = csv.DictReader(fileobj)
Skip Montanaroa032bf42008-08-08 22:52:51 +0000678 self.assertEqual(reader.fieldnames, ["f1", "f2", "f3"])
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000679 self.assertEqual(reader.next(), {"f1": '1', "f2": '2', "f3": 'abc'})
680 finally:
681 fileobj.close()
682 os.unlink(name)
Skip Montanarodffeed32003-10-03 14:03:01 +0000683
Skip Montanaroa032bf42008-08-08 22:52:51 +0000684 # Two test cases to make sure existing ways of implicitly setting
685 # fieldnames continue to work. Both arise from discussion in issue3436.
686 def test_read_dict_fieldnames_from_file(self):
687 fd, name = tempfile.mkstemp()
688 f = os.fdopen(fd, "w+b")
689 try:
690 f.write("f1,f2,f3\r\n1,2,abc\r\n")
691 f.seek(0)
692 reader = csv.DictReader(f, fieldnames=csv.reader(f).next())
693 self.assertEqual(reader.fieldnames, ["f1", "f2", "f3"])
694 self.assertEqual(reader.next(), {"f1": '1', "f2": '2', "f3": 'abc'})
695 finally:
696 f.close()
697 os.unlink(name)
698
699 def test_read_dict_fieldnames_chain(self):
700 import itertools
701 fd, name = tempfile.mkstemp()
702 f = os.fdopen(fd, "w+b")
703 try:
704 f.write("f1,f2,f3\r\n1,2,abc\r\n")
705 f.seek(0)
706 reader = csv.DictReader(f)
707 first = next(reader)
708 for row in itertools.chain([first], reader):
709 self.assertEqual(reader.fieldnames, ["f1", "f2", "f3"])
710 self.assertEqual(row, {"f1": '1', "f2": '2', "f3": 'abc'})
711 finally:
712 f.close()
713 os.unlink(name)
714
Skip Montanarob4a04172003-03-20 23:29:12 +0000715 def test_read_long(self):
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000716 fd, name = tempfile.mkstemp()
717 fileobj = os.fdopen(fd, "w+b")
718 try:
719 fileobj.write("1,2,abc,4,5,6\r\n")
720 fileobj.seek(0)
721 reader = csv.DictReader(fileobj,
722 fieldnames=["f1", "f2"])
723 self.assertEqual(reader.next(), {"f1": '1', "f2": '2',
724 None: ["abc", "4", "5", "6"]})
725 finally:
726 fileobj.close()
727 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000728
729 def test_read_long_with_rest(self):
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000730 fd, name = tempfile.mkstemp()
731 fileobj = os.fdopen(fd, "w+b")
732 try:
733 fileobj.write("1,2,abc,4,5,6\r\n")
734 fileobj.seek(0)
735 reader = csv.DictReader(fileobj,
736 fieldnames=["f1", "f2"], restkey="_rest")
737 self.assertEqual(reader.next(), {"f1": '1', "f2": '2',
738 "_rest": ["abc", "4", "5", "6"]})
739 finally:
740 fileobj.close()
741 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000742
Skip Montanarodffeed32003-10-03 14:03:01 +0000743 def test_read_long_with_rest_no_fieldnames(self):
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000744 fd, name = tempfile.mkstemp()
745 fileobj = os.fdopen(fd, "w+b")
746 try:
747 fileobj.write("f1,f2\r\n1,2,abc,4,5,6\r\n")
748 fileobj.seek(0)
749 reader = csv.DictReader(fileobj, restkey="_rest")
Skip Montanaroa032bf42008-08-08 22:52:51 +0000750 self.assertEqual(reader.fieldnames, ["f1", "f2"])
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000751 self.assertEqual(reader.next(), {"f1": '1', "f2": '2',
752 "_rest": ["abc", "4", "5", "6"]})
753 finally:
754 fileobj.close()
755 os.unlink(name)
Skip Montanarodffeed32003-10-03 14:03:01 +0000756
Skip Montanarob4a04172003-03-20 23:29:12 +0000757 def test_read_short(self):
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000758 fd, name = tempfile.mkstemp()
759 fileobj = os.fdopen(fd, "w+b")
760 try:
761 fileobj.write("1,2,abc,4,5,6\r\n1,2,abc\r\n")
762 fileobj.seek(0)
763 reader = csv.DictReader(fileobj,
764 fieldnames="1 2 3 4 5 6".split(),
765 restval="DEFAULT")
766 self.assertEqual(reader.next(), {"1": '1', "2": '2', "3": 'abc',
767 "4": '4', "5": '5', "6": '6'})
768 self.assertEqual(reader.next(), {"1": '1', "2": '2', "3": 'abc',
769 "4": 'DEFAULT', "5": 'DEFAULT',
770 "6": 'DEFAULT'})
771 finally:
772 fileobj.close()
773 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000774
Skip Montanaro1546bc42003-06-12 02:40:22 +0000775 def test_read_multi(self):
776 sample = [
777 '2147483648,43.0e12,17,abc,def\r\n',
778 '147483648,43.0e2,17,abc,def\r\n',
779 '47483648,43.0,170,abc,def\r\n'
780 ]
781
782 reader = csv.DictReader(sample,
783 fieldnames="i1 float i2 s1 s2".split())
784 self.assertEqual(reader.next(), {"i1": '2147483648',
785 "float": '43.0e12',
786 "i2": '17',
787 "s1": 'abc',
788 "s2": 'def'})
789
Skip Montanarob4a04172003-03-20 23:29:12 +0000790 def test_read_with_blanks(self):
791 reader = csv.DictReader(["1,2,abc,4,5,6\r\n","\r\n",
792 "1,2,abc,4,5,6\r\n"],
793 fieldnames="1 2 3 4 5 6".split())
794 self.assertEqual(reader.next(), {"1": '1', "2": '2', "3": 'abc',
795 "4": '4', "5": '5', "6": '6'})
796 self.assertEqual(reader.next(), {"1": '1', "2": '2', "3": 'abc',
797 "4": '4', "5": '5', "6": '6'})
798
Skip Montanaro3f7a9482003-09-06 19:52:12 +0000799 def test_read_semi_sep(self):
800 reader = csv.DictReader(["1;2;abc;4;5;6\r\n"],
801 fieldnames="1 2 3 4 5 6".split(),
802 delimiter=';')
803 self.assertEqual(reader.next(), {"1": '1', "2": '2', "3": 'abc',
804 "4": '4', "5": '5', "6": '6'})
805
Skip Montanarob4a04172003-03-20 23:29:12 +0000806class TestArrayWrites(unittest.TestCase):
807 def test_int_write(self):
808 import array
809 contents = [(20-i) for i in range(20)]
810 a = array.array('i', contents)
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000811
812 fd, name = tempfile.mkstemp()
813 fileobj = os.fdopen(fd, "w+b")
814 try:
815 writer = csv.writer(fileobj, dialect="excel")
816 writer.writerow(a)
817 expected = ",".join([str(i) for i in a])+"\r\n"
818 fileobj.seek(0)
819 self.assertEqual(fileobj.read(), expected)
820 finally:
821 fileobj.close()
822 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000823
824 def test_double_write(self):
825 import array
826 contents = [(20-i)*0.1 for i in range(20)]
827 a = array.array('d', contents)
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000828 fd, name = tempfile.mkstemp()
829 fileobj = os.fdopen(fd, "w+b")
830 try:
831 writer = csv.writer(fileobj, dialect="excel")
832 writer.writerow(a)
Raymond Hettingerf5377022011-12-11 22:31:09 -0800833 expected = ",".join([repr(i) for i in a])+"\r\n"
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000834 fileobj.seek(0)
835 self.assertEqual(fileobj.read(), expected)
836 finally:
837 fileobj.close()
838 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000839
840 def test_float_write(self):
841 import array
842 contents = [(20-i)*0.1 for i in range(20)]
843 a = array.array('f', contents)
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000844 fd, name = tempfile.mkstemp()
845 fileobj = os.fdopen(fd, "w+b")
846 try:
847 writer = csv.writer(fileobj, dialect="excel")
848 writer.writerow(a)
Raymond Hettingerf5377022011-12-11 22:31:09 -0800849 expected = ",".join([repr(i) for i in a])+"\r\n"
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000850 fileobj.seek(0)
851 self.assertEqual(fileobj.read(), expected)
852 finally:
853 fileobj.close()
854 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000855
856 def test_char_write(self):
857 import array, string
858 a = array.array('c', string.letters)
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000859 fd, name = tempfile.mkstemp()
860 fileobj = os.fdopen(fd, "w+b")
861 try:
862 writer = csv.writer(fileobj, dialect="excel")
863 writer.writerow(a)
864 expected = ",".join(a)+"\r\n"
865 fileobj.seek(0)
866 self.assertEqual(fileobj.read(), expected)
867 finally:
868 fileobj.close()
869 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000870
871class TestDialectValidity(unittest.TestCase):
872 def test_quoting(self):
873 class mydialect(csv.Dialect):
874 delimiter = ";"
875 escapechar = '\\'
876 doublequote = False
877 skipinitialspace = True
878 lineterminator = '\r\n'
879 quoting = csv.QUOTE_NONE
880 d = mydialect()
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200881 self.assertEqual(d.quoting, csv.QUOTE_NONE)
Skip Montanarob4a04172003-03-20 23:29:12 +0000882
883 mydialect.quoting = None
884 self.assertRaises(csv.Error, mydialect)
885
Skip Montanarob4a04172003-03-20 23:29:12 +0000886 mydialect.doublequote = True
887 mydialect.quoting = csv.QUOTE_ALL
888 mydialect.quotechar = '"'
889 d = mydialect()
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200890 self.assertEqual(d.quoting, csv.QUOTE_ALL)
891 self.assertEqual(d.quotechar, '"')
892 self.assertTrue(d.doublequote)
Skip Montanarob4a04172003-03-20 23:29:12 +0000893
894 mydialect.quotechar = "''"
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200895 with self.assertRaises(csv.Error) as cm:
896 mydialect()
897 self.assertEqual(str(cm.exception),
898 '"quotechar" must be an 1-character string')
Skip Montanarob4a04172003-03-20 23:29:12 +0000899
900 mydialect.quotechar = 4
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200901 with self.assertRaises(csv.Error) as cm:
902 mydialect()
903 self.assertEqual(str(cm.exception),
904 '"quotechar" must be string, not int')
Skip Montanarob4a04172003-03-20 23:29:12 +0000905
906 def test_delimiter(self):
907 class mydialect(csv.Dialect):
908 delimiter = ";"
909 escapechar = '\\'
910 doublequote = False
911 skipinitialspace = True
912 lineterminator = '\r\n'
913 quoting = csv.QUOTE_NONE
914 d = mydialect()
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200915 self.assertEqual(d.delimiter, ";")
Skip Montanarob4a04172003-03-20 23:29:12 +0000916
917 mydialect.delimiter = ":::"
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200918 with self.assertRaises(csv.Error) as cm:
919 mydialect()
920 self.assertEqual(str(cm.exception),
921 '"delimiter" must be an 1-character string')
922
923 mydialect.delimiter = ""
924 with self.assertRaises(csv.Error) as cm:
925 mydialect()
926 self.assertEqual(str(cm.exception),
927 '"delimiter" must be an 1-character string')
928
929 mydialect.delimiter = u","
930 with self.assertRaises(csv.Error) as cm:
931 mydialect()
932 self.assertEqual(str(cm.exception),
933 '"delimiter" must be string, not unicode')
Skip Montanarob4a04172003-03-20 23:29:12 +0000934
935 mydialect.delimiter = 4
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200936 with self.assertRaises(csv.Error) as cm:
937 mydialect()
938 self.assertEqual(str(cm.exception),
939 '"delimiter" must be string, not int')
Skip Montanarob4a04172003-03-20 23:29:12 +0000940
941 def test_lineterminator(self):
942 class mydialect(csv.Dialect):
943 delimiter = ";"
944 escapechar = '\\'
945 doublequote = False
946 skipinitialspace = True
947 lineterminator = '\r\n'
948 quoting = csv.QUOTE_NONE
949 d = mydialect()
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200950 self.assertEqual(d.lineterminator, '\r\n')
Skip Montanarob4a04172003-03-20 23:29:12 +0000951
952 mydialect.lineterminator = ":::"
953 d = mydialect()
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200954 self.assertEqual(d.lineterminator, ":::")
Skip Montanarob4a04172003-03-20 23:29:12 +0000955
956 mydialect.lineterminator = 4
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200957 with self.assertRaises(csv.Error) as cm:
958 mydialect()
959 self.assertEqual(str(cm.exception),
960 '"lineterminator" must be a string')
Skip Montanarob4a04172003-03-20 23:29:12 +0000961
962
Skip Montanaro48816c62003-04-25 14:43:14 +0000963class TestSniffer(unittest.TestCase):
964 sample1 = """\
965Harry's, Arlington Heights, IL, 2/1/03, Kimi Hayes
966Shark City, Glendale Heights, IL, 12/28/02, Prezence
967Tommy's Place, Blue Island, IL, 12/28/02, Blue Sunday/White Crow
968Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back
969"""
970 sample2 = """\
971'Harry''s':'Arlington Heights':'IL':'2/1/03':'Kimi Hayes'
972'Shark City':'Glendale Heights':'IL':'12/28/02':'Prezence'
973'Tommy''s Place':'Blue Island':'IL':'12/28/02':'Blue Sunday/White Crow'
Skip Montanarob4fd4d32009-09-28 02:12:27 +0000974'Stonecutters ''Seafood'' and Chop House':'Lemont':'IL':'12/19/02':'Week Back'
Skip Montanaro48816c62003-04-25 14:43:14 +0000975"""
R David Murray24dc7532013-06-29 18:43:59 -0400976 header1 = '''\
Skip Montanaro48816c62003-04-25 14:43:14 +0000977"venue","city","state","date","performers"
978'''
Skip Montanaro77892372003-05-19 15:33:36 +0000979 sample3 = '''\
98005/05/03?05/05/03?05/05/03?05/05/03?05/05/03?05/05/03
98105/05/03?05/05/03?05/05/03?05/05/03?05/05/03?05/05/03
98205/05/03?05/05/03?05/05/03?05/05/03?05/05/03?05/05/03
983'''
984
Skip Montanaro1546bc42003-06-12 02:40:22 +0000985 sample4 = '''\
9862147483648;43.0e12;17;abc;def
987147483648;43.0e2;17;abc;def
98847483648;43.0;170;abc;def
989'''
990
Skip Montanaro91bb70c2005-12-28 15:37:25 +0000991 sample5 = "aaa\tbbb\r\nAAA\t\r\nBBB\t\r\n"
Skip Montanaro39b29be2005-12-30 05:09:48 +0000992 sample6 = "a|b|c\r\nd|e|f\r\n"
993 sample7 = "'a'|'b'|'c'\r\n'd'|e|f\r\n"
Skip Montanaro91bb70c2005-12-28 15:37:25 +0000994
R David Murray24dc7532013-06-29 18:43:59 -0400995# Issue 18155: Use a delimiter that is a special char to regex:
996
997 header2 = '''\
998"venue"+"city"+"state"+"date"+"performers"
999'''
1000 sample8 = """\
1001Harry's+ Arlington Heights+ IL+ 2/1/03+ Kimi Hayes
1002Shark City+ Glendale Heights+ IL+ 12/28/02+ Prezence
1003Tommy's Place+ Blue Island+ IL+ 12/28/02+ Blue Sunday/White Crow
1004Stonecutters Seafood and Chop House+ Lemont+ IL+ 12/19/02+ Week Back
1005"""
1006 sample9 = """\
1007'Harry''s'+ Arlington Heights'+ 'IL'+ '2/1/03'+ 'Kimi Hayes'
1008'Shark City'+ Glendale Heights'+' IL'+ '12/28/02'+ 'Prezence'
1009'Tommy''s Place'+ Blue Island'+ 'IL'+ '12/28/02'+ 'Blue Sunday/White Crow'
1010'Stonecutters ''Seafood'' and Chop House'+ 'Lemont'+ 'IL'+ '12/19/02'+ 'Week Back'
1011"""
1012
Skip Montanaro48816c62003-04-25 14:43:14 +00001013 def test_has_header(self):
1014 sniffer = csv.Sniffer()
1015 self.assertEqual(sniffer.has_header(self.sample1), False)
R David Murray24dc7532013-06-29 18:43:59 -04001016 self.assertEqual(sniffer.has_header(self.header1 + self.sample1),
1017 True)
1018
1019 def test_has_header_regex_special_delimiter(self):
1020 sniffer = csv.Sniffer()
1021 self.assertEqual(sniffer.has_header(self.sample8), False)
1022 self.assertEqual(sniffer.has_header(self.header2 + self.sample8),
1023 True)
Skip Montanaro48816c62003-04-25 14:43:14 +00001024
1025 def test_sniff(self):
1026 sniffer = csv.Sniffer()
1027 dialect = sniffer.sniff(self.sample1)
1028 self.assertEqual(dialect.delimiter, ",")
1029 self.assertEqual(dialect.quotechar, '"')
1030 self.assertEqual(dialect.skipinitialspace, True)
1031
1032 dialect = sniffer.sniff(self.sample2)
1033 self.assertEqual(dialect.delimiter, ":")
1034 self.assertEqual(dialect.quotechar, "'")
1035 self.assertEqual(dialect.skipinitialspace, False)
1036
Skip Montanaro77892372003-05-19 15:33:36 +00001037 def test_delimiters(self):
1038 sniffer = csv.Sniffer()
1039 dialect = sniffer.sniff(self.sample3)
Armin Rigoa3f09272006-05-28 19:13:17 +00001040 # given that all three lines in sample3 are equal,
1041 # I think that any character could have been 'guessed' as the
1042 # delimiter, depending on dictionary order
Ezio Melottiaa980582010-01-23 23:04:36 +00001043 self.assertIn(dialect.delimiter, self.sample3)
Skip Montanaro77892372003-05-19 15:33:36 +00001044 dialect = sniffer.sniff(self.sample3, delimiters="?,")
1045 self.assertEqual(dialect.delimiter, "?")
1046 dialect = sniffer.sniff(self.sample3, delimiters="/,")
1047 self.assertEqual(dialect.delimiter, "/")
Skip Montanaro1546bc42003-06-12 02:40:22 +00001048 dialect = sniffer.sniff(self.sample4)
1049 self.assertEqual(dialect.delimiter, ";")
Skip Montanaro91bb70c2005-12-28 15:37:25 +00001050 dialect = sniffer.sniff(self.sample5)
1051 self.assertEqual(dialect.delimiter, "\t")
Skip Montanaro39b29be2005-12-30 05:09:48 +00001052 dialect = sniffer.sniff(self.sample6)
1053 self.assertEqual(dialect.delimiter, "|")
1054 dialect = sniffer.sniff(self.sample7)
1055 self.assertEqual(dialect.delimiter, "|")
1056 self.assertEqual(dialect.quotechar, "'")
R David Murray24dc7532013-06-29 18:43:59 -04001057 dialect = sniffer.sniff(self.sample8)
1058 self.assertEqual(dialect.delimiter, '+')
1059 dialect = sniffer.sniff(self.sample9)
1060 self.assertEqual(dialect.delimiter, '+')
1061 self.assertEqual(dialect.quotechar, "'")
Skip Montanaro77892372003-05-19 15:33:36 +00001062
Skip Montanarob4fd4d32009-09-28 02:12:27 +00001063 def test_doublequote(self):
1064 sniffer = csv.Sniffer()
R David Murray24dc7532013-06-29 18:43:59 -04001065 dialect = sniffer.sniff(self.header1)
1066 self.assertFalse(dialect.doublequote)
1067 dialect = sniffer.sniff(self.header2)
Skip Montanarob4fd4d32009-09-28 02:12:27 +00001068 self.assertFalse(dialect.doublequote)
1069 dialect = sniffer.sniff(self.sample2)
1070 self.assertTrue(dialect.doublequote)
R David Murray24dc7532013-06-29 18:43:59 -04001071 dialect = sniffer.sniff(self.sample8)
1072 self.assertFalse(dialect.doublequote)
1073 dialect = sniffer.sniff(self.sample9)
1074 self.assertTrue(dialect.doublequote)
Skip Montanarob4fd4d32009-09-28 02:12:27 +00001075
Serhiy Storchaka32e23e72013-11-03 23:15:46 +02001076class NUL:
1077 def write(s, *args):
1078 pass
1079 writelines = write
Skip Montanarob4a04172003-03-20 23:29:12 +00001080
Serhiy Storchaka32e23e72013-11-03 23:15:46 +02001081@unittest.skipUnless(hasattr(sys, "gettotalrefcount"),
1082 'requires sys.gettotalrefcount()')
1083class TestLeaks(unittest.TestCase):
1084 def test_create_read(self):
1085 delta = 0
1086 lastrc = sys.gettotalrefcount()
1087 for i in xrange(20):
1088 gc.collect()
1089 self.assertEqual(gc.garbage, [])
1090 rc = sys.gettotalrefcount()
1091 csv.reader(["a,b,c\r\n"])
1092 csv.reader(["a,b,c\r\n"])
1093 csv.reader(["a,b,c\r\n"])
1094 delta = rc-lastrc
1095 lastrc = rc
1096 # if csv.reader() leaks, last delta should be 3 or more
1097 self.assertEqual(delta < 3, True)
Skip Montanarob4a04172003-03-20 23:29:12 +00001098
Serhiy Storchaka32e23e72013-11-03 23:15:46 +02001099 def test_create_write(self):
1100 delta = 0
1101 lastrc = sys.gettotalrefcount()
1102 s = NUL()
1103 for i in xrange(20):
1104 gc.collect()
1105 self.assertEqual(gc.garbage, [])
1106 rc = sys.gettotalrefcount()
1107 csv.writer(s)
1108 csv.writer(s)
1109 csv.writer(s)
1110 delta = rc-lastrc
1111 lastrc = rc
1112 # if csv.writer() leaks, last delta should be 3 or more
1113 self.assertEqual(delta < 3, True)
Skip Montanarob4a04172003-03-20 23:29:12 +00001114
Serhiy Storchaka32e23e72013-11-03 23:15:46 +02001115 def test_read(self):
1116 delta = 0
1117 rows = ["a,b,c\r\n"]*5
1118 lastrc = sys.gettotalrefcount()
1119 for i in xrange(20):
1120 gc.collect()
1121 self.assertEqual(gc.garbage, [])
1122 rc = sys.gettotalrefcount()
1123 rdr = csv.reader(rows)
1124 for row in rdr:
1125 pass
1126 delta = rc-lastrc
1127 lastrc = rc
1128 # if reader leaks during read, delta should be 5 or more
1129 self.assertEqual(delta < 5, True)
Skip Montanarob4a04172003-03-20 23:29:12 +00001130
Serhiy Storchaka32e23e72013-11-03 23:15:46 +02001131 def test_write(self):
1132 delta = 0
1133 rows = [[1,2,3]]*5
1134 s = NUL()
1135 lastrc = sys.gettotalrefcount()
1136 for i in xrange(20):
1137 gc.collect()
1138 self.assertEqual(gc.garbage, [])
1139 rc = sys.gettotalrefcount()
1140 writer = csv.writer(s)
1141 for row in rows:
1142 writer.writerow(row)
1143 delta = rc-lastrc
1144 lastrc = rc
1145 # if writer leaks during write, last delta should be 5 or more
1146 self.assertEqual(delta < 5, True)
Skip Montanarob4a04172003-03-20 23:29:12 +00001147
Skip Montanaro1a566652003-05-06 15:56:05 +00001148# commented out for now - csv module doesn't yet support Unicode
Skip Montanaro58fc5d02004-06-05 17:03:20 +00001149## class TestUnicode(unittest.TestCase):
1150## def test_unicode_read(self):
1151## import codecs
1152## f = codecs.EncodedFile(StringIO("Martin von Löwis,"
1153## "Marc André Lemburg,"
1154## "Guido van Rossum,"
1155## "François Pinard\r\n"),
1156## data_encoding='iso-8859-1')
1157## reader = csv.reader(f)
1158## self.assertEqual(list(reader), [[u"Martin von Löwis",
1159## u"Marc André Lemburg",
1160## u"Guido van Rossum",
1161## u"François Pinardn"]])
Skip Montanaro1a566652003-05-06 15:56:05 +00001162
Walter Dörwald21d3a322003-05-01 17:45:56 +00001163def test_main():
Skip Montanarob4a04172003-03-20 23:29:12 +00001164 mod = sys.modules[__name__]
Walter Dörwald21d3a322003-05-01 17:45:56 +00001165 test_support.run_unittest(
1166 *[getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
1167 )
Skip Montanarob4a04172003-03-20 23:29:12 +00001168
1169if __name__ == '__main__':
Walter Dörwald21d3a322003-05-01 17:45:56 +00001170 test_main()