blob: 181af99efb74e2527dff67d18d420fadcb660e13 [file] [log] [blame]
Skip Montanaro1a566652003-05-06 15:56:05 +00001# -*- coding: iso-8859-1 -*-
Skip Montanarob4a04172003-03-20 23:29:12 +00002# Copyright (C) 2001,2002 Python Software Foundation
3# csv package unit tests
4
Serhiy Storchakab8e54dd2015-12-30 20:43:29 +02005import copy
Skip Montanarob4a04172003-03-20 23:29:12 +00006import sys
Skip Montanaro58fc5d02004-06-05 17:03:20 +00007import os
Skip Montanarob4a04172003-03-20 23:29:12 +00008import unittest
9from StringIO import StringIO
Skip Montanaro58fc5d02004-06-05 17:03:20 +000010import tempfile
Skip Montanaro594adac2003-04-10 17:16:15 +000011import csv
Skip Montanarob4a04172003-03-20 23:29:12 +000012import gc
Dirkjan Ochtman19c9b602010-03-04 19:21:53 +000013import io
Serhiy Storchakab8e54dd2015-12-30 20:43:29 +020014import pickle
Walter Dörwald21d3a322003-05-01 17:45:56 +000015from test import test_support
Skip Montanarob4a04172003-03-20 23:29:12 +000016
17class Test_Csv(unittest.TestCase):
18 """
Tim Peters0eadaac2003-04-24 16:02:54 +000019 Test the underlying C csv parser in ways that are not appropriate
Skip Montanarob4a04172003-03-20 23:29:12 +000020 from the high level interface. Further tests of this nature are done
21 in TestDialectRegistry.
22 """
Andrew McNamara1196cf12005-01-07 04:42:45 +000023 def _test_arg_valid(self, ctor, arg):
24 self.assertRaises(TypeError, ctor)
25 self.assertRaises(TypeError, ctor, None)
26 self.assertRaises(TypeError, ctor, arg, bad_attr = 0)
27 self.assertRaises(TypeError, ctor, arg, delimiter = 0)
28 self.assertRaises(TypeError, ctor, arg, delimiter = 'XX')
29 self.assertRaises(csv.Error, ctor, arg, 'foo')
Andrew McNamara1196cf12005-01-07 04:42:45 +000030 self.assertRaises(TypeError, ctor, arg, delimiter=None)
31 self.assertRaises(TypeError, ctor, arg, delimiter=1)
32 self.assertRaises(TypeError, ctor, arg, quotechar=1)
33 self.assertRaises(TypeError, ctor, arg, lineterminator=None)
34 self.assertRaises(TypeError, ctor, arg, lineterminator=1)
35 self.assertRaises(TypeError, ctor, arg, quoting=None)
Tim Peters608c2ff2005-01-13 17:37:38 +000036 self.assertRaises(TypeError, ctor, arg,
Andrew McNamaraaf1e3122005-01-12 01:55:21 +000037 quoting=csv.QUOTE_ALL, quotechar='')
Tim Peters608c2ff2005-01-13 17:37:38 +000038 self.assertRaises(TypeError, ctor, arg,
Andrew McNamara5d45a8d2005-01-12 08:16:17 +000039 quoting=csv.QUOTE_ALL, quotechar=None)
Andrew McNamara1196cf12005-01-07 04:42:45 +000040
41 def test_reader_arg_valid(self):
42 self._test_arg_valid(csv.reader, [])
Skip Montanarob4a04172003-03-20 23:29:12 +000043
44 def test_writer_arg_valid(self):
Andrew McNamara1196cf12005-01-07 04:42:45 +000045 self._test_arg_valid(csv.writer, StringIO())
Skip Montanarob4a04172003-03-20 23:29:12 +000046
Andrew McNamara1196cf12005-01-07 04:42:45 +000047 def _test_default_attrs(self, ctor, *args):
48 obj = ctor(*args)
49 # Check defaults
Skip Montanarob4a04172003-03-20 23:29:12 +000050 self.assertEqual(obj.dialect.delimiter, ',')
Andrew McNamara1196cf12005-01-07 04:42:45 +000051 self.assertEqual(obj.dialect.doublequote, True)
Skip Montanarob4a04172003-03-20 23:29:12 +000052 self.assertEqual(obj.dialect.escapechar, None)
Andrew McNamara1196cf12005-01-07 04:42:45 +000053 self.assertEqual(obj.dialect.lineterminator, "\r\n")
54 self.assertEqual(obj.dialect.quotechar, '"')
55 self.assertEqual(obj.dialect.quoting, csv.QUOTE_MINIMAL)
56 self.assertEqual(obj.dialect.skipinitialspace, False)
57 self.assertEqual(obj.dialect.strict, False)
58 # Try deleting or changing attributes (they are read-only)
59 self.assertRaises(TypeError, delattr, obj.dialect, 'delimiter')
60 self.assertRaises(TypeError, setattr, obj.dialect, 'delimiter', ':')
Barry Warsawb180c062005-04-20 19:41:36 +000061 self.assertRaises(AttributeError, delattr, obj.dialect, 'quoting')
62 self.assertRaises(AttributeError, setattr, obj.dialect,
63 'quoting', None)
Skip Montanarob4a04172003-03-20 23:29:12 +000064
65 def test_reader_attrs(self):
Andrew McNamara1196cf12005-01-07 04:42:45 +000066 self._test_default_attrs(csv.reader, [])
Skip Montanarob4a04172003-03-20 23:29:12 +000067
68 def test_writer_attrs(self):
Andrew McNamara1196cf12005-01-07 04:42:45 +000069 self._test_default_attrs(csv.writer, StringIO())
70
71 def _test_kw_attrs(self, ctor, *args):
72 # Now try with alternate options
73 kwargs = dict(delimiter=':', doublequote=False, escapechar='\\',
74 lineterminator='\r', quotechar='*',
75 quoting=csv.QUOTE_NONE, skipinitialspace=True,
76 strict=True)
77 obj = ctor(*args, **kwargs)
78 self.assertEqual(obj.dialect.delimiter, ':')
79 self.assertEqual(obj.dialect.doublequote, False)
80 self.assertEqual(obj.dialect.escapechar, '\\')
81 self.assertEqual(obj.dialect.lineterminator, "\r")
82 self.assertEqual(obj.dialect.quotechar, '*')
83 self.assertEqual(obj.dialect.quoting, csv.QUOTE_NONE)
84 self.assertEqual(obj.dialect.skipinitialspace, True)
85 self.assertEqual(obj.dialect.strict, True)
86
87 def test_reader_kw_attrs(self):
88 self._test_kw_attrs(csv.reader, [])
89
90 def test_writer_kw_attrs(self):
91 self._test_kw_attrs(csv.writer, StringIO())
92
93 def _test_dialect_attrs(self, ctor, *args):
94 # Now try with dialect-derived options
95 class dialect:
96 delimiter='-'
97 doublequote=False
98 escapechar='^'
99 lineterminator='$'
100 quotechar='#'
101 quoting=csv.QUOTE_ALL
102 skipinitialspace=True
103 strict=False
104 args = args + (dialect,)
105 obj = ctor(*args)
106 self.assertEqual(obj.dialect.delimiter, '-')
107 self.assertEqual(obj.dialect.doublequote, False)
108 self.assertEqual(obj.dialect.escapechar, '^')
109 self.assertEqual(obj.dialect.lineterminator, "$")
110 self.assertEqual(obj.dialect.quotechar, '#')
111 self.assertEqual(obj.dialect.quoting, csv.QUOTE_ALL)
112 self.assertEqual(obj.dialect.skipinitialspace, True)
113 self.assertEqual(obj.dialect.strict, False)
114
115 def test_reader_dialect_attrs(self):
116 self._test_dialect_attrs(csv.reader, [])
117
118 def test_writer_dialect_attrs(self):
119 self._test_dialect_attrs(csv.writer, StringIO())
120
Skip Montanarob4a04172003-03-20 23:29:12 +0000121
122 def _write_test(self, fields, expect, **kwargs):
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000123 fd, name = tempfile.mkstemp()
124 fileobj = os.fdopen(fd, "w+b")
125 try:
126 writer = csv.writer(fileobj, **kwargs)
127 writer.writerow(fields)
128 fileobj.seek(0)
129 self.assertEqual(fileobj.read(),
130 expect + writer.dialect.lineterminator)
131 finally:
132 fileobj.close()
133 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000134
Serhiy Storchaka837d7602015-03-25 19:15:56 +0200135 def _write_error_test(self, exc, fields, **kwargs):
136 fd, name = tempfile.mkstemp()
137 fileobj = os.fdopen(fd, "w+b")
138 try:
139 writer = csv.writer(fileobj, **kwargs)
140 with self.assertRaises(exc):
141 writer.writerow(fields)
142 fileobj.seek(0)
143 self.assertEqual(fileobj.read(), '')
144 finally:
145 fileobj.close()
146 os.unlink(name)
147
Skip Montanarob4a04172003-03-20 23:29:12 +0000148 def test_write_arg_valid(self):
Serhiy Storchaka837d7602015-03-25 19:15:56 +0200149 self._write_error_test(csv.Error, None)
Skip Montanarob4a04172003-03-20 23:29:12 +0000150 self._write_test((), '')
151 self._write_test([None], '""')
Serhiy Storchaka837d7602015-03-25 19:15:56 +0200152 self._write_error_test(csv.Error, [None], quoting = csv.QUOTE_NONE)
Skip Montanarob4a04172003-03-20 23:29:12 +0000153 # Check that exceptions are passed up the chain
154 class BadList:
155 def __len__(self):
156 return 10;
157 def __getitem__(self, i):
158 if i > 2:
159 raise IOError
Serhiy Storchaka837d7602015-03-25 19:15:56 +0200160 self._write_error_test(IOError, BadList())
Skip Montanarob4a04172003-03-20 23:29:12 +0000161 class BadItem:
162 def __str__(self):
163 raise IOError
Serhiy Storchaka837d7602015-03-25 19:15:56 +0200164 self._write_error_test(IOError, [BadItem()])
Skip Montanarob4a04172003-03-20 23:29:12 +0000165
166 def test_write_bigfield(self):
167 # This exercises the buffer realloc functionality
168 bigstring = 'X' * 50000
169 self._write_test([bigstring,bigstring], '%s,%s' % \
170 (bigstring, bigstring))
171
172 def test_write_quoting(self):
Andrew McNamarac89f2842005-01-12 07:44:42 +0000173 self._write_test(['a',1,'p,q'], 'a,1,"p,q"')
Serhiy Storchaka837d7602015-03-25 19:15:56 +0200174 self._write_error_test(csv.Error, ['a',1,'p,q'],
175 quoting = csv.QUOTE_NONE)
Andrew McNamarac89f2842005-01-12 07:44:42 +0000176 self._write_test(['a',1,'p,q'], 'a,1,"p,q"',
Skip Montanarob4a04172003-03-20 23:29:12 +0000177 quoting = csv.QUOTE_MINIMAL)
Andrew McNamarac89f2842005-01-12 07:44:42 +0000178 self._write_test(['a',1,'p,q'], '"a",1,"p,q"',
Skip Montanarob4a04172003-03-20 23:29:12 +0000179 quoting = csv.QUOTE_NONNUMERIC)
Andrew McNamarac89f2842005-01-12 07:44:42 +0000180 self._write_test(['a',1,'p,q'], '"a","1","p,q"',
Skip Montanarob4a04172003-03-20 23:29:12 +0000181 quoting = csv.QUOTE_ALL)
R. David Murray38644592009-04-01 21:26:18 +0000182 self._write_test(['a\nb',1], '"a\nb","1"',
183 quoting = csv.QUOTE_ALL)
Skip Montanarob4a04172003-03-20 23:29:12 +0000184
185 def test_write_escape(self):
Andrew McNamarac89f2842005-01-12 07:44:42 +0000186 self._write_test(['a',1,'p,q'], 'a,1,"p,q"',
Skip Montanarob4a04172003-03-20 23:29:12 +0000187 escapechar='\\')
Serhiy Storchaka837d7602015-03-25 19:15:56 +0200188 self._write_error_test(csv.Error, ['a',1,'p,"q"'],
189 escapechar=None, doublequote=False)
Andrew McNamarac89f2842005-01-12 07:44:42 +0000190 self._write_test(['a',1,'p,"q"'], 'a,1,"p,\\"q\\""',
191 escapechar='\\', doublequote = False)
Tim Peters608c2ff2005-01-13 17:37:38 +0000192 self._write_test(['"'], '""""',
Andrew McNamarac89f2842005-01-12 07:44:42 +0000193 escapechar='\\', quoting = csv.QUOTE_MINIMAL)
Tim Peters608c2ff2005-01-13 17:37:38 +0000194 self._write_test(['"'], '\\"',
Andrew McNamarac89f2842005-01-12 07:44:42 +0000195 escapechar='\\', quoting = csv.QUOTE_MINIMAL,
196 doublequote = False)
Tim Peters608c2ff2005-01-13 17:37:38 +0000197 self._write_test(['"'], '\\"',
Andrew McNamarac89f2842005-01-12 07:44:42 +0000198 escapechar='\\', quoting = csv.QUOTE_NONE)
199 self._write_test(['a',1,'p,q'], 'a,1,p\\,q',
Skip Montanarob4a04172003-03-20 23:29:12 +0000200 escapechar='\\', quoting = csv.QUOTE_NONE)
201
202 def test_writerows(self):
203 class BrokenFile:
204 def write(self, buf):
205 raise IOError
206 writer = csv.writer(BrokenFile())
207 self.assertRaises(IOError, writer.writerows, [['a']])
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000208 fd, name = tempfile.mkstemp()
209 fileobj = os.fdopen(fd, "w+b")
210 try:
211 writer = csv.writer(fileobj)
212 self.assertRaises(TypeError, writer.writerows, None)
213 writer.writerows([['a','b'],['c','d']])
214 fileobj.seek(0)
215 self.assertEqual(fileobj.read(), "a,b\r\nc,d\r\n")
216 finally:
217 fileobj.close()
218 os.unlink(name)
Tim Peters27f88362004-07-08 04:22:35 +0000219
Raymond Hettingerf5377022011-12-11 22:31:09 -0800220 def test_write_float(self):
221 # Issue 13573: loss of precision because csv.writer
222 # uses str() for floats instead of repr()
223 orig_row = [1.234567890123, 1.0/7.0, 'abc']
224 f = StringIO()
225 c = csv.writer(f, quoting=csv.QUOTE_NONNUMERIC)
226 c.writerow(orig_row)
227 f.seek(0)
228 c = csv.reader(f, quoting=csv.QUOTE_NONNUMERIC)
229 new_row = next(c)
230 self.assertEqual(orig_row, new_row)
231
Skip Montanarob4a04172003-03-20 23:29:12 +0000232 def _read_test(self, input, expect, **kwargs):
233 reader = csv.reader(input, **kwargs)
234 result = list(reader)
235 self.assertEqual(result, expect)
236
237 def test_read_oddinputs(self):
238 self._read_test([], [])
239 self._read_test([''], [[]])
240 self.assertRaises(csv.Error, self._read_test,
241 ['"ab"c'], None, strict = 1)
242 # cannot handle null bytes for the moment
243 self.assertRaises(csv.Error, self._read_test,
244 ['ab\0c'], None, strict = 1)
245 self._read_test(['"ab"c'], [['abc']], doublequote = 0)
246
247 def test_read_eol(self):
248 self._read_test(['a,b'], [['a','b']])
249 self._read_test(['a,b\n'], [['a','b']])
250 self._read_test(['a,b\r\n'], [['a','b']])
251 self._read_test(['a,b\r'], [['a','b']])
252 self.assertRaises(csv.Error, self._read_test, ['a,b\rc,d'], [])
253 self.assertRaises(csv.Error, self._read_test, ['a,b\nc,d'], [])
254 self.assertRaises(csv.Error, self._read_test, ['a,b\r\nc,d'], [])
255
Senthil Kumarand41dc7c2012-09-25 02:48:21 -0700256 def test_read_eof(self):
257 self._read_test(['a,"'], [['a', '']])
258 self._read_test(['"a'], [['a']])
259 self._read_test(['^'], [['\n']], escapechar='^')
260 self.assertRaises(csv.Error, self._read_test, ['a,"'], [], strict=True)
261 self.assertRaises(csv.Error, self._read_test, ['"a'], [], strict=True)
262 self.assertRaises(csv.Error, self._read_test,
263 ['^'], [], escapechar='^', strict=True)
264
Skip Montanarob4a04172003-03-20 23:29:12 +0000265 def test_read_escape(self):
Andrew McNamara36a76912005-01-10 01:04:40 +0000266 self._read_test(['a,\\b,c'], [['a', 'b', 'c']], escapechar='\\')
Skip Montanarob4a04172003-03-20 23:29:12 +0000267 self._read_test(['a,b\\,c'], [['a', 'b,c']], escapechar='\\')
268 self._read_test(['a,"b\\,c"'], [['a', 'b,c']], escapechar='\\')
Andrew McNamara36a76912005-01-10 01:04:40 +0000269 self._read_test(['a,"b,\\c"'], [['a', 'b,c']], escapechar='\\')
Skip Montanarob4a04172003-03-20 23:29:12 +0000270 self._read_test(['a,"b,c\\""'], [['a', 'b,c"']], escapechar='\\')
271 self._read_test(['a,"b,c"\\'], [['a', 'b,c\\']], escapechar='\\')
272
Andrew McNamara1196cf12005-01-07 04:42:45 +0000273 def test_read_quoting(self):
274 self._read_test(['1,",3,",5'], [['1', ',3,', '5']])
275 self._read_test(['1,",3,",5'], [['1', '"', '3', '"', '5']],
276 quotechar=None, escapechar='\\')
277 self._read_test(['1,",3,",5'], [['1', '"', '3', '"', '5']],
278 quoting=csv.QUOTE_NONE, escapechar='\\')
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000279 # will this fail where locale uses comma for decimals?
Andrew McNamara7f2053e2005-01-12 11:17:16 +0000280 self._read_test([',3,"5",7.3, 9'], [['', 3, '5', 7.3, 9]],
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000281 quoting=csv.QUOTE_NONNUMERIC)
R. David Murray38644592009-04-01 21:26:18 +0000282 self._read_test(['"a\nb", 7'], [['a\nb', ' 7']])
Tim Peters608c2ff2005-01-13 17:37:38 +0000283 self.assertRaises(ValueError, self._read_test,
Andrew McNamara0f0599d2005-01-12 09:45:18 +0000284 ['abc,3'], [[]],
285 quoting=csv.QUOTE_NONNUMERIC)
Andrew McNamara1196cf12005-01-07 04:42:45 +0000286
Skip Montanarob4a04172003-03-20 23:29:12 +0000287 def test_read_bigfield(self):
Andrew McNamarae4d05c42005-01-11 07:32:02 +0000288 # This exercises the buffer realloc functionality and field size
289 # limits.
Andrew McNamara31d88962005-01-12 03:45:10 +0000290 limit = csv.field_size_limit()
Andrew McNamaraaf1e3122005-01-12 01:55:21 +0000291 try:
292 size = 50000
293 bigstring = 'X' * size
294 bigline = '%s,%s' % (bigstring, bigstring)
295 self._read_test([bigline], [[bigstring, bigstring]])
Andrew McNamara31d88962005-01-12 03:45:10 +0000296 csv.field_size_limit(size)
Andrew McNamaraaf1e3122005-01-12 01:55:21 +0000297 self._read_test([bigline], [[bigstring, bigstring]])
Andrew McNamara31d88962005-01-12 03:45:10 +0000298 self.assertEqual(csv.field_size_limit(), size)
299 csv.field_size_limit(size-1)
Andrew McNamaraaf1e3122005-01-12 01:55:21 +0000300 self.assertRaises(csv.Error, self._read_test, [bigline], [])
Andrew McNamara31d88962005-01-12 03:45:10 +0000301 self.assertRaises(TypeError, csv.field_size_limit, None)
302 self.assertRaises(TypeError, csv.field_size_limit, 1, None)
Andrew McNamaraaf1e3122005-01-12 01:55:21 +0000303 finally:
Andrew McNamara31d88962005-01-12 03:45:10 +0000304 csv.field_size_limit(limit)
Skip Montanarob4a04172003-03-20 23:29:12 +0000305
Andrew McNamara7f2053e2005-01-12 11:17:16 +0000306 def test_read_linenum(self):
Georg Brandl77354cf2008-03-21 20:01:51 +0000307 for r in (csv.reader(['line,1', 'line,2', 'line,3']),
308 csv.DictReader(['line,1', 'line,2', 'line,3'],
309 fieldnames=['a', 'b', 'c'])):
310 self.assertEqual(r.line_num, 0)
311 r.next()
312 self.assertEqual(r.line_num, 1)
313 r.next()
314 self.assertEqual(r.line_num, 2)
315 r.next()
316 self.assertEqual(r.line_num, 3)
317 self.assertRaises(StopIteration, r.next)
318 self.assertEqual(r.line_num, 3)
Andrew McNamara7f2053e2005-01-12 11:17:16 +0000319
R. David Murray38644592009-04-01 21:26:18 +0000320 def test_roundtrip_quoteed_newlines(self):
321 fd, name = tempfile.mkstemp()
322 fileobj = os.fdopen(fd, "w+b")
323 try:
324 writer = csv.writer(fileobj)
325 self.assertRaises(TypeError, writer.writerows, None)
326 rows = [['a\nb','b'],['c','x\r\nd']]
327 writer.writerows(rows)
328 fileobj.seek(0)
329 for i, row in enumerate(csv.reader(fileobj)):
330 self.assertEqual(row, rows[i])
331 finally:
332 fileobj.close()
333 os.unlink(name)
334
Skip Montanarob4a04172003-03-20 23:29:12 +0000335class TestDialectRegistry(unittest.TestCase):
336 def test_registry_badargs(self):
337 self.assertRaises(TypeError, csv.list_dialects, None)
338 self.assertRaises(TypeError, csv.get_dialect)
339 self.assertRaises(csv.Error, csv.get_dialect, None)
340 self.assertRaises(csv.Error, csv.get_dialect, "nonesuch")
341 self.assertRaises(TypeError, csv.unregister_dialect)
342 self.assertRaises(csv.Error, csv.unregister_dialect, None)
343 self.assertRaises(csv.Error, csv.unregister_dialect, "nonesuch")
344 self.assertRaises(TypeError, csv.register_dialect, None)
345 self.assertRaises(TypeError, csv.register_dialect, None, None)
Andrew McNamaraaf1e3122005-01-12 01:55:21 +0000346 self.assertRaises(TypeError, csv.register_dialect, "nonesuch", 0, 0)
347 self.assertRaises(TypeError, csv.register_dialect, "nonesuch",
348 badargument=None)
349 self.assertRaises(TypeError, csv.register_dialect, "nonesuch",
350 quoting=None)
351 self.assertRaises(TypeError, csv.register_dialect, [])
Skip Montanarob4a04172003-03-20 23:29:12 +0000352
353 def test_registry(self):
354 class myexceltsv(csv.excel):
355 delimiter = "\t"
356 name = "myexceltsv"
357 expected_dialects = csv.list_dialects() + [name]
358 expected_dialects.sort()
359 csv.register_dialect(name, myexceltsv)
Éric Araujod11058d2010-12-02 22:35:10 +0000360 self.addCleanup(csv.unregister_dialect, name)
361 self.assertEqual(csv.get_dialect(name).delimiter, '\t')
362 got_dialects = sorted(csv.list_dialects())
363 self.assertEqual(expected_dialects, got_dialects)
Skip Montanarob4a04172003-03-20 23:29:12 +0000364
Andrew McNamara86625972005-01-11 01:28:33 +0000365 def test_register_kwargs(self):
366 name = 'fedcba'
367 csv.register_dialect(name, delimiter=';')
Éric Araujod11058d2010-12-02 22:35:10 +0000368 self.addCleanup(csv.unregister_dialect, name)
369 self.assertEqual(csv.get_dialect(name).delimiter, ';')
370 self.assertEqual([['X', 'Y', 'Z']], list(csv.reader(['X;Y;Z'], name)))
Andrew McNamara86625972005-01-11 01:28:33 +0000371
Skip Montanarob4a04172003-03-20 23:29:12 +0000372 def test_incomplete_dialect(self):
373 class myexceltsv(csv.Dialect):
374 delimiter = "\t"
375 self.assertRaises(csv.Error, myexceltsv)
376
377 def test_space_dialect(self):
378 class space(csv.excel):
379 delimiter = " "
380 quoting = csv.QUOTE_NONE
381 escapechar = "\\"
382
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000383 fd, name = tempfile.mkstemp()
384 fileobj = os.fdopen(fd, "w+b")
385 try:
386 fileobj.write("abc def\nc1ccccc1 benzene\n")
387 fileobj.seek(0)
388 rdr = csv.reader(fileobj, dialect=space())
389 self.assertEqual(rdr.next(), ["abc", "def"])
390 self.assertEqual(rdr.next(), ["c1ccccc1", "benzene"])
391 finally:
392 fileobj.close()
393 os.unlink(name)
Tim Peters27f88362004-07-08 04:22:35 +0000394
Skip Montanarob4a04172003-03-20 23:29:12 +0000395 def test_dialect_apply(self):
396 class testA(csv.excel):
397 delimiter = "\t"
398 class testB(csv.excel):
399 delimiter = ":"
400 class testC(csv.excel):
401 delimiter = "|"
402
403 csv.register_dialect('testC', testC)
404 try:
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000405 fd, name = tempfile.mkstemp()
406 fileobj = os.fdopen(fd, "w+b")
407 try:
408 writer = csv.writer(fileobj)
409 writer.writerow([1,2,3])
410 fileobj.seek(0)
411 self.assertEqual(fileobj.read(), "1,2,3\r\n")
412 finally:
413 fileobj.close()
414 os.unlink(name)
Tim Peters27f88362004-07-08 04:22:35 +0000415
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000416 fd, name = tempfile.mkstemp()
417 fileobj = os.fdopen(fd, "w+b")
418 try:
419 writer = csv.writer(fileobj, testA)
420 writer.writerow([1,2,3])
421 fileobj.seek(0)
422 self.assertEqual(fileobj.read(), "1\t2\t3\r\n")
423 finally:
424 fileobj.close()
425 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000426
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000427 fd, name = tempfile.mkstemp()
428 fileobj = os.fdopen(fd, "w+b")
429 try:
430 writer = csv.writer(fileobj, dialect=testB())
431 writer.writerow([1,2,3])
432 fileobj.seek(0)
433 self.assertEqual(fileobj.read(), "1:2:3\r\n")
434 finally:
435 fileobj.close()
436 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000437
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000438 fd, name = tempfile.mkstemp()
439 fileobj = os.fdopen(fd, "w+b")
440 try:
441 writer = csv.writer(fileobj, dialect='testC')
442 writer.writerow([1,2,3])
443 fileobj.seek(0)
444 self.assertEqual(fileobj.read(), "1|2|3\r\n")
445 finally:
446 fileobj.close()
447 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000448
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000449 fd, name = tempfile.mkstemp()
450 fileobj = os.fdopen(fd, "w+b")
451 try:
452 writer = csv.writer(fileobj, dialect=testA, delimiter=';')
453 writer.writerow([1,2,3])
454 fileobj.seek(0)
455 self.assertEqual(fileobj.read(), "1;2;3\r\n")
456 finally:
457 fileobj.close()
458 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000459
Skip Montanarob4a04172003-03-20 23:29:12 +0000460 finally:
461 csv.unregister_dialect('testC')
462
463 def test_bad_dialect(self):
464 # Unknown parameter
Andrew McNamara1196cf12005-01-07 04:42:45 +0000465 self.assertRaises(TypeError, csv.reader, [], bad_attr = 0)
Skip Montanarob4a04172003-03-20 23:29:12 +0000466 # Bad values
467 self.assertRaises(TypeError, csv.reader, [], delimiter = None)
468 self.assertRaises(TypeError, csv.reader, [], quoting = -1)
469 self.assertRaises(TypeError, csv.reader, [], quoting = 100)
470
Serhiy Storchakab8e54dd2015-12-30 20:43:29 +0200471 def test_copy(self):
472 for name in csv.list_dialects():
473 dialect = csv.get_dialect(name)
474 self.assertRaises(TypeError, copy.copy, dialect)
475
476 def test_pickle(self):
477 for name in csv.list_dialects():
478 dialect = csv.get_dialect(name)
479 for proto in range(pickle.HIGHEST_PROTOCOL + 1):
480 self.assertRaises(TypeError, pickle.dumps, dialect, proto)
481
Skip Montanarob4a04172003-03-20 23:29:12 +0000482class TestCsvBase(unittest.TestCase):
483 def readerAssertEqual(self, input, expected_result):
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000484 fd, name = tempfile.mkstemp()
485 fileobj = os.fdopen(fd, "w+b")
486 try:
487 fileobj.write(input)
488 fileobj.seek(0)
489 reader = csv.reader(fileobj, dialect = self.dialect)
490 fields = list(reader)
491 self.assertEqual(fields, expected_result)
492 finally:
493 fileobj.close()
494 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000495
496 def writerAssertEqual(self, input, expected_result):
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000497 fd, name = tempfile.mkstemp()
498 fileobj = os.fdopen(fd, "w+b")
499 try:
500 writer = csv.writer(fileobj, dialect = self.dialect)
501 writer.writerows(input)
502 fileobj.seek(0)
503 self.assertEqual(fileobj.read(), expected_result)
504 finally:
505 fileobj.close()
506 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000507
508class TestDialectExcel(TestCsvBase):
509 dialect = 'excel'
510
511 def test_single(self):
512 self.readerAssertEqual('abc', [['abc']])
513
514 def test_simple(self):
515 self.readerAssertEqual('1,2,3,4,5', [['1','2','3','4','5']])
516
517 def test_blankline(self):
518 self.readerAssertEqual('', [])
519
520 def test_empty_fields(self):
521 self.readerAssertEqual(',', [['', '']])
522
523 def test_singlequoted(self):
524 self.readerAssertEqual('""', [['']])
525
526 def test_singlequoted_left_empty(self):
527 self.readerAssertEqual('"",', [['','']])
528
529 def test_singlequoted_right_empty(self):
530 self.readerAssertEqual(',""', [['','']])
531
532 def test_single_quoted_quote(self):
533 self.readerAssertEqual('""""', [['"']])
534
535 def test_quoted_quotes(self):
536 self.readerAssertEqual('""""""', [['""']])
537
538 def test_inline_quote(self):
539 self.readerAssertEqual('a""b', [['a""b']])
540
541 def test_inline_quotes(self):
542 self.readerAssertEqual('a"b"c', [['a"b"c']])
543
544 def test_quotes_and_more(self):
Skip Montanaro9cea8e52007-03-12 03:30:50 +0000545 # Excel would never write a field containing '"a"b', but when
546 # reading one, it will return 'ab'.
Skip Montanarob4a04172003-03-20 23:29:12 +0000547 self.readerAssertEqual('"a"b', [['ab']])
548
549 def test_lone_quote(self):
550 self.readerAssertEqual('a"b', [['a"b']])
551
552 def test_quote_and_quote(self):
Skip Montanaro9cea8e52007-03-12 03:30:50 +0000553 # Excel would never write a field containing '"a" "b"', but when
554 # reading one, it will return 'a "b"'.
Skip Montanarob4a04172003-03-20 23:29:12 +0000555 self.readerAssertEqual('"a" "b"', [['a "b"']])
556
557 def test_space_and_quote(self):
558 self.readerAssertEqual(' "a"', [[' "a"']])
559
560 def test_quoted(self):
Tim Peters0eadaac2003-04-24 16:02:54 +0000561 self.readerAssertEqual('1,2,3,"I think, therefore I am",5,6',
562 [['1', '2', '3',
563 'I think, therefore I am',
Skip Montanarob4a04172003-03-20 23:29:12 +0000564 '5', '6']])
565
566 def test_quoted_quote(self):
567 self.readerAssertEqual('1,2,3,"""I see,"" said the blind man","as he picked up his hammer and saw"',
Tim Peters0eadaac2003-04-24 16:02:54 +0000568 [['1', '2', '3',
569 '"I see," said the blind man',
Skip Montanarob4a04172003-03-20 23:29:12 +0000570 'as he picked up his hammer and saw']])
571
572 def test_quoted_nl(self):
573 input = '''\
5741,2,3,"""I see,""
575said the blind man","as he picked up his
576hammer and saw"
5779,8,7,6'''
578 self.readerAssertEqual(input,
Tim Peters0eadaac2003-04-24 16:02:54 +0000579 [['1', '2', '3',
580 '"I see,"\nsaid the blind man',
Skip Montanarob4a04172003-03-20 23:29:12 +0000581 'as he picked up his\nhammer and saw'],
582 ['9','8','7','6']])
583
584 def test_dubious_quote(self):
585 self.readerAssertEqual('12,12,1",', [['12', '12', '1"', '']])
586
587 def test_null(self):
588 self.writerAssertEqual([], '')
589
Georg Brandl41dc63f2010-02-07 12:25:50 +0000590 def test_single_writer(self):
Skip Montanarob4a04172003-03-20 23:29:12 +0000591 self.writerAssertEqual([['abc']], 'abc\r\n')
592
Georg Brandl41dc63f2010-02-07 12:25:50 +0000593 def test_simple_writer(self):
Skip Montanarob4a04172003-03-20 23:29:12 +0000594 self.writerAssertEqual([[1, 2, 'abc', 3, 4]], '1,2,abc,3,4\r\n')
595
596 def test_quotes(self):
597 self.writerAssertEqual([[1, 2, 'a"bc"', 3, 4]], '1,2,"a""bc""",3,4\r\n')
598
599 def test_quote_fieldsep(self):
600 self.writerAssertEqual([['abc,def']], '"abc,def"\r\n')
601
602 def test_newlines(self):
603 self.writerAssertEqual([[1, 2, 'a\nbc', 3, 4]], '1,2,"a\nbc",3,4\r\n')
604
605class EscapedExcel(csv.excel):
606 quoting = csv.QUOTE_NONE
607 escapechar = '\\'
608
609class TestEscapedExcel(TestCsvBase):
610 dialect = EscapedExcel()
611
612 def test_escape_fieldsep(self):
613 self.writerAssertEqual([['abc,def']], 'abc\\,def\r\n')
614
615 def test_read_escape_fieldsep(self):
616 self.readerAssertEqual('abc\\,def\r\n', [['abc,def']])
617
618class QuotedEscapedExcel(csv.excel):
619 quoting = csv.QUOTE_NONNUMERIC
620 escapechar = '\\'
621
622class TestQuotedEscapedExcel(TestCsvBase):
623 dialect = QuotedEscapedExcel()
624
625 def test_write_escape_fieldsep(self):
626 self.writerAssertEqual([['abc,def']], '"abc,def"\r\n')
627
628 def test_read_escape_fieldsep(self):
629 self.readerAssertEqual('"abc\\,def"\r\n', [['abc,def']])
630
Skip Montanarob4a04172003-03-20 23:29:12 +0000631class TestDictFields(unittest.TestCase):
632 ### "long" means the row is longer than the number of fieldnames
633 ### "short" means there are fewer elements in the row than fieldnames
634 def test_write_simple_dict(self):
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000635 fd, name = tempfile.mkstemp()
Dirkjan Ochtman19c9b602010-03-04 19:21:53 +0000636 fileobj = io.open(fd, 'w+b')
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000637 try:
638 writer = csv.DictWriter(fileobj, fieldnames = ["f1", "f2", "f3"])
Dirkjan Ochtman86148172010-02-23 21:09:52 +0000639 writer.writeheader()
640 fileobj.seek(0)
641 self.assertEqual(fileobj.readline(), "f1,f2,f3\r\n")
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000642 writer.writerow({"f1": 10, "f3": "abc"})
643 fileobj.seek(0)
Dirkjan Ochtman86148172010-02-23 21:09:52 +0000644 fileobj.readline() # header
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000645 self.assertEqual(fileobj.read(), "10,,abc\r\n")
646 finally:
647 fileobj.close()
648 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000649
650 def test_write_no_fields(self):
651 fileobj = StringIO()
652 self.assertRaises(TypeError, csv.DictWriter, fileobj)
653
R David Murrayeccf9c22013-11-19 13:25:24 -0500654 def test_write_fields_not_in_fieldnames(self):
655 fd, name = tempfile.mkstemp()
656 fileobj = os.fdopen(fd, "w+b")
657 try:
658 writer = csv.DictWriter(fileobj, fieldnames = ["f1", "f2", "f3"])
659 # Of special note is the non-string key (issue 19449)
660 with self.assertRaises(ValueError) as cx:
661 writer.writerow({"f4": 10, "f2": "spam", 1: "abc"})
662 exception = str(cx.exception)
663 self.assertIn("fieldnames", exception)
664 self.assertIn("'f4'", exception)
665 self.assertNotIn("'f2'", exception)
666 self.assertIn("1", exception)
667 finally:
668 fileobj.close()
669 os.unlink(name)
670
Skip Montanarob4a04172003-03-20 23:29:12 +0000671 def test_read_dict_fields(self):
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000672 fd, name = tempfile.mkstemp()
673 fileobj = os.fdopen(fd, "w+b")
674 try:
675 fileobj.write("1,2,abc\r\n")
676 fileobj.seek(0)
677 reader = csv.DictReader(fileobj,
678 fieldnames=["f1", "f2", "f3"])
679 self.assertEqual(reader.next(), {"f1": '1', "f2": '2', "f3": 'abc'})
680 finally:
681 fileobj.close()
682 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000683
Skip Montanarodffeed32003-10-03 14:03:01 +0000684 def test_read_dict_no_fieldnames(self):
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000685 fd, name = tempfile.mkstemp()
686 fileobj = os.fdopen(fd, "w+b")
687 try:
688 fileobj.write("f1,f2,f3\r\n1,2,abc\r\n")
689 fileobj.seek(0)
690 reader = csv.DictReader(fileobj)
Skip Montanaroa032bf42008-08-08 22:52:51 +0000691 self.assertEqual(reader.fieldnames, ["f1", "f2", "f3"])
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000692 self.assertEqual(reader.next(), {"f1": '1', "f2": '2', "f3": 'abc'})
693 finally:
694 fileobj.close()
695 os.unlink(name)
Skip Montanarodffeed32003-10-03 14:03:01 +0000696
Skip Montanaroa032bf42008-08-08 22:52:51 +0000697 # Two test cases to make sure existing ways of implicitly setting
698 # fieldnames continue to work. Both arise from discussion in issue3436.
699 def test_read_dict_fieldnames_from_file(self):
700 fd, name = tempfile.mkstemp()
701 f = os.fdopen(fd, "w+b")
702 try:
703 f.write("f1,f2,f3\r\n1,2,abc\r\n")
704 f.seek(0)
705 reader = csv.DictReader(f, fieldnames=csv.reader(f).next())
706 self.assertEqual(reader.fieldnames, ["f1", "f2", "f3"])
707 self.assertEqual(reader.next(), {"f1": '1', "f2": '2', "f3": 'abc'})
708 finally:
709 f.close()
710 os.unlink(name)
711
712 def test_read_dict_fieldnames_chain(self):
713 import itertools
714 fd, name = tempfile.mkstemp()
715 f = os.fdopen(fd, "w+b")
716 try:
717 f.write("f1,f2,f3\r\n1,2,abc\r\n")
718 f.seek(0)
719 reader = csv.DictReader(f)
720 first = next(reader)
721 for row in itertools.chain([first], reader):
722 self.assertEqual(reader.fieldnames, ["f1", "f2", "f3"])
723 self.assertEqual(row, {"f1": '1', "f2": '2', "f3": 'abc'})
724 finally:
725 f.close()
726 os.unlink(name)
727
Skip Montanarob4a04172003-03-20 23:29:12 +0000728 def test_read_long(self):
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000729 fd, name = tempfile.mkstemp()
730 fileobj = os.fdopen(fd, "w+b")
731 try:
732 fileobj.write("1,2,abc,4,5,6\r\n")
733 fileobj.seek(0)
734 reader = csv.DictReader(fileobj,
735 fieldnames=["f1", "f2"])
736 self.assertEqual(reader.next(), {"f1": '1', "f2": '2',
737 None: ["abc", "4", "5", "6"]})
738 finally:
739 fileobj.close()
740 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000741
742 def test_read_long_with_rest(self):
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000743 fd, name = tempfile.mkstemp()
744 fileobj = os.fdopen(fd, "w+b")
745 try:
746 fileobj.write("1,2,abc,4,5,6\r\n")
747 fileobj.seek(0)
748 reader = csv.DictReader(fileobj,
749 fieldnames=["f1", "f2"], restkey="_rest")
750 self.assertEqual(reader.next(), {"f1": '1', "f2": '2',
751 "_rest": ["abc", "4", "5", "6"]})
752 finally:
753 fileobj.close()
754 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000755
Skip Montanarodffeed32003-10-03 14:03:01 +0000756 def test_read_long_with_rest_no_fieldnames(self):
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000757 fd, name = tempfile.mkstemp()
758 fileobj = os.fdopen(fd, "w+b")
759 try:
760 fileobj.write("f1,f2\r\n1,2,abc,4,5,6\r\n")
761 fileobj.seek(0)
762 reader = csv.DictReader(fileobj, restkey="_rest")
Skip Montanaroa032bf42008-08-08 22:52:51 +0000763 self.assertEqual(reader.fieldnames, ["f1", "f2"])
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000764 self.assertEqual(reader.next(), {"f1": '1', "f2": '2',
765 "_rest": ["abc", "4", "5", "6"]})
766 finally:
767 fileobj.close()
768 os.unlink(name)
Skip Montanarodffeed32003-10-03 14:03:01 +0000769
Skip Montanarob4a04172003-03-20 23:29:12 +0000770 def test_read_short(self):
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000771 fd, name = tempfile.mkstemp()
772 fileobj = os.fdopen(fd, "w+b")
773 try:
774 fileobj.write("1,2,abc,4,5,6\r\n1,2,abc\r\n")
775 fileobj.seek(0)
776 reader = csv.DictReader(fileobj,
777 fieldnames="1 2 3 4 5 6".split(),
778 restval="DEFAULT")
779 self.assertEqual(reader.next(), {"1": '1', "2": '2', "3": 'abc',
780 "4": '4', "5": '5', "6": '6'})
781 self.assertEqual(reader.next(), {"1": '1', "2": '2', "3": 'abc',
782 "4": 'DEFAULT', "5": 'DEFAULT',
783 "6": 'DEFAULT'})
784 finally:
785 fileobj.close()
786 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000787
Skip Montanaro1546bc42003-06-12 02:40:22 +0000788 def test_read_multi(self):
789 sample = [
790 '2147483648,43.0e12,17,abc,def\r\n',
791 '147483648,43.0e2,17,abc,def\r\n',
792 '47483648,43.0,170,abc,def\r\n'
793 ]
794
795 reader = csv.DictReader(sample,
796 fieldnames="i1 float i2 s1 s2".split())
797 self.assertEqual(reader.next(), {"i1": '2147483648',
798 "float": '43.0e12',
799 "i2": '17',
800 "s1": 'abc',
801 "s2": 'def'})
802
Skip Montanarob4a04172003-03-20 23:29:12 +0000803 def test_read_with_blanks(self):
804 reader = csv.DictReader(["1,2,abc,4,5,6\r\n","\r\n",
805 "1,2,abc,4,5,6\r\n"],
806 fieldnames="1 2 3 4 5 6".split())
807 self.assertEqual(reader.next(), {"1": '1', "2": '2', "3": 'abc',
808 "4": '4', "5": '5', "6": '6'})
809 self.assertEqual(reader.next(), {"1": '1', "2": '2', "3": 'abc',
810 "4": '4', "5": '5', "6": '6'})
811
Skip Montanaro3f7a9482003-09-06 19:52:12 +0000812 def test_read_semi_sep(self):
813 reader = csv.DictReader(["1;2;abc;4;5;6\r\n"],
814 fieldnames="1 2 3 4 5 6".split(),
815 delimiter=';')
816 self.assertEqual(reader.next(), {"1": '1', "2": '2', "3": 'abc',
817 "4": '4', "5": '5', "6": '6'})
818
Skip Montanarob4a04172003-03-20 23:29:12 +0000819class TestArrayWrites(unittest.TestCase):
820 def test_int_write(self):
821 import array
822 contents = [(20-i) for i in range(20)]
823 a = array.array('i', contents)
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000824
825 fd, name = tempfile.mkstemp()
826 fileobj = os.fdopen(fd, "w+b")
827 try:
828 writer = csv.writer(fileobj, dialect="excel")
829 writer.writerow(a)
830 expected = ",".join([str(i) for i in a])+"\r\n"
831 fileobj.seek(0)
832 self.assertEqual(fileobj.read(), expected)
833 finally:
834 fileobj.close()
835 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000836
837 def test_double_write(self):
838 import array
839 contents = [(20-i)*0.1 for i in range(20)]
840 a = array.array('d', contents)
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000841 fd, name = tempfile.mkstemp()
842 fileobj = os.fdopen(fd, "w+b")
843 try:
844 writer = csv.writer(fileobj, dialect="excel")
845 writer.writerow(a)
Raymond Hettingerf5377022011-12-11 22:31:09 -0800846 expected = ",".join([repr(i) for i in a])+"\r\n"
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000847 fileobj.seek(0)
848 self.assertEqual(fileobj.read(), expected)
849 finally:
850 fileobj.close()
851 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000852
853 def test_float_write(self):
854 import array
855 contents = [(20-i)*0.1 for i in range(20)]
856 a = array.array('f', contents)
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000857 fd, name = tempfile.mkstemp()
858 fileobj = os.fdopen(fd, "w+b")
859 try:
860 writer = csv.writer(fileobj, dialect="excel")
861 writer.writerow(a)
Raymond Hettingerf5377022011-12-11 22:31:09 -0800862 expected = ",".join([repr(i) for i in a])+"\r\n"
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000863 fileobj.seek(0)
864 self.assertEqual(fileobj.read(), expected)
865 finally:
866 fileobj.close()
867 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000868
869 def test_char_write(self):
870 import array, string
871 a = array.array('c', string.letters)
Skip Montanaro58fc5d02004-06-05 17:03:20 +0000872 fd, name = tempfile.mkstemp()
873 fileobj = os.fdopen(fd, "w+b")
874 try:
875 writer = csv.writer(fileobj, dialect="excel")
876 writer.writerow(a)
877 expected = ",".join(a)+"\r\n"
878 fileobj.seek(0)
879 self.assertEqual(fileobj.read(), expected)
880 finally:
881 fileobj.close()
882 os.unlink(name)
Skip Montanarob4a04172003-03-20 23:29:12 +0000883
884class TestDialectValidity(unittest.TestCase):
885 def test_quoting(self):
886 class mydialect(csv.Dialect):
887 delimiter = ";"
888 escapechar = '\\'
889 doublequote = False
890 skipinitialspace = True
891 lineterminator = '\r\n'
892 quoting = csv.QUOTE_NONE
893 d = mydialect()
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200894 self.assertEqual(d.quoting, csv.QUOTE_NONE)
Skip Montanarob4a04172003-03-20 23:29:12 +0000895
896 mydialect.quoting = None
897 self.assertRaises(csv.Error, mydialect)
898
Skip Montanarob4a04172003-03-20 23:29:12 +0000899 mydialect.doublequote = True
900 mydialect.quoting = csv.QUOTE_ALL
901 mydialect.quotechar = '"'
902 d = mydialect()
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200903 self.assertEqual(d.quoting, csv.QUOTE_ALL)
904 self.assertEqual(d.quotechar, '"')
905 self.assertTrue(d.doublequote)
Skip Montanarob4a04172003-03-20 23:29:12 +0000906
907 mydialect.quotechar = "''"
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200908 with self.assertRaises(csv.Error) as cm:
909 mydialect()
910 self.assertEqual(str(cm.exception),
911 '"quotechar" must be an 1-character string')
Skip Montanarob4a04172003-03-20 23:29:12 +0000912
913 mydialect.quotechar = 4
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200914 with self.assertRaises(csv.Error) as cm:
915 mydialect()
916 self.assertEqual(str(cm.exception),
917 '"quotechar" must be string, not int')
Skip Montanarob4a04172003-03-20 23:29:12 +0000918
919 def test_delimiter(self):
920 class mydialect(csv.Dialect):
921 delimiter = ";"
922 escapechar = '\\'
923 doublequote = False
924 skipinitialspace = True
925 lineterminator = '\r\n'
926 quoting = csv.QUOTE_NONE
927 d = mydialect()
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200928 self.assertEqual(d.delimiter, ";")
Skip Montanarob4a04172003-03-20 23:29:12 +0000929
930 mydialect.delimiter = ":::"
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200931 with self.assertRaises(csv.Error) as cm:
932 mydialect()
933 self.assertEqual(str(cm.exception),
934 '"delimiter" must be an 1-character string')
935
936 mydialect.delimiter = ""
937 with self.assertRaises(csv.Error) as cm:
938 mydialect()
939 self.assertEqual(str(cm.exception),
940 '"delimiter" must be an 1-character string')
941
942 mydialect.delimiter = u","
943 with self.assertRaises(csv.Error) as cm:
944 mydialect()
945 self.assertEqual(str(cm.exception),
946 '"delimiter" must be string, not unicode')
Skip Montanarob4a04172003-03-20 23:29:12 +0000947
948 mydialect.delimiter = 4
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200949 with self.assertRaises(csv.Error) as cm:
950 mydialect()
951 self.assertEqual(str(cm.exception),
952 '"delimiter" must be string, not int')
Skip Montanarob4a04172003-03-20 23:29:12 +0000953
954 def test_lineterminator(self):
955 class mydialect(csv.Dialect):
956 delimiter = ";"
957 escapechar = '\\'
958 doublequote = False
959 skipinitialspace = True
960 lineterminator = '\r\n'
961 quoting = csv.QUOTE_NONE
962 d = mydialect()
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200963 self.assertEqual(d.lineterminator, '\r\n')
Skip Montanarob4a04172003-03-20 23:29:12 +0000964
965 mydialect.lineterminator = ":::"
966 d = mydialect()
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200967 self.assertEqual(d.lineterminator, ":::")
Skip Montanarob4a04172003-03-20 23:29:12 +0000968
969 mydialect.lineterminator = 4
Serhiy Storchaka0c221be2013-12-19 16:26:56 +0200970 with self.assertRaises(csv.Error) as cm:
971 mydialect()
972 self.assertEqual(str(cm.exception),
973 '"lineterminator" must be a string')
Skip Montanarob4a04172003-03-20 23:29:12 +0000974
975
Skip Montanaro48816c62003-04-25 14:43:14 +0000976class TestSniffer(unittest.TestCase):
977 sample1 = """\
978Harry's, Arlington Heights, IL, 2/1/03, Kimi Hayes
979Shark City, Glendale Heights, IL, 12/28/02, Prezence
980Tommy's Place, Blue Island, IL, 12/28/02, Blue Sunday/White Crow
981Stonecutters Seafood and Chop House, Lemont, IL, 12/19/02, Week Back
982"""
983 sample2 = """\
984'Harry''s':'Arlington Heights':'IL':'2/1/03':'Kimi Hayes'
985'Shark City':'Glendale Heights':'IL':'12/28/02':'Prezence'
986'Tommy''s Place':'Blue Island':'IL':'12/28/02':'Blue Sunday/White Crow'
Skip Montanarob4fd4d32009-09-28 02:12:27 +0000987'Stonecutters ''Seafood'' and Chop House':'Lemont':'IL':'12/19/02':'Week Back'
Skip Montanaro48816c62003-04-25 14:43:14 +0000988"""
R David Murray24dc7532013-06-29 18:43:59 -0400989 header1 = '''\
Skip Montanaro48816c62003-04-25 14:43:14 +0000990"venue","city","state","date","performers"
991'''
Skip Montanaro77892372003-05-19 15:33:36 +0000992 sample3 = '''\
99305/05/03?05/05/03?05/05/03?05/05/03?05/05/03?05/05/03
99405/05/03?05/05/03?05/05/03?05/05/03?05/05/03?05/05/03
99505/05/03?05/05/03?05/05/03?05/05/03?05/05/03?05/05/03
996'''
997
Skip Montanaro1546bc42003-06-12 02:40:22 +0000998 sample4 = '''\
9992147483648;43.0e12;17;abc;def
1000147483648;43.0e2;17;abc;def
100147483648;43.0;170;abc;def
1002'''
1003
Skip Montanaro91bb70c2005-12-28 15:37:25 +00001004 sample5 = "aaa\tbbb\r\nAAA\t\r\nBBB\t\r\n"
Skip Montanaro39b29be2005-12-30 05:09:48 +00001005 sample6 = "a|b|c\r\nd|e|f\r\n"
1006 sample7 = "'a'|'b'|'c'\r\n'd'|e|f\r\n"
Skip Montanaro91bb70c2005-12-28 15:37:25 +00001007
R David Murray24dc7532013-06-29 18:43:59 -04001008# Issue 18155: Use a delimiter that is a special char to regex:
1009
1010 header2 = '''\
1011"venue"+"city"+"state"+"date"+"performers"
1012'''
1013 sample8 = """\
1014Harry's+ Arlington Heights+ IL+ 2/1/03+ Kimi Hayes
1015Shark City+ Glendale Heights+ IL+ 12/28/02+ Prezence
1016Tommy's Place+ Blue Island+ IL+ 12/28/02+ Blue Sunday/White Crow
1017Stonecutters Seafood and Chop House+ Lemont+ IL+ 12/19/02+ Week Back
1018"""
1019 sample9 = """\
1020'Harry''s'+ Arlington Heights'+ 'IL'+ '2/1/03'+ 'Kimi Hayes'
1021'Shark City'+ Glendale Heights'+' IL'+ '12/28/02'+ 'Prezence'
1022'Tommy''s Place'+ Blue Island'+ 'IL'+ '12/28/02'+ 'Blue Sunday/White Crow'
1023'Stonecutters ''Seafood'' and Chop House'+ 'Lemont'+ 'IL'+ '12/19/02'+ 'Week Back'
1024"""
1025
Skip Montanaro48816c62003-04-25 14:43:14 +00001026 def test_has_header(self):
1027 sniffer = csv.Sniffer()
1028 self.assertEqual(sniffer.has_header(self.sample1), False)
R David Murray24dc7532013-06-29 18:43:59 -04001029 self.assertEqual(sniffer.has_header(self.header1 + self.sample1),
1030 True)
1031
1032 def test_has_header_regex_special_delimiter(self):
1033 sniffer = csv.Sniffer()
1034 self.assertEqual(sniffer.has_header(self.sample8), False)
1035 self.assertEqual(sniffer.has_header(self.header2 + self.sample8),
1036 True)
Skip Montanaro48816c62003-04-25 14:43:14 +00001037
1038 def test_sniff(self):
1039 sniffer = csv.Sniffer()
1040 dialect = sniffer.sniff(self.sample1)
1041 self.assertEqual(dialect.delimiter, ",")
1042 self.assertEqual(dialect.quotechar, '"')
1043 self.assertEqual(dialect.skipinitialspace, True)
1044
1045 dialect = sniffer.sniff(self.sample2)
1046 self.assertEqual(dialect.delimiter, ":")
1047 self.assertEqual(dialect.quotechar, "'")
1048 self.assertEqual(dialect.skipinitialspace, False)
1049
Skip Montanaro77892372003-05-19 15:33:36 +00001050 def test_delimiters(self):
1051 sniffer = csv.Sniffer()
1052 dialect = sniffer.sniff(self.sample3)
Armin Rigoa3f09272006-05-28 19:13:17 +00001053 # given that all three lines in sample3 are equal,
1054 # I think that any character could have been 'guessed' as the
1055 # delimiter, depending on dictionary order
Ezio Melottiaa980582010-01-23 23:04:36 +00001056 self.assertIn(dialect.delimiter, self.sample3)
Skip Montanaro77892372003-05-19 15:33:36 +00001057 dialect = sniffer.sniff(self.sample3, delimiters="?,")
1058 self.assertEqual(dialect.delimiter, "?")
1059 dialect = sniffer.sniff(self.sample3, delimiters="/,")
1060 self.assertEqual(dialect.delimiter, "/")
Skip Montanaro1546bc42003-06-12 02:40:22 +00001061 dialect = sniffer.sniff(self.sample4)
1062 self.assertEqual(dialect.delimiter, ";")
Skip Montanaro91bb70c2005-12-28 15:37:25 +00001063 dialect = sniffer.sniff(self.sample5)
1064 self.assertEqual(dialect.delimiter, "\t")
Skip Montanaro39b29be2005-12-30 05:09:48 +00001065 dialect = sniffer.sniff(self.sample6)
1066 self.assertEqual(dialect.delimiter, "|")
1067 dialect = sniffer.sniff(self.sample7)
1068 self.assertEqual(dialect.delimiter, "|")
1069 self.assertEqual(dialect.quotechar, "'")
R David Murray24dc7532013-06-29 18:43:59 -04001070 dialect = sniffer.sniff(self.sample8)
1071 self.assertEqual(dialect.delimiter, '+')
1072 dialect = sniffer.sniff(self.sample9)
1073 self.assertEqual(dialect.delimiter, '+')
1074 self.assertEqual(dialect.quotechar, "'")
Skip Montanaro77892372003-05-19 15:33:36 +00001075
Skip Montanarob4fd4d32009-09-28 02:12:27 +00001076 def test_doublequote(self):
1077 sniffer = csv.Sniffer()
R David Murray24dc7532013-06-29 18:43:59 -04001078 dialect = sniffer.sniff(self.header1)
1079 self.assertFalse(dialect.doublequote)
1080 dialect = sniffer.sniff(self.header2)
Skip Montanarob4fd4d32009-09-28 02:12:27 +00001081 self.assertFalse(dialect.doublequote)
1082 dialect = sniffer.sniff(self.sample2)
1083 self.assertTrue(dialect.doublequote)
R David Murray24dc7532013-06-29 18:43:59 -04001084 dialect = sniffer.sniff(self.sample8)
1085 self.assertFalse(dialect.doublequote)
1086 dialect = sniffer.sniff(self.sample9)
1087 self.assertTrue(dialect.doublequote)
Skip Montanarob4fd4d32009-09-28 02:12:27 +00001088
Serhiy Storchaka32e23e72013-11-03 23:15:46 +02001089class NUL:
1090 def write(s, *args):
1091 pass
1092 writelines = write
Skip Montanarob4a04172003-03-20 23:29:12 +00001093
Serhiy Storchaka32e23e72013-11-03 23:15:46 +02001094@unittest.skipUnless(hasattr(sys, "gettotalrefcount"),
1095 'requires sys.gettotalrefcount()')
1096class TestLeaks(unittest.TestCase):
1097 def test_create_read(self):
1098 delta = 0
1099 lastrc = sys.gettotalrefcount()
1100 for i in xrange(20):
1101 gc.collect()
1102 self.assertEqual(gc.garbage, [])
1103 rc = sys.gettotalrefcount()
1104 csv.reader(["a,b,c\r\n"])
1105 csv.reader(["a,b,c\r\n"])
1106 csv.reader(["a,b,c\r\n"])
1107 delta = rc-lastrc
1108 lastrc = rc
1109 # if csv.reader() leaks, last delta should be 3 or more
1110 self.assertEqual(delta < 3, True)
Skip Montanarob4a04172003-03-20 23:29:12 +00001111
Serhiy Storchaka32e23e72013-11-03 23:15:46 +02001112 def test_create_write(self):
1113 delta = 0
1114 lastrc = sys.gettotalrefcount()
1115 s = NUL()
1116 for i in xrange(20):
1117 gc.collect()
1118 self.assertEqual(gc.garbage, [])
1119 rc = sys.gettotalrefcount()
1120 csv.writer(s)
1121 csv.writer(s)
1122 csv.writer(s)
1123 delta = rc-lastrc
1124 lastrc = rc
1125 # if csv.writer() leaks, last delta should be 3 or more
1126 self.assertEqual(delta < 3, True)
Skip Montanarob4a04172003-03-20 23:29:12 +00001127
Serhiy Storchaka32e23e72013-11-03 23:15:46 +02001128 def test_read(self):
1129 delta = 0
1130 rows = ["a,b,c\r\n"]*5
1131 lastrc = sys.gettotalrefcount()
1132 for i in xrange(20):
1133 gc.collect()
1134 self.assertEqual(gc.garbage, [])
1135 rc = sys.gettotalrefcount()
1136 rdr = csv.reader(rows)
1137 for row in rdr:
1138 pass
1139 delta = rc-lastrc
1140 lastrc = rc
1141 # if reader leaks during read, delta should be 5 or more
1142 self.assertEqual(delta < 5, True)
Skip Montanarob4a04172003-03-20 23:29:12 +00001143
Serhiy Storchaka32e23e72013-11-03 23:15:46 +02001144 def test_write(self):
1145 delta = 0
1146 rows = [[1,2,3]]*5
1147 s = NUL()
1148 lastrc = sys.gettotalrefcount()
1149 for i in xrange(20):
1150 gc.collect()
1151 self.assertEqual(gc.garbage, [])
1152 rc = sys.gettotalrefcount()
1153 writer = csv.writer(s)
1154 for row in rows:
1155 writer.writerow(row)
1156 delta = rc-lastrc
1157 lastrc = rc
1158 # if writer leaks during write, last delta should be 5 or more
1159 self.assertEqual(delta < 5, True)
Skip Montanarob4a04172003-03-20 23:29:12 +00001160
Skip Montanaro1a566652003-05-06 15:56:05 +00001161# commented out for now - csv module doesn't yet support Unicode
Skip Montanaro58fc5d02004-06-05 17:03:20 +00001162## class TestUnicode(unittest.TestCase):
1163## def test_unicode_read(self):
1164## import codecs
1165## f = codecs.EncodedFile(StringIO("Martin von Löwis,"
1166## "Marc André Lemburg,"
1167## "Guido van Rossum,"
1168## "François Pinard\r\n"),
1169## data_encoding='iso-8859-1')
1170## reader = csv.reader(f)
1171## self.assertEqual(list(reader), [[u"Martin von Löwis",
1172## u"Marc André Lemburg",
1173## u"Guido van Rossum",
1174## u"François Pinardn"]])
Skip Montanaro1a566652003-05-06 15:56:05 +00001175
Walter Dörwald21d3a322003-05-01 17:45:56 +00001176def test_main():
Skip Montanarob4a04172003-03-20 23:29:12 +00001177 mod = sys.modules[__name__]
Walter Dörwald21d3a322003-05-01 17:45:56 +00001178 test_support.run_unittest(
1179 *[getattr(mod, name) for name in dir(mod) if name.startswith('Test')]
1180 )
Skip Montanarob4a04172003-03-20 23:29:12 +00001181
1182if __name__ == '__main__':
Walter Dörwald21d3a322003-05-01 17:45:56 +00001183 test_main()