blob: dc85077f3ec663642d315a1ff1311d71e20f0169 [file] [log] [blame]
Skip Montanaro04ae7052003-04-24 20:21:31 +00001
2"""
3csv.py - read/write/investigate CSV files
4"""
5
6import re
7from _csv import Error, __version__, writer, reader, register_dialect, \
8 unregister_dialect, get_dialect, list_dialects, \
Andrew McNamara31d88962005-01-12 03:45:10 +00009 field_size_limit, \
Skip Montanaro04ae7052003-04-24 20:21:31 +000010 QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE, \
11 __doc__
Andrew McNamara7130ff52005-01-11 02:22:47 +000012from _csv import Dialect as _Dialect
Skip Montanaro04ae7052003-04-24 20:21:31 +000013
Guido van Rossum68937b42007-05-18 00:51:22 +000014from io import StringIO
Skip Montanaro1448d472003-04-25 14:47:16 +000015
Martin Panter19e69c52015-11-14 12:46:42 +000016__all__ = ["QUOTE_MINIMAL", "QUOTE_ALL", "QUOTE_NONNUMERIC", "QUOTE_NONE",
17 "Error", "Dialect", "__doc__", "excel", "excel_tab",
18 "field_size_limit", "reader", "writer",
19 "register_dialect", "get_dialect", "list_dialects", "Sniffer",
20 "unregister_dialect", "__version__", "DictReader", "DictWriter",
21 "unix_dialect"]
Skip Montanaro04ae7052003-04-24 20:21:31 +000022
23class Dialect:
Georg Brandl7424dd32010-10-27 07:27:06 +000024 """Describe a CSV dialect.
Skip Montanarof26285c2005-01-05 06:54:58 +000025
26 This must be subclassed (see csv.excel). Valid attributes are:
27 delimiter, quotechar, escapechar, doublequote, skipinitialspace,
28 lineterminator, quoting.
29
30 """
Skip Montanaro04ae7052003-04-24 20:21:31 +000031 _name = ""
32 _valid = False
33 # placeholders
34 delimiter = None
35 quotechar = None
36 escapechar = None
37 doublequote = None
38 skipinitialspace = None
39 lineterminator = None
40 quoting = None
41
42 def __init__(self):
43 if self.__class__ != Dialect:
44 self._valid = True
Andrew McNamara7130ff52005-01-11 02:22:47 +000045 self._validate()
Skip Montanaro04ae7052003-04-24 20:21:31 +000046
47 def _validate(self):
Andrew McNamara7130ff52005-01-11 02:22:47 +000048 try:
49 _Dialect(self)
Guido van Rossumb940e112007-01-10 16:19:56 +000050 except TypeError as e:
Andrew McNamara7130ff52005-01-11 02:22:47 +000051 # We do this for compatibility with py2.3
52 raise Error(str(e))
Skip Montanaro04ae7052003-04-24 20:21:31 +000053
54class excel(Dialect):
Skip Montanarof26285c2005-01-05 06:54:58 +000055 """Describe the usual properties of Excel-generated CSV files."""
Skip Montanaro04ae7052003-04-24 20:21:31 +000056 delimiter = ','
57 quotechar = '"'
58 doublequote = True
59 skipinitialspace = False
60 lineterminator = '\r\n'
61 quoting = QUOTE_MINIMAL
62register_dialect("excel", excel)
63
64class excel_tab(excel):
Skip Montanarof26285c2005-01-05 06:54:58 +000065 """Describe the usual properties of Excel-generated TAB-delimited files."""
Skip Montanaro04ae7052003-04-24 20:21:31 +000066 delimiter = '\t'
67register_dialect("excel-tab", excel_tab)
68
Georg Brandl7424dd32010-10-27 07:27:06 +000069class unix_dialect(Dialect):
70 """Describe the usual properties of Unix-generated CSV files."""
71 delimiter = ','
72 quotechar = '"'
73 doublequote = True
74 skipinitialspace = False
75 lineterminator = '\n'
76 quoting = QUOTE_ALL
77register_dialect("unix", unix_dialect)
78
Skip Montanaro04ae7052003-04-24 20:21:31 +000079
80class DictReader:
Skip Montanarodffeed32003-10-03 14:03:01 +000081 def __init__(self, f, fieldnames=None, restkey=None, restval=None,
Skip Montanaro3f7a9482003-09-06 19:52:12 +000082 dialect="excel", *args, **kwds):
Skip Montanaroaf8fcfa2008-08-09 19:44:22 +000083 self._fieldnames = fieldnames # list of keys for the dict
Skip Montanaro04ae7052003-04-24 20:21:31 +000084 self.restkey = restkey # key to catch long rows
85 self.restval = restval # default value for short rows
Skip Montanaro3f7a9482003-09-06 19:52:12 +000086 self.reader = reader(f, dialect, *args, **kwds)
Christian Heimes4fbc72b2008-03-22 00:47:35 +000087 self.dialect = dialect
88 self.line_num = 0
Skip Montanaro04ae7052003-04-24 20:21:31 +000089
90 def __iter__(self):
91 return self
92
Skip Montanaroaf8fcfa2008-08-09 19:44:22 +000093 @property
94 def fieldnames(self):
95 if self._fieldnames is None:
96 try:
97 self._fieldnames = next(self.reader)
98 except StopIteration:
99 pass
100 self.line_num = self.reader.line_num
101 return self._fieldnames
102
103 @fieldnames.setter
104 def fieldnames(self, value):
105 self._fieldnames = value
106
Georg Brandla18af4e2007-04-21 15:47:16 +0000107 def __next__(self):
Skip Montanaroaf8fcfa2008-08-09 19:44:22 +0000108 if self.line_num == 0:
109 # Used only for its side effect.
110 self.fieldnames
Georg Brandla18af4e2007-04-21 15:47:16 +0000111 row = next(self.reader)
Christian Heimes4fbc72b2008-03-22 00:47:35 +0000112 self.line_num = self.reader.line_num
Skip Montanarodffeed32003-10-03 14:03:01 +0000113
Skip Montanaro04ae7052003-04-24 20:21:31 +0000114 # unlike the basic reader, we prefer not to return blanks,
115 # because we will typically wind up with a dict full of None
116 # values
117 while row == []:
Georg Brandla18af4e2007-04-21 15:47:16 +0000118 row = next(self.reader)
Michael Selik9f3f0932019-01-31 00:47:53 -0800119 d = dict(zip(self.fieldnames, row))
Skip Montanaro04ae7052003-04-24 20:21:31 +0000120 lf = len(self.fieldnames)
121 lr = len(row)
122 if lf < lr:
123 d[self.restkey] = row[lf:]
124 elif lf > lr:
125 for key in self.fieldnames[lr:]:
126 d[key] = self.restval
127 return d
128
129
130class DictWriter:
131 def __init__(self, f, fieldnames, restval="", extrasaction="raise",
Skip Montanaro3f7a9482003-09-06 19:52:12 +0000132 dialect="excel", *args, **kwds):
Skip Montanaro04ae7052003-04-24 20:21:31 +0000133 self.fieldnames = fieldnames # list of keys for the dict
134 self.restval = restval # for writing short dicts
135 if extrasaction.lower() not in ("raise", "ignore"):
Collin Winterce36ad82007-08-30 01:19:48 +0000136 raise ValueError("extrasaction (%s) must be 'raise' or 'ignore'"
137 % extrasaction)
Skip Montanaro04ae7052003-04-24 20:21:31 +0000138 self.extrasaction = extrasaction
Skip Montanaro3f7a9482003-09-06 19:52:12 +0000139 self.writer = writer(f, dialect, *args, **kwds)
Skip Montanaro04ae7052003-04-24 20:21:31 +0000140
R. David Murraybe0698b2010-02-23 22:57:58 +0000141 def writeheader(self):
142 header = dict(zip(self.fieldnames, self.fieldnames))
Rémi Lapeyrefce5ff12019-05-10 03:50:11 +0200143 return self.writerow(header)
R. David Murraybe0698b2010-02-23 22:57:58 +0000144
Skip Montanaro04ae7052003-04-24 20:21:31 +0000145 def _dict_to_list(self, rowdict):
146 if self.extrasaction == "raise":
INADA Naoki0a421a22016-10-21 19:47:57 +0900147 wrong_fields = rowdict.keys() - self.fieldnames
Guido van Rossumd8faa362007-04-27 19:54:29 +0000148 if wrong_fields:
Collin Winterce36ad82007-08-30 01:19:48 +0000149 raise ValueError("dict contains fields not in fieldnames: "
R David Murrayfb099c92013-11-19 13:16:20 -0500150 + ", ".join([repr(x) for x in wrong_fields]))
Serhiy Storchaka7901b482015-03-30 09:09:54 +0300151 return (rowdict.get(key, self.restval) for key in self.fieldnames)
Skip Montanaro04ae7052003-04-24 20:21:31 +0000152
153 def writerow(self, rowdict):
154 return self.writer.writerow(self._dict_to_list(rowdict))
155
156 def writerows(self, rowdicts):
Serhiy Storchaka7901b482015-03-30 09:09:54 +0300157 return self.writer.writerows(map(self._dict_to_list, rowdicts))
Skip Montanaro04ae7052003-04-24 20:21:31 +0000158
Raymond Hettinger39a55922003-06-12 03:01:55 +0000159# Guard Sniffer's type checking against builds that exclude complex()
160try:
161 complex
162except NameError:
163 complex = float
Skip Montanaro04ae7052003-04-24 20:21:31 +0000164
165class Sniffer:
166 '''
167 "Sniffs" the format of a CSV file (i.e. delimiter, quotechar)
Skip Montanaro1448d472003-04-25 14:47:16 +0000168 Returns a Dialect object.
Skip Montanaro04ae7052003-04-24 20:21:31 +0000169 '''
Skip Montanaro1448d472003-04-25 14:47:16 +0000170 def __init__(self):
Skip Montanaro04ae7052003-04-24 20:21:31 +0000171 # in case there is more than one possible delimiter
172 self.preferred = [',', '\t', ';', ' ', ':']
173
Skip Montanaro04ae7052003-04-24 20:21:31 +0000174
Skip Montanaro77892372003-05-19 15:33:36 +0000175 def sniff(self, sample, delimiters=None):
Skip Montanaro04ae7052003-04-24 20:21:31 +0000176 """
Skip Montanaro1448d472003-04-25 14:47:16 +0000177 Returns a dialect (or None) corresponding to the sample
Skip Montanaro04ae7052003-04-24 20:21:31 +0000178 """
Skip Montanaro04ae7052003-04-24 20:21:31 +0000179
Benjamin Peterson4ac9ce42009-10-04 14:49:41 +0000180 quotechar, doublequote, delimiter, skipinitialspace = \
Skip Montanaro77892372003-05-19 15:33:36 +0000181 self._guess_quote_and_delimiter(sample, delimiters)
Skip Montanaro39b29be2005-12-30 05:09:48 +0000182 if not delimiter:
Skip Montanaro77892372003-05-19 15:33:36 +0000183 delimiter, skipinitialspace = self._guess_delimiter(sample,
184 delimiters)
Skip Montanaro04ae7052003-04-24 20:21:31 +0000185
Skip Montanaro39b29be2005-12-30 05:09:48 +0000186 if not delimiter:
Collin Winterce36ad82007-08-30 01:19:48 +0000187 raise Error("Could not determine delimiter")
Skip Montanaro39b29be2005-12-30 05:09:48 +0000188
Skip Montanaro1448d472003-04-25 14:47:16 +0000189 class dialect(Dialect):
Skip Montanaro04ae7052003-04-24 20:21:31 +0000190 _name = "sniffed"
191 lineterminator = '\r\n'
Fred Drake7c852f32003-04-25 14:27:00 +0000192 quoting = QUOTE_MINIMAL
Skip Montanaro04ae7052003-04-24 20:21:31 +0000193 # escapechar = ''
Skip Montanaro04ae7052003-04-24 20:21:31 +0000194
Benjamin Peterson4ac9ce42009-10-04 14:49:41 +0000195 dialect.doublequote = doublequote
Skip Montanaro1448d472003-04-25 14:47:16 +0000196 dialect.delimiter = delimiter
197 # _csv.reader won't accept a quotechar of ''
198 dialect.quotechar = quotechar or '"'
199 dialect.skipinitialspace = skipinitialspace
200
201 return dialect
Skip Montanaro04ae7052003-04-24 20:21:31 +0000202
203
Skip Montanaro77892372003-05-19 15:33:36 +0000204 def _guess_quote_and_delimiter(self, data, delimiters):
Skip Montanaro04ae7052003-04-24 20:21:31 +0000205 """
206 Looks for text enclosed between two identical quotes
207 (the probable quotechar) which are preceded and followed
208 by the same character (the probable delimiter).
209 For example:
210 ,'some text',
211 The quote with the most wins, same with the delimiter.
212 If there is no quotechar the delimiter can't be determined
213 this way.
214 """
215
216 matches = []
R David Murray44b548d2016-09-08 13:59:53 -0400217 for restr in (r'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?P=delim)', # ,".*?",
218 r'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?P<delim>[^\w\n"\'])(?P<space> ?)', # ".*?",
Serhiy Storchaka24112922018-02-09 20:00:49 +0200219 r'(?P<delim>[^\w\n"\'])(?P<space> ?)(?P<quote>["\']).*?(?P=quote)(?:$|\n)', # ,".*?"
R David Murray44b548d2016-09-08 13:59:53 -0400220 r'(?:^|\n)(?P<quote>["\']).*?(?P=quote)(?:$|\n)'): # ".*?" (no delim, no space)
Fred Drake6f7b2132003-09-02 16:01:07 +0000221 regexp = re.compile(restr, re.DOTALL | re.MULTILINE)
Skip Montanaro04ae7052003-04-24 20:21:31 +0000222 matches = regexp.findall(data)
223 if matches:
224 break
225
226 if not matches:
Benjamin Peterson4ac9ce42009-10-04 14:49:41 +0000227 # (quotechar, doublequote, delimiter, skipinitialspace)
228 return ('', False, None, 0)
Skip Montanaro04ae7052003-04-24 20:21:31 +0000229 quotes = {}
230 delims = {}
231 spaces = 0
Serhiy Storchaka07360df2015-03-30 01:01:48 +0300232 groupindex = regexp.groupindex
Skip Montanaro04ae7052003-04-24 20:21:31 +0000233 for m in matches:
Serhiy Storchaka07360df2015-03-30 01:01:48 +0300234 n = groupindex['quote'] - 1
Skip Montanaro04ae7052003-04-24 20:21:31 +0000235 key = m[n]
236 if key:
237 quotes[key] = quotes.get(key, 0) + 1
238 try:
Serhiy Storchaka07360df2015-03-30 01:01:48 +0300239 n = groupindex['delim'] - 1
Skip Montanaro04ae7052003-04-24 20:21:31 +0000240 key = m[n]
241 except KeyError:
242 continue
Skip Montanaro77892372003-05-19 15:33:36 +0000243 if key and (delimiters is None or key in delimiters):
Skip Montanaro04ae7052003-04-24 20:21:31 +0000244 delims[key] = delims.get(key, 0) + 1
245 try:
Serhiy Storchaka07360df2015-03-30 01:01:48 +0300246 n = groupindex['space'] - 1
Skip Montanaro04ae7052003-04-24 20:21:31 +0000247 except KeyError:
248 continue
249 if m[n]:
250 spaces += 1
251
Guido van Rossum89da5d72006-08-22 00:21:25 +0000252 quotechar = max(quotes, key=quotes.get)
Skip Montanaro04ae7052003-04-24 20:21:31 +0000253
254 if delims:
Guido van Rossum89da5d72006-08-22 00:21:25 +0000255 delim = max(delims, key=delims.get)
Skip Montanaro04ae7052003-04-24 20:21:31 +0000256 skipinitialspace = delims[delim] == spaces
257 if delim == '\n': # most likely a file with a single column
258 delim = ''
259 else:
260 # there is *no* delimiter, it's a single column of quoted data
261 delim = ''
262 skipinitialspace = 0
263
Benjamin Peterson4ac9ce42009-10-04 14:49:41 +0000264 # if we see an extra quote between delimiters, we've got a
265 # double quoted format
R David Murray925a3222013-06-29 18:40:53 -0400266 dq_regexp = re.compile(
267 r"((%(delim)s)|^)\W*%(quote)s[^%(delim)s\n]*%(quote)s[^%(delim)s\n]*%(quote)s\W*((%(delim)s)|$)" % \
268 {'delim':re.escape(delim), 'quote':quotechar}, re.MULTILINE)
Benjamin Peterson4ac9ce42009-10-04 14:49:41 +0000269
270
271
272 if dq_regexp.search(data):
273 doublequote = True
274 else:
275 doublequote = False
276
277 return (quotechar, doublequote, delim, skipinitialspace)
Skip Montanaro04ae7052003-04-24 20:21:31 +0000278
279
Skip Montanaro77892372003-05-19 15:33:36 +0000280 def _guess_delimiter(self, data, delimiters):
Skip Montanaro04ae7052003-04-24 20:21:31 +0000281 """
282 The delimiter /should/ occur the same number of times on
283 each row. However, due to malformed data, it may not. We don't want
284 an all or nothing approach, so we allow for small variations in this
285 number.
286 1) build a table of the frequency of each character on every line.
Ezio Melotti13925002011-03-16 11:05:33 +0200287 2) build a table of frequencies of this frequency (meta-frequency?),
Skip Montanaro04ae7052003-04-24 20:21:31 +0000288 e.g. 'x occurred 5 times in 10 rows, 6 times in 1000 rows,
289 7 times in 2 rows'
290 3) use the mode of the meta-frequency to determine the /expected/
291 frequency for that character
292 4) find out how often the character actually meets that goal
293 5) the character that best meets its goal is the delimiter
294 For performance reasons, the data is evaluated in chunks, so it can
295 try and evaluate the smallest portion of the data possible, evaluating
296 additional chunks as necessary.
297 """
298
Guido van Rossumc1f779c2007-07-03 08:25:58 +0000299 data = list(filter(None, data.split('\n')))
Skip Montanaro04ae7052003-04-24 20:21:31 +0000300
301 ascii = [chr(c) for c in range(127)] # 7-bit ASCII
302
303 # build frequency tables
304 chunkLength = min(10, len(data))
305 iteration = 0
306 charFrequency = {}
307 modes = {}
308 delims = {}
Xiang Zhang6aee6fb2017-02-06 10:50:09 +0800309 start, end = 0, chunkLength
Skip Montanaro04ae7052003-04-24 20:21:31 +0000310 while start < len(data):
311 iteration += 1
312 for line in data[start:end]:
313 for char in ascii:
Skip Montanaro1448d472003-04-25 14:47:16 +0000314 metaFrequency = charFrequency.get(char, {})
Skip Montanaro04ae7052003-04-24 20:21:31 +0000315 # must count even if frequency is 0
Skip Montanaro91bb70c2005-12-28 15:37:25 +0000316 freq = line.count(char)
Skip Montanaro04ae7052003-04-24 20:21:31 +0000317 # value is the mode
Skip Montanaro1448d472003-04-25 14:47:16 +0000318 metaFrequency[freq] = metaFrequency.get(freq, 0) + 1
319 charFrequency[char] = metaFrequency
Skip Montanaro04ae7052003-04-24 20:21:31 +0000320
321 for char in charFrequency.keys():
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000322 items = list(charFrequency[char].items())
Skip Montanaro04ae7052003-04-24 20:21:31 +0000323 if len(items) == 1 and items[0][0] == 0:
324 continue
325 # get the mode of the frequencies
326 if len(items) > 1:
Guido van Rossum89da5d72006-08-22 00:21:25 +0000327 modes[char] = max(items, key=lambda x: x[1])
Skip Montanaro04ae7052003-04-24 20:21:31 +0000328 # adjust the mode - subtract the sum of all
329 # other frequencies
330 items.remove(modes[char])
331 modes[char] = (modes[char][0], modes[char][1]
Guido van Rossum89da5d72006-08-22 00:21:25 +0000332 - sum(item[1] for item in items))
Skip Montanaro04ae7052003-04-24 20:21:31 +0000333 else:
334 modes[char] = items[0]
335
336 # build a list of possible delimiters
337 modeList = modes.items()
Xiang Zhang6aee6fb2017-02-06 10:50:09 +0800338 total = float(min(chunkLength * iteration, len(data)))
Skip Montanaro04ae7052003-04-24 20:21:31 +0000339 # (rows of consistent data) / (number of rows) = 100%
340 consistency = 1.0
341 # minimum consistency threshold
342 threshold = 0.9
343 while len(delims) == 0 and consistency >= threshold:
344 for k, v in modeList:
345 if v[0] > 0 and v[1] > 0:
Skip Montanaro77892372003-05-19 15:33:36 +0000346 if ((v[1]/total) >= consistency and
347 (delimiters is None or k in delimiters)):
Skip Montanaro04ae7052003-04-24 20:21:31 +0000348 delims[k] = v
349 consistency -= 0.01
350
351 if len(delims) == 1:
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000352 delim = list(delims.keys())[0]
Skip Montanaro04ae7052003-04-24 20:21:31 +0000353 skipinitialspace = (data[0].count(delim) ==
354 data[0].count("%c " % delim))
355 return (delim, skipinitialspace)
356
357 # analyze another chunkLength lines
358 start = end
359 end += chunkLength
360
361 if not delims:
362 return ('', 0)
363
364 # if there's more than one, fall back to a 'preferred' list
365 if len(delims) > 1:
366 for d in self.preferred:
367 if d in delims.keys():
368 skipinitialspace = (data[0].count(d) ==
369 data[0].count("%c " % d))
370 return (d, skipinitialspace)
371
Skip Montanaro39b29be2005-12-30 05:09:48 +0000372 # nothing else indicates a preference, pick the character that
373 # dominates(?)
374 items = [(v,k) for (k,v) in delims.items()]
375 items.sort()
376 delim = items[-1][1]
377
Skip Montanaro04ae7052003-04-24 20:21:31 +0000378 skipinitialspace = (data[0].count(delim) ==
379 data[0].count("%c " % delim))
380 return (delim, skipinitialspace)
381
382
Skip Montanaro1448d472003-04-25 14:47:16 +0000383 def has_header(self, sample):
Skip Montanaro04ae7052003-04-24 20:21:31 +0000384 # Creates a dictionary of types of data in each column. If any
385 # column is of a single type (say, integers), *except* for the first
386 # row, then the first row is presumed to be labels. If the type
387 # can't be determined, it is assumed to be a string in which case
388 # the length of the string is the determining factor: if all of the
389 # rows except for the first are the same length, it's a header.
390 # Finally, a 'vote' is taken at the end for each column, adding or
391 # subtracting from the likelihood of the first row being a header.
392
Skip Montanaro1448d472003-04-25 14:47:16 +0000393 rdr = reader(StringIO(sample), self.sniff(sample))
Skip Montanaro04ae7052003-04-24 20:21:31 +0000394
Georg Brandla18af4e2007-04-21 15:47:16 +0000395 header = next(rdr) # assume first row is header
Skip Montanaro04ae7052003-04-24 20:21:31 +0000396
397 columns = len(header)
398 columnTypes = {}
399 for i in range(columns): columnTypes[i] = None
400
401 checked = 0
Skip Montanaro1448d472003-04-25 14:47:16 +0000402 for row in rdr:
Skip Montanaro04ae7052003-04-24 20:21:31 +0000403 # arbitrary number of rows to check, to keep it sane
404 if checked > 20:
405 break
406 checked += 1
407
408 if len(row) != columns:
409 continue # skip rows that have irregular number of columns
410
Guido van Rossumcc2b0162007-02-11 06:12:03 +0000411 for col in list(columnTypes.keys()):
Raymond Hettinger39a55922003-06-12 03:01:55 +0000412
Amaury Forgeot d'Arca4618732008-04-24 18:26:53 +0000413 for thisType in [int, float, complex]:
Skip Montanaro04ae7052003-04-24 20:21:31 +0000414 try:
Raymond Hettinger39a55922003-06-12 03:01:55 +0000415 thisType(row[col])
416 break
Raymond Hettingerabe14e62003-06-12 03:59:17 +0000417 except (ValueError, OverflowError):
Raymond Hettinger39a55922003-06-12 03:01:55 +0000418 pass
419 else:
Skip Montanaro04ae7052003-04-24 20:21:31 +0000420 # fallback to length of string
421 thisType = len(row[col])
422
423 if thisType != columnTypes[col]:
424 if columnTypes[col] is None: # add new column type
425 columnTypes[col] = thisType
426 else:
427 # type is inconsistent, remove column from
428 # consideration
429 del columnTypes[col]
430
431 # finally, compare results against first row and "vote"
432 # on whether it's a header
433 hasHeader = 0
434 for col, colType in columnTypes.items():
435 if type(colType) == type(0): # it's a length
436 if len(header[col]) != colType:
437 hasHeader += 1
438 else:
439 hasHeader -= 1
440 else: # attempt typecast
441 try:
Raymond Hettinger39a55922003-06-12 03:01:55 +0000442 colType(header[col])
Raymond Hettingerf31cb0c2003-06-12 04:05:00 +0000443 except (ValueError, TypeError):
Skip Montanaro04ae7052003-04-24 20:21:31 +0000444 hasHeader += 1
445 else:
446 hasHeader -= 1
447
448 return hasHeader > 0