Fix a delimiter detection problem in sniffer. Sniffing "a|b|c\r\n" was
returning 'a' as the delimiter. It now returns '|', but not because I
understood better what the code was supposed to do. Would someone that
understands the idea behind _guess_delimiter() (see its doc string) look to
see if my fallback choice is better than before or if it's just serendipity
that I picked the proper delimiter?
diff --git a/Lib/test/test_csv.py b/Lib/test/test_csv.py
index 0ad77ef..8511a5a 100644
--- a/Lib/test/test_csv.py
+++ b/Lib/test/test_csv.py
@@ -852,6 +852,8 @@
'''
sample5 = "aaa\tbbb\r\nAAA\t\r\nBBB\t\r\n"
+ sample6 = "a|b|c\r\nd|e|f\r\n"
+ sample7 = "'a'|'b'|'c'\r\n'd'|e|f\r\n"
def test_has_header(self):
sniffer = csv.Sniffer()
@@ -882,6 +884,11 @@
self.assertEqual(dialect.delimiter, ";")
dialect = sniffer.sniff(self.sample5)
self.assertEqual(dialect.delimiter, "\t")
+ dialect = sniffer.sniff(self.sample6)
+ self.assertEqual(dialect.delimiter, "|")
+ dialect = sniffer.sniff(self.sample7)
+ self.assertEqual(dialect.delimiter, "|")
+ self.assertEqual(dialect.quotechar, "'")
if not hasattr(sys, "gettotalrefcount"):
if test_support.verbose: print "*** skipping leakage tests ***"