Latin-1 source code was not being properly decoded when passed through compile(). This was due to left-over special-casing before UTF-8 became the default source encoding. Closes issue #3574. Thanks to Victor Stinner for help with the patch.

commit: da780432378e6298463889557ab43e0c156758cd [log] [tgz]
author: Brett Cannon <bcannon@gmail.com> Fri Oct 17 03:38:50 2008 +0000
committer: Brett Cannon <bcannon@gmail.com> Fri Oct 17 03:38:50 2008 +0000
tree: dc622a9b62874851f90abc45524d3d2653cab9ba
parent: 9e9dcd6d4225faa6a8b19120f009e0253d16ab92 [diff] [blame]
diff --git a/Parser/tokenizer.h b/Parser/tokenizer.h
index c45dea1..df9cbc7 100644
--- a/Parser/tokenizer.h
+++ b/Parser/tokenizer.h

@@ -49,14 +49,14 @@
 	enum decoding_state decoding_state;
 	int decoding_erred;	/* whether erred in decoding  */
 	int read_coding_spec;	/* whether 'coding:...' has been read  */
-	char *encoding;
+	char *encoding;         /* Source encoding. */
 	int cont_line;          /* whether we are in a continuation line. */
 	const char* line_start;	/* pointer to start of current line */
 #ifndef PGEN
 	PyObject *decoding_readline; /* codecs.open(...).readline */
 	PyObject *decoding_buffer;
 #endif
-	const char* enc;
+	const char* enc;        /* Encoding for the current str. */
 	const char* str;
 };
commit	da780432378e6298463889557ab43e0c156758cd	[log] [tgz]
author	Brett Cannon <bcannon@gmail.com>	Fri Oct 17 03:38:50 2008 +0000
committer	Brett Cannon <bcannon@gmail.com>	Fri Oct 17 03:38:50 2008 +0000
tree	dc622a9b62874851f90abc45524d3d2653cab9ba
parent	9e9dcd6d4225faa6a8b19120f009e0253d16ab92 [diff] [blame]