Merged revisions 72491-72493 via svnmerge from
svn+ssh://pythondev@svn.python.org/sandbox/trunk/2to3/lib2to3

........
  r72491 | benjamin.peterson | 2009-05-08 19:33:27 -0500 (Fri, 08 May 2009) | 7 lines

  make 2to3 use unicode internally on 2.x

  This started out as a fix for #2660, but became this large refactoring
  when I realized the dire state this was in. 2to3 now uses
  tokenize.detect_encoding to decode the files correctly into unicode.
........
  r72492 | benjamin.peterson | 2009-05-08 19:35:38 -0500 (Fri, 08 May 2009) | 1 line

  remove compat code
........
  r72493 | benjamin.peterson | 2009-05-08 19:54:15 -0500 (Fri, 08 May 2009) | 1 line

  add a test for \r\n newlines
........
diff --git a/Lib/lib2to3/pgen2/driver.py b/Lib/lib2to3/pgen2/driver.py
index 3cff0ac..6b3825e 100644
--- a/Lib/lib2to3/pgen2/driver.py
+++ b/Lib/lib2to3/pgen2/driver.py
@@ -16,6 +16,7 @@
 __all__ = ["Driver", "load_grammar"]
 
 # Python imports
+import codecs
 import os
 import logging
 import sys
@@ -41,7 +42,7 @@
         lineno = 1
         column = 0
         type = value = start = end = line_text = None
-        prefix = ""
+        prefix = u""
         for quintuple in tokens:
             type, value, start, end, line_text = quintuple
             if start != (lineno, column):
@@ -90,9 +91,9 @@
         """Parse a stream and return the syntax tree."""
         return self.parse_stream_raw(stream, debug)
 
-    def parse_file(self, filename, debug=False):
+    def parse_file(self, filename, encoding=None, debug=False):
         """Parse a file and return the syntax tree."""
-        stream = open(filename)
+        stream = codecs.open(filename, "r", encoding)
         try:
             return self.parse_stream(stream, debug)
         finally: