SF patch [ 597919 ] compiler package and SET_LINENO

A variety of changes from Michael Hudson to get the compiler working
with 2.3.  The primary change is the handling of SET_LINENO:

# The set_lineno() function and the explicit emit() calls for
# SET_LINENO below are only used to generate the line number table.
# As of Python 2.3, the interpreter does not have a SET_LINENO
# instruction.  pyassem treats SET_LINENO opcodes as a special case.

A few other small changes:
 - Remove unused code from pycodegen and pyassem.
 - Fix error handling in parsermodule.  When PyParser_SimplerParseString()
   fails, it sets an exception with detailed info.  The parsermodule
   was clobbering that exception and replacing it was a generic
   "could not parse string" exception.  Keep the original exception.
diff --git a/Lib/compiler/pyassem.py b/Lib/compiler/pyassem.py
index 10a8dbd..0547eeb 100644
--- a/Lib/compiler/pyassem.py
+++ b/Lib/compiler/pyassem.py
@@ -6,15 +6,8 @@
 import types
 
 from compiler import misc
-from compiler.consts import CO_OPTIMIZED, CO_NEWLOCALS, CO_VARARGS, \
-     CO_VARKEYWORDS
-
-def xxx_sort(l):
-    l = l[:]
-    def sorter(a, b):
-        return cmp(a.bid, b.bid)
-    l.sort(sorter)
-    return l
+from compiler.consts \
+     import CO_OPTIMIZED, CO_NEWLOCALS, CO_VARARGS, CO_VARKEYWORDS
 
 class FlowGraph:
     def __init__(self):
@@ -77,7 +70,7 @@
     def emit(self, *inst):
         if self._debug:
             print "\t", inst
-        if inst[0] == 'RETURN_VALUE':
+        if inst[0] in ['RETURN_VALUE', 'YIELD_VALUE']:
             self.current.addOutEdge(self.exit)
         if len(inst) == 2 and isinstance(inst[1], Block):
             self.current.addOutEdge(inst[1])
@@ -266,7 +259,7 @@
         self.next.append(block)
         assert len(self.next) == 1, map(str, self.next)
 
-    _uncond_transfer = ('RETURN_VALUE', 'RAISE_VARARGS',
+    _uncond_transfer = ('RETURN_VALUE', 'RAISE_VARARGS', 'YIELD_VALUE',
                         'JUMP_ABSOLUTE', 'JUMP_FORWARD', 'CONTINUE_LOOP')
 
     def pruneNext(self):
@@ -443,7 +436,7 @@
                 insts.append(inst)
                 if len(inst) == 1:
                     pc = pc + 1
-                else:
+                elif inst[0] != "SET_LINENO":
                     # arg takes 2 bytes
                     pc = pc + 3
             end[b] = pc
@@ -452,7 +445,7 @@
             inst = insts[i]
             if len(inst) == 1:
                 pc = pc + 1
-            else:
+            elif inst[0] != "SET_LINENO":
                 pc = pc + 3
             opname = inst[0]
             if self.hasjrel.has_elt(opname):
@@ -580,6 +573,7 @@
                 oparg = t[1]
                 if opname == "SET_LINENO":
                     lnotab.nextLine(oparg)
+                    continue
                 hi, lo = twobyte(oparg)
                 try:
                     lnotab.addCode(self.opnum[opname], lo, hi)
@@ -697,7 +691,7 @@
             # after the loading of "b".  This works with the C Python
             # compiler because it only generates a SET_LINENO instruction
             # for the assignment.
-            if line > 0:
+            if line >= 0:
                 push = self.lnotab.append
                 while addr > 255:
                     push(255); push(0)
@@ -768,6 +762,7 @@
         # PRINT_EXPR?
         'PRINT_ITEM': -1,
         'RETURN_VALUE': -1,
+        'YIELD_VALUE': -1,
         'EXEC_STMT': -3,
         'BUILD_CLASS': -2,
         'STORE_NAME': -1,
diff --git a/Lib/compiler/pycodegen.py b/Lib/compiler/pycodegen.py
index ac978c0..a3518d2 100644
--- a/Lib/compiler/pycodegen.py
+++ b/Lib/compiler/pycodegen.py
@@ -13,6 +13,7 @@
      CO_NESTED, CO_GENERATOR, CO_GENERATOR_ALLOWED, CO_FUTURE_DIVISION
 from compiler.pyassem import TupleArg
 
+# XXX The version-specific code can go, since this code only works with 2.x.
 # Do we have Python 1.x or Python 2.x?
 try:
     VERSION = sys.version_info[0]
@@ -32,22 +33,14 @@
 TRY_FINALLY = 3
 END_FINALLY = 4
 
-# XXX this doesn't seem to be used
-class BlockStack(misc.Stack):
-    __super_init = misc.Stack.__init__
-
-    def __init__(self):
-        self.__super_init(self)
-        self.loop = None
-
 def compileFile(filename, display=0):
-    f = open(filename)
+    f = open(filename, 'U')
     buf = f.read()
     f.close()
     mod = Module(buf, filename)
     try:
         mod.compile(display)
-    except SyntaxError, err:
+    except SyntaxError:
         raise
     else:
         f = open(filename + "c", "wb")
@@ -134,7 +127,7 @@
         # to indicate the type of the value.  simplest way to get the
         # same effect is to call marshal and then skip the code.
         mtime = os.path.getmtime(self.filename)
-        mtime = struct.pack('i', mtime)
+        mtime = struct.pack('<i', mtime)
         return self.MAGIC + mtime
 
 class LocalNameFinder:
@@ -310,9 +303,17 @@
         else:
             self.emit(prefix + '_NAME', name)
 
-    def set_lineno(self, node, force=0):
-        """Emit SET_LINENO if node has lineno attribute and it is
-        different than the last lineno emitted.
+    # The set_lineno() function and the explicit emit() calls for
+    # SET_LINENO below are only used to generate the line number table.
+    # As of Python 2.3, the interpreter does not have a SET_LINENO
+    # instruction.  pyassem treats SET_LINENO opcodes as a special case.
+
+    def set_lineno(self, node, force=False):
+        """Emit SET_LINENO if necessary.
+
+        The instruction is considered necessary if the node has a
+        lineno attribute and it is different than the last lineno
+        emitted.
 
         Returns true if SET_LINENO was emitted.
 
@@ -326,8 +327,8 @@
                                    or force):
             self.emit('SET_LINENO', lineno)
             self.last_lineno = lineno
-            return 1
-        return 0
+            return True
+        return False
 
     # The first few visitor methods handle nodes that generator new
     # code objects.  They use class attributes to determine what
@@ -387,9 +388,6 @@
     def visitClass(self, node):
         gen = self.ClassGen(node, self.scopes,
                             self.get_module())
-        if node.doc:
-            self.emit('LOAD_CONST', node.doc)
-            self.storeName('__doc__')
         walk(node.code, gen)
         gen.finish()
         self.set_lineno(node)
@@ -447,7 +445,7 @@
         self.nextBlock(loop)
         self.setups.push((LOOP, loop))
 
-        self.set_lineno(node, force=1)
+        self.set_lineno(node, force=True)
         self.visit(node.test)
         self.emit('JUMP_IF_FALSE', else_ or after)
 
@@ -617,7 +615,7 @@
         return start, anchor
 
     def visitListCompIf(self, node, branch):
-        self.set_lineno(node, force=1)
+        self.set_lineno(node, force=True)
         self.visit(node.test)
         self.emit('JUMP_IF_FALSE', branch)
         self.newBlock()
@@ -975,7 +973,7 @@
     def visitYield(self, node):
         self.set_lineno(node)
         self.visit(node.value)
-        self.emit('YIELD_STMT')
+        self.emit('YIELD_VALUE')
 
     # slice and subscript stuff
 
@@ -1266,9 +1264,8 @@
         self.__super_init(func, scopes, isLambda, class_name, mod)
         self.graph.setFreeVars(self.scope.get_free_vars())
         self.graph.setCellVars(self.scope.get_cell_vars())
-        if self.graph.checkFlag(CO_GENERATOR_ALLOWED):
-            if self.scope.generator is not None:
-                self.graph.setFlag(CO_GENERATOR)
+        if self.scope.generator is not None:
+            self.graph.setFlag(CO_GENERATOR)
 
 class AbstractClassCode:
 
@@ -1304,6 +1301,12 @@
         self.__super_init(klass, scopes, module)
         self.graph.setFreeVars(self.scope.get_free_vars())
         self.graph.setCellVars(self.scope.get_cell_vars())
+        self.set_lineno(klass)
+        self.emit("LOAD_GLOBAL", "__name__")
+        self.storeName("__module__")
+        if klass.doc:
+            self.emit("LOAD_CONST", klass.doc)
+            self.storeName('__doc__')
 
 def generateArgList(arglist):
     """Generate an arg list marking TupleArgs"""
@@ -1379,7 +1382,5 @@
     return wrapper[node.__class__](node)
 
 if __name__ == "__main__":
-    import sys
-
     for file in sys.argv[1:]:
         compileFile(file)
diff --git a/Lib/compiler/symbols.py b/Lib/compiler/symbols.py
index cd7bceb..9f47fa3 100644
--- a/Lib/compiler/symbols.py
+++ b/Lib/compiler/symbols.py
@@ -249,6 +249,9 @@
         scope = ClassScope(node.name, self.module)
         if parent.nested or isinstance(parent, FunctionScope):
             scope.nested = 1
+        if node.doc is not None:
+            scope.add_def('__doc__')
+        scope.add_def('__module__')
         self.scopes[node] = scope
         prev = self.klass
         self.klass = node.name
diff --git a/Lib/compiler/transformer.py b/Lib/compiler/transformer.py
index 382ea41..d1001bd 100644
--- a/Lib/compiler/transformer.py
+++ b/Lib/compiler/transformer.py
@@ -37,7 +37,11 @@
 
 def parseFile(path):
     f = open(path)
-    src = f.read()
+    # XXX The parser API tolerates files without a trailing newline,
+    # but not strings without a trailing newline.  Always add an extra
+    # newline to the file contents, since we're going through the string
+    # version of the API.
+    src = f.read() + "\n"
     f.close()
     return parse(src)
 
@@ -100,6 +104,7 @@
                                token.STRING: self.atom_string,
                                token.NAME: self.atom_name,
                                }
+        self.encoding = None
 
     def transform(self, tree):
         """Transform an AST into a modified parse tree."""
@@ -110,6 +115,7 @@
     def parsesuite(self, text):
         """Return a modified parse tree for the given suite text."""
         # Hack for handling non-native line endings on non-DOS like OSs.
+        # this can go now we have universal newlines?
         text = text.replace('\x0d', '')
         return self.transform(parser.suite(text))
 
@@ -131,6 +137,12 @@
     def compile_node(self, node):
         ### emit a line-number node?
         n = node[0]
+
+        if n == symbol.encoding_decl:
+            self.encoding = node[2]
+            node = node[1]
+            n = node[0]
+        
         if n == symbol.single_input:
             return self.single_input(node[1:])
         if n == symbol.file_input:
@@ -519,6 +531,7 @@
         return self.com_binary(Tuple, nodelist)
 
     testlist_safe = testlist # XXX
+    testlist1 = testlist
     exprlist = testlist
 
     def test(self, nodelist):
@@ -637,11 +650,14 @@
     def factor(self, nodelist):
         elt = nodelist[0]
         t = elt[0]
+        print "source", nodelist[-1]
         node = self.com_node(nodelist[-1])
+        # need to handle (unary op)constant here...
         if t == token.PLUS:
             node = UnaryAdd(node)
             node.lineno = elt[2]
         elif t == token.MINUS:
+            print node
             node = UnarySub(node)
             node.lineno = elt[2]
         elif t == token.TILDE:
@@ -699,11 +715,21 @@
         n.lineno = nodelist[0][2]
         return n
 
+    def decode_literal(self, lit):
+        if self.encoding:
+            # this is particularly fragile & a bit of a
+            # hack... changes in compile.c:parsestr and
+            # tokenizer.c must be reflected here.
+            if self.encoding not in ['utf-8', 'iso-8859-1']:
+                lit = unicode(lit, 'utf-8').encode(self.encoding)
+            return eval("# coding: %s\n%s" % (self.encoding, lit))
+        else:
+            return eval(lit)
+
     def atom_string(self, nodelist):
-        ### need to verify this matches compile.c
         k = ''
         for node in nodelist:
-            k = k + eval(node[1])
+            k += self.decode_literal(node[1])
         n = Const(k)
         n.lineno = nodelist[0][2]
         return n