Improved the bytecode optimizer.
* Can now test for basic blocks.
* Optimize inverted comparisions.
* Optimize unary_not followed by a conditional jump.
* Added a new opcode, NOP, to keep code size constant.
* Applied NOP to previous transformations where appropriate.

Note, the NOP would not be necessary if other functions were
added to re-target jump addresses and update the co_lnotab mapping.
That would yield slightly faster and cleaner bytecode at the
expense of optimizer simplicity and of keeping it decoupled
from the line-numbering structure.
diff --git a/Python/ceval.c b/Python/ceval.c
index 3ea1bdc..7f8f654 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c
@@ -873,6 +873,9 @@
 
 		/* case STOP_CODE: this is an error! */
 
+		case NOP:
+			goto fast_next_opcode;
+
 		case LOAD_FAST:
 			x = GETLOCAL(oparg);
 			if (x != NULL) {
diff --git a/Python/compile.c b/Python/compile.c
index 57f0edb..4afd0eb 100644
--- a/Python/compile.c
+++ b/Python/compile.c
@@ -328,6 +328,43 @@
 #define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP)
 #define GETJUMPTGT(arr, i) (GETARG(arr,i) + (ABSOLUTE_JUMP(arr[i]) ? 0 : i+3))
 #define SETARG(arr, i, val) arr[i+2] = val>>8; arr[i+1] = val & 255
+#define CODESIZE(op)  (HAS_ARG(op) ? 3 : 1)
+#define ISBASICBLOCK(blocks, start, bytes) (blocks[start]==blocks[start+bytes-1])
+
+static unsigned int *
+markblocks(unsigned char *code, int len)
+{
+	unsigned int *blocks = PyMem_Malloc(len*sizeof(int));
+	int i,j, opcode, oldblock, newblock, blockcnt = 0;
+
+	if (blocks == NULL)
+		return NULL;
+	memset(blocks, 0, len*sizeof(int));
+	for (i=0 ; i<len ; i+=CODESIZE(opcode)) {
+		opcode = code[i];
+		switch (opcode) {
+			case FOR_ITER:
+			case JUMP_FORWARD:
+			case JUMP_IF_FALSE:
+			case JUMP_IF_TRUE:
+			case JUMP_ABSOLUTE:
+			case CONTINUE_LOOP:
+			case SETUP_LOOP:
+			case SETUP_EXCEPT:
+			case SETUP_FINALLY:
+				j = GETJUMPTGT(code, i);
+				oldblock = blocks[j];
+				newblock = ++blockcnt;
+				for (; j<len ; j++) {
+					if (blocks[j] != (unsigned)oldblock)
+						break;
+					blocks[j] = newblock;
+				}
+			break;
+		}
+	}
+	return blocks;
+}
 
 static PyObject *
 optimize_code(PyObject *code, PyObject* consts)
@@ -335,18 +372,24 @@
 	int i, j, codelen;
 	int tgt, tgttgt, opcode;
 	unsigned char *codestr;
+	unsigned int *blocks;
 
 	/* Make a modifiable copy of the code string */
 	if (!PyString_Check(code))
 		goto exitUnchanged;
 	codelen = PyString_Size(code);
 	codestr = PyMem_Malloc(codelen);
-	if (codestr == NULL) 
+	if (codestr == NULL)
 		goto exitUnchanged;
 	codestr = memcpy(codestr, PyString_AS_STRING(code), codelen);
+	blocks = markblocks(codestr, codelen);
+	if (blocks == NULL) {
+		PyMem_Free(codestr);
+		goto exitUnchanged;
+	}
 	assert(PyTuple_Check(consts));
 
-	for (i=0 ; i<codelen-7 ; i += HAS_ARG(codestr[i]) ? 3 : 1) {
+	for (i=0 ; i<codelen ; i += CODESIZE(codestr[i])) {
 		opcode = codestr[i];
 		switch (opcode) {
 
@@ -363,8 +406,8 @@
 			SETARG(codestr, i, 4);
 			break;
 
-		/* Replace BUILD_SEQN 2 UNPACK_SEQN 2 with ROT2 JMP+2.
-		   Replace BUILD_SEQN 3 UNPACK_SEQN 3 with ROT3 ROT2 JMP+1.
+		/* Replace BUILD_SEQN 2 UNPACK_SEQN 2 with ROT2 JMP+2 NOP NOP.
+		   Replace BUILD_SEQN 3 UNPACK_SEQN 3 with ROT3 ROT2 JMP+1 NOP.
 		   Note, these opcodes occur together only in assignment
 		   statements.  Accordingly, the unpack opcode is never
 		   a jump target.  */
@@ -377,8 +420,8 @@
 				codestr[i] = ROT_TWO;
 				codestr[i+1] = JUMP_FORWARD;
 				SETARG(codestr, i+1, 2);
-				codestr[i+4] = DUP_TOP;	 /* Filler codes used as NOPs */
-				codestr[i+5] = POP_TOP;
+				codestr[i+4] = NOP;
+				codestr[i+5] = NOP;
 				continue;
 			} 
 			if (GETARG(codestr, i) == 3 && \
@@ -386,11 +429,41 @@
 				codestr[i] = ROT_THREE;
 				codestr[i+1] = ROT_TWO;
 				codestr[i+2] = JUMP_FORWARD;
-				SETARG(codestr, i+2, 1);	
-				codestr[i+5] = DUP_TOP;
+				SETARG(codestr, i+2, 1);
+				codestr[i+5] = NOP;
 			}
 			break;
 
+		/* Simplify inverted tests.
+		   Must verify that sequence is a basic block because the jump
+		   can itself be a jump target.  Also, must verify that *both*
+		   jump alternatives go to a POP_TOP.  Otherwise, the code will
+		   expect the stack value to have been inverted.  */
+		case UNARY_NOT:
+			if (codestr[i+1] != JUMP_IF_FALSE || \
+			    codestr[i+4] != POP_TOP || \
+			    !ISBASICBLOCK(blocks,i,5))
+				continue;
+			tgt = GETJUMPTGT(codestr, (i+1));
+			if (codestr[tgt] != POP_TOP)
+				continue;
+			codestr[i] = NOP;
+			codestr[i+1] = JUMP_IF_TRUE;
+			break;
+
+		/* not a is b -->  a is not b
+		   not a in b -->  a not in b
+		   not a is not b -->  a is b
+		   not a not in b -->  a in b */
+		case COMPARE_OP:
+			j = GETARG(codestr, i);
+			if (codestr[i+3] != UNARY_NOT || j < 6 || \
+			    j > 9 || !ISBASICBLOCK(blocks,i,4))
+				continue;
+			SETARG(codestr, i, (j^1));
+			codestr[i+3] = NOP;
+			break;
+
 		/* Replace jumps to unconditional jumps */
 		case FOR_ITER:
 		case JUMP_FORWARD:
@@ -402,7 +475,7 @@
 		case SETUP_EXCEPT:
 		case SETUP_FINALLY:
 			tgt = GETJUMPTGT(codestr, i);
-			if (!UNCONDITIONAL_JUMP(codestr[tgt])) 
+			if (!UNCONDITIONAL_JUMP(codestr[tgt]))
 				continue;
 			tgttgt = GETJUMPTGT(codestr, tgt);
 			if (opcode == JUMP_FORWARD) /* JMP_ABS can go backwards */
@@ -422,6 +495,7 @@
 	}
 	code = PyString_FromStringAndSize(codestr, codelen);
 	PyMem_Free(codestr);
+	PyMem_Free(blocks);
 	return code;
 
 exitUnchanged: