http://bugs.python.org/issue4715

This patch by Antoine Pitrou optimizes the bytecode for conditional branches by
merging the following "POP_TOP" instruction into the conditional jump.  For
example, the list comprehension "[x for x in l if not x]" produced the
following bytecode:

  1           0 BUILD_LIST               0
              3 LOAD_FAST                0 (.0)
        >>    6 FOR_ITER                23 (to 32)
              9 STORE_FAST               1 (x)
             12 LOAD_FAST                1 (x)
             15 JUMP_IF_TRUE            10 (to 28)
             18 POP_TOP
             19 LOAD_FAST                1 (x)
             22 LIST_APPEND              2
             25 JUMP_ABSOLUTE            6
        >>   28 POP_TOP
             29 JUMP_ABSOLUTE            6
        >>   32 RETURN_VALUE

but after the patch it produces the following bytecode:

  1           0 BUILD_LIST               0
              3 LOAD_FAST                0 (.0)
        >>    6 FOR_ITER                18 (to 27)
              9 STORE_FAST               1 (x)
             12 LOAD_FAST                1 (x)
             15 POP_JUMP_IF_TRUE         6
             18 LOAD_FAST                1 (x)
             21 LIST_APPEND              2
             24 JUMP_ABSOLUTE            6
        >>   27 RETURN_VALUE

Notice that not only the code is shorter, but the conditional jump
(POP_JUMP_IF_TRUE) jumps right to the start of the loop instead of going through
the JUMP_ABSOLUTE at the end. "continue" statements are helped
similarly.

Furthermore, the old jump opcodes (JUMP_IF_FALSE, JUMP_IF_TRUE) have been
replaced by two new opcodes:
- JUMP_IF_TRUE_OR_POP, which jumps if true and pops otherwise
- JUMP_IF_FALSE_OR_POP, which jumps if false and pops otherwise
diff --git a/Python/peephole.c b/Python/peephole.c
index d31c89b..c413fc8 100644
--- a/Python/peephole.c
+++ b/Python/peephole.c
@@ -13,7 +13,12 @@
 
 #define GETARG(arr, i) ((int)((arr[i+2]<<8) + arr[i+1]))
 #define UNCONDITIONAL_JUMP(op)	(op==JUMP_ABSOLUTE || op==JUMP_FORWARD)
-#define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP)
+#define CONDITIONAL_JUMP(op) (op==POP_JUMP_IF_FALSE || op==POP_JUMP_IF_TRUE \
+	|| op==JUMP_IF_FALSE_OR_POP || op==JUMP_IF_TRUE_OR_POP)
+#define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP \
+	|| op==POP_JUMP_IF_FALSE || op==POP_JUMP_IF_TRUE \
+	|| op==JUMP_IF_FALSE_OR_POP || op==JUMP_IF_TRUE_OR_POP)
+#define JUMPS_ON_TRUE(op) (op==POP_JUMP_IF_TRUE || op==JUMP_IF_TRUE_OR_POP)
 #define GETJUMPTGT(arr, i) (GETARG(arr,i) + (ABSOLUTE_JUMP(arr[i]) ? 0 : i+3))
 #define SETARG(arr, i, val) arr[i+2] = val>>8; arr[i+1] = val & 255
 #define CODESIZE(op)  (HAS_ARG(op) ? 3 : 1)
@@ -237,8 +242,10 @@
 		switch (opcode) {
 			case FOR_ITER:
 			case JUMP_FORWARD:
-			case JUMP_IF_FALSE:
-			case JUMP_IF_TRUE:
+			case JUMP_IF_FALSE_OR_POP:
+			case JUMP_IF_TRUE_OR_POP:
+			case POP_JUMP_IF_FALSE:
+			case POP_JUMP_IF_TRUE:
 			case JUMP_ABSOLUTE:
 			case CONTINUE_LOOP:
 			case SETUP_LOOP:
@@ -363,29 +370,24 @@
 	assert(PyList_Check(consts));
 
 	for (i=0 ; i<codelen ; i += CODESIZE(codestr[i])) {
+	  reoptimize_current:
 		opcode = codestr[i];
 
 		lastlc = cumlc;
 		cumlc = 0;
 
 		switch (opcode) {
-
-			/* Replace UNARY_NOT JUMP_IF_FALSE POP_TOP with 
-			   with	   JUMP_IF_TRUE POP_TOP */
+			/* Replace UNARY_NOT POP_JUMP_IF_FALSE 
+			   with    POP_JUMP_IF_TRUE */
 			case UNARY_NOT:
-				if (codestr[i+1] != JUMP_IF_FALSE  ||
-				    codestr[i+4] != POP_TOP  ||
-				    !ISBASICBLOCK(blocks,i,5))
+				if (codestr[i+1] != POP_JUMP_IF_FALSE
+				    || !ISBASICBLOCK(blocks,i,4))
 					continue;
-				tgt = GETJUMPTGT(codestr, (i+1));
-				if (codestr[tgt] != POP_TOP)
-					continue;
-				j = GETARG(codestr, i+1) + 1;
-				codestr[i] = JUMP_IF_TRUE;
+				j = GETARG(codestr, i+1);
+				codestr[i] = POP_JUMP_IF_TRUE;
 				SETARG(codestr, i, j);
-				codestr[i+3] = POP_TOP;
-				codestr[i+4] = NOP;
-				break;
+				codestr[i+3] = NOP;
+				goto reoptimize_current;
 
 				/* not a is b -->  a is not b
 				   not a in b -->  a not in b
@@ -417,29 +419,19 @@
 				break;
 
 				/* Skip over LOAD_CONST trueconst
-                                   JUMP_IF_FALSE xx  POP_TOP */
+				   POP_JUMP_IF_FALSE xx. This improves
+				   "while 1" performance. */
 			case LOAD_CONST:
 				cumlc = lastlc + 1;
 				j = GETARG(codestr, i);
-				if (codestr[i+3] != JUMP_IF_FALSE  ||
-				    codestr[i+6] != POP_TOP  ||
+				if (codestr[i+3] != POP_JUMP_IF_FALSE  ||
 				    !ISBASICBLOCK(blocks,i,7)  ||
 				    !PyObject_IsTrue(PyList_GET_ITEM(consts, j)))
 					continue;
-				memset(codestr+i, NOP, 7);
+				memset(codestr+i, NOP, 6);
 				cumlc = 0;
 				break;
 
-				/* Replace POP_TOP JUMP_FORWARD 1 POP_TOP 
-				   with    NOP NOP NOP NOP POP_TOP.           */
-			case POP_TOP:
-				if (UNCONDITIONAL_JUMP(codestr[i+1]) &&
-					GETJUMPTGT(codestr, i+1) == i+5 &&
-					codestr[i+4] == POP_TOP &&
-					ISBASICBLOCK(blocks,i,4))
-						memset(codestr+i, NOP, 4);
-				break;
-
 				/* Try to fold tuples of constants (includes a case for lists
 				   which are only used for "in" and "not in" tests).
 				   Skip over BUILD_SEQN 1 UNPACK_SEQN 1.
@@ -524,27 +516,47 @@
 				   "if a or b:"
 				   "a and b or c"
 				   "(a and b) and c"
-				   x:JUMP_IF_FALSE y   y:JUMP_IF_FALSE z  -->  x:JUMP_IF_FALSE z
-				   x:JUMP_IF_FALSE y   y:JUMP_IF_TRUE z	 -->  x:JUMP_IF_FALSE y+3
+				   x:POP_OR_JUMP y   y:POP_OR_JUMP z  -->  x:POP_OR_JUMP z
+				   x:POP_OR_JUMP y   y:JUMP_OR_POP z  -->  x:POP_JUMP_IF_FALSE y+3
 				   where y+3 is the instruction following the second test.
 				*/
-			case JUMP_IF_FALSE:
-			case JUMP_IF_TRUE:
+			case JUMP_IF_FALSE_OR_POP:
+			case JUMP_IF_TRUE_OR_POP:
 				tgt = GETJUMPTGT(codestr, i);
 				j = codestr[tgt];
-				if (j == JUMP_IF_FALSE	||  j == JUMP_IF_TRUE) {
-					if (j == opcode) {
-						tgttgt = GETJUMPTGT(codestr, tgt) - i - 3;
+				if (CONDITIONAL_JUMP(j)) {
+					/* NOTE: all possible jumps here are
+					   absolute! */
+					if (JUMPS_ON_TRUE(j) == JUMPS_ON_TRUE(opcode)) {
+						/* The second jump will be
+						   taken iff the first is. */
+						tgttgt = GETJUMPTGT(codestr, tgt);
+						/* The current opcode inherits
+						   its target's stack behaviour */
+						codestr[i] = j;
 						SETARG(codestr, i, tgttgt);
+						goto reoptimize_current;
 					} else {
-						tgt -= i;
-						SETARG(codestr, i, tgt);
+						/* The second jump is not taken
+						   if the first is (so jump past
+						   it), and all conditional
+						   jumps pop their argument when
+						   they're not taken (so change
+						   the first jump to pop its
+						   argument when it's taken). */
+						if (JUMPS_ON_TRUE(opcode))
+							codestr[i] = POP_JUMP_IF_TRUE;
+						else
+							codestr[i] = POP_JUMP_IF_FALSE;
+						SETARG(codestr, i, (tgt + 3));
+						goto reoptimize_current;
 					}
-					break;
 				}
-				/* Intentional fallthrough */  
+				/* Intentional fallthrough */
 
 				/* Replace jumps to unconditional jumps */
+			case POP_JUMP_IF_FALSE:
+			case POP_JUMP_IF_TRUE:
 			case FOR_ITER:
 			case JUMP_FORWARD:
 			case JUMP_ABSOLUTE:
@@ -621,14 +633,16 @@
 
 			case JUMP_ABSOLUTE:
 			case CONTINUE_LOOP:
+			case POP_JUMP_IF_FALSE:
+			case POP_JUMP_IF_TRUE:
+			case JUMP_IF_FALSE_OR_POP:
+			case JUMP_IF_TRUE_OR_POP:
 				j = addrmap[GETARG(codestr, i)];
 				SETARG(codestr, i, j);
 				break;
 
 			case FOR_ITER:
 			case JUMP_FORWARD:
-			case JUMP_IF_FALSE:
-			case JUMP_IF_TRUE:
 			case SETUP_LOOP:
 			case SETUP_EXCEPT:
 			case SETUP_FINALLY: