A 2% speed improvement with gcc on low-endian machines. My guess is that this new pattern for NEXTARG() is detected and optimized as a single (*short) loading.

commit: 1515fc2a013b01819df823a155c6c19e35b9f71a [log] [tgz]
author: Armin Rigo <arigo@tunes.org> Sat Mar 20 20:03:17 2004 +0000
committer: Armin Rigo <arigo@tunes.org> Sat Mar 20 20:03:17 2004 +0000
tree: 08ffe06be703fba3123d9dfdc78c7a6afff8ab72
parent: 09240f65f8becdb72bb72bd44817e11c7192b24f [diff]
diff --git a/Python/ceval.c b/Python/ceval.c
index d3a0053..51df60a 100644
--- a/Python/ceval.c
+++ b/Python/ceval.c

@@ -627,7 +627,8 @@
 
 #define INSTR_OFFSET()	(next_instr - first_instr)
 #define NEXTOP()	(*next_instr++)
-#define NEXTARG()	(next_instr += 2, (next_instr[-1]<<8) + next_instr[-2])
+#define OPARG()		(next_instr[0] + (next_instr[1]<<8))
+#define OPARG_SIZE	2
 #define JUMPTO(x)	(next_instr = first_instr + (x))
 #define JUMPBY(x)	(next_instr += (x))
 
@@ -658,8 +659,7 @@
 #endif
 
 #define PREDICTED(op)		PRED_##op: next_instr++
-#define PREDICTED_WITH_ARG(op)	PRED_##op: oparg = (next_instr[2]<<8) + \
-				next_instr[1]; next_instr += 3
+#define PREDICTED_WITH_ARG(op)	PRED_##op: next_instr++; oparg = OPARG(); next_instr += OPARG_SIZE
 
 /* Stack manipulation macros */
 
@@ -862,8 +862,11 @@
 		/* Extract opcode and argument */
 
 		opcode = NEXTOP();
-		if (HAS_ARG(opcode))
-			oparg = NEXTARG();
+		if (HAS_ARG(opcode)) {
+			oparg = OPARG();
+			next_instr += OPARG_SIZE;
+		}
+
 	  dispatch_opcode:
 #ifdef DYNAMIC_EXECUTION_PROFILE
 #ifdef DXPAIRS
@@ -2249,7 +2252,8 @@
 
 		case EXTENDED_ARG:
 			opcode = NEXTOP();
-			oparg = oparg<<16 | NEXTARG();
+			oparg = oparg<<16 | OPARG();
+			next_instr += OPARG_SIZE;
 			goto dispatch_opcode;
 
 		default:
commit	1515fc2a013b01819df823a155c6c19e35b9f71a	[log] [tgz]
author	Armin Rigo <arigo@tunes.org>	Sat Mar 20 20:03:17 2004 +0000
committer	Armin Rigo <arigo@tunes.org>	Sat Mar 20 20:03:17 2004 +0000
tree	08ffe06be703fba3123d9dfdc78c7a6afff8ab72
parent	09240f65f8becdb72bb72bd44817e11c7192b24f [diff]