blob: 28e4c4c32f1213694b7f9a60a0b5500c8a00096e [file] [log] [blame]
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001/* Peehole optimizations for bytecode compiler. */
2
3#include "Python.h"
4
5#include "Python-ast.h"
6#include "node.h"
7#include "pyarena.h"
8#include "ast.h"
9#include "code.h"
10#include "compile.h"
11#include "symtable.h"
12#include "opcode.h"
13
14#define GETARG(arr, i) ((int)((arr[i+2]<<8) + arr[i+1]))
15#define UNCONDITIONAL_JUMP(op) (op==JUMP_ABSOLUTE || op==JUMP_FORWARD)
16#define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP)
17#define GETJUMPTGT(arr, i) (GETARG(arr,i) + (ABSOLUTE_JUMP(arr[i]) ? 0 : i+3))
18#define SETARG(arr, i, val) arr[i+2] = val>>8; arr[i+1] = val & 255
19#define CODESIZE(op) (HAS_ARG(op) ? 3 : 1)
20#define ISBASICBLOCK(blocks, start, bytes) \
21 (blocks[start]==blocks[start+bytes-1])
22
23/* Replace LOAD_CONST c1. LOAD_CONST c2 ... LOAD_CONST cn BUILD_TUPLE n
24 with LOAD_CONST (c1, c2, ... cn).
25 The consts table must still be in list form so that the
26 new constant (c1, c2, ... cn) can be appended.
27 Called with codestr pointing to the first LOAD_CONST.
28 Bails out with no change if one or more of the LOAD_CONSTs is missing.
29 Also works for BUILD_LIST when followed by an "in" or "not in" test.
30*/
31static int
32tuple_of_constants(unsigned char *codestr, int n, PyObject *consts)
33{
34 PyObject *newconst, *constant;
35 Py_ssize_t i, arg, len_consts;
36
37 /* Pre-conditions */
38 assert(PyList_CheckExact(consts));
39 assert(codestr[n*3] == BUILD_TUPLE || codestr[n*3] == BUILD_LIST);
40 assert(GETARG(codestr, (n*3)) == n);
41 for (i=0 ; i<n ; i++)
42 assert(codestr[i*3] == LOAD_CONST);
43
44 /* Buildup new tuple of constants */
45 newconst = PyTuple_New(n);
46 if (newconst == NULL)
47 return 0;
48 len_consts = PyList_GET_SIZE(consts);
49 for (i=0 ; i<n ; i++) {
50 arg = GETARG(codestr, (i*3));
51 assert(arg < len_consts);
52 constant = PyList_GET_ITEM(consts, arg);
53 Py_INCREF(constant);
54 PyTuple_SET_ITEM(newconst, i, constant);
55 }
56
57 /* Append folded constant onto consts */
58 if (PyList_Append(consts, newconst)) {
59 Py_DECREF(newconst);
60 return 0;
61 }
62 Py_DECREF(newconst);
63
64 /* Write NOPs over old LOAD_CONSTS and
65 add a new LOAD_CONST newconst on top of the BUILD_TUPLE n */
66 memset(codestr, NOP, n*3);
67 codestr[n*3] = LOAD_CONST;
68 SETARG(codestr, (n*3), len_consts);
69 return 1;
70}
71
72/* Replace LOAD_CONST c1. LOAD_CONST c2 BINOP
73 with LOAD_CONST binop(c1,c2)
74 The consts table must still be in list form so that the
75 new constant can be appended.
76 Called with codestr pointing to the first LOAD_CONST.
77 Abandons the transformation if the folding fails (i.e. 1+'a').
78 If the new constant is a sequence, only folds when the size
79 is below a threshold value. That keeps pyc files from
80 becoming large in the presence of code like: (None,)*1000.
81*/
82static int
83fold_binops_on_constants(unsigned char *codestr, PyObject *consts)
84{
85 PyObject *newconst, *v, *w;
86 Py_ssize_t len_consts, size;
87 int opcode;
88
89 /* Pre-conditions */
90 assert(PyList_CheckExact(consts));
91 assert(codestr[0] == LOAD_CONST);
92 assert(codestr[3] == LOAD_CONST);
93
94 /* Create new constant */
95 v = PyList_GET_ITEM(consts, GETARG(codestr, 0));
96 w = PyList_GET_ITEM(consts, GETARG(codestr, 3));
97 opcode = codestr[6];
98 switch (opcode) {
99 case BINARY_POWER:
100 newconst = PyNumber_Power(v, w, Py_None);
101 break;
102 case BINARY_MULTIPLY:
103 newconst = PyNumber_Multiply(v, w);
104 break;
105 case BINARY_TRUE_DIVIDE:
106 newconst = PyNumber_TrueDivide(v, w);
107 break;
108 case BINARY_FLOOR_DIVIDE:
109 newconst = PyNumber_FloorDivide(v, w);
110 break;
111 case BINARY_MODULO:
112 newconst = PyNumber_Remainder(v, w);
113 break;
114 case BINARY_ADD:
115 newconst = PyNumber_Add(v, w);
116 break;
117 case BINARY_SUBTRACT:
118 newconst = PyNumber_Subtract(v, w);
119 break;
120 case BINARY_SUBSCR:
121 newconst = PyObject_GetItem(v, w);
122 break;
123 case BINARY_LSHIFT:
124 newconst = PyNumber_Lshift(v, w);
125 break;
126 case BINARY_RSHIFT:
127 newconst = PyNumber_Rshift(v, w);
128 break;
129 case BINARY_AND:
130 newconst = PyNumber_And(v, w);
131 break;
132 case BINARY_XOR:
133 newconst = PyNumber_Xor(v, w);
134 break;
135 case BINARY_OR:
136 newconst = PyNumber_Or(v, w);
137 break;
138 default:
139 /* Called with an unknown opcode */
140 PyErr_Format(PyExc_SystemError,
141 "unexpected binary operation %d on a constant",
142 opcode);
143 return 0;
144 }
145 if (newconst == NULL) {
146 PyErr_Clear();
147 return 0;
148 }
149 size = PyObject_Size(newconst);
150 if (size == -1)
151 PyErr_Clear();
152 else if (size > 20) {
153 Py_DECREF(newconst);
154 return 0;
155 }
156
157 /* Append folded constant into consts table */
158 len_consts = PyList_GET_SIZE(consts);
159 if (PyList_Append(consts, newconst)) {
160 Py_DECREF(newconst);
161 return 0;
162 }
163 Py_DECREF(newconst);
164
165 /* Write NOP NOP NOP NOP LOAD_CONST newconst */
166 memset(codestr, NOP, 4);
167 codestr[4] = LOAD_CONST;
168 SETARG(codestr, 4, len_consts);
169 return 1;
170}
171
172static int
173fold_unaryops_on_constants(unsigned char *codestr, PyObject *consts)
174{
175 PyObject *newconst=NULL, *v;
176 Py_ssize_t len_consts;
177 int opcode;
178
179 /* Pre-conditions */
180 assert(PyList_CheckExact(consts));
181 assert(codestr[0] == LOAD_CONST);
182
183 /* Create new constant */
184 v = PyList_GET_ITEM(consts, GETARG(codestr, 0));
185 opcode = codestr[3];
186 switch (opcode) {
187 case UNARY_NEGATIVE:
188 /* Preserve the sign of -0.0 */
189 if (PyObject_IsTrue(v) == 1)
190 newconst = PyNumber_Negative(v);
191 break;
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000192 case UNARY_INVERT:
193 newconst = PyNumber_Invert(v);
194 break;
195 default:
196 /* Called with an unknown opcode */
197 PyErr_Format(PyExc_SystemError,
198 "unexpected unary operation %d on a constant",
199 opcode);
200 return 0;
201 }
202 if (newconst == NULL) {
203 PyErr_Clear();
204 return 0;
205 }
206
207 /* Append folded constant into consts table */
208 len_consts = PyList_GET_SIZE(consts);
209 if (PyList_Append(consts, newconst)) {
210 Py_DECREF(newconst);
211 return 0;
212 }
213 Py_DECREF(newconst);
214
215 /* Write NOP LOAD_CONST newconst */
216 codestr[0] = NOP;
217 codestr[1] = LOAD_CONST;
218 SETARG(codestr, 1, len_consts);
219 return 1;
220}
221
222static unsigned int *
223markblocks(unsigned char *code, int len)
224{
225 unsigned int *blocks = (unsigned int *)PyMem_Malloc(len*sizeof(int));
226 int i,j, opcode, blockcnt = 0;
227
228 if (blocks == NULL) {
229 PyErr_NoMemory();
230 return NULL;
231 }
232 memset(blocks, 0, len*sizeof(int));
233
234 /* Mark labels in the first pass */
235 for (i=0 ; i<len ; i+=CODESIZE(opcode)) {
236 opcode = code[i];
237 switch (opcode) {
238 case FOR_ITER:
239 case JUMP_FORWARD:
240 case JUMP_IF_FALSE:
241 case JUMP_IF_TRUE:
242 case JUMP_ABSOLUTE:
243 case CONTINUE_LOOP:
244 case SETUP_LOOP:
245 case SETUP_EXCEPT:
246 case SETUP_FINALLY:
247 j = GETJUMPTGT(code, i);
248 blocks[j] = 1;
249 break;
250 }
251 }
252 /* Build block numbers in the second pass */
253 for (i=0 ; i<len ; i++) {
254 blockcnt += blocks[i]; /* increment blockcnt over labels */
255 blocks[i] = blockcnt;
256 }
257 return blocks;
258}
259
260/* Perform basic peephole optimizations to components of a code object.
261 The consts object should still be in list form to allow new constants
262 to be appended.
263
264 To keep the optimizer simple, it bails out (does nothing) for code
265 containing extended arguments or that has a length over 32,700. That
266 allows us to avoid overflow and sign issues. Likewise, it bails when
267 the lineno table has complex encoding for gaps >= 255.
268
269 Optimizations are restricted to simple transformations occuring within a
270 single basic block. All transformations keep the code size the same or
271 smaller. For those that reduce size, the gaps are initially filled with
272 NOPs. Later those NOPs are removed and the jump addresses retargeted in
273 a single pass. Line numbering is adjusted accordingly. */
274
275PyObject *
276PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
277 PyObject *lineno_obj)
278{
279 Py_ssize_t i, j, codelen;
280 int nops, h, adj;
281 int tgt, tgttgt, opcode;
282 unsigned char *codestr = NULL;
283 unsigned char *lineno;
284 int *addrmap = NULL;
285 int new_line, cum_orig_line, last_line, tabsiz;
286 int cumlc=0, lastlc=0; /* Count runs of consecutive LOAD_CONSTs */
287 unsigned int *blocks = NULL;
288 char *name;
289
290 /* Bail out if an exception is set */
291 if (PyErr_Occurred())
292 goto exitUnchanged;
293
294 /* Bypass optimization when the lineno table is too complex */
295 assert(PyString_Check(lineno_obj));
296 lineno = (unsigned char*)PyString_AS_STRING(lineno_obj);
297 tabsiz = PyString_GET_SIZE(lineno_obj);
298 if (memchr(lineno, 255, tabsiz) != NULL)
299 goto exitUnchanged;
300
301 /* Avoid situations where jump retargeting could overflow */
302 assert(PyString_Check(code));
303 codelen = PyString_Size(code);
304 if (codelen > 32700)
305 goto exitUnchanged;
306
307 /* Make a modifiable copy of the code string */
308 codestr = (unsigned char *)PyMem_Malloc(codelen);
309 if (codestr == NULL)
310 goto exitUnchanged;
311 codestr = (unsigned char *)memcpy(codestr,
312 PyString_AS_STRING(code), codelen);
313
314 /* Verify that RETURN_VALUE terminates the codestring. This allows
315 the various transformation patterns to look ahead several
316 instructions without additional checks to make sure they are not
317 looking beyond the end of the code string.
318 */
319 if (codestr[codelen-1] != RETURN_VALUE)
320 goto exitUnchanged;
321
322 /* Mapping to new jump targets after NOPs are removed */
323 addrmap = (int *)PyMem_Malloc(codelen * sizeof(int));
324 if (addrmap == NULL)
325 goto exitUnchanged;
326
327 blocks = markblocks(codestr, codelen);
328 if (blocks == NULL)
329 goto exitUnchanged;
330 assert(PyList_Check(consts));
331
332 for (i=0 ; i<codelen ; i += CODESIZE(codestr[i])) {
333 opcode = codestr[i];
334
335 lastlc = cumlc;
336 cumlc = 0;
337
338 switch (opcode) {
339
340 /* Replace UNARY_NOT JUMP_IF_FALSE POP_TOP with
341 with JUMP_IF_TRUE POP_TOP */
342 case UNARY_NOT:
343 if (codestr[i+1] != JUMP_IF_FALSE ||
344 codestr[i+4] != POP_TOP ||
345 !ISBASICBLOCK(blocks,i,5))
346 continue;
347 tgt = GETJUMPTGT(codestr, (i+1));
348 if (codestr[tgt] != POP_TOP)
349 continue;
350 j = GETARG(codestr, i+1) + 1;
351 codestr[i] = JUMP_IF_TRUE;
352 SETARG(codestr, i, j);
353 codestr[i+3] = POP_TOP;
354 codestr[i+4] = NOP;
355 break;
356
357 /* not a is b --> a is not b
358 not a in b --> a not in b
359 not a is not b --> a is b
360 not a not in b --> a in b
361 */
362 case COMPARE_OP:
363 j = GETARG(codestr, i);
364 if (j < 6 || j > 9 ||
365 codestr[i+3] != UNARY_NOT ||
366 !ISBASICBLOCK(blocks,i,4))
367 continue;
368 SETARG(codestr, i, (j^1));
369 codestr[i+3] = NOP;
370 break;
371
372 /* Replace LOAD_GLOBAL/LOAD_NAME None
373 with LOAD_CONST None */
374 case LOAD_NAME:
375 case LOAD_GLOBAL:
376 j = GETARG(codestr, i);
377 name = PyString_AsString(PyTuple_GET_ITEM(names, j));
378 if (name == NULL || strcmp(name, "None") != 0)
379 continue;
380 for (j=0 ; j < PyList_GET_SIZE(consts) ; j++) {
381 if (PyList_GET_ITEM(consts, j) == Py_None) {
382 codestr[i] = LOAD_CONST;
383 SETARG(codestr, i, j);
384 cumlc = lastlc + 1;
385 break;
386 }
387 }
388 break;
389
390 /* Skip over LOAD_CONST trueconst
391 JUMP_IF_FALSE xx POP_TOP */
392 case LOAD_CONST:
393 cumlc = lastlc + 1;
394 j = GETARG(codestr, i);
395 if (codestr[i+3] != JUMP_IF_FALSE ||
396 codestr[i+6] != POP_TOP ||
397 !ISBASICBLOCK(blocks,i,7) ||
398 !PyObject_IsTrue(PyList_GET_ITEM(consts, j)))
399 continue;
400 memset(codestr+i, NOP, 7);
401 cumlc = 0;
402 break;
403
404 /* Try to fold tuples of constants (includes a case for lists
405 which are only used for "in" and "not in" tests).
406 Skip over BUILD_SEQN 1 UNPACK_SEQN 1.
407 Replace BUILD_SEQN 2 UNPACK_SEQN 2 with ROT2.
408 Replace BUILD_SEQN 3 UNPACK_SEQN 3 with ROT3 ROT2. */
409 case BUILD_TUPLE:
410 case BUILD_LIST:
411 j = GETARG(codestr, i);
412 h = i - 3 * j;
413 if (h >= 0 &&
414 j <= lastlc &&
415 ((opcode == BUILD_TUPLE &&
416 ISBASICBLOCK(blocks, h, 3*(j+1))) ||
417 (opcode == BUILD_LIST &&
418 codestr[i+3]==COMPARE_OP &&
419 ISBASICBLOCK(blocks, h, 3*(j+2)) &&
420 (GETARG(codestr,i+3)==6 ||
421 GETARG(codestr,i+3)==7))) &&
422 tuple_of_constants(&codestr[h], j, consts)) {
423 assert(codestr[i] == LOAD_CONST);
424 cumlc = 1;
425 break;
426 }
427 if (codestr[i+3] != UNPACK_SEQUENCE ||
428 !ISBASICBLOCK(blocks,i,6) ||
429 j != GETARG(codestr, i+3))
430 continue;
431 if (j == 1) {
432 memset(codestr+i, NOP, 6);
433 } else if (j == 2) {
434 codestr[i] = ROT_TWO;
435 memset(codestr+i+1, NOP, 5);
436 } else if (j == 3) {
437 codestr[i] = ROT_THREE;
438 codestr[i+1] = ROT_TWO;
439 memset(codestr+i+2, NOP, 4);
440 }
441 break;
442
443 /* Fold binary ops on constants.
444 LOAD_CONST c1 LOAD_CONST c2 BINOP --> LOAD_CONST binop(c1,c2) */
445 case BINARY_POWER:
446 case BINARY_MULTIPLY:
447 case BINARY_TRUE_DIVIDE:
448 case BINARY_FLOOR_DIVIDE:
449 case BINARY_MODULO:
450 case BINARY_ADD:
451 case BINARY_SUBTRACT:
452 case BINARY_SUBSCR:
453 case BINARY_LSHIFT:
454 case BINARY_RSHIFT:
455 case BINARY_AND:
456 case BINARY_XOR:
457 case BINARY_OR:
458 if (lastlc >= 2 &&
459 ISBASICBLOCK(blocks, i-6, 7) &&
460 fold_binops_on_constants(&codestr[i-6], consts)) {
461 i -= 2;
462 assert(codestr[i] == LOAD_CONST);
463 cumlc = 1;
464 }
465 break;
466
467 /* Fold unary ops on constants.
468 LOAD_CONST c1 UNARY_OP --> LOAD_CONST unary_op(c) */
469 case UNARY_NEGATIVE:
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000470 case UNARY_INVERT:
471 if (lastlc >= 1 &&
472 ISBASICBLOCK(blocks, i-3, 4) &&
473 fold_unaryops_on_constants(&codestr[i-3], consts)) {
474 i -= 2;
475 assert(codestr[i] == LOAD_CONST);
476 cumlc = 1;
477 }
478 break;
479
480 /* Simplify conditional jump to conditional jump where the
481 result of the first test implies the success of a similar
482 test or the failure of the opposite test.
483 Arises in code like:
484 "if a and b:"
485 "if a or b:"
486 "a and b or c"
487 "(a and b) and c"
488 x:JUMP_IF_FALSE y y:JUMP_IF_FALSE z --> x:JUMP_IF_FALSE z
489 x:JUMP_IF_FALSE y y:JUMP_IF_TRUE z --> x:JUMP_IF_FALSE y+3
490 where y+3 is the instruction following the second test.
491 */
492 case JUMP_IF_FALSE:
493 case JUMP_IF_TRUE:
494 tgt = GETJUMPTGT(codestr, i);
495 j = codestr[tgt];
496 if (j == JUMP_IF_FALSE || j == JUMP_IF_TRUE) {
497 if (j == opcode) {
498 tgttgt = GETJUMPTGT(codestr, tgt) - i - 3;
499 SETARG(codestr, i, tgttgt);
500 } else {
501 tgt -= i;
502 SETARG(codestr, i, tgt);
503 }
504 break;
505 }
506 /* Intentional fallthrough */
507
508 /* Replace jumps to unconditional jumps */
509 case FOR_ITER:
510 case JUMP_FORWARD:
511 case JUMP_ABSOLUTE:
512 case CONTINUE_LOOP:
513 case SETUP_LOOP:
514 case SETUP_EXCEPT:
515 case SETUP_FINALLY:
516 tgt = GETJUMPTGT(codestr, i);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000517 /* Replace JUMP_* to a RETURN into just a RETURN */
518 if (UNCONDITIONAL_JUMP(opcode) &&
519 codestr[tgt] == RETURN_VALUE) {
520 codestr[i] = RETURN_VALUE;
521 memset(codestr+i+1, NOP, 2);
522 continue;
523 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000524 if (!UNCONDITIONAL_JUMP(codestr[tgt]))
525 continue;
526 tgttgt = GETJUMPTGT(codestr, tgt);
527 if (opcode == JUMP_FORWARD) /* JMP_ABS can go backwards */
528 opcode = JUMP_ABSOLUTE;
529 if (!ABSOLUTE_JUMP(opcode))
530 tgttgt -= i + 3; /* Calc relative jump addr */
531 if (tgttgt < 0) /* No backward relative jumps */
532 continue;
533 codestr[i] = opcode;
534 SETARG(codestr, i, tgttgt);
535 break;
536
537 case EXTENDED_ARG:
538 goto exitUnchanged;
539
540 /* Replace RETURN LOAD_CONST None RETURN with just RETURN */
Thomas Wouters89f507f2006-12-13 04:49:30 +0000541 /* Remove unreachable JUMPs after RETURN */
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000542 case RETURN_VALUE:
Thomas Wouters89f507f2006-12-13 04:49:30 +0000543 if (i+4 >= codelen)
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000544 continue;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000545 if (codestr[i+4] == RETURN_VALUE &&
546 ISBASICBLOCK(blocks,i,5))
547 memset(codestr+i+1, NOP, 4);
548 else if (UNCONDITIONAL_JUMP(codestr[i+1]) &&
549 ISBASICBLOCK(blocks,i,4))
550 memset(codestr+i+1, NOP, 3);
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000551 break;
552 }
553 }
554
555 /* Fixup linenotab */
556 for (i=0, nops=0 ; i<codelen ; i += CODESIZE(codestr[i])) {
557 addrmap[i] = i - nops;
558 if (codestr[i] == NOP)
559 nops++;
560 }
561 cum_orig_line = 0;
562 last_line = 0;
563 for (i=0 ; i < tabsiz ; i+=2) {
564 cum_orig_line += lineno[i];
565 new_line = addrmap[cum_orig_line];
566 assert (new_line - last_line < 255);
567 lineno[i] =((unsigned char)(new_line - last_line));
568 last_line = new_line;
569 }
570
571 /* Remove NOPs and fixup jump targets */
572 for (i=0, h=0 ; i<codelen ; ) {
573 opcode = codestr[i];
574 switch (opcode) {
575 case NOP:
576 i++;
577 continue;
578
579 case JUMP_ABSOLUTE:
580 case CONTINUE_LOOP:
581 j = addrmap[GETARG(codestr, i)];
582 SETARG(codestr, i, j);
583 break;
584
585 case FOR_ITER:
586 case JUMP_FORWARD:
587 case JUMP_IF_FALSE:
588 case JUMP_IF_TRUE:
589 case SETUP_LOOP:
590 case SETUP_EXCEPT:
591 case SETUP_FINALLY:
592 j = addrmap[GETARG(codestr, i) + i + 3] - addrmap[i] - 3;
593 SETARG(codestr, i, j);
594 break;
595 }
596 adj = CODESIZE(opcode);
597 while (adj--)
598 codestr[h++] = codestr[i++];
599 }
600 assert(h + nops == codelen);
601
602 code = PyString_FromStringAndSize((char *)codestr, h);
603 PyMem_Free(addrmap);
604 PyMem_Free(codestr);
605 PyMem_Free(blocks);
606 return code;
607
608 exitUnchanged:
609 if (blocks != NULL)
610 PyMem_Free(blocks);
611 if (addrmap != NULL)
612 PyMem_Free(addrmap);
613 if (codestr != NULL)
614 PyMem_Free(codestr);
615 Py_INCREF(code);
616 return code;
617}