blob: f2e0c0b0886e50f790138ff863790e046ba06ead [file] [log] [blame]
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001/* Peehole optimizations for bytecode compiler. */
2
3#include "Python.h"
4
5#include "Python-ast.h"
6#include "node.h"
7#include "pyarena.h"
8#include "ast.h"
9#include "code.h"
10#include "compile.h"
11#include "symtable.h"
12#include "opcode.h"
13
14#define GETARG(arr, i) ((int)((arr[i+2]<<8) + arr[i+1]))
15#define UNCONDITIONAL_JUMP(op) (op==JUMP_ABSOLUTE || op==JUMP_FORWARD)
16#define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP)
17#define GETJUMPTGT(arr, i) (GETARG(arr,i) + (ABSOLUTE_JUMP(arr[i]) ? 0 : i+3))
18#define SETARG(arr, i, val) arr[i+2] = val>>8; arr[i+1] = val & 255
19#define CODESIZE(op) (HAS_ARG(op) ? 3 : 1)
20#define ISBASICBLOCK(blocks, start, bytes) \
21 (blocks[start]==blocks[start+bytes-1])
22
23/* Replace LOAD_CONST c1. LOAD_CONST c2 ... LOAD_CONST cn BUILD_TUPLE n
24 with LOAD_CONST (c1, c2, ... cn).
25 The consts table must still be in list form so that the
26 new constant (c1, c2, ... cn) can be appended.
27 Called with codestr pointing to the first LOAD_CONST.
28 Bails out with no change if one or more of the LOAD_CONSTs is missing.
29 Also works for BUILD_LIST when followed by an "in" or "not in" test.
30*/
31static int
32tuple_of_constants(unsigned char *codestr, int n, PyObject *consts)
33{
34 PyObject *newconst, *constant;
35 Py_ssize_t i, arg, len_consts;
36
37 /* Pre-conditions */
38 assert(PyList_CheckExact(consts));
39 assert(codestr[n*3] == BUILD_TUPLE || codestr[n*3] == BUILD_LIST);
40 assert(GETARG(codestr, (n*3)) == n);
41 for (i=0 ; i<n ; i++)
42 assert(codestr[i*3] == LOAD_CONST);
43
44 /* Buildup new tuple of constants */
45 newconst = PyTuple_New(n);
46 if (newconst == NULL)
47 return 0;
48 len_consts = PyList_GET_SIZE(consts);
49 for (i=0 ; i<n ; i++) {
50 arg = GETARG(codestr, (i*3));
51 assert(arg < len_consts);
52 constant = PyList_GET_ITEM(consts, arg);
53 Py_INCREF(constant);
54 PyTuple_SET_ITEM(newconst, i, constant);
55 }
56
57 /* Append folded constant onto consts */
58 if (PyList_Append(consts, newconst)) {
59 Py_DECREF(newconst);
60 return 0;
61 }
62 Py_DECREF(newconst);
63
64 /* Write NOPs over old LOAD_CONSTS and
65 add a new LOAD_CONST newconst on top of the BUILD_TUPLE n */
66 memset(codestr, NOP, n*3);
67 codestr[n*3] = LOAD_CONST;
68 SETARG(codestr, (n*3), len_consts);
69 return 1;
70}
71
72/* Replace LOAD_CONST c1. LOAD_CONST c2 BINOP
73 with LOAD_CONST binop(c1,c2)
74 The consts table must still be in list form so that the
75 new constant can be appended.
76 Called with codestr pointing to the first LOAD_CONST.
77 Abandons the transformation if the folding fails (i.e. 1+'a').
78 If the new constant is a sequence, only folds when the size
79 is below a threshold value. That keeps pyc files from
80 becoming large in the presence of code like: (None,)*1000.
81*/
82static int
83fold_binops_on_constants(unsigned char *codestr, PyObject *consts)
84{
85 PyObject *newconst, *v, *w;
86 Py_ssize_t len_consts, size;
87 int opcode;
88
89 /* Pre-conditions */
90 assert(PyList_CheckExact(consts));
91 assert(codestr[0] == LOAD_CONST);
92 assert(codestr[3] == LOAD_CONST);
93
94 /* Create new constant */
95 v = PyList_GET_ITEM(consts, GETARG(codestr, 0));
96 w = PyList_GET_ITEM(consts, GETARG(codestr, 3));
97 opcode = codestr[6];
98 switch (opcode) {
99 case BINARY_POWER:
100 newconst = PyNumber_Power(v, w, Py_None);
101 break;
102 case BINARY_MULTIPLY:
103 newconst = PyNumber_Multiply(v, w);
104 break;
105 case BINARY_TRUE_DIVIDE:
106 newconst = PyNumber_TrueDivide(v, w);
107 break;
108 case BINARY_FLOOR_DIVIDE:
109 newconst = PyNumber_FloorDivide(v, w);
110 break;
111 case BINARY_MODULO:
112 newconst = PyNumber_Remainder(v, w);
113 break;
114 case BINARY_ADD:
115 newconst = PyNumber_Add(v, w);
116 break;
117 case BINARY_SUBTRACT:
118 newconst = PyNumber_Subtract(v, w);
119 break;
120 case BINARY_SUBSCR:
121 newconst = PyObject_GetItem(v, w);
122 break;
123 case BINARY_LSHIFT:
124 newconst = PyNumber_Lshift(v, w);
125 break;
126 case BINARY_RSHIFT:
127 newconst = PyNumber_Rshift(v, w);
128 break;
129 case BINARY_AND:
130 newconst = PyNumber_And(v, w);
131 break;
132 case BINARY_XOR:
133 newconst = PyNumber_Xor(v, w);
134 break;
135 case BINARY_OR:
136 newconst = PyNumber_Or(v, w);
137 break;
138 default:
139 /* Called with an unknown opcode */
140 PyErr_Format(PyExc_SystemError,
141 "unexpected binary operation %d on a constant",
142 opcode);
143 return 0;
144 }
145 if (newconst == NULL) {
146 PyErr_Clear();
147 return 0;
148 }
149 size = PyObject_Size(newconst);
150 if (size == -1)
151 PyErr_Clear();
152 else if (size > 20) {
153 Py_DECREF(newconst);
154 return 0;
155 }
156
157 /* Append folded constant into consts table */
158 len_consts = PyList_GET_SIZE(consts);
159 if (PyList_Append(consts, newconst)) {
160 Py_DECREF(newconst);
161 return 0;
162 }
163 Py_DECREF(newconst);
164
165 /* Write NOP NOP NOP NOP LOAD_CONST newconst */
166 memset(codestr, NOP, 4);
167 codestr[4] = LOAD_CONST;
168 SETARG(codestr, 4, len_consts);
169 return 1;
170}
171
172static int
173fold_unaryops_on_constants(unsigned char *codestr, PyObject *consts)
174{
175 PyObject *newconst=NULL, *v;
176 Py_ssize_t len_consts;
177 int opcode;
178
179 /* Pre-conditions */
180 assert(PyList_CheckExact(consts));
181 assert(codestr[0] == LOAD_CONST);
182
183 /* Create new constant */
184 v = PyList_GET_ITEM(consts, GETARG(codestr, 0));
185 opcode = codestr[3];
186 switch (opcode) {
187 case UNARY_NEGATIVE:
188 /* Preserve the sign of -0.0 */
189 if (PyObject_IsTrue(v) == 1)
190 newconst = PyNumber_Negative(v);
191 break;
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000192 case UNARY_INVERT:
193 newconst = PyNumber_Invert(v);
194 break;
195 default:
196 /* Called with an unknown opcode */
197 PyErr_Format(PyExc_SystemError,
198 "unexpected unary operation %d on a constant",
199 opcode);
200 return 0;
201 }
202 if (newconst == NULL) {
203 PyErr_Clear();
204 return 0;
205 }
206
207 /* Append folded constant into consts table */
208 len_consts = PyList_GET_SIZE(consts);
209 if (PyList_Append(consts, newconst)) {
210 Py_DECREF(newconst);
211 return 0;
212 }
213 Py_DECREF(newconst);
214
215 /* Write NOP LOAD_CONST newconst */
216 codestr[0] = NOP;
217 codestr[1] = LOAD_CONST;
218 SETARG(codestr, 1, len_consts);
219 return 1;
220}
221
222static unsigned int *
223markblocks(unsigned char *code, int len)
224{
225 unsigned int *blocks = (unsigned int *)PyMem_Malloc(len*sizeof(int));
226 int i,j, opcode, blockcnt = 0;
227
228 if (blocks == NULL) {
229 PyErr_NoMemory();
230 return NULL;
231 }
232 memset(blocks, 0, len*sizeof(int));
233
234 /* Mark labels in the first pass */
235 for (i=0 ; i<len ; i+=CODESIZE(opcode)) {
236 opcode = code[i];
237 switch (opcode) {
238 case FOR_ITER:
239 case JUMP_FORWARD:
240 case JUMP_IF_FALSE:
241 case JUMP_IF_TRUE:
242 case JUMP_ABSOLUTE:
243 case CONTINUE_LOOP:
244 case SETUP_LOOP:
245 case SETUP_EXCEPT:
246 case SETUP_FINALLY:
247 j = GETJUMPTGT(code, i);
248 blocks[j] = 1;
249 break;
250 }
251 }
252 /* Build block numbers in the second pass */
253 for (i=0 ; i<len ; i++) {
254 blockcnt += blocks[i]; /* increment blockcnt over labels */
255 blocks[i] = blockcnt;
256 }
257 return blocks;
258}
259
260/* Perform basic peephole optimizations to components of a code object.
261 The consts object should still be in list form to allow new constants
262 to be appended.
263
Guido van Rossum0240b922007-02-26 21:23:50 +0000264 To keep the optimizer simple, it bails out (does nothing) for code that
265 has a length over 32,700, and does not calculate extended arguments.
266 That allows us to avoid overflow and sign issues. Likewise, it bails when
267 the lineno table has complex encoding for gaps >= 255. EXTENDED_ARG can
268 appear before MAKE_FUNCTION; in this case both opcodes are skipped.
269 EXTENDED_ARG preceding any other opcode causes the optimizer to bail.
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000270
271 Optimizations are restricted to simple transformations occuring within a
272 single basic block. All transformations keep the code size the same or
273 smaller. For those that reduce size, the gaps are initially filled with
274 NOPs. Later those NOPs are removed and the jump addresses retargeted in
275 a single pass. Line numbering is adjusted accordingly. */
276
277PyObject *
278PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
279 PyObject *lineno_obj)
280{
281 Py_ssize_t i, j, codelen;
282 int nops, h, adj;
283 int tgt, tgttgt, opcode;
284 unsigned char *codestr = NULL;
285 unsigned char *lineno;
286 int *addrmap = NULL;
287 int new_line, cum_orig_line, last_line, tabsiz;
288 int cumlc=0, lastlc=0; /* Count runs of consecutive LOAD_CONSTs */
289 unsigned int *blocks = NULL;
290 char *name;
291
292 /* Bail out if an exception is set */
293 if (PyErr_Occurred())
294 goto exitUnchanged;
295
296 /* Bypass optimization when the lineno table is too complex */
297 assert(PyString_Check(lineno_obj));
298 lineno = (unsigned char*)PyString_AS_STRING(lineno_obj);
299 tabsiz = PyString_GET_SIZE(lineno_obj);
300 if (memchr(lineno, 255, tabsiz) != NULL)
301 goto exitUnchanged;
302
303 /* Avoid situations where jump retargeting could overflow */
304 assert(PyString_Check(code));
305 codelen = PyString_Size(code);
306 if (codelen > 32700)
307 goto exitUnchanged;
308
309 /* Make a modifiable copy of the code string */
310 codestr = (unsigned char *)PyMem_Malloc(codelen);
311 if (codestr == NULL)
312 goto exitUnchanged;
313 codestr = (unsigned char *)memcpy(codestr,
314 PyString_AS_STRING(code), codelen);
315
316 /* Verify that RETURN_VALUE terminates the codestring. This allows
317 the various transformation patterns to look ahead several
318 instructions without additional checks to make sure they are not
319 looking beyond the end of the code string.
320 */
321 if (codestr[codelen-1] != RETURN_VALUE)
322 goto exitUnchanged;
323
324 /* Mapping to new jump targets after NOPs are removed */
325 addrmap = (int *)PyMem_Malloc(codelen * sizeof(int));
326 if (addrmap == NULL)
327 goto exitUnchanged;
328
329 blocks = markblocks(codestr, codelen);
330 if (blocks == NULL)
331 goto exitUnchanged;
332 assert(PyList_Check(consts));
333
334 for (i=0 ; i<codelen ; i += CODESIZE(codestr[i])) {
335 opcode = codestr[i];
336
337 lastlc = cumlc;
338 cumlc = 0;
339
340 switch (opcode) {
341
342 /* Replace UNARY_NOT JUMP_IF_FALSE POP_TOP with
343 with JUMP_IF_TRUE POP_TOP */
344 case UNARY_NOT:
345 if (codestr[i+1] != JUMP_IF_FALSE ||
346 codestr[i+4] != POP_TOP ||
347 !ISBASICBLOCK(blocks,i,5))
348 continue;
349 tgt = GETJUMPTGT(codestr, (i+1));
350 if (codestr[tgt] != POP_TOP)
351 continue;
352 j = GETARG(codestr, i+1) + 1;
353 codestr[i] = JUMP_IF_TRUE;
354 SETARG(codestr, i, j);
355 codestr[i+3] = POP_TOP;
356 codestr[i+4] = NOP;
357 break;
358
359 /* not a is b --> a is not b
360 not a in b --> a not in b
361 not a is not b --> a is b
362 not a not in b --> a in b
363 */
364 case COMPARE_OP:
365 j = GETARG(codestr, i);
366 if (j < 6 || j > 9 ||
367 codestr[i+3] != UNARY_NOT ||
368 !ISBASICBLOCK(blocks,i,4))
369 continue;
370 SETARG(codestr, i, (j^1));
371 codestr[i+3] = NOP;
372 break;
373
374 /* Replace LOAD_GLOBAL/LOAD_NAME None
375 with LOAD_CONST None */
376 case LOAD_NAME:
377 case LOAD_GLOBAL:
378 j = GETARG(codestr, i);
379 name = PyString_AsString(PyTuple_GET_ITEM(names, j));
380 if (name == NULL || strcmp(name, "None") != 0)
381 continue;
382 for (j=0 ; j < PyList_GET_SIZE(consts) ; j++) {
383 if (PyList_GET_ITEM(consts, j) == Py_None) {
384 codestr[i] = LOAD_CONST;
385 SETARG(codestr, i, j);
386 cumlc = lastlc + 1;
387 break;
388 }
389 }
390 break;
391
392 /* Skip over LOAD_CONST trueconst
393 JUMP_IF_FALSE xx POP_TOP */
394 case LOAD_CONST:
395 cumlc = lastlc + 1;
396 j = GETARG(codestr, i);
397 if (codestr[i+3] != JUMP_IF_FALSE ||
398 codestr[i+6] != POP_TOP ||
399 !ISBASICBLOCK(blocks,i,7) ||
400 !PyObject_IsTrue(PyList_GET_ITEM(consts, j)))
401 continue;
402 memset(codestr+i, NOP, 7);
403 cumlc = 0;
404 break;
405
406 /* Try to fold tuples of constants (includes a case for lists
407 which are only used for "in" and "not in" tests).
408 Skip over BUILD_SEQN 1 UNPACK_SEQN 1.
409 Replace BUILD_SEQN 2 UNPACK_SEQN 2 with ROT2.
410 Replace BUILD_SEQN 3 UNPACK_SEQN 3 with ROT3 ROT2. */
411 case BUILD_TUPLE:
412 case BUILD_LIST:
413 j = GETARG(codestr, i);
414 h = i - 3 * j;
415 if (h >= 0 &&
416 j <= lastlc &&
417 ((opcode == BUILD_TUPLE &&
418 ISBASICBLOCK(blocks, h, 3*(j+1))) ||
419 (opcode == BUILD_LIST &&
420 codestr[i+3]==COMPARE_OP &&
421 ISBASICBLOCK(blocks, h, 3*(j+2)) &&
422 (GETARG(codestr,i+3)==6 ||
423 GETARG(codestr,i+3)==7))) &&
424 tuple_of_constants(&codestr[h], j, consts)) {
425 assert(codestr[i] == LOAD_CONST);
426 cumlc = 1;
427 break;
428 }
429 if (codestr[i+3] != UNPACK_SEQUENCE ||
430 !ISBASICBLOCK(blocks,i,6) ||
431 j != GETARG(codestr, i+3))
432 continue;
433 if (j == 1) {
434 memset(codestr+i, NOP, 6);
435 } else if (j == 2) {
436 codestr[i] = ROT_TWO;
437 memset(codestr+i+1, NOP, 5);
438 } else if (j == 3) {
439 codestr[i] = ROT_THREE;
440 codestr[i+1] = ROT_TWO;
441 memset(codestr+i+2, NOP, 4);
442 }
443 break;
444
445 /* Fold binary ops on constants.
446 LOAD_CONST c1 LOAD_CONST c2 BINOP --> LOAD_CONST binop(c1,c2) */
447 case BINARY_POWER:
448 case BINARY_MULTIPLY:
449 case BINARY_TRUE_DIVIDE:
450 case BINARY_FLOOR_DIVIDE:
451 case BINARY_MODULO:
452 case BINARY_ADD:
453 case BINARY_SUBTRACT:
454 case BINARY_SUBSCR:
455 case BINARY_LSHIFT:
456 case BINARY_RSHIFT:
457 case BINARY_AND:
458 case BINARY_XOR:
459 case BINARY_OR:
460 if (lastlc >= 2 &&
461 ISBASICBLOCK(blocks, i-6, 7) &&
462 fold_binops_on_constants(&codestr[i-6], consts)) {
463 i -= 2;
464 assert(codestr[i] == LOAD_CONST);
465 cumlc = 1;
466 }
467 break;
468
469 /* Fold unary ops on constants.
470 LOAD_CONST c1 UNARY_OP --> LOAD_CONST unary_op(c) */
471 case UNARY_NEGATIVE:
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000472 case UNARY_INVERT:
473 if (lastlc >= 1 &&
474 ISBASICBLOCK(blocks, i-3, 4) &&
475 fold_unaryops_on_constants(&codestr[i-3], consts)) {
476 i -= 2;
477 assert(codestr[i] == LOAD_CONST);
478 cumlc = 1;
479 }
480 break;
481
482 /* Simplify conditional jump to conditional jump where the
483 result of the first test implies the success of a similar
484 test or the failure of the opposite test.
485 Arises in code like:
486 "if a and b:"
487 "if a or b:"
488 "a and b or c"
489 "(a and b) and c"
490 x:JUMP_IF_FALSE y y:JUMP_IF_FALSE z --> x:JUMP_IF_FALSE z
491 x:JUMP_IF_FALSE y y:JUMP_IF_TRUE z --> x:JUMP_IF_FALSE y+3
492 where y+3 is the instruction following the second test.
493 */
494 case JUMP_IF_FALSE:
495 case JUMP_IF_TRUE:
496 tgt = GETJUMPTGT(codestr, i);
497 j = codestr[tgt];
498 if (j == JUMP_IF_FALSE || j == JUMP_IF_TRUE) {
499 if (j == opcode) {
500 tgttgt = GETJUMPTGT(codestr, tgt) - i - 3;
501 SETARG(codestr, i, tgttgt);
502 } else {
503 tgt -= i;
504 SETARG(codestr, i, tgt);
505 }
506 break;
507 }
508 /* Intentional fallthrough */
509
510 /* Replace jumps to unconditional jumps */
511 case FOR_ITER:
512 case JUMP_FORWARD:
513 case JUMP_ABSOLUTE:
514 case CONTINUE_LOOP:
515 case SETUP_LOOP:
516 case SETUP_EXCEPT:
517 case SETUP_FINALLY:
518 tgt = GETJUMPTGT(codestr, i);
Thomas Wouters89f507f2006-12-13 04:49:30 +0000519 /* Replace JUMP_* to a RETURN into just a RETURN */
520 if (UNCONDITIONAL_JUMP(opcode) &&
521 codestr[tgt] == RETURN_VALUE) {
522 codestr[i] = RETURN_VALUE;
523 memset(codestr+i+1, NOP, 2);
524 continue;
525 }
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000526 if (!UNCONDITIONAL_JUMP(codestr[tgt]))
527 continue;
528 tgttgt = GETJUMPTGT(codestr, tgt);
529 if (opcode == JUMP_FORWARD) /* JMP_ABS can go backwards */
530 opcode = JUMP_ABSOLUTE;
531 if (!ABSOLUTE_JUMP(opcode))
532 tgttgt -= i + 3; /* Calc relative jump addr */
533 if (tgttgt < 0) /* No backward relative jumps */
534 continue;
535 codestr[i] = opcode;
536 SETARG(codestr, i, tgttgt);
537 break;
538
539 case EXTENDED_ARG:
Guido van Rossum0240b922007-02-26 21:23:50 +0000540 if (codestr[i+3] != MAKE_FUNCTION)
541 goto exitUnchanged;
542 /* don't visit MAKE_FUNCTION as GETARG will be wrong */
543 i += 3;
544 break;
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000545
546 /* Replace RETURN LOAD_CONST None RETURN with just RETURN */
Thomas Wouters89f507f2006-12-13 04:49:30 +0000547 /* Remove unreachable JUMPs after RETURN */
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000548 case RETURN_VALUE:
Thomas Wouters89f507f2006-12-13 04:49:30 +0000549 if (i+4 >= codelen)
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000550 continue;
Thomas Wouters89f507f2006-12-13 04:49:30 +0000551 if (codestr[i+4] == RETURN_VALUE &&
552 ISBASICBLOCK(blocks,i,5))
553 memset(codestr+i+1, NOP, 4);
554 else if (UNCONDITIONAL_JUMP(codestr[i+1]) &&
555 ISBASICBLOCK(blocks,i,4))
556 memset(codestr+i+1, NOP, 3);
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000557 break;
558 }
559 }
560
561 /* Fixup linenotab */
562 for (i=0, nops=0 ; i<codelen ; i += CODESIZE(codestr[i])) {
563 addrmap[i] = i - nops;
564 if (codestr[i] == NOP)
565 nops++;
566 }
567 cum_orig_line = 0;
568 last_line = 0;
569 for (i=0 ; i < tabsiz ; i+=2) {
570 cum_orig_line += lineno[i];
571 new_line = addrmap[cum_orig_line];
572 assert (new_line - last_line < 255);
573 lineno[i] =((unsigned char)(new_line - last_line));
574 last_line = new_line;
575 }
576
577 /* Remove NOPs and fixup jump targets */
578 for (i=0, h=0 ; i<codelen ; ) {
579 opcode = codestr[i];
580 switch (opcode) {
581 case NOP:
582 i++;
583 continue;
584
585 case JUMP_ABSOLUTE:
586 case CONTINUE_LOOP:
587 j = addrmap[GETARG(codestr, i)];
588 SETARG(codestr, i, j);
589 break;
590
591 case FOR_ITER:
592 case JUMP_FORWARD:
593 case JUMP_IF_FALSE:
594 case JUMP_IF_TRUE:
595 case SETUP_LOOP:
596 case SETUP_EXCEPT:
597 case SETUP_FINALLY:
598 j = addrmap[GETARG(codestr, i) + i + 3] - addrmap[i] - 3;
599 SETARG(codestr, i, j);
600 break;
601 }
602 adj = CODESIZE(opcode);
603 while (adj--)
604 codestr[h++] = codestr[i++];
605 }
606 assert(h + nops == codelen);
607
608 code = PyString_FromStringAndSize((char *)codestr, h);
609 PyMem_Free(addrmap);
610 PyMem_Free(codestr);
611 PyMem_Free(blocks);
612 return code;
613
614 exitUnchanged:
615 if (blocks != NULL)
616 PyMem_Free(blocks);
617 if (addrmap != NULL)
618 PyMem_Free(addrmap);
619 if (codestr != NULL)
620 PyMem_Free(codestr);
621 Py_INCREF(code);
622 return code;
623}