blob: 836d3983c995ebfca76f83d4eea6228d057b05f7 [file] [log] [blame]
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001/* Peehole optimizations for bytecode compiler. */
2
3#include "Python.h"
4
5#include "Python-ast.h"
6#include "node.h"
7#include "pyarena.h"
8#include "ast.h"
9#include "code.h"
10#include "compile.h"
11#include "symtable.h"
12#include "opcode.h"
13
14#define GETARG(arr, i) ((int)((arr[i+2]<<8) + arr[i+1]))
15#define UNCONDITIONAL_JUMP(op) (op==JUMP_ABSOLUTE || op==JUMP_FORWARD)
16#define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP)
17#define GETJUMPTGT(arr, i) (GETARG(arr,i) + (ABSOLUTE_JUMP(arr[i]) ? 0 : i+3))
18#define SETARG(arr, i, val) arr[i+2] = val>>8; arr[i+1] = val & 255
19#define CODESIZE(op) (HAS_ARG(op) ? 3 : 1)
20#define ISBASICBLOCK(blocks, start, bytes) \
21 (blocks[start]==blocks[start+bytes-1])
22
23/* Replace LOAD_CONST c1. LOAD_CONST c2 ... LOAD_CONST cn BUILD_TUPLE n
24 with LOAD_CONST (c1, c2, ... cn).
25 The consts table must still be in list form so that the
26 new constant (c1, c2, ... cn) can be appended.
27 Called with codestr pointing to the first LOAD_CONST.
28 Bails out with no change if one or more of the LOAD_CONSTs is missing.
29 Also works for BUILD_LIST when followed by an "in" or "not in" test.
30*/
31static int
32tuple_of_constants(unsigned char *codestr, int n, PyObject *consts)
33{
34 PyObject *newconst, *constant;
35 Py_ssize_t i, arg, len_consts;
36
37 /* Pre-conditions */
38 assert(PyList_CheckExact(consts));
39 assert(codestr[n*3] == BUILD_TUPLE || codestr[n*3] == BUILD_LIST);
40 assert(GETARG(codestr, (n*3)) == n);
41 for (i=0 ; i<n ; i++)
42 assert(codestr[i*3] == LOAD_CONST);
43
44 /* Buildup new tuple of constants */
45 newconst = PyTuple_New(n);
46 if (newconst == NULL)
47 return 0;
48 len_consts = PyList_GET_SIZE(consts);
49 for (i=0 ; i<n ; i++) {
50 arg = GETARG(codestr, (i*3));
51 assert(arg < len_consts);
52 constant = PyList_GET_ITEM(consts, arg);
53 Py_INCREF(constant);
54 PyTuple_SET_ITEM(newconst, i, constant);
55 }
56
57 /* Append folded constant onto consts */
58 if (PyList_Append(consts, newconst)) {
59 Py_DECREF(newconst);
60 return 0;
61 }
62 Py_DECREF(newconst);
63
64 /* Write NOPs over old LOAD_CONSTS and
65 add a new LOAD_CONST newconst on top of the BUILD_TUPLE n */
66 memset(codestr, NOP, n*3);
67 codestr[n*3] = LOAD_CONST;
68 SETARG(codestr, (n*3), len_consts);
69 return 1;
70}
71
72/* Replace LOAD_CONST c1. LOAD_CONST c2 BINOP
73 with LOAD_CONST binop(c1,c2)
74 The consts table must still be in list form so that the
75 new constant can be appended.
76 Called with codestr pointing to the first LOAD_CONST.
77 Abandons the transformation if the folding fails (i.e. 1+'a').
78 If the new constant is a sequence, only folds when the size
79 is below a threshold value. That keeps pyc files from
80 becoming large in the presence of code like: (None,)*1000.
81*/
82static int
83fold_binops_on_constants(unsigned char *codestr, PyObject *consts)
84{
85 PyObject *newconst, *v, *w;
86 Py_ssize_t len_consts, size;
87 int opcode;
88
89 /* Pre-conditions */
90 assert(PyList_CheckExact(consts));
91 assert(codestr[0] == LOAD_CONST);
92 assert(codestr[3] == LOAD_CONST);
93
94 /* Create new constant */
95 v = PyList_GET_ITEM(consts, GETARG(codestr, 0));
96 w = PyList_GET_ITEM(consts, GETARG(codestr, 3));
97 opcode = codestr[6];
98 switch (opcode) {
99 case BINARY_POWER:
100 newconst = PyNumber_Power(v, w, Py_None);
101 break;
102 case BINARY_MULTIPLY:
103 newconst = PyNumber_Multiply(v, w);
104 break;
105 case BINARY_TRUE_DIVIDE:
106 newconst = PyNumber_TrueDivide(v, w);
107 break;
108 case BINARY_FLOOR_DIVIDE:
109 newconst = PyNumber_FloorDivide(v, w);
110 break;
111 case BINARY_MODULO:
112 newconst = PyNumber_Remainder(v, w);
113 break;
114 case BINARY_ADD:
115 newconst = PyNumber_Add(v, w);
116 break;
117 case BINARY_SUBTRACT:
118 newconst = PyNumber_Subtract(v, w);
119 break;
120 case BINARY_SUBSCR:
121 newconst = PyObject_GetItem(v, w);
122 break;
123 case BINARY_LSHIFT:
124 newconst = PyNumber_Lshift(v, w);
125 break;
126 case BINARY_RSHIFT:
127 newconst = PyNumber_Rshift(v, w);
128 break;
129 case BINARY_AND:
130 newconst = PyNumber_And(v, w);
131 break;
132 case BINARY_XOR:
133 newconst = PyNumber_Xor(v, w);
134 break;
135 case BINARY_OR:
136 newconst = PyNumber_Or(v, w);
137 break;
138 default:
139 /* Called with an unknown opcode */
140 PyErr_Format(PyExc_SystemError,
141 "unexpected binary operation %d on a constant",
142 opcode);
143 return 0;
144 }
145 if (newconst == NULL) {
146 PyErr_Clear();
147 return 0;
148 }
149 size = PyObject_Size(newconst);
150 if (size == -1)
151 PyErr_Clear();
152 else if (size > 20) {
153 Py_DECREF(newconst);
154 return 0;
155 }
156
157 /* Append folded constant into consts table */
158 len_consts = PyList_GET_SIZE(consts);
159 if (PyList_Append(consts, newconst)) {
160 Py_DECREF(newconst);
161 return 0;
162 }
163 Py_DECREF(newconst);
164
165 /* Write NOP NOP NOP NOP LOAD_CONST newconst */
166 memset(codestr, NOP, 4);
167 codestr[4] = LOAD_CONST;
168 SETARG(codestr, 4, len_consts);
169 return 1;
170}
171
172static int
173fold_unaryops_on_constants(unsigned char *codestr, PyObject *consts)
174{
175 PyObject *newconst=NULL, *v;
176 Py_ssize_t len_consts;
177 int opcode;
178
179 /* Pre-conditions */
180 assert(PyList_CheckExact(consts));
181 assert(codestr[0] == LOAD_CONST);
182
183 /* Create new constant */
184 v = PyList_GET_ITEM(consts, GETARG(codestr, 0));
185 opcode = codestr[3];
186 switch (opcode) {
187 case UNARY_NEGATIVE:
188 /* Preserve the sign of -0.0 */
189 if (PyObject_IsTrue(v) == 1)
190 newconst = PyNumber_Negative(v);
191 break;
192 case UNARY_CONVERT:
193 newconst = PyObject_Repr(v);
194 break;
195 case UNARY_INVERT:
196 newconst = PyNumber_Invert(v);
197 break;
198 default:
199 /* Called with an unknown opcode */
200 PyErr_Format(PyExc_SystemError,
201 "unexpected unary operation %d on a constant",
202 opcode);
203 return 0;
204 }
205 if (newconst == NULL) {
206 PyErr_Clear();
207 return 0;
208 }
209
210 /* Append folded constant into consts table */
211 len_consts = PyList_GET_SIZE(consts);
212 if (PyList_Append(consts, newconst)) {
213 Py_DECREF(newconst);
214 return 0;
215 }
216 Py_DECREF(newconst);
217
218 /* Write NOP LOAD_CONST newconst */
219 codestr[0] = NOP;
220 codestr[1] = LOAD_CONST;
221 SETARG(codestr, 1, len_consts);
222 return 1;
223}
224
225static unsigned int *
226markblocks(unsigned char *code, int len)
227{
228 unsigned int *blocks = (unsigned int *)PyMem_Malloc(len*sizeof(int));
229 int i,j, opcode, blockcnt = 0;
230
231 if (blocks == NULL) {
232 PyErr_NoMemory();
233 return NULL;
234 }
235 memset(blocks, 0, len*sizeof(int));
236
237 /* Mark labels in the first pass */
238 for (i=0 ; i<len ; i+=CODESIZE(opcode)) {
239 opcode = code[i];
240 switch (opcode) {
241 case FOR_ITER:
242 case JUMP_FORWARD:
243 case JUMP_IF_FALSE:
244 case JUMP_IF_TRUE:
245 case JUMP_ABSOLUTE:
246 case CONTINUE_LOOP:
247 case SETUP_LOOP:
248 case SETUP_EXCEPT:
249 case SETUP_FINALLY:
250 j = GETJUMPTGT(code, i);
251 blocks[j] = 1;
252 break;
253 }
254 }
255 /* Build block numbers in the second pass */
256 for (i=0 ; i<len ; i++) {
257 blockcnt += blocks[i]; /* increment blockcnt over labels */
258 blocks[i] = blockcnt;
259 }
260 return blocks;
261}
262
263/* Perform basic peephole optimizations to components of a code object.
264 The consts object should still be in list form to allow new constants
265 to be appended.
266
267 To keep the optimizer simple, it bails out (does nothing) for code
268 containing extended arguments or that has a length over 32,700. That
269 allows us to avoid overflow and sign issues. Likewise, it bails when
270 the lineno table has complex encoding for gaps >= 255.
271
272 Optimizations are restricted to simple transformations occuring within a
273 single basic block. All transformations keep the code size the same or
274 smaller. For those that reduce size, the gaps are initially filled with
275 NOPs. Later those NOPs are removed and the jump addresses retargeted in
276 a single pass. Line numbering is adjusted accordingly. */
277
278PyObject *
279PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
280 PyObject *lineno_obj)
281{
282 Py_ssize_t i, j, codelen;
283 int nops, h, adj;
284 int tgt, tgttgt, opcode;
285 unsigned char *codestr = NULL;
286 unsigned char *lineno;
287 int *addrmap = NULL;
288 int new_line, cum_orig_line, last_line, tabsiz;
289 int cumlc=0, lastlc=0; /* Count runs of consecutive LOAD_CONSTs */
290 unsigned int *blocks = NULL;
291 char *name;
292
293 /* Bail out if an exception is set */
294 if (PyErr_Occurred())
295 goto exitUnchanged;
296
297 /* Bypass optimization when the lineno table is too complex */
298 assert(PyString_Check(lineno_obj));
299 lineno = (unsigned char*)PyString_AS_STRING(lineno_obj);
300 tabsiz = PyString_GET_SIZE(lineno_obj);
301 if (memchr(lineno, 255, tabsiz) != NULL)
302 goto exitUnchanged;
303
304 /* Avoid situations where jump retargeting could overflow */
305 assert(PyString_Check(code));
306 codelen = PyString_Size(code);
307 if (codelen > 32700)
308 goto exitUnchanged;
309
310 /* Make a modifiable copy of the code string */
311 codestr = (unsigned char *)PyMem_Malloc(codelen);
312 if (codestr == NULL)
313 goto exitUnchanged;
314 codestr = (unsigned char *)memcpy(codestr,
315 PyString_AS_STRING(code), codelen);
316
317 /* Verify that RETURN_VALUE terminates the codestring. This allows
318 the various transformation patterns to look ahead several
319 instructions without additional checks to make sure they are not
320 looking beyond the end of the code string.
321 */
322 if (codestr[codelen-1] != RETURN_VALUE)
323 goto exitUnchanged;
324
325 /* Mapping to new jump targets after NOPs are removed */
326 addrmap = (int *)PyMem_Malloc(codelen * sizeof(int));
327 if (addrmap == NULL)
328 goto exitUnchanged;
329
330 blocks = markblocks(codestr, codelen);
331 if (blocks == NULL)
332 goto exitUnchanged;
333 assert(PyList_Check(consts));
334
335 for (i=0 ; i<codelen ; i += CODESIZE(codestr[i])) {
336 opcode = codestr[i];
337
338 lastlc = cumlc;
339 cumlc = 0;
340
341 switch (opcode) {
342
343 /* Replace UNARY_NOT JUMP_IF_FALSE POP_TOP with
344 with JUMP_IF_TRUE POP_TOP */
345 case UNARY_NOT:
346 if (codestr[i+1] != JUMP_IF_FALSE ||
347 codestr[i+4] != POP_TOP ||
348 !ISBASICBLOCK(blocks,i,5))
349 continue;
350 tgt = GETJUMPTGT(codestr, (i+1));
351 if (codestr[tgt] != POP_TOP)
352 continue;
353 j = GETARG(codestr, i+1) + 1;
354 codestr[i] = JUMP_IF_TRUE;
355 SETARG(codestr, i, j);
356 codestr[i+3] = POP_TOP;
357 codestr[i+4] = NOP;
358 break;
359
360 /* not a is b --> a is not b
361 not a in b --> a not in b
362 not a is not b --> a is b
363 not a not in b --> a in b
364 */
365 case COMPARE_OP:
366 j = GETARG(codestr, i);
367 if (j < 6 || j > 9 ||
368 codestr[i+3] != UNARY_NOT ||
369 !ISBASICBLOCK(blocks,i,4))
370 continue;
371 SETARG(codestr, i, (j^1));
372 codestr[i+3] = NOP;
373 break;
374
375 /* Replace LOAD_GLOBAL/LOAD_NAME None
376 with LOAD_CONST None */
377 case LOAD_NAME:
378 case LOAD_GLOBAL:
379 j = GETARG(codestr, i);
380 name = PyString_AsString(PyTuple_GET_ITEM(names, j));
381 if (name == NULL || strcmp(name, "None") != 0)
382 continue;
383 for (j=0 ; j < PyList_GET_SIZE(consts) ; j++) {
384 if (PyList_GET_ITEM(consts, j) == Py_None) {
385 codestr[i] = LOAD_CONST;
386 SETARG(codestr, i, j);
387 cumlc = lastlc + 1;
388 break;
389 }
390 }
391 break;
392
393 /* Skip over LOAD_CONST trueconst
394 JUMP_IF_FALSE xx POP_TOP */
395 case LOAD_CONST:
396 cumlc = lastlc + 1;
397 j = GETARG(codestr, i);
398 if (codestr[i+3] != JUMP_IF_FALSE ||
399 codestr[i+6] != POP_TOP ||
400 !ISBASICBLOCK(blocks,i,7) ||
401 !PyObject_IsTrue(PyList_GET_ITEM(consts, j)))
402 continue;
403 memset(codestr+i, NOP, 7);
404 cumlc = 0;
405 break;
406
407 /* Try to fold tuples of constants (includes a case for lists
408 which are only used for "in" and "not in" tests).
409 Skip over BUILD_SEQN 1 UNPACK_SEQN 1.
410 Replace BUILD_SEQN 2 UNPACK_SEQN 2 with ROT2.
411 Replace BUILD_SEQN 3 UNPACK_SEQN 3 with ROT3 ROT2. */
412 case BUILD_TUPLE:
413 case BUILD_LIST:
414 j = GETARG(codestr, i);
415 h = i - 3 * j;
416 if (h >= 0 &&
417 j <= lastlc &&
418 ((opcode == BUILD_TUPLE &&
419 ISBASICBLOCK(blocks, h, 3*(j+1))) ||
420 (opcode == BUILD_LIST &&
421 codestr[i+3]==COMPARE_OP &&
422 ISBASICBLOCK(blocks, h, 3*(j+2)) &&
423 (GETARG(codestr,i+3)==6 ||
424 GETARG(codestr,i+3)==7))) &&
425 tuple_of_constants(&codestr[h], j, consts)) {
426 assert(codestr[i] == LOAD_CONST);
427 cumlc = 1;
428 break;
429 }
430 if (codestr[i+3] != UNPACK_SEQUENCE ||
431 !ISBASICBLOCK(blocks,i,6) ||
432 j != GETARG(codestr, i+3))
433 continue;
434 if (j == 1) {
435 memset(codestr+i, NOP, 6);
436 } else if (j == 2) {
437 codestr[i] = ROT_TWO;
438 memset(codestr+i+1, NOP, 5);
439 } else if (j == 3) {
440 codestr[i] = ROT_THREE;
441 codestr[i+1] = ROT_TWO;
442 memset(codestr+i+2, NOP, 4);
443 }
444 break;
445
446 /* Fold binary ops on constants.
447 LOAD_CONST c1 LOAD_CONST c2 BINOP --> LOAD_CONST binop(c1,c2) */
448 case BINARY_POWER:
449 case BINARY_MULTIPLY:
450 case BINARY_TRUE_DIVIDE:
451 case BINARY_FLOOR_DIVIDE:
452 case BINARY_MODULO:
453 case BINARY_ADD:
454 case BINARY_SUBTRACT:
455 case BINARY_SUBSCR:
456 case BINARY_LSHIFT:
457 case BINARY_RSHIFT:
458 case BINARY_AND:
459 case BINARY_XOR:
460 case BINARY_OR:
461 if (lastlc >= 2 &&
462 ISBASICBLOCK(blocks, i-6, 7) &&
463 fold_binops_on_constants(&codestr[i-6], consts)) {
464 i -= 2;
465 assert(codestr[i] == LOAD_CONST);
466 cumlc = 1;
467 }
468 break;
469
470 /* Fold unary ops on constants.
471 LOAD_CONST c1 UNARY_OP --> LOAD_CONST unary_op(c) */
472 case UNARY_NEGATIVE:
473 case UNARY_CONVERT:
474 case UNARY_INVERT:
475 if (lastlc >= 1 &&
476 ISBASICBLOCK(blocks, i-3, 4) &&
477 fold_unaryops_on_constants(&codestr[i-3], consts)) {
478 i -= 2;
479 assert(codestr[i] == LOAD_CONST);
480 cumlc = 1;
481 }
482 break;
483
484 /* Simplify conditional jump to conditional jump where the
485 result of the first test implies the success of a similar
486 test or the failure of the opposite test.
487 Arises in code like:
488 "if a and b:"
489 "if a or b:"
490 "a and b or c"
491 "(a and b) and c"
492 x:JUMP_IF_FALSE y y:JUMP_IF_FALSE z --> x:JUMP_IF_FALSE z
493 x:JUMP_IF_FALSE y y:JUMP_IF_TRUE z --> x:JUMP_IF_FALSE y+3
494 where y+3 is the instruction following the second test.
495 */
496 case JUMP_IF_FALSE:
497 case JUMP_IF_TRUE:
498 tgt = GETJUMPTGT(codestr, i);
499 j = codestr[tgt];
500 if (j == JUMP_IF_FALSE || j == JUMP_IF_TRUE) {
501 if (j == opcode) {
502 tgttgt = GETJUMPTGT(codestr, tgt) - i - 3;
503 SETARG(codestr, i, tgttgt);
504 } else {
505 tgt -= i;
506 SETARG(codestr, i, tgt);
507 }
508 break;
509 }
510 /* Intentional fallthrough */
511
512 /* Replace jumps to unconditional jumps */
513 case FOR_ITER:
514 case JUMP_FORWARD:
515 case JUMP_ABSOLUTE:
516 case CONTINUE_LOOP:
517 case SETUP_LOOP:
518 case SETUP_EXCEPT:
519 case SETUP_FINALLY:
520 tgt = GETJUMPTGT(codestr, i);
521 if (!UNCONDITIONAL_JUMP(codestr[tgt]))
522 continue;
523 tgttgt = GETJUMPTGT(codestr, tgt);
524 if (opcode == JUMP_FORWARD) /* JMP_ABS can go backwards */
525 opcode = JUMP_ABSOLUTE;
526 if (!ABSOLUTE_JUMP(opcode))
527 tgttgt -= i + 3; /* Calc relative jump addr */
528 if (tgttgt < 0) /* No backward relative jumps */
529 continue;
530 codestr[i] = opcode;
531 SETARG(codestr, i, tgttgt);
532 break;
533
534 case EXTENDED_ARG:
535 goto exitUnchanged;
536
537 /* Replace RETURN LOAD_CONST None RETURN with just RETURN */
538 case RETURN_VALUE:
539 if (i+4 >= codelen ||
540 codestr[i+4] != RETURN_VALUE ||
541 !ISBASICBLOCK(blocks,i,5))
542 continue;
543 memset(codestr+i+1, NOP, 4);
544 break;
545 }
546 }
547
548 /* Fixup linenotab */
549 for (i=0, nops=0 ; i<codelen ; i += CODESIZE(codestr[i])) {
550 addrmap[i] = i - nops;
551 if (codestr[i] == NOP)
552 nops++;
553 }
554 cum_orig_line = 0;
555 last_line = 0;
556 for (i=0 ; i < tabsiz ; i+=2) {
557 cum_orig_line += lineno[i];
558 new_line = addrmap[cum_orig_line];
559 assert (new_line - last_line < 255);
560 lineno[i] =((unsigned char)(new_line - last_line));
561 last_line = new_line;
562 }
563
564 /* Remove NOPs and fixup jump targets */
565 for (i=0, h=0 ; i<codelen ; ) {
566 opcode = codestr[i];
567 switch (opcode) {
568 case NOP:
569 i++;
570 continue;
571
572 case JUMP_ABSOLUTE:
573 case CONTINUE_LOOP:
574 j = addrmap[GETARG(codestr, i)];
575 SETARG(codestr, i, j);
576 break;
577
578 case FOR_ITER:
579 case JUMP_FORWARD:
580 case JUMP_IF_FALSE:
581 case JUMP_IF_TRUE:
582 case SETUP_LOOP:
583 case SETUP_EXCEPT:
584 case SETUP_FINALLY:
585 j = addrmap[GETARG(codestr, i) + i + 3] - addrmap[i] - 3;
586 SETARG(codestr, i, j);
587 break;
588 }
589 adj = CODESIZE(opcode);
590 while (adj--)
591 codestr[h++] = codestr[i++];
592 }
593 assert(h + nops == codelen);
594
595 code = PyString_FromStringAndSize((char *)codestr, h);
596 PyMem_Free(addrmap);
597 PyMem_Free(codestr);
598 PyMem_Free(blocks);
599 return code;
600
601 exitUnchanged:
602 if (blocks != NULL)
603 PyMem_Free(blocks);
604 if (addrmap != NULL)
605 PyMem_Free(addrmap);
606 if (codestr != NULL)
607 PyMem_Free(codestr);
608 Py_INCREF(code);
609 return code;
610}