blob: 1d94319a3927e126270697d1b60a28466f522090 [file] [log] [blame]
Jeremy Hylton644dddc2006-08-21 16:19:37 +00001/* Peehole optimizations for bytecode compiler. */
2
3#include "Python.h"
4
5#include "Python-ast.h"
6#include "node.h"
7#include "pyarena.h"
8#include "ast.h"
9#include "code.h"
10#include "compile.h"
11#include "symtable.h"
12#include "opcode.h"
13
14#define GETARG(arr, i) ((int)((arr[i+2]<<8) + arr[i+1]))
15#define UNCONDITIONAL_JUMP(op) (op==JUMP_ABSOLUTE || op==JUMP_FORWARD)
16#define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP)
17#define GETJUMPTGT(arr, i) (GETARG(arr,i) + (ABSOLUTE_JUMP(arr[i]) ? 0 : i+3))
18#define SETARG(arr, i, val) arr[i+2] = val>>8; arr[i+1] = val & 255
19#define CODESIZE(op) (HAS_ARG(op) ? 3 : 1)
20#define ISBASICBLOCK(blocks, start, bytes) \
21 (blocks[start]==blocks[start+bytes-1])
22
23/* Replace LOAD_CONST c1. LOAD_CONST c2 ... LOAD_CONST cn BUILD_TUPLE n
24 with LOAD_CONST (c1, c2, ... cn).
25 The consts table must still be in list form so that the
26 new constant (c1, c2, ... cn) can be appended.
27 Called with codestr pointing to the first LOAD_CONST.
28 Bails out with no change if one or more of the LOAD_CONSTs is missing.
29 Also works for BUILD_LIST when followed by an "in" or "not in" test.
30*/
31static int
32tuple_of_constants(unsigned char *codestr, int n, PyObject *consts)
33{
34 PyObject *newconst, *constant;
35 Py_ssize_t i, arg, len_consts;
36
37 /* Pre-conditions */
38 assert(PyList_CheckExact(consts));
39 assert(codestr[n*3] == BUILD_TUPLE || codestr[n*3] == BUILD_LIST);
40 assert(GETARG(codestr, (n*3)) == n);
41 for (i=0 ; i<n ; i++)
42 assert(codestr[i*3] == LOAD_CONST);
43
44 /* Buildup new tuple of constants */
45 newconst = PyTuple_New(n);
46 if (newconst == NULL)
47 return 0;
48 len_consts = PyList_GET_SIZE(consts);
49 for (i=0 ; i<n ; i++) {
50 arg = GETARG(codestr, (i*3));
51 assert(arg < len_consts);
52 constant = PyList_GET_ITEM(consts, arg);
53 Py_INCREF(constant);
54 PyTuple_SET_ITEM(newconst, i, constant);
55 }
56
57 /* Append folded constant onto consts */
58 if (PyList_Append(consts, newconst)) {
59 Py_DECREF(newconst);
60 return 0;
61 }
62 Py_DECREF(newconst);
63
64 /* Write NOPs over old LOAD_CONSTS and
65 add a new LOAD_CONST newconst on top of the BUILD_TUPLE n */
66 memset(codestr, NOP, n*3);
67 codestr[n*3] = LOAD_CONST;
68 SETARG(codestr, (n*3), len_consts);
69 return 1;
70}
71
72/* Replace LOAD_CONST c1. LOAD_CONST c2 BINOP
73 with LOAD_CONST binop(c1,c2)
74 The consts table must still be in list form so that the
75 new constant can be appended.
76 Called with codestr pointing to the first LOAD_CONST.
77 Abandons the transformation if the folding fails (i.e. 1+'a').
78 If the new constant is a sequence, only folds when the size
79 is below a threshold value. That keeps pyc files from
80 becoming large in the presence of code like: (None,)*1000.
81*/
82static int
83fold_binops_on_constants(unsigned char *codestr, PyObject *consts)
84{
85 PyObject *newconst, *v, *w;
86 Py_ssize_t len_consts, size;
87 int opcode;
88
89 /* Pre-conditions */
90 assert(PyList_CheckExact(consts));
91 assert(codestr[0] == LOAD_CONST);
92 assert(codestr[3] == LOAD_CONST);
93
94 /* Create new constant */
95 v = PyList_GET_ITEM(consts, GETARG(codestr, 0));
96 w = PyList_GET_ITEM(consts, GETARG(codestr, 3));
97 opcode = codestr[6];
98 switch (opcode) {
99 case BINARY_POWER:
100 newconst = PyNumber_Power(v, w, Py_None);
101 break;
102 case BINARY_MULTIPLY:
103 newconst = PyNumber_Multiply(v, w);
104 break;
105 case BINARY_DIVIDE:
106 /* Cannot fold this operation statically since
107 the result can depend on the run-time presence
108 of the -Qnew flag */
109 return 0;
110 case BINARY_TRUE_DIVIDE:
111 newconst = PyNumber_TrueDivide(v, w);
112 break;
113 case BINARY_FLOOR_DIVIDE:
114 newconst = PyNumber_FloorDivide(v, w);
115 break;
116 case BINARY_MODULO:
117 newconst = PyNumber_Remainder(v, w);
118 break;
119 case BINARY_ADD:
120 newconst = PyNumber_Add(v, w);
121 break;
122 case BINARY_SUBTRACT:
123 newconst = PyNumber_Subtract(v, w);
124 break;
125 case BINARY_SUBSCR:
126 newconst = PyObject_GetItem(v, w);
127 break;
128 case BINARY_LSHIFT:
129 newconst = PyNumber_Lshift(v, w);
130 break;
131 case BINARY_RSHIFT:
132 newconst = PyNumber_Rshift(v, w);
133 break;
134 case BINARY_AND:
135 newconst = PyNumber_And(v, w);
136 break;
137 case BINARY_XOR:
138 newconst = PyNumber_Xor(v, w);
139 break;
140 case BINARY_OR:
141 newconst = PyNumber_Or(v, w);
142 break;
143 default:
144 /* Called with an unknown opcode */
145 PyErr_Format(PyExc_SystemError,
146 "unexpected binary operation %d on a constant",
147 opcode);
148 return 0;
149 }
150 if (newconst == NULL) {
151 PyErr_Clear();
152 return 0;
153 }
154 size = PyObject_Size(newconst);
155 if (size == -1)
156 PyErr_Clear();
157 else if (size > 20) {
158 Py_DECREF(newconst);
159 return 0;
160 }
161
162 /* Append folded constant into consts table */
163 len_consts = PyList_GET_SIZE(consts);
164 if (PyList_Append(consts, newconst)) {
165 Py_DECREF(newconst);
166 return 0;
167 }
168 Py_DECREF(newconst);
169
170 /* Write NOP NOP NOP NOP LOAD_CONST newconst */
171 memset(codestr, NOP, 4);
172 codestr[4] = LOAD_CONST;
173 SETARG(codestr, 4, len_consts);
174 return 1;
175}
176
177static int
178fold_unaryops_on_constants(unsigned char *codestr, PyObject *consts)
179{
180 PyObject *newconst=NULL, *v;
181 Py_ssize_t len_consts;
182 int opcode;
183
184 /* Pre-conditions */
185 assert(PyList_CheckExact(consts));
186 assert(codestr[0] == LOAD_CONST);
187
188 /* Create new constant */
189 v = PyList_GET_ITEM(consts, GETARG(codestr, 0));
190 opcode = codestr[3];
191 switch (opcode) {
192 case UNARY_NEGATIVE:
193 /* Preserve the sign of -0.0 */
194 if (PyObject_IsTrue(v) == 1)
195 newconst = PyNumber_Negative(v);
196 break;
197 case UNARY_CONVERT:
198 newconst = PyObject_Repr(v);
199 break;
200 case UNARY_INVERT:
201 newconst = PyNumber_Invert(v);
202 break;
203 default:
204 /* Called with an unknown opcode */
205 PyErr_Format(PyExc_SystemError,
206 "unexpected unary operation %d on a constant",
207 opcode);
208 return 0;
209 }
210 if (newconst == NULL) {
211 PyErr_Clear();
212 return 0;
213 }
214
215 /* Append folded constant into consts table */
216 len_consts = PyList_GET_SIZE(consts);
217 if (PyList_Append(consts, newconst)) {
218 Py_DECREF(newconst);
219 return 0;
220 }
221 Py_DECREF(newconst);
222
223 /* Write NOP LOAD_CONST newconst */
224 codestr[0] = NOP;
225 codestr[1] = LOAD_CONST;
226 SETARG(codestr, 1, len_consts);
227 return 1;
228}
229
230static unsigned int *
231markblocks(unsigned char *code, int len)
232{
233 unsigned int *blocks = (unsigned int *)PyMem_Malloc(len*sizeof(int));
234 int i,j, opcode, blockcnt = 0;
235
236 if (blocks == NULL) {
237 PyErr_NoMemory();
238 return NULL;
239 }
240 memset(blocks, 0, len*sizeof(int));
241
242 /* Mark labels in the first pass */
243 for (i=0 ; i<len ; i+=CODESIZE(opcode)) {
244 opcode = code[i];
245 switch (opcode) {
246 case FOR_ITER:
247 case JUMP_FORWARD:
248 case JUMP_IF_FALSE:
249 case JUMP_IF_TRUE:
250 case JUMP_ABSOLUTE:
251 case CONTINUE_LOOP:
252 case SETUP_LOOP:
253 case SETUP_EXCEPT:
254 case SETUP_FINALLY:
255 j = GETJUMPTGT(code, i);
256 blocks[j] = 1;
257 break;
258 }
259 }
260 /* Build block numbers in the second pass */
261 for (i=0 ; i<len ; i++) {
262 blockcnt += blocks[i]; /* increment blockcnt over labels */
263 blocks[i] = blockcnt;
264 }
265 return blocks;
266}
267
268/* Perform basic peephole optimizations to components of a code object.
269 The consts object should still be in list form to allow new constants
270 to be appended.
271
272 To keep the optimizer simple, it bails out (does nothing) for code
273 containing extended arguments or that has a length over 32,700. That
274 allows us to avoid overflow and sign issues. Likewise, it bails when
275 the lineno table has complex encoding for gaps >= 255.
276
277 Optimizations are restricted to simple transformations occuring within a
278 single basic block. All transformations keep the code size the same or
279 smaller. For those that reduce size, the gaps are initially filled with
280 NOPs. Later those NOPs are removed and the jump addresses retargeted in
281 a single pass. Line numbering is adjusted accordingly. */
282
283PyObject *
284PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
285 PyObject *lineno_obj)
286{
287 Py_ssize_t i, j, codelen;
288 int nops, h, adj;
289 int tgt, tgttgt, opcode;
290 unsigned char *codestr = NULL;
291 unsigned char *lineno;
292 int *addrmap = NULL;
293 int new_line, cum_orig_line, last_line, tabsiz;
294 int cumlc=0, lastlc=0; /* Count runs of consecutive LOAD_CONSTs */
295 unsigned int *blocks = NULL;
296 char *name;
297
298 /* Bail out if an exception is set */
299 if (PyErr_Occurred())
300 goto exitUnchanged;
301
302 /* Bypass optimization when the lineno table is too complex */
303 assert(PyString_Check(lineno_obj));
304 lineno = (unsigned char*)PyString_AS_STRING(lineno_obj);
305 tabsiz = PyString_GET_SIZE(lineno_obj);
306 if (memchr(lineno, 255, tabsiz) != NULL)
307 goto exitUnchanged;
308
309 /* Avoid situations where jump retargeting could overflow */
310 assert(PyString_Check(code));
311 codelen = PyString_Size(code);
312 if (codelen > 32700)
313 goto exitUnchanged;
314
315 /* Make a modifiable copy of the code string */
316 codestr = (unsigned char *)PyMem_Malloc(codelen);
317 if (codestr == NULL)
318 goto exitUnchanged;
319 codestr = (unsigned char *)memcpy(codestr,
320 PyString_AS_STRING(code), codelen);
321
322 /* Verify that RETURN_VALUE terminates the codestring. This allows
323 the various transformation patterns to look ahead several
324 instructions without additional checks to make sure they are not
325 looking beyond the end of the code string.
326 */
327 if (codestr[codelen-1] != RETURN_VALUE)
328 goto exitUnchanged;
329
330 /* Mapping to new jump targets after NOPs are removed */
331 addrmap = (int *)PyMem_Malloc(codelen * sizeof(int));
332 if (addrmap == NULL)
333 goto exitUnchanged;
334
335 blocks = markblocks(codestr, codelen);
336 if (blocks == NULL)
337 goto exitUnchanged;
338 assert(PyList_Check(consts));
339
340 for (i=0 ; i<codelen ; i += CODESIZE(codestr[i])) {
341 opcode = codestr[i];
342
343 lastlc = cumlc;
344 cumlc = 0;
345
346 switch (opcode) {
347
348 /* Replace UNARY_NOT JUMP_IF_FALSE POP_TOP with
349 with JUMP_IF_TRUE POP_TOP */
350 case UNARY_NOT:
351 if (codestr[i+1] != JUMP_IF_FALSE ||
352 codestr[i+4] != POP_TOP ||
353 !ISBASICBLOCK(blocks,i,5))
354 continue;
355 tgt = GETJUMPTGT(codestr, (i+1));
356 if (codestr[tgt] != POP_TOP)
357 continue;
358 j = GETARG(codestr, i+1) + 1;
359 codestr[i] = JUMP_IF_TRUE;
360 SETARG(codestr, i, j);
361 codestr[i+3] = POP_TOP;
362 codestr[i+4] = NOP;
363 break;
364
365 /* not a is b --> a is not b
366 not a in b --> a not in b
367 not a is not b --> a is b
368 not a not in b --> a in b
369 */
370 case COMPARE_OP:
371 j = GETARG(codestr, i);
372 if (j < 6 || j > 9 ||
373 codestr[i+3] != UNARY_NOT ||
374 !ISBASICBLOCK(blocks,i,4))
375 continue;
376 SETARG(codestr, i, (j^1));
377 codestr[i+3] = NOP;
378 break;
379
380 /* Replace LOAD_GLOBAL/LOAD_NAME None
381 with LOAD_CONST None */
382 case LOAD_NAME:
383 case LOAD_GLOBAL:
384 j = GETARG(codestr, i);
385 name = PyString_AsString(PyTuple_GET_ITEM(names, j));
386 if (name == NULL || strcmp(name, "None") != 0)
387 continue;
388 for (j=0 ; j < PyList_GET_SIZE(consts) ; j++) {
389 if (PyList_GET_ITEM(consts, j) == Py_None) {
390 codestr[i] = LOAD_CONST;
391 SETARG(codestr, i, j);
392 cumlc = lastlc + 1;
393 break;
394 }
395 }
396 break;
397
398 /* Skip over LOAD_CONST trueconst
399 JUMP_IF_FALSE xx POP_TOP */
400 case LOAD_CONST:
401 cumlc = lastlc + 1;
402 j = GETARG(codestr, i);
403 if (codestr[i+3] != JUMP_IF_FALSE ||
404 codestr[i+6] != POP_TOP ||
405 !ISBASICBLOCK(blocks,i,7) ||
406 !PyObject_IsTrue(PyList_GET_ITEM(consts, j)))
407 continue;
408 memset(codestr+i, NOP, 7);
409 cumlc = 0;
410 break;
411
412 /* Try to fold tuples of constants (includes a case for lists
413 which are only used for "in" and "not in" tests).
414 Skip over BUILD_SEQN 1 UNPACK_SEQN 1.
415 Replace BUILD_SEQN 2 UNPACK_SEQN 2 with ROT2.
416 Replace BUILD_SEQN 3 UNPACK_SEQN 3 with ROT3 ROT2. */
417 case BUILD_TUPLE:
418 case BUILD_LIST:
419 j = GETARG(codestr, i);
420 h = i - 3 * j;
421 if (h >= 0 &&
422 j <= lastlc &&
423 ((opcode == BUILD_TUPLE &&
424 ISBASICBLOCK(blocks, h, 3*(j+1))) ||
425 (opcode == BUILD_LIST &&
426 codestr[i+3]==COMPARE_OP &&
427 ISBASICBLOCK(blocks, h, 3*(j+2)) &&
428 (GETARG(codestr,i+3)==6 ||
429 GETARG(codestr,i+3)==7))) &&
430 tuple_of_constants(&codestr[h], j, consts)) {
431 assert(codestr[i] == LOAD_CONST);
432 cumlc = 1;
433 break;
434 }
435 if (codestr[i+3] != UNPACK_SEQUENCE ||
436 !ISBASICBLOCK(blocks,i,6) ||
437 j != GETARG(codestr, i+3))
438 continue;
439 if (j == 1) {
440 memset(codestr+i, NOP, 6);
441 } else if (j == 2) {
442 codestr[i] = ROT_TWO;
443 memset(codestr+i+1, NOP, 5);
444 } else if (j == 3) {
445 codestr[i] = ROT_THREE;
446 codestr[i+1] = ROT_TWO;
447 memset(codestr+i+2, NOP, 4);
448 }
449 break;
450
451 /* Fold binary ops on constants.
452 LOAD_CONST c1 LOAD_CONST c2 BINOP --> LOAD_CONST binop(c1,c2) */
453 case BINARY_POWER:
454 case BINARY_MULTIPLY:
455 case BINARY_TRUE_DIVIDE:
456 case BINARY_FLOOR_DIVIDE:
457 case BINARY_MODULO:
458 case BINARY_ADD:
459 case BINARY_SUBTRACT:
460 case BINARY_SUBSCR:
461 case BINARY_LSHIFT:
462 case BINARY_RSHIFT:
463 case BINARY_AND:
464 case BINARY_XOR:
465 case BINARY_OR:
466 if (lastlc >= 2 &&
467 ISBASICBLOCK(blocks, i-6, 7) &&
468 fold_binops_on_constants(&codestr[i-6], consts)) {
469 i -= 2;
470 assert(codestr[i] == LOAD_CONST);
471 cumlc = 1;
472 }
473 break;
474
475 /* Fold unary ops on constants.
476 LOAD_CONST c1 UNARY_OP --> LOAD_CONST unary_op(c) */
477 case UNARY_NEGATIVE:
478 case UNARY_CONVERT:
479 case UNARY_INVERT:
480 if (lastlc >= 1 &&
481 ISBASICBLOCK(blocks, i-3, 4) &&
482 fold_unaryops_on_constants(&codestr[i-3], consts)) {
483 i -= 2;
484 assert(codestr[i] == LOAD_CONST);
485 cumlc = 1;
486 }
487 break;
488
489 /* Simplify conditional jump to conditional jump where the
490 result of the first test implies the success of a similar
491 test or the failure of the opposite test.
492 Arises in code like:
493 "if a and b:"
494 "if a or b:"
495 "a and b or c"
496 "(a and b) and c"
497 x:JUMP_IF_FALSE y y:JUMP_IF_FALSE z --> x:JUMP_IF_FALSE z
498 x:JUMP_IF_FALSE y y:JUMP_IF_TRUE z --> x:JUMP_IF_FALSE y+3
499 where y+3 is the instruction following the second test.
500 */
501 case JUMP_IF_FALSE:
502 case JUMP_IF_TRUE:
503 tgt = GETJUMPTGT(codestr, i);
504 j = codestr[tgt];
505 if (j == JUMP_IF_FALSE || j == JUMP_IF_TRUE) {
506 if (j == opcode) {
507 tgttgt = GETJUMPTGT(codestr, tgt) - i - 3;
508 SETARG(codestr, i, tgttgt);
509 } else {
510 tgt -= i;
511 SETARG(codestr, i, tgt);
512 }
513 break;
514 }
515 /* Intentional fallthrough */
516
517 /* Replace jumps to unconditional jumps */
518 case FOR_ITER:
519 case JUMP_FORWARD:
520 case JUMP_ABSOLUTE:
521 case CONTINUE_LOOP:
522 case SETUP_LOOP:
523 case SETUP_EXCEPT:
524 case SETUP_FINALLY:
525 tgt = GETJUMPTGT(codestr, i);
526 if (!UNCONDITIONAL_JUMP(codestr[tgt]))
527 continue;
528 tgttgt = GETJUMPTGT(codestr, tgt);
529 if (opcode == JUMP_FORWARD) /* JMP_ABS can go backwards */
530 opcode = JUMP_ABSOLUTE;
531 if (!ABSOLUTE_JUMP(opcode))
532 tgttgt -= i + 3; /* Calc relative jump addr */
533 if (tgttgt < 0) /* No backward relative jumps */
534 continue;
535 codestr[i] = opcode;
536 SETARG(codestr, i, tgttgt);
537 break;
538
539 case EXTENDED_ARG:
540 goto exitUnchanged;
541
542 /* Replace RETURN LOAD_CONST None RETURN with just RETURN */
543 case RETURN_VALUE:
544 if (i+4 >= codelen ||
545 codestr[i+4] != RETURN_VALUE ||
546 !ISBASICBLOCK(blocks,i,5))
547 continue;
548 memset(codestr+i+1, NOP, 4);
549 break;
550 }
551 }
552
553 /* Fixup linenotab */
554 for (i=0, nops=0 ; i<codelen ; i += CODESIZE(codestr[i])) {
555 addrmap[i] = i - nops;
556 if (codestr[i] == NOP)
557 nops++;
558 }
559 cum_orig_line = 0;
560 last_line = 0;
561 for (i=0 ; i < tabsiz ; i+=2) {
562 cum_orig_line += lineno[i];
563 new_line = addrmap[cum_orig_line];
564 assert (new_line - last_line < 255);
565 lineno[i] =((unsigned char)(new_line - last_line));
566 last_line = new_line;
567 }
568
569 /* Remove NOPs and fixup jump targets */
570 for (i=0, h=0 ; i<codelen ; ) {
571 opcode = codestr[i];
572 switch (opcode) {
573 case NOP:
574 i++;
575 continue;
576
577 case JUMP_ABSOLUTE:
578 case CONTINUE_LOOP:
579 j = addrmap[GETARG(codestr, i)];
580 SETARG(codestr, i, j);
581 break;
582
583 case FOR_ITER:
584 case JUMP_FORWARD:
585 case JUMP_IF_FALSE:
586 case JUMP_IF_TRUE:
587 case SETUP_LOOP:
588 case SETUP_EXCEPT:
589 case SETUP_FINALLY:
590 j = addrmap[GETARG(codestr, i) + i + 3] - addrmap[i] - 3;
591 SETARG(codestr, i, j);
592 break;
593 }
594 adj = CODESIZE(opcode);
595 while (adj--)
596 codestr[h++] = codestr[i++];
597 }
598 assert(h + nops == codelen);
599
600 code = PyString_FromStringAndSize((char *)codestr, h);
601 PyMem_Free(addrmap);
602 PyMem_Free(codestr);
603 PyMem_Free(blocks);
604 return code;
605
606 exitUnchanged:
607 if (blocks != NULL)
608 PyMem_Free(blocks);
609 if (addrmap != NULL)
610 PyMem_Free(addrmap);
611 if (codestr != NULL)
612 PyMem_Free(codestr);
613 Py_INCREF(code);
614 return code;
615}