blob: 145996095812c28e591547e8fce8ecafa0130d13 [file] [log] [blame]
Thomas Wouters00ee7ba2006-08-21 19:07:27 +00001/* Peehole optimizations for bytecode compiler. */
2
3#include "Python.h"
4
5#include "Python-ast.h"
6#include "node.h"
7#include "pyarena.h"
8#include "ast.h"
9#include "code.h"
10#include "compile.h"
11#include "symtable.h"
12#include "opcode.h"
13
14#define GETARG(arr, i) ((int)((arr[i+2]<<8) + arr[i+1]))
15#define UNCONDITIONAL_JUMP(op) (op==JUMP_ABSOLUTE || op==JUMP_FORWARD)
16#define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP)
17#define GETJUMPTGT(arr, i) (GETARG(arr,i) + (ABSOLUTE_JUMP(arr[i]) ? 0 : i+3))
18#define SETARG(arr, i, val) arr[i+2] = val>>8; arr[i+1] = val & 255
19#define CODESIZE(op) (HAS_ARG(op) ? 3 : 1)
20#define ISBASICBLOCK(blocks, start, bytes) \
21 (blocks[start]==blocks[start+bytes-1])
22
23/* Replace LOAD_CONST c1. LOAD_CONST c2 ... LOAD_CONST cn BUILD_TUPLE n
24 with LOAD_CONST (c1, c2, ... cn).
25 The consts table must still be in list form so that the
26 new constant (c1, c2, ... cn) can be appended.
27 Called with codestr pointing to the first LOAD_CONST.
28 Bails out with no change if one or more of the LOAD_CONSTs is missing.
29 Also works for BUILD_LIST when followed by an "in" or "not in" test.
30*/
31static int
32tuple_of_constants(unsigned char *codestr, int n, PyObject *consts)
33{
34 PyObject *newconst, *constant;
35 Py_ssize_t i, arg, len_consts;
36
37 /* Pre-conditions */
38 assert(PyList_CheckExact(consts));
39 assert(codestr[n*3] == BUILD_TUPLE || codestr[n*3] == BUILD_LIST);
40 assert(GETARG(codestr, (n*3)) == n);
41 for (i=0 ; i<n ; i++)
42 assert(codestr[i*3] == LOAD_CONST);
43
44 /* Buildup new tuple of constants */
45 newconst = PyTuple_New(n);
46 if (newconst == NULL)
47 return 0;
48 len_consts = PyList_GET_SIZE(consts);
49 for (i=0 ; i<n ; i++) {
50 arg = GETARG(codestr, (i*3));
51 assert(arg < len_consts);
52 constant = PyList_GET_ITEM(consts, arg);
53 Py_INCREF(constant);
54 PyTuple_SET_ITEM(newconst, i, constant);
55 }
56
57 /* Append folded constant onto consts */
58 if (PyList_Append(consts, newconst)) {
59 Py_DECREF(newconst);
60 return 0;
61 }
62 Py_DECREF(newconst);
63
64 /* Write NOPs over old LOAD_CONSTS and
65 add a new LOAD_CONST newconst on top of the BUILD_TUPLE n */
66 memset(codestr, NOP, n*3);
67 codestr[n*3] = LOAD_CONST;
68 SETARG(codestr, (n*3), len_consts);
69 return 1;
70}
71
72/* Replace LOAD_CONST c1. LOAD_CONST c2 BINOP
73 with LOAD_CONST binop(c1,c2)
74 The consts table must still be in list form so that the
75 new constant can be appended.
76 Called with codestr pointing to the first LOAD_CONST.
77 Abandons the transformation if the folding fails (i.e. 1+'a').
78 If the new constant is a sequence, only folds when the size
79 is below a threshold value. That keeps pyc files from
80 becoming large in the presence of code like: (None,)*1000.
81*/
82static int
83fold_binops_on_constants(unsigned char *codestr, PyObject *consts)
84{
85 PyObject *newconst, *v, *w;
86 Py_ssize_t len_consts, size;
87 int opcode;
88
89 /* Pre-conditions */
90 assert(PyList_CheckExact(consts));
91 assert(codestr[0] == LOAD_CONST);
92 assert(codestr[3] == LOAD_CONST);
93
94 /* Create new constant */
95 v = PyList_GET_ITEM(consts, GETARG(codestr, 0));
96 w = PyList_GET_ITEM(consts, GETARG(codestr, 3));
97 opcode = codestr[6];
98 switch (opcode) {
99 case BINARY_POWER:
100 newconst = PyNumber_Power(v, w, Py_None);
101 break;
102 case BINARY_MULTIPLY:
103 newconst = PyNumber_Multiply(v, w);
104 break;
105 case BINARY_TRUE_DIVIDE:
106 newconst = PyNumber_TrueDivide(v, w);
107 break;
108 case BINARY_FLOOR_DIVIDE:
109 newconst = PyNumber_FloorDivide(v, w);
110 break;
111 case BINARY_MODULO:
112 newconst = PyNumber_Remainder(v, w);
113 break;
114 case BINARY_ADD:
115 newconst = PyNumber_Add(v, w);
116 break;
117 case BINARY_SUBTRACT:
118 newconst = PyNumber_Subtract(v, w);
119 break;
120 case BINARY_SUBSCR:
121 newconst = PyObject_GetItem(v, w);
122 break;
123 case BINARY_LSHIFT:
124 newconst = PyNumber_Lshift(v, w);
125 break;
126 case BINARY_RSHIFT:
127 newconst = PyNumber_Rshift(v, w);
128 break;
129 case BINARY_AND:
130 newconst = PyNumber_And(v, w);
131 break;
132 case BINARY_XOR:
133 newconst = PyNumber_Xor(v, w);
134 break;
135 case BINARY_OR:
136 newconst = PyNumber_Or(v, w);
137 break;
138 default:
139 /* Called with an unknown opcode */
140 PyErr_Format(PyExc_SystemError,
141 "unexpected binary operation %d on a constant",
142 opcode);
143 return 0;
144 }
145 if (newconst == NULL) {
146 PyErr_Clear();
147 return 0;
148 }
149 size = PyObject_Size(newconst);
150 if (size == -1)
151 PyErr_Clear();
152 else if (size > 20) {
153 Py_DECREF(newconst);
154 return 0;
155 }
156
157 /* Append folded constant into consts table */
158 len_consts = PyList_GET_SIZE(consts);
159 if (PyList_Append(consts, newconst)) {
160 Py_DECREF(newconst);
161 return 0;
162 }
163 Py_DECREF(newconst);
164
165 /* Write NOP NOP NOP NOP LOAD_CONST newconst */
166 memset(codestr, NOP, 4);
167 codestr[4] = LOAD_CONST;
168 SETARG(codestr, 4, len_consts);
169 return 1;
170}
171
172static int
173fold_unaryops_on_constants(unsigned char *codestr, PyObject *consts)
174{
175 PyObject *newconst=NULL, *v;
176 Py_ssize_t len_consts;
177 int opcode;
178
179 /* Pre-conditions */
180 assert(PyList_CheckExact(consts));
181 assert(codestr[0] == LOAD_CONST);
182
183 /* Create new constant */
184 v = PyList_GET_ITEM(consts, GETARG(codestr, 0));
185 opcode = codestr[3];
186 switch (opcode) {
187 case UNARY_NEGATIVE:
188 /* Preserve the sign of -0.0 */
189 if (PyObject_IsTrue(v) == 1)
190 newconst = PyNumber_Negative(v);
191 break;
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000192 case UNARY_INVERT:
193 newconst = PyNumber_Invert(v);
194 break;
195 default:
196 /* Called with an unknown opcode */
197 PyErr_Format(PyExc_SystemError,
198 "unexpected unary operation %d on a constant",
199 opcode);
200 return 0;
201 }
202 if (newconst == NULL) {
203 PyErr_Clear();
204 return 0;
205 }
206
207 /* Append folded constant into consts table */
208 len_consts = PyList_GET_SIZE(consts);
209 if (PyList_Append(consts, newconst)) {
210 Py_DECREF(newconst);
211 return 0;
212 }
213 Py_DECREF(newconst);
214
215 /* Write NOP LOAD_CONST newconst */
216 codestr[0] = NOP;
217 codestr[1] = LOAD_CONST;
218 SETARG(codestr, 1, len_consts);
219 return 1;
220}
221
222static unsigned int *
223markblocks(unsigned char *code, int len)
224{
225 unsigned int *blocks = (unsigned int *)PyMem_Malloc(len*sizeof(int));
226 int i,j, opcode, blockcnt = 0;
227
228 if (blocks == NULL) {
229 PyErr_NoMemory();
230 return NULL;
231 }
232 memset(blocks, 0, len*sizeof(int));
233
234 /* Mark labels in the first pass */
235 for (i=0 ; i<len ; i+=CODESIZE(opcode)) {
236 opcode = code[i];
237 switch (opcode) {
238 case FOR_ITER:
239 case JUMP_FORWARD:
240 case JUMP_IF_FALSE:
241 case JUMP_IF_TRUE:
242 case JUMP_ABSOLUTE:
243 case CONTINUE_LOOP:
244 case SETUP_LOOP:
245 case SETUP_EXCEPT:
246 case SETUP_FINALLY:
247 j = GETJUMPTGT(code, i);
248 blocks[j] = 1;
249 break;
250 }
251 }
252 /* Build block numbers in the second pass */
253 for (i=0 ; i<len ; i++) {
254 blockcnt += blocks[i]; /* increment blockcnt over labels */
255 blocks[i] = blockcnt;
256 }
257 return blocks;
258}
259
260/* Perform basic peephole optimizations to components of a code object.
261 The consts object should still be in list form to allow new constants
262 to be appended.
263
264 To keep the optimizer simple, it bails out (does nothing) for code
265 containing extended arguments or that has a length over 32,700. That
266 allows us to avoid overflow and sign issues. Likewise, it bails when
267 the lineno table has complex encoding for gaps >= 255.
268
269 Optimizations are restricted to simple transformations occuring within a
270 single basic block. All transformations keep the code size the same or
271 smaller. For those that reduce size, the gaps are initially filled with
272 NOPs. Later those NOPs are removed and the jump addresses retargeted in
273 a single pass. Line numbering is adjusted accordingly. */
274
275PyObject *
276PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,
277 PyObject *lineno_obj)
278{
279 Py_ssize_t i, j, codelen;
280 int nops, h, adj;
281 int tgt, tgttgt, opcode;
282 unsigned char *codestr = NULL;
283 unsigned char *lineno;
284 int *addrmap = NULL;
285 int new_line, cum_orig_line, last_line, tabsiz;
286 int cumlc=0, lastlc=0; /* Count runs of consecutive LOAD_CONSTs */
287 unsigned int *blocks = NULL;
288 char *name;
289
290 /* Bail out if an exception is set */
291 if (PyErr_Occurred())
292 goto exitUnchanged;
293
294 /* Bypass optimization when the lineno table is too complex */
295 assert(PyString_Check(lineno_obj));
296 lineno = (unsigned char*)PyString_AS_STRING(lineno_obj);
297 tabsiz = PyString_GET_SIZE(lineno_obj);
298 if (memchr(lineno, 255, tabsiz) != NULL)
299 goto exitUnchanged;
300
301 /* Avoid situations where jump retargeting could overflow */
302 assert(PyString_Check(code));
303 codelen = PyString_Size(code);
304 if (codelen > 32700)
305 goto exitUnchanged;
306
307 /* Make a modifiable copy of the code string */
308 codestr = (unsigned char *)PyMem_Malloc(codelen);
309 if (codestr == NULL)
310 goto exitUnchanged;
311 codestr = (unsigned char *)memcpy(codestr,
312 PyString_AS_STRING(code), codelen);
313
314 /* Verify that RETURN_VALUE terminates the codestring. This allows
315 the various transformation patterns to look ahead several
316 instructions without additional checks to make sure they are not
317 looking beyond the end of the code string.
318 */
319 if (codestr[codelen-1] != RETURN_VALUE)
320 goto exitUnchanged;
321
322 /* Mapping to new jump targets after NOPs are removed */
323 addrmap = (int *)PyMem_Malloc(codelen * sizeof(int));
324 if (addrmap == NULL)
325 goto exitUnchanged;
326
327 blocks = markblocks(codestr, codelen);
328 if (blocks == NULL)
329 goto exitUnchanged;
330 assert(PyList_Check(consts));
331
332 for (i=0 ; i<codelen ; i += CODESIZE(codestr[i])) {
333 opcode = codestr[i];
334
335 lastlc = cumlc;
336 cumlc = 0;
337
338 switch (opcode) {
339
340 /* Replace UNARY_NOT JUMP_IF_FALSE POP_TOP with
341 with JUMP_IF_TRUE POP_TOP */
342 case UNARY_NOT:
343 if (codestr[i+1] != JUMP_IF_FALSE ||
344 codestr[i+4] != POP_TOP ||
345 !ISBASICBLOCK(blocks,i,5))
346 continue;
347 tgt = GETJUMPTGT(codestr, (i+1));
348 if (codestr[tgt] != POP_TOP)
349 continue;
350 j = GETARG(codestr, i+1) + 1;
351 codestr[i] = JUMP_IF_TRUE;
352 SETARG(codestr, i, j);
353 codestr[i+3] = POP_TOP;
354 codestr[i+4] = NOP;
355 break;
356
357 /* not a is b --> a is not b
358 not a in b --> a not in b
359 not a is not b --> a is b
360 not a not in b --> a in b
361 */
362 case COMPARE_OP:
363 j = GETARG(codestr, i);
364 if (j < 6 || j > 9 ||
365 codestr[i+3] != UNARY_NOT ||
366 !ISBASICBLOCK(blocks,i,4))
367 continue;
368 SETARG(codestr, i, (j^1));
369 codestr[i+3] = NOP;
370 break;
371
372 /* Replace LOAD_GLOBAL/LOAD_NAME None
373 with LOAD_CONST None */
374 case LOAD_NAME:
375 case LOAD_GLOBAL:
376 j = GETARG(codestr, i);
377 name = PyString_AsString(PyTuple_GET_ITEM(names, j));
378 if (name == NULL || strcmp(name, "None") != 0)
379 continue;
380 for (j=0 ; j < PyList_GET_SIZE(consts) ; j++) {
381 if (PyList_GET_ITEM(consts, j) == Py_None) {
382 codestr[i] = LOAD_CONST;
383 SETARG(codestr, i, j);
384 cumlc = lastlc + 1;
385 break;
386 }
387 }
388 break;
389
390 /* Skip over LOAD_CONST trueconst
391 JUMP_IF_FALSE xx POP_TOP */
392 case LOAD_CONST:
393 cumlc = lastlc + 1;
394 j = GETARG(codestr, i);
395 if (codestr[i+3] != JUMP_IF_FALSE ||
396 codestr[i+6] != POP_TOP ||
397 !ISBASICBLOCK(blocks,i,7) ||
398 !PyObject_IsTrue(PyList_GET_ITEM(consts, j)))
399 continue;
400 memset(codestr+i, NOP, 7);
401 cumlc = 0;
402 break;
403
404 /* Try to fold tuples of constants (includes a case for lists
405 which are only used for "in" and "not in" tests).
406 Skip over BUILD_SEQN 1 UNPACK_SEQN 1.
407 Replace BUILD_SEQN 2 UNPACK_SEQN 2 with ROT2.
408 Replace BUILD_SEQN 3 UNPACK_SEQN 3 with ROT3 ROT2. */
409 case BUILD_TUPLE:
410 case BUILD_LIST:
411 j = GETARG(codestr, i);
412 h = i - 3 * j;
413 if (h >= 0 &&
414 j <= lastlc &&
415 ((opcode == BUILD_TUPLE &&
416 ISBASICBLOCK(blocks, h, 3*(j+1))) ||
417 (opcode == BUILD_LIST &&
418 codestr[i+3]==COMPARE_OP &&
419 ISBASICBLOCK(blocks, h, 3*(j+2)) &&
420 (GETARG(codestr,i+3)==6 ||
421 GETARG(codestr,i+3)==7))) &&
422 tuple_of_constants(&codestr[h], j, consts)) {
423 assert(codestr[i] == LOAD_CONST);
424 cumlc = 1;
425 break;
426 }
427 if (codestr[i+3] != UNPACK_SEQUENCE ||
428 !ISBASICBLOCK(blocks,i,6) ||
429 j != GETARG(codestr, i+3))
430 continue;
431 if (j == 1) {
432 memset(codestr+i, NOP, 6);
433 } else if (j == 2) {
434 codestr[i] = ROT_TWO;
435 memset(codestr+i+1, NOP, 5);
436 } else if (j == 3) {
437 codestr[i] = ROT_THREE;
438 codestr[i+1] = ROT_TWO;
439 memset(codestr+i+2, NOP, 4);
440 }
441 break;
442
443 /* Fold binary ops on constants.
444 LOAD_CONST c1 LOAD_CONST c2 BINOP --> LOAD_CONST binop(c1,c2) */
445 case BINARY_POWER:
446 case BINARY_MULTIPLY:
447 case BINARY_TRUE_DIVIDE:
448 case BINARY_FLOOR_DIVIDE:
449 case BINARY_MODULO:
450 case BINARY_ADD:
451 case BINARY_SUBTRACT:
452 case BINARY_SUBSCR:
453 case BINARY_LSHIFT:
454 case BINARY_RSHIFT:
455 case BINARY_AND:
456 case BINARY_XOR:
457 case BINARY_OR:
458 if (lastlc >= 2 &&
459 ISBASICBLOCK(blocks, i-6, 7) &&
460 fold_binops_on_constants(&codestr[i-6], consts)) {
461 i -= 2;
462 assert(codestr[i] == LOAD_CONST);
463 cumlc = 1;
464 }
465 break;
466
467 /* Fold unary ops on constants.
468 LOAD_CONST c1 UNARY_OP --> LOAD_CONST unary_op(c) */
469 case UNARY_NEGATIVE:
Thomas Wouters00ee7ba2006-08-21 19:07:27 +0000470 case UNARY_INVERT:
471 if (lastlc >= 1 &&
472 ISBASICBLOCK(blocks, i-3, 4) &&
473 fold_unaryops_on_constants(&codestr[i-3], consts)) {
474 i -= 2;
475 assert(codestr[i] == LOAD_CONST);
476 cumlc = 1;
477 }
478 break;
479
480 /* Simplify conditional jump to conditional jump where the
481 result of the first test implies the success of a similar
482 test or the failure of the opposite test.
483 Arises in code like:
484 "if a and b:"
485 "if a or b:"
486 "a and b or c"
487 "(a and b) and c"
488 x:JUMP_IF_FALSE y y:JUMP_IF_FALSE z --> x:JUMP_IF_FALSE z
489 x:JUMP_IF_FALSE y y:JUMP_IF_TRUE z --> x:JUMP_IF_FALSE y+3
490 where y+3 is the instruction following the second test.
491 */
492 case JUMP_IF_FALSE:
493 case JUMP_IF_TRUE:
494 tgt = GETJUMPTGT(codestr, i);
495 j = codestr[tgt];
496 if (j == JUMP_IF_FALSE || j == JUMP_IF_TRUE) {
497 if (j == opcode) {
498 tgttgt = GETJUMPTGT(codestr, tgt) - i - 3;
499 SETARG(codestr, i, tgttgt);
500 } else {
501 tgt -= i;
502 SETARG(codestr, i, tgt);
503 }
504 break;
505 }
506 /* Intentional fallthrough */
507
508 /* Replace jumps to unconditional jumps */
509 case FOR_ITER:
510 case JUMP_FORWARD:
511 case JUMP_ABSOLUTE:
512 case CONTINUE_LOOP:
513 case SETUP_LOOP:
514 case SETUP_EXCEPT:
515 case SETUP_FINALLY:
516 tgt = GETJUMPTGT(codestr, i);
517 if (!UNCONDITIONAL_JUMP(codestr[tgt]))
518 continue;
519 tgttgt = GETJUMPTGT(codestr, tgt);
520 if (opcode == JUMP_FORWARD) /* JMP_ABS can go backwards */
521 opcode = JUMP_ABSOLUTE;
522 if (!ABSOLUTE_JUMP(opcode))
523 tgttgt -= i + 3; /* Calc relative jump addr */
524 if (tgttgt < 0) /* No backward relative jumps */
525 continue;
526 codestr[i] = opcode;
527 SETARG(codestr, i, tgttgt);
528 break;
529
530 case EXTENDED_ARG:
531 goto exitUnchanged;
532
533 /* Replace RETURN LOAD_CONST None RETURN with just RETURN */
534 case RETURN_VALUE:
535 if (i+4 >= codelen ||
536 codestr[i+4] != RETURN_VALUE ||
537 !ISBASICBLOCK(blocks,i,5))
538 continue;
539 memset(codestr+i+1, NOP, 4);
540 break;
541 }
542 }
543
544 /* Fixup linenotab */
545 for (i=0, nops=0 ; i<codelen ; i += CODESIZE(codestr[i])) {
546 addrmap[i] = i - nops;
547 if (codestr[i] == NOP)
548 nops++;
549 }
550 cum_orig_line = 0;
551 last_line = 0;
552 for (i=0 ; i < tabsiz ; i+=2) {
553 cum_orig_line += lineno[i];
554 new_line = addrmap[cum_orig_line];
555 assert (new_line - last_line < 255);
556 lineno[i] =((unsigned char)(new_line - last_line));
557 last_line = new_line;
558 }
559
560 /* Remove NOPs and fixup jump targets */
561 for (i=0, h=0 ; i<codelen ; ) {
562 opcode = codestr[i];
563 switch (opcode) {
564 case NOP:
565 i++;
566 continue;
567
568 case JUMP_ABSOLUTE:
569 case CONTINUE_LOOP:
570 j = addrmap[GETARG(codestr, i)];
571 SETARG(codestr, i, j);
572 break;
573
574 case FOR_ITER:
575 case JUMP_FORWARD:
576 case JUMP_IF_FALSE:
577 case JUMP_IF_TRUE:
578 case SETUP_LOOP:
579 case SETUP_EXCEPT:
580 case SETUP_FINALLY:
581 j = addrmap[GETARG(codestr, i) + i + 3] - addrmap[i] - 3;
582 SETARG(codestr, i, j);
583 break;
584 }
585 adj = CODESIZE(opcode);
586 while (adj--)
587 codestr[h++] = codestr[i++];
588 }
589 assert(h + nops == codelen);
590
591 code = PyString_FromStringAndSize((char *)codestr, h);
592 PyMem_Free(addrmap);
593 PyMem_Free(codestr);
594 PyMem_Free(blocks);
595 return code;
596
597 exitUnchanged:
598 if (blocks != NULL)
599 PyMem_Free(blocks);
600 if (addrmap != NULL)
601 PyMem_Free(addrmap);
602 if (codestr != NULL)
603 PyMem_Free(codestr);
604 Py_INCREF(code);
605 return code;
606}