blob: ca8f32358f04849f0cc9929f257dbeb2ed342659 [file] [log] [blame]
sewardja1a370f2004-08-17 13:31:55 +00001
2/*---------------------------------------------------------------*/
3/*--- ---*/
4/*--- This file (ir/iropt.c) is ---*/
5/*--- Copyright (c) 2004 OpenWorks LLP. All rights reserved. ---*/
6/*--- ---*/
7/*---------------------------------------------------------------*/
8
9#include "libvex_basictypes.h"
sewardjedf4d692004-08-17 13:52:58 +000010#include "libvex_ir.h"
sewardja1a370f2004-08-17 13:31:55 +000011#include "libvex.h"
12
13#include "main/vex_util.h"
sewardjedf4d692004-08-17 13:52:58 +000014#include "ir/iropt.h"
sewardja1a370f2004-08-17 13:31:55 +000015
sewardj39555aa2004-10-24 22:29:19 +000016/* Possibly disable iropt for drastic debugging:
17 Set to 0 to completely disable iropt
18 1 for simple optimisation
19 2 for maximum optimisation (the default) */
20#define IROPT_LEVEL 0
sewardjd0863ff2004-10-23 00:22:32 +000021
sewardj088bcb42004-08-19 17:16:52 +000022/* Set to 1 for lots of debugging output. */
23#define DEBUG_IROPT 0
24
sewardj2d04d112004-10-13 20:44:46 +000025/* Controls the enthusiasm of the loop unroller. It tries to
26 unroll loops enough times to get somewhere near this number of SSA
27 statements, as measured after initial cleanup pass. Set to zero to
28 disable the unroller. */
sewardj2d04d112004-10-13 20:44:46 +000029#define UNROLL_TARGET 120
sewardj2d04d112004-10-13 20:44:46 +000030
sewardj73017432004-10-14 19:33:25 +000031/* Set to 1 to get details of loop unrolling. */
32#define UNROLL_VERBOSE 0
33
sewardja1a370f2004-08-17 13:31:55 +000034
sewardjc0b42282004-10-12 13:44:12 +000035/* Implementation notes, 12 Oct 04.
36
37 F64i constants are treated differently from other constants.
38 They are not regarded as atoms, and instead lifted off and
39 bound to temps. This allows them to participate in CSE, which
40 is important for getting good performance for x86 guest code.
sewardj695cff92004-10-13 14:50:14 +000041
42 ToDo:
43
44 make spec_helpers_BB always return flat code
sewardja5aa9cf2004-10-15 22:56:38 +000045
46 CSE up F64 literals (already doing F64is)
sewardjc0b42282004-10-12 13:44:12 +000047*/
48
49
sewardja1a370f2004-08-17 13:31:55 +000050/*---------------------------------------------------------------*/
51/*--- Finite mappery, of a sort ---*/
52/*---------------------------------------------------------------*/
53
sewardjea602bc2004-10-14 21:40:12 +000054/* General map from HWord-sized thing HWord-sized thing. Could be
55 done faster by hashing. */
sewardja1a370f2004-08-17 13:31:55 +000056
57typedef
58 struct {
59 Bool* inuse;
sewardjea602bc2004-10-14 21:40:12 +000060 HWord* key;
61 HWord* val;
sewardja1a370f2004-08-17 13:31:55 +000062 Int size;
63 Int used;
64 }
sewardjea602bc2004-10-14 21:40:12 +000065 HashHW;
sewardja1a370f2004-08-17 13:31:55 +000066
sewardjea602bc2004-10-14 21:40:12 +000067static HashHW* newHHW ( void )
sewardja1a370f2004-08-17 13:31:55 +000068{
sewardjea602bc2004-10-14 21:40:12 +000069 HashHW* h = LibVEX_Alloc(sizeof(HashHW));
sewardj29632392004-08-22 02:38:11 +000070 h->size = 8;
sewardja1a370f2004-08-17 13:31:55 +000071 h->used = 0;
72 h->inuse = LibVEX_Alloc(h->size * sizeof(Bool));
sewardjea602bc2004-10-14 21:40:12 +000073 h->key = LibVEX_Alloc(h->size * sizeof(HWord));
74 h->val = LibVEX_Alloc(h->size * sizeof(HWord));
sewardja1a370f2004-08-17 13:31:55 +000075 return h;
76}
77
78
sewardj84be7372004-08-18 13:59:33 +000079/* Look up key in the map. */
sewardja1a370f2004-08-17 13:31:55 +000080
sewardjea602bc2004-10-14 21:40:12 +000081static Bool lookupHHW ( HashHW* h, /*OUT*/HWord* val, HWord key )
sewardja1a370f2004-08-17 13:31:55 +000082{
83 Int i;
sewardjea602bc2004-10-14 21:40:12 +000084 //vex_printf("lookupHHW(%llx)\n", key );
sewardja1a370f2004-08-17 13:31:55 +000085 for (i = 0; i < h->used; i++) {
86 if (h->inuse[i] && h->key[i] == key) {
sewardj39e3f242004-08-18 16:54:52 +000087 if (val)
88 *val = h->val[i];
sewardja1a370f2004-08-17 13:31:55 +000089 return True;
90 }
91 }
92 return False;
93}
94
95
sewardj575670c2004-10-12 09:13:19 +000096#if 0
97/* Apparently unused */
sewardja1a370f2004-08-17 13:31:55 +000098/* Delete any binding for key in h. */
99
sewardjea602bc2004-10-14 21:40:12 +0000100static void delFromHHW ( HashHW* h, HWord key )
sewardja1a370f2004-08-17 13:31:55 +0000101{
102 Int i;
103 for (i = 0; i < h->used; i++) {
104 if (h->inuse[i] && h->key[i] == key) {
105 h->inuse[i] = False;
106 return;
107 }
108 }
109}
sewardj575670c2004-10-12 09:13:19 +0000110#endif
sewardja1a370f2004-08-17 13:31:55 +0000111
112
113/* Add key->val to the map. Replaces any existing binding for key. */
114
sewardjea602bc2004-10-14 21:40:12 +0000115static void addToHHW ( HashHW* h, HWord key, HWord val )
sewardja1a370f2004-08-17 13:31:55 +0000116{
117 Int i, j;
118
sewardjea602bc2004-10-14 21:40:12 +0000119 //vex_printf("addToHHW(%llx, %llx)\n", key, val);
sewardja1a370f2004-08-17 13:31:55 +0000120 /* Find and replace existing binding, if any. */
121 for (i = 0; i < h->used; i++) {
122 if (h->inuse[i] && h->key[i] == key) {
123 h->val[i] = val;
124 return;
125 }
126 }
127
128 /* Ensure a space is available. */
129 if (h->used == h->size) {
130 /* Copy into arrays twice the size. */
131 Bool* inuse2 = LibVEX_Alloc(2 * h->size * sizeof(Bool));
sewardjea602bc2004-10-14 21:40:12 +0000132 HWord* key2 = LibVEX_Alloc(2 * h->size * sizeof(HWord));
133 HWord* val2 = LibVEX_Alloc(2 * h->size * sizeof(HWord));
sewardja1a370f2004-08-17 13:31:55 +0000134 for (i = j = 0; i < h->size; i++) {
135 if (!h->inuse[i]) continue;
136 inuse2[j] = True;
137 key2[j] = h->key[i];
138 val2[j] = h->val[i];
139 j++;
140 }
141 h->used = j;
142 h->size *= 2;
143 h->inuse = inuse2;
144 h->key = key2;
145 h->val = val2;
146 }
147
148 /* Finally, add it. */
149 vassert(h->used < h->size);
150 h->inuse[h->used] = True;
151 h->key[h->used] = key;
sewardj84be7372004-08-18 13:59:33 +0000152 h->val[h->used] = val;
sewardja1a370f2004-08-17 13:31:55 +0000153 h->used++;
154}
155
sewardj84be7372004-08-18 13:59:33 +0000156
sewardjd7cb8532004-08-17 23:59:23 +0000157/*---------------------------------------------------------------*/
158/*--- Flattening out a BB into pure SSA form ---*/
159/*---------------------------------------------------------------*/
160
sewardjedeb4c42004-09-21 23:39:25 +0000161inline
sewardje80679a2004-09-21 23:00:11 +0000162static Bool isAtom ( IRExpr* e )
163{
164 return e->tag == Iex_Tmp || e->tag == Iex_Const;
165}
166
167
sewardje80679a2004-09-21 23:00:11 +0000168/* Non-critical helper, heuristic for reducing the number of tmp-tmp
169 copies made by flattening. If in doubt return False. */
170
171static Bool isFlat ( IRExpr* e )
172{
sewardj695cff92004-10-13 14:50:14 +0000173 if (e->tag == Iex_Get)
174 return True;
sewardje80679a2004-09-21 23:00:11 +0000175 if (e->tag == Iex_Binop)
176 return isAtom(e->Iex.Binop.arg1) && isAtom(e->Iex.Binop.arg2);
177 if (e->tag == Iex_LDle)
178 return isAtom(e->Iex.LDle.addr);
179 return False;
180}
181
sewardjd7cb8532004-08-17 23:59:23 +0000182/* Flatten out 'ex' so it is atomic, returning a new expression with
183 the same value, after having appended extra IRTemp assignments to
184 the end of 'bb'. */
185
186static IRExpr* flatten_Expr ( IRBB* bb, IRExpr* ex )
187{
188 Int i;
189 IRExpr** newargs;
190 IRType ty = typeOfIRExpr(bb->tyenv, ex);
191 IRTemp t1;
192
193 switch (ex->tag) {
194
sewardjd7217032004-08-19 10:49:10 +0000195 case Iex_GetI:
196 t1 = newIRTemp(bb->tyenv, ty);
197 addStmtToIRBB(bb, IRStmt_Tmp(t1,
sewardj2d3f77c2004-09-22 23:49:09 +0000198 IRExpr_GetI(ex->Iex.GetI.descr,
199 flatten_Expr(bb, ex->Iex.GetI.off),
200 ex->Iex.GetI.bias)));
sewardjd7217032004-08-19 10:49:10 +0000201 return IRExpr_Tmp(t1);
202
sewardjd7cb8532004-08-17 23:59:23 +0000203 case Iex_Get:
204 t1 = newIRTemp(bb->tyenv, ty);
205 addStmtToIRBB(bb,
206 IRStmt_Tmp(t1, ex));
207 return IRExpr_Tmp(t1);
208
209 case Iex_Binop:
210 t1 = newIRTemp(bb->tyenv, ty);
211 addStmtToIRBB(bb, IRStmt_Tmp(t1,
212 IRExpr_Binop(ex->Iex.Binop.op,
213 flatten_Expr(bb, ex->Iex.Binop.arg1),
214 flatten_Expr(bb, ex->Iex.Binop.arg2))));
215 return IRExpr_Tmp(t1);
216
217 case Iex_Unop:
218 t1 = newIRTemp(bb->tyenv, ty);
219 addStmtToIRBB(bb, IRStmt_Tmp(t1,
220 IRExpr_Unop(ex->Iex.Unop.op,
221 flatten_Expr(bb, ex->Iex.Unop.arg))));
222 return IRExpr_Tmp(t1);
223
224 case Iex_LDle:
225 t1 = newIRTemp(bb->tyenv, ty);
226 addStmtToIRBB(bb, IRStmt_Tmp(t1,
227 IRExpr_LDle(ex->Iex.LDle.ty,
228 flatten_Expr(bb, ex->Iex.LDle.addr))));
229 return IRExpr_Tmp(t1);
230
231 case Iex_CCall:
sewardj695cff92004-10-13 14:50:14 +0000232 newargs = sopyIRExprVec(ex->Iex.CCall.args);
sewardjd7cb8532004-08-17 23:59:23 +0000233 for (i = 0; newargs[i]; i++)
234 newargs[i] = flatten_Expr(bb, newargs[i]);
235 t1 = newIRTemp(bb->tyenv, ty);
236 addStmtToIRBB(bb, IRStmt_Tmp(t1,
237 IRExpr_CCall(ex->Iex.CCall.name,
238 ex->Iex.CCall.retty,
239 newargs)));
240 return IRExpr_Tmp(t1);
241
242 case Iex_Mux0X:
243 t1 = newIRTemp(bb->tyenv, ty);
244 addStmtToIRBB(bb, IRStmt_Tmp(t1,
245 IRExpr_Mux0X(flatten_Expr(bb, ex->Iex.Mux0X.cond),
246 flatten_Expr(bb, ex->Iex.Mux0X.expr0),
247 flatten_Expr(bb, ex->Iex.Mux0X.exprX))));
248 return IRExpr_Tmp(t1);
249
250 case Iex_Const:
sewardjc0b42282004-10-12 13:44:12 +0000251 /* Lift F64i constants out onto temps so they can be CSEd
252 later. */
253 if (ex->Iex.Const.con->tag == Ico_F64i) {
254 t1 = newIRTemp(bb->tyenv, ty);
255 addStmtToIRBB(bb, IRStmt_Tmp(t1,
256 IRExpr_Const(ex->Iex.Const.con)));
257 return IRExpr_Tmp(t1);
258 } else {
259 /* Leave all other constants alone. */
260 return ex;
261 }
262
sewardjd7cb8532004-08-17 23:59:23 +0000263 case Iex_Tmp:
264 return ex;
265
266 default:
267 vex_printf("\n");
268 ppIRExpr(ex);
269 vex_printf("\n");
270 vpanic("flatten_Expr");
271 }
272}
273
274
275/* Append a completely flattened form of 'st' to the end of 'bb'. */
276
277static void flatten_Stmt ( IRBB* bb, IRStmt* st )
278{
sewardj17442fe2004-09-20 14:54:28 +0000279 Int i;
280 IRExpr *e1, *e2;
281 IRDirty *d, *d2;
sewardjd7cb8532004-08-17 23:59:23 +0000282 switch (st->tag) {
283 case Ist_Put:
sewardj6d076362004-09-23 11:06:17 +0000284 e1 = flatten_Expr(bb, st->Ist.Put.data);
sewardjd7cb8532004-08-17 23:59:23 +0000285 addStmtToIRBB(bb, IRStmt_Put(st->Ist.Put.offset, e1));
286 break;
sewardjd7cb8532004-08-17 23:59:23 +0000287 case Ist_PutI:
sewardj2d3f77c2004-09-22 23:49:09 +0000288 e1 = flatten_Expr(bb, st->Ist.PutI.off);
289 e2 = flatten_Expr(bb, st->Ist.PutI.data);
290 addStmtToIRBB(bb, IRStmt_PutI(st->Ist.PutI.descr,
291 e1,
292 st->Ist.PutI.bias,
293 e2));
sewardjd7217032004-08-19 10:49:10 +0000294 break;
sewardjd7cb8532004-08-17 23:59:23 +0000295 case Ist_Tmp:
sewardj6d076362004-09-23 11:06:17 +0000296 if (isFlat(st->Ist.Tmp.data)) {
sewardje80679a2004-09-21 23:00:11 +0000297 /* optimisation, to reduce the number of tmp-tmp
298 copies generated */
299 addStmtToIRBB(bb, st);
300 } else {
301 /* general case, always correct */
sewardj6d076362004-09-23 11:06:17 +0000302 e1 = flatten_Expr(bb, st->Ist.Tmp.data);
sewardje80679a2004-09-21 23:00:11 +0000303 addStmtToIRBB(bb, IRStmt_Tmp(st->Ist.Tmp.tmp, e1));
304 }
sewardjd7cb8532004-08-17 23:59:23 +0000305 break;
306 case Ist_STle:
307 e1 = flatten_Expr(bb, st->Ist.STle.addr);
308 e2 = flatten_Expr(bb, st->Ist.STle.data);
309 addStmtToIRBB(bb, IRStmt_STle(e1,e2));
310 break;
sewardj17442fe2004-09-20 14:54:28 +0000311 case Ist_Dirty:
312 d = st->Ist.Dirty.details;
313 d2 = emptyIRDirty();
314 *d2 = *d;
sewardj695cff92004-10-13 14:50:14 +0000315 d2->args = sopyIRExprVec(d2->args);
sewardj17442fe2004-09-20 14:54:28 +0000316 if (d2->mFx != Ifx_None) {
317 d2->mAddr = flatten_Expr(bb, d2->mAddr);
318 } else {
319 vassert(d2->mAddr == NULL);
320 }
321 for (i = 0; d2->args[i]; i++)
322 d2->args[i] = flatten_Expr(bb, d2->args[i]);
323 addStmtToIRBB(bb, IRStmt_Dirty(d2));
324 break;
sewardjd7cb8532004-08-17 23:59:23 +0000325 case Ist_Exit:
326 e1 = flatten_Expr(bb, st->Ist.Exit.cond);
327 addStmtToIRBB(bb, IRStmt_Exit(e1, st->Ist.Exit.dst));
328 break;
329 default:
330 vex_printf("\n");
331 ppIRStmt(st);
332 vex_printf("\n");
333 vpanic("flatten_Stmt");
334 }
335}
336
337static IRBB* flatten_BB ( IRBB* in )
338{
339 Int i;
340 IRBB* out;
341 out = emptyIRBB();
sewardj695cff92004-10-13 14:50:14 +0000342 out->tyenv = dopyIRTypeEnv( in->tyenv );
sewardjd7cb8532004-08-17 23:59:23 +0000343 for (i = 0; i < in->stmts_used; i++)
sewardj4345f7a2004-09-22 19:49:27 +0000344 if (in->stmts[i])
345 flatten_Stmt( out, in->stmts[i] );
sewardjd7cb8532004-08-17 23:59:23 +0000346 out->next = flatten_Expr( out, in->next );
347 out->jumpkind = in->jumpkind;
348 return out;
349}
350
sewardjedf4d692004-08-17 13:52:58 +0000351
sewardj84be7372004-08-18 13:59:33 +0000352
353/*---------------------------------------------------------------*/
354/*--- Constant propagation and folding ---*/
355/*---------------------------------------------------------------*/
356
sewardj62617ef2004-10-13 23:29:22 +0000357/* The env in this section is a map from IRTemp to IRExpr*,
358 that is, an array indexed by IRTemp. */
sewardjf6501992004-08-27 11:58:24 +0000359
sewardjf6729012004-08-25 12:45:13 +0000360/* Are both expressions simply the same IRTemp ? */
361static Bool sameIRTemps ( IRExpr* e1, IRExpr* e2 )
362{
363 return e1->tag == Iex_Tmp
364 && e2->tag == Iex_Tmp
365 && e1->Iex.Tmp.tmp == e2->Iex.Tmp.tmp;
366}
367
sewardj84be7372004-08-18 13:59:33 +0000368static IRExpr* fold_Expr ( IRExpr* e )
369{
sewardj278c44c2004-08-20 00:28:13 +0000370 Int shift;
sewardj84be7372004-08-18 13:59:33 +0000371 IRExpr* e2 = e; /* e2 is the result of folding e, if possible */
372
373 /* UNARY ops */
374 if (e->tag == Iex_Unop
375 && e->Iex.Unop.arg->tag == Iex_Const) {
376 switch (e->Iex.Unop.op) {
sewardjae27ab62004-10-15 21:21:46 +0000377 case Iop_1Uto8:
378 e2 = IRExpr_Const(IRConst_U8(
379 e->Iex.Unop.arg->Iex.Const.con->Ico.Bit
380 ? 1 : 0));
381 break;
sewardjf4a821d2004-10-09 00:58:19 +0000382 case Iop_1Uto32:
383 e2 = IRExpr_Const(IRConst_U32(
384 e->Iex.Unop.arg->Iex.Const.con->Ico.Bit
385 ? 1 : 0));
386 break;
sewardj883b00b2004-09-11 09:30:24 +0000387 case Iop_8Sto32: {
388 /* signed */ Int s32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
389 s32 <<= 24;
390 s32 >>= 24;
391 e2 = IRExpr_Const(IRConst_U32((UInt)s32));
392 break;
393 }
sewardj84be7372004-08-18 13:59:33 +0000394 case Iop_8Uto32:
395 e2 = IRExpr_Const(IRConst_U32(
396 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
397 break;
398 case Iop_16Uto32:
399 e2 = IRExpr_Const(IRConst_U32(
400 0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
401 break;
sewardj73017432004-10-14 19:33:25 +0000402 case Iop_32to16:
403 e2 = IRExpr_Const(IRConst_U16(
404 0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32));
405 break;
sewardj4345f7a2004-09-22 19:49:27 +0000406 case Iop_32to8:
407 e2 = IRExpr_Const(IRConst_U8(
408 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32));
409 break;
sewardj7447b5b2004-10-16 11:30:42 +0000410 case Iop_32to1:
411 e2 = IRExpr_Const(IRConst_Bit(
412 0==e->Iex.Unop.arg->Iex.Const.con->Ico.U32
413 ? False : True));
414 break;
sewardj883b00b2004-09-11 09:30:24 +0000415 case Iop_Not32:
416 e2 = IRExpr_Const(IRConst_U32(
417 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
418 break;
sewardj84be7372004-08-18 13:59:33 +0000419 default:
420 goto unhandled;
421 }
422 }
423
424 /* BINARY ops */
425 if (e->tag == Iex_Binop) {
426 if (e->Iex.Binop.arg1->tag == Iex_Const
427 && e->Iex.Binop.arg2->tag == Iex_Const) {
428 /* cases where both args are consts */
429 switch (e->Iex.Binop.op) {
sewardj883b00b2004-09-11 09:30:24 +0000430 case Iop_Xor8:
431 e2 = IRExpr_Const(IRConst_U8(0xFF &
432 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
433 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)));
434 break;
sewardj84be7372004-08-18 13:59:33 +0000435 case Iop_And8:
436 e2 = IRExpr_Const(IRConst_U8(0xFF &
437 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
438 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)));
439 break;
sewardj4345f7a2004-09-22 19:49:27 +0000440 case Iop_Add8:
441 e2 = IRExpr_Const(IRConst_U8(0xFF &
442 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
443 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)));
444 break;
sewardj84be7372004-08-18 13:59:33 +0000445 case Iop_Sub8:
446 e2 = IRExpr_Const(IRConst_U8(0xFF &
447 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
448 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)));
449 break;
sewardjd7217032004-08-19 10:49:10 +0000450 case Iop_Sub32:
451 e2 = IRExpr_Const(IRConst_U32(
452 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
453 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
454 break;
455 case Iop_Add32:
456 e2 = IRExpr_Const(IRConst_U32(
457 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
458 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
459 break;
sewardj883b00b2004-09-11 09:30:24 +0000460 case Iop_Xor32:
461 e2 = IRExpr_Const(IRConst_U32(
462 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
463 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
464 break;
sewardjd7217032004-08-19 10:49:10 +0000465 case Iop_And32:
466 e2 = IRExpr_Const(IRConst_U32(
467 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
468 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
469 break;
sewardjb8e75862004-08-19 17:58:45 +0000470 case Iop_Or32:
471 e2 = IRExpr_Const(IRConst_U32(
472 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
473 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
474 break;
sewardjb9c5cf62004-08-24 15:10:38 +0000475 case Iop_Mul32:
476 e2 = IRExpr_Const(IRConst_U32(
477 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
478 * e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
479 break;
sewardjd7217032004-08-19 10:49:10 +0000480 case Iop_Shl32:
sewardj61348472004-08-20 01:01:04 +0000481 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
482 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
sewardj29632392004-08-22 02:38:11 +0000483 if (shift >= 0 && shift <= 31)
sewardj278c44c2004-08-20 00:28:13 +0000484 e2 = IRExpr_Const(IRConst_U32(
485 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
486 << shift)));
sewardjd7217032004-08-19 10:49:10 +0000487 break;
sewardj278c44c2004-08-20 00:28:13 +0000488 case Iop_Sar32: {
489 /* paranoid ... */
490 /*signed*/ Int s32;
sewardj61348472004-08-20 01:01:04 +0000491 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
sewardj278c44c2004-08-20 00:28:13 +0000492 s32 = (Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
sewardj61348472004-08-20 01:01:04 +0000493 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
sewardj278c44c2004-08-20 00:28:13 +0000494 if (shift >= 0 && shift <= 31) {
495 s32 >>=/*signed*/ shift;
496 e2 = IRExpr_Const(IRConst_U32((UInt)s32));
497 }
498 break;
499 }
sewardj61348472004-08-20 01:01:04 +0000500 case Iop_Shr32: {
501 /* paranoid ... */
502 /*unsigned*/ UInt s32;
503 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
504 s32 = (Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
505 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
506 if (shift >= 0 && shift <= 31) {
507 s32 >>=/*unsigned*/ shift;
508 e2 = IRExpr_Const(IRConst_U32((UInt)s32));
509 }
510 break;
511 }
sewardjb8e75862004-08-19 17:58:45 +0000512 case Iop_CmpEQ32:
513 e2 = IRExpr_Const(IRConst_Bit(
514 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
515 == e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
516 break;
sewardjae27ab62004-10-15 21:21:46 +0000517 case Iop_CmpNE32:
518 e2 = IRExpr_Const(IRConst_Bit(
519 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
520 != e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
521 break;
sewardj7447b5b2004-10-16 11:30:42 +0000522
523 case Iop_CmpLE32U:
524 e2 = IRExpr_Const(IRConst_Bit(
525 ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
526 <= (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
527 break;
sewardj088e4f72004-10-19 01:25:02 +0000528 case Iop_CmpLE32S:
529 e2 = IRExpr_Const(IRConst_Bit(
530 ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
531 <= (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
532 break;
sewardj9bdd2652004-10-19 12:56:33 +0000533 case Iop_CmpLT32S:
534 e2 = IRExpr_Const(IRConst_Bit(
535 ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
536 < (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
537 break;
sewardj7447b5b2004-10-16 11:30:42 +0000538
sewardj088bcb42004-08-19 17:16:52 +0000539 case Iop_32HLto64:
540 e2 = IRExpr_Const(IRConst_U64(
541 (((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)) << 32)
542 | ((ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))
543 ));
544 break;
sewardj607dd4f2004-09-08 18:20:19 +0000545 default:
546 goto unhandled;
sewardjd7217032004-08-19 10:49:10 +0000547 }
sewardjf6729012004-08-25 12:45:13 +0000548
sewardj84be7372004-08-18 13:59:33 +0000549 } else {
sewardjf6729012004-08-25 12:45:13 +0000550
sewardj84be7372004-08-18 13:59:33 +0000551 /* other cases (identities, etc) */
sewardj52345402004-10-13 16:08:14 +0000552 /* Shl32(x,0) ==> x */
553 if (e->Iex.Binop.op == Iop_Shl32
554 && e->Iex.Binop.arg2->tag == Iex_Const
sewardj4980c6b2004-10-13 16:16:27 +0000555 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 == 0) {
sewardj52345402004-10-13 16:08:14 +0000556 e2 = e->Iex.Binop.arg1;
557 } else
558
sewardjf6729012004-08-25 12:45:13 +0000559 /* Add32(x,0) ==> x */
sewardj84be7372004-08-18 13:59:33 +0000560 if (e->Iex.Binop.op == Iop_Add32
561 && e->Iex.Binop.arg2->tag == Iex_Const
562 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U32 == 0) {
563 e2 = e->Iex.Binop.arg1;
sewardjf6729012004-08-25 12:45:13 +0000564 } else
565
566 /* And8/16/32(t,t) ==> t, for some IRTemp t */
567 if ((e->Iex.Binop.op == Iop_And32
568 || e->Iex.Binop.op == Iop_And16
569 || e->Iex.Binop.op == Iop_And8)
570 && sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
571 e2 = e->Iex.Binop.arg1;
sewardjaba4fff2004-10-08 21:37:15 +0000572 }
sewardjf6729012004-08-25 12:45:13 +0000573
sewardj84be7372004-08-18 13:59:33 +0000574 }
575 }
576
577 /* Mux0X */
578 if (e->tag == Iex_Mux0X
579 && e->Iex.Mux0X.cond->tag == Iex_Const) {
580 Bool zero;
581 /* assured us by the IR type rules */
582 vassert(e->Iex.Mux0X.cond->Iex.Const.con->tag == Ico_U8);
583 zero = 0 == e->Iex.Mux0X.cond->Iex.Const.con->Ico.U8;
584 e2 = zero ? e->Iex.Mux0X.expr0 : e->Iex.Mux0X.exprX;
585 }
586
sewardj088bcb42004-08-19 17:16:52 +0000587 if (DEBUG_IROPT && e2 != e) {
588 vex_printf("FOLD: ");
sewardj84be7372004-08-18 13:59:33 +0000589 ppIRExpr(e); vex_printf(" -> ");
590 ppIRExpr(e2); vex_printf("\n");
591 }
592
593 return e2;
594
595 unhandled:
sewardj883b00b2004-09-11 09:30:24 +0000596# if 0
sewardj84be7372004-08-18 13:59:33 +0000597 vex_printf("\n\n");
598 ppIRExpr(e);
599 vpanic("fold_Expr: no rule for the above");
sewardj883b00b2004-09-11 09:30:24 +0000600# else
601 vex_printf("vex iropt: fold_Expr: no rule for: ");
602 ppIRExpr(e);
603 vex_printf("\n");
604 return e2;
605# endif
sewardj84be7372004-08-18 13:59:33 +0000606}
607
608
sewardj84be7372004-08-18 13:59:33 +0000609/* Apply the subst to a simple 1-level expression -- guaranteed to be
610 1-level due to previous flattening pass. */
611
sewardj62617ef2004-10-13 23:29:22 +0000612static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex )
sewardj84be7372004-08-18 13:59:33 +0000613{
sewardj62617ef2004-10-13 23:29:22 +0000614 switch (ex->tag) {
615 case Iex_Tmp:
616 if (env[(Int)ex->Iex.Tmp.tmp] != NULL) {
617 return env[(Int)ex->Iex.Tmp.tmp];
618 } else {
619 /* not bound in env */
620 return ex;
621 }
622
623 case Iex_Const:
624 case Iex_Get:
sewardj84be7372004-08-18 13:59:33 +0000625 return ex;
sewardj62617ef2004-10-13 23:29:22 +0000626
627 case Iex_GetI:
628 vassert(isAtom(ex->Iex.GetI.off));
629 return IRExpr_GetI(
630 ex->Iex.GetI.descr,
631 subst_Expr(env, ex->Iex.GetI.off),
632 ex->Iex.GetI.bias
633 );
634
635 case Iex_Binop:
636 vassert(isAtom(ex->Iex.Binop.arg1));
637 vassert(isAtom(ex->Iex.Binop.arg2));
638 return IRExpr_Binop(
639 ex->Iex.Binop.op,
640 subst_Expr(env, ex->Iex.Binop.arg1),
641 subst_Expr(env, ex->Iex.Binop.arg2)
642 );
643
644 case Iex_Unop:
645 vassert(isAtom(ex->Iex.Unop.arg));
646 return IRExpr_Unop(
647 ex->Iex.Unop.op,
648 subst_Expr(env, ex->Iex.Unop.arg)
649 );
650
651 case Iex_LDle:
652 vassert(isAtom(ex->Iex.LDle.addr));
653 return IRExpr_LDle(
654 ex->Iex.LDle.ty,
655 subst_Expr(env, ex->Iex.LDle.addr)
656 );
657
658 case Iex_CCall: {
659 Int i;
660 IRExpr** args2 = sopyIRExprVec(ex->Iex.CCall.args);
661 for (i = 0; args2[i]; i++) {
662 vassert(isAtom(args2[i]));
663 args2[i] = subst_Expr(env, args2[i]);
664 }
665 return IRExpr_CCall(
666 ex->Iex.CCall.name,
667 ex->Iex.CCall.retty,
668 args2
669 );
sewardj84be7372004-08-18 13:59:33 +0000670 }
sewardj62617ef2004-10-13 23:29:22 +0000671
672 case Iex_Mux0X:
673 vassert(isAtom(ex->Iex.Mux0X.cond));
674 vassert(isAtom(ex->Iex.Mux0X.expr0));
675 vassert(isAtom(ex->Iex.Mux0X.exprX));
676 return IRExpr_Mux0X(
677 subst_Expr(env, ex->Iex.Mux0X.cond),
678 subst_Expr(env, ex->Iex.Mux0X.expr0),
679 subst_Expr(env, ex->Iex.Mux0X.exprX)
680 );
681
682 default:
683 vex_printf("\n\n"); ppIRExpr(ex);
684 vpanic("subst_Expr");
685
sewardj84be7372004-08-18 13:59:33 +0000686 }
sewardj84be7372004-08-18 13:59:33 +0000687}
688
689
690/* Apply the subst to stmt, then fold the result as much as possible.
sewardjb8e75862004-08-19 17:58:45 +0000691 Much simplified due to stmt being previously flattened. Returning
692 NULL means the statement has been turned into a no-op. */
sewardj84be7372004-08-18 13:59:33 +0000693
sewardj62617ef2004-10-13 23:29:22 +0000694static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st )
sewardj84be7372004-08-18 13:59:33 +0000695{
696# if 0
697 vex_printf("\nsubst and fold stmt\n");
698 ppIRStmt(st);
699 vex_printf("\n");
700# endif
701
sewardj62617ef2004-10-13 23:29:22 +0000702 switch (st->tag) {
703 case Ist_Put:
704 vassert(isAtom(st->Ist.Put.data));
705 return IRStmt_Put(
706 st->Ist.Put.offset,
707 fold_Expr(subst_Expr(env, st->Ist.Put.data))
708 );
sewardj84be7372004-08-18 13:59:33 +0000709
sewardj62617ef2004-10-13 23:29:22 +0000710 case Ist_PutI:
711 vassert(isAtom(st->Ist.PutI.off));
712 vassert(isAtom(st->Ist.PutI.data));
713 return IRStmt_PutI(
714 st->Ist.PutI.descr,
715 fold_Expr(subst_Expr(env, st->Ist.PutI.off)),
716 st->Ist.PutI.bias,
717 fold_Expr(subst_Expr(env, st->Ist.PutI.data))
718 );
sewardjd7217032004-08-19 10:49:10 +0000719
sewardj62617ef2004-10-13 23:29:22 +0000720 case Ist_Tmp:
721 /* This is the one place where an expr (st->Ist.Tmp.data) is
722 allowed to be more than just a constant or a tmp. */
723 return IRStmt_Tmp(
724 st->Ist.Tmp.tmp,
725 fold_Expr(subst_Expr(env, st->Ist.Tmp.data))
726 );
sewardj84be7372004-08-18 13:59:33 +0000727
sewardj62617ef2004-10-13 23:29:22 +0000728 case Ist_STle:
729 vassert(isAtom(st->Ist.STle.addr));
730 vassert(isAtom(st->Ist.STle.data));
731 return IRStmt_STle(
732 fold_Expr(subst_Expr(env, st->Ist.STle.addr)),
733 fold_Expr(subst_Expr(env, st->Ist.STle.data))
734 );
sewardj84be7372004-08-18 13:59:33 +0000735
sewardj62617ef2004-10-13 23:29:22 +0000736 case Ist_Dirty: {
737 Int i;
738 IRDirty *d, *d2;
739 d = st->Ist.Dirty.details;
740 d2 = emptyIRDirty();
741 *d2 = *d;
742 d2->args = sopyIRExprVec(d2->args);
743 if (d2->mFx != Ifx_None) {
744 vassert(isAtom(d2->mAddr));
745 d2->mAddr = fold_Expr(subst_Expr(env, d2->mAddr));
sewardjb8e75862004-08-19 17:58:45 +0000746 }
sewardj62617ef2004-10-13 23:29:22 +0000747 for (i = 0; d2->args[i]; i++) {
748 vassert(isAtom(d2->args[i]));
749 d2->args[i] = fold_Expr(subst_Expr(env, d2->args[i]));
750 }
751 return IRStmt_Dirty(d2);
sewardjb8e75862004-08-19 17:58:45 +0000752 }
sewardj84be7372004-08-18 13:59:33 +0000753
sewardj62617ef2004-10-13 23:29:22 +0000754 case Ist_Exit: {
755 IRExpr* fcond;
756 vassert(isAtom(st->Ist.Exit.cond));
757 fcond = fold_Expr(subst_Expr(env, st->Ist.Exit.cond));
758 if (fcond->tag == Iex_Const) {
759 /* Interesting. The condition on this exit has folded down to
760 a constant. */
761 vassert(fcond->Iex.Const.con->tag == Ico_Bit);
762 if (fcond->Iex.Const.con->Ico.Bit == False) {
763 /* exit is never going to happen, so dump the statement. */
764 return NULL;
765 } else {
766 vassert(fcond->Iex.Const.con->Ico.Bit == True);
767 /* Hmmm. The exit has become unconditional. Leave it as
768 it is for now, since we'd have to truncate the BB at
769 this point, which is tricky. */
770 /* fall out into the reconstruct-the-exit code. */
sewardj8c2c10b2004-10-16 20:51:52 +0000771 if (UNROLL_VERBOSE) /* really a misuse of UNROLL_VERBOSE */
772 vex_printf("vex iropt: IRStmt_Exit became unconditional\n");
sewardj62617ef2004-10-13 23:29:22 +0000773 }
774 }
775 return IRStmt_Exit(fcond,st->Ist.Exit.dst);
776 }
777
778 default:
779 vex_printf("\n"); ppIRStmt(st);
780 vpanic("subst_and_fold_Stmt");
781 }
sewardj84be7372004-08-18 13:59:33 +0000782}
783
784
785static IRBB* cprop_BB ( IRBB* in )
786{
sewardj62617ef2004-10-13 23:29:22 +0000787 Int i;
788 IRBB* out;
789 IRStmt* st2;
790 Int n_tmps = in->tyenv->types_used;
791 IRExpr** env = LibVEX_Alloc(n_tmps * sizeof(IRExpr*));
sewardj84be7372004-08-18 13:59:33 +0000792
793 out = emptyIRBB();
sewardj695cff92004-10-13 14:50:14 +0000794 out->tyenv = dopyIRTypeEnv( in->tyenv );
sewardj84be7372004-08-18 13:59:33 +0000795
796 /* Set up the env with which travels forward. This holds a
797 substitution, mapping IRTemps to atoms, that is, IRExprs which
798 are either IRTemps or IRConsts. Thus, copy and constant
799 propagation is done. The environment is to be applied as we
800 move along. Keys are IRTemps. Values are IRExpr*s.
801 */
sewardj62617ef2004-10-13 23:29:22 +0000802 for (i = 0; i < n_tmps; i++)
803 env[i] = NULL;
sewardj84be7372004-08-18 13:59:33 +0000804
805 /* For each original SSA-form stmt ... */
806 for (i = 0; i < in->stmts_used; i++) {
807
808 /* First apply the substitution to the current stmt. This
809 propagates in any constants and tmp-tmp assignments
810 accumulated prior to this point. As part of the subst_Stmt
811 call, also then fold any constant expressions resulting. */
812
sewardjf0e43162004-08-20 00:11:12 +0000813 st2 = in->stmts[i];
814
815 /* perhaps st2 is already a no-op? */
816 if (!st2) continue;
817
818 st2 = subst_and_fold_Stmt( env, st2 );
sewardj84be7372004-08-18 13:59:33 +0000819
sewardjb8e75862004-08-19 17:58:45 +0000820 /* If the statement has been folded into a no-op, forget it. */
sewardjf0e43162004-08-20 00:11:12 +0000821 if (!st2) continue;
sewardjb8e75862004-08-19 17:58:45 +0000822
sewardj84be7372004-08-18 13:59:33 +0000823 /* Now consider what the stmt looks like. If it's of the form
824 't = const' or 't1 = t2', add it to the running environment
825 and not to the output BB. Otherwise, add it to the output
sewardjc0b42282004-10-12 13:44:12 +0000826 BB. Note, we choose not to propagate const when const is an
827 F64i, so that F64i literals can be CSE'd later. This helps
828 x86 floating point code generation. */
sewardj84be7372004-08-18 13:59:33 +0000829
sewardjc0b42282004-10-12 13:44:12 +0000830 if (st2->tag == Ist_Tmp
831 && st2->Ist.Tmp.data->tag == Iex_Const
832 && st2->Ist.Tmp.data->Iex.Const.con->tag != Ico_F64i) {
sewardj84be7372004-08-18 13:59:33 +0000833 /* 't = const' -- add to env.
834 The pair (IRTemp, IRExpr*) is added. */
sewardj62617ef2004-10-13 23:29:22 +0000835 env[(Int)(st2->Ist.Tmp.tmp)] = st2->Ist.Tmp.data;
sewardj84be7372004-08-18 13:59:33 +0000836 }
837 else
sewardj6d076362004-09-23 11:06:17 +0000838 if (st2->tag == Ist_Tmp && st2->Ist.Tmp.data->tag == Iex_Tmp) {
sewardj84be7372004-08-18 13:59:33 +0000839 /* 't1 = t2' -- add to env.
840 The pair (IRTemp, IRExpr*) is added. */
sewardj62617ef2004-10-13 23:29:22 +0000841 env[(Int)(st2->Ist.Tmp.tmp)] = st2->Ist.Tmp.data;
sewardj84be7372004-08-18 13:59:33 +0000842 }
843 else {
844 /* Not interesting, copy st2 into the output block. */
845 addStmtToIRBB( out, st2 );
846 }
847 }
848
sewardj84be7372004-08-18 13:59:33 +0000849 out->next = subst_Expr( env, in->next );
850 out->jumpkind = in->jumpkind;
851 return out;
852}
853
854
855
sewardjedf4d692004-08-17 13:52:58 +0000856/*---------------------------------------------------------------*/
sewardj39e3f242004-08-18 16:54:52 +0000857/*--- Dead code (t = E) removal ---*/
858/*---------------------------------------------------------------*/
859
sewardjea602bc2004-10-14 21:40:12 +0000860/* The type of the HashHW map is: a map from IRTemp to nothing
sewardj39e3f242004-08-18 16:54:52 +0000861 -- really just operating a set or IRTemps.
862*/
863
sewardjd503a322004-10-13 22:41:16 +0000864inline
865static void addUses_Temp ( Bool* set, IRTemp tmp )
sewardj17442fe2004-09-20 14:54:28 +0000866{
sewardjd503a322004-10-13 22:41:16 +0000867 set[(Int)tmp] = True;
sewardj17442fe2004-09-20 14:54:28 +0000868}
869
sewardjd503a322004-10-13 22:41:16 +0000870static void addUses_Expr ( Bool* set, IRExpr* e )
sewardj39e3f242004-08-18 16:54:52 +0000871{
872 Int i;
873 switch (e->tag) {
sewardjd7217032004-08-19 10:49:10 +0000874 case Iex_GetI:
sewardj2d3f77c2004-09-22 23:49:09 +0000875 addUses_Expr(set, e->Iex.GetI.off);
sewardjd7217032004-08-19 10:49:10 +0000876 return;
sewardj39e3f242004-08-18 16:54:52 +0000877 case Iex_Mux0X:
878 addUses_Expr(set, e->Iex.Mux0X.cond);
879 addUses_Expr(set, e->Iex.Mux0X.expr0);
880 addUses_Expr(set, e->Iex.Mux0X.exprX);
881 return;
882 case Iex_CCall:
883 for (i = 0; e->Iex.CCall.args[i]; i++)
884 addUses_Expr(set, e->Iex.CCall.args[i]);
885 return;
886 case Iex_LDle:
887 addUses_Expr(set, e->Iex.LDle.addr);
888 return;
889 case Iex_Binop:
890 addUses_Expr(set, e->Iex.Binop.arg1);
891 addUses_Expr(set, e->Iex.Binop.arg2);
892 return;
893 case Iex_Unop:
894 addUses_Expr(set, e->Iex.Unop.arg);
895 return;
896 case Iex_Tmp:
sewardj17442fe2004-09-20 14:54:28 +0000897 addUses_Temp(set, e->Iex.Tmp.tmp);
sewardj39e3f242004-08-18 16:54:52 +0000898 return;
899 case Iex_Const:
900 case Iex_Get:
901 return;
902 default:
903 vex_printf("\n");
904 ppIRExpr(e);
905 vpanic("addUses_Expr");
906 }
907}
908
sewardjd503a322004-10-13 22:41:16 +0000909static void addUses_Stmt ( Bool* set, IRStmt* st )
sewardj39e3f242004-08-18 16:54:52 +0000910{
sewardj17442fe2004-09-20 14:54:28 +0000911 Int i;
912 IRDirty* d;
sewardj39e3f242004-08-18 16:54:52 +0000913 switch (st->tag) {
sewardjd7217032004-08-19 10:49:10 +0000914 case Ist_PutI:
sewardj2d3f77c2004-09-22 23:49:09 +0000915 addUses_Expr(set, st->Ist.PutI.off);
916 addUses_Expr(set, st->Ist.PutI.data);
sewardjd7217032004-08-19 10:49:10 +0000917 return;
sewardj39e3f242004-08-18 16:54:52 +0000918 case Ist_Tmp:
sewardj6d076362004-09-23 11:06:17 +0000919 addUses_Expr(set, st->Ist.Tmp.data);
sewardj39e3f242004-08-18 16:54:52 +0000920 return;
921 case Ist_Put:
sewardj6d076362004-09-23 11:06:17 +0000922 addUses_Expr(set, st->Ist.Put.data);
sewardj39e3f242004-08-18 16:54:52 +0000923 return;
924 case Ist_STle:
925 addUses_Expr(set, st->Ist.STle.addr);
926 addUses_Expr(set, st->Ist.STle.data);
927 return;
sewardj17442fe2004-09-20 14:54:28 +0000928 case Ist_Dirty:
929 d = st->Ist.Dirty.details;
930 if (d->mFx != Ifx_None)
931 addUses_Expr(set, d->mAddr);
932 for (i = 0; d->args[i] != NULL; i++)
933 addUses_Expr(set, d->args[i]);
934 return;
935 case Ist_Exit:
936 addUses_Expr(set, st->Ist.Exit.cond);
937 return;
sewardj39e3f242004-08-18 16:54:52 +0000938 default:
939 vex_printf("\n");
940 ppIRStmt(st);
941 vpanic("addUses_Stmt");
sewardjd503a322004-10-13 22:41:16 +0000942 }
sewardj39e3f242004-08-18 16:54:52 +0000943}
944
945
946
947/* Note, this destructively modifies the given IRBB. */
948
949/* Scan backwards through statements, carrying a set of IRTemps which
950 are known to be used after the current point. On encountering 't =
951 E', delete the binding if it is not used. Otherwise, add any temp
952 uses to the set and keep on moving backwards. */
953
sewardjd7217032004-08-19 10:49:10 +0000954static void dead_BB ( IRBB* bb )
sewardj39e3f242004-08-18 16:54:52 +0000955{
956 Int i;
sewardjd503a322004-10-13 22:41:16 +0000957 Int n_tmps = bb->tyenv->types_used;
958 Bool* set = LibVEX_Alloc(n_tmps * sizeof(Bool));
sewardj39e3f242004-08-18 16:54:52 +0000959 IRStmt* st;
960
sewardjd503a322004-10-13 22:41:16 +0000961 for (i = 0; i < n_tmps; i++)
962 set[i] = False;
963
sewardj39e3f242004-08-18 16:54:52 +0000964 /* start off by recording IRTemp uses in the next field. */
965 addUses_Expr(set, bb->next);
966
967 /* Work backwards through the stmts */
968 for (i = bb->stmts_used-1; i >= 0; i--) {
969 st = bb->stmts[i];
sewardj84ff0652004-08-23 16:16:08 +0000970 if (!st)
971 continue;
sewardj39e3f242004-08-18 16:54:52 +0000972 if (st->tag == Ist_Tmp
sewardjd503a322004-10-13 22:41:16 +0000973 && set[(Int)(st->Ist.Tmp.tmp)] == False) {
sewardj39e3f242004-08-18 16:54:52 +0000974 /* it's an IRTemp which never got used. Delete it. */
sewardj088bcb42004-08-19 17:16:52 +0000975 if (DEBUG_IROPT) {
sewardj39e3f242004-08-18 16:54:52 +0000976 vex_printf("DEAD: ");
977 ppIRStmt(st);
978 vex_printf("\n");
979 }
980 bb->stmts[i] = NULL;
981 } else {
982 /* Note any IRTemp uses made by the current statement. */
983 addUses_Stmt(set, st);
984 }
985 }
986}
987
988
989/*---------------------------------------------------------------*/
sewardjd7217032004-08-19 10:49:10 +0000990/*--- In-place removal of redundant GETs ---*/
991/*---------------------------------------------------------------*/
992
sewardjf0e43162004-08-20 00:11:12 +0000993/* Scan forwards, building up an environment binding (min offset, max
994 offset) pairs to values, which will either be temps or constants.
sewardjd7217032004-08-19 10:49:10 +0000995
sewardjf0e43162004-08-20 00:11:12 +0000996 On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the
sewardj29632392004-08-22 02:38:11 +0000997 env and if it matches, replace the Get with the stored value. If
998 there is no match, add a (minoff,maxoff) :-> t binding.
sewardjd7217032004-08-19 10:49:10 +0000999
sewardjf0e43162004-08-20 00:11:12 +00001000 On seeing 'Put (minoff,maxoff) = t or c', first remove in the env
1001 any binding which fully or partially overlaps with (minoff,maxoff).
1002 Then add a new (minoff,maxoff) :-> t or c binding. */
sewardjd7217032004-08-19 10:49:10 +00001003
sewardj575670c2004-10-12 09:13:19 +00001004/* Extract the min/max offsets from a guest state array descriptor. */
1005
sewardjcc384162004-10-14 00:28:41 +00001006inline
sewardj575670c2004-10-12 09:13:19 +00001007static void getArrayBounds ( IRArray* descr, UInt* minoff, UInt* maxoff )
1008{
1009 *minoff = descr->base;
1010 *maxoff = *minoff + descr->nElems*sizeofIRType(descr->elemTy) - 1;
1011 vassert((*minoff & 0xFFFF0000) == 0);
1012 vassert((*maxoff & 0xFFFF0000) == 0);
1013 vassert(*minoff <= *maxoff);
1014}
1015
sewardjd7217032004-08-19 10:49:10 +00001016/* Create keys, of the form ((minoffset << 16) | maxoffset). */
1017
1018static UInt mk_key_GetPut ( Int offset, IRType ty )
1019{
1020 /* offset should fit in 16 bits. */
1021 UInt minoff = offset;
1022 UInt maxoff = minoff + sizeofIRType(ty) - 1;
1023 vassert((minoff & 0xFFFF0000) == 0);
1024 vassert((maxoff & 0xFFFF0000) == 0);
1025 return (minoff << 16) | maxoff;
1026}
1027
sewardj2d3f77c2004-09-22 23:49:09 +00001028static UInt mk_key_GetIPutI ( IRArray* descr )
sewardjd7217032004-08-19 10:49:10 +00001029{
sewardj575670c2004-10-12 09:13:19 +00001030 UInt minoff, maxoff;
1031 getArrayBounds( descr, &minoff, &maxoff );
sewardj2d3f77c2004-09-22 23:49:09 +00001032 vassert((minoff & 0xFFFF0000) == 0);
1033 vassert((maxoff & 0xFFFF0000) == 0);
sewardjd7217032004-08-19 10:49:10 +00001034 return (minoff << 16) | maxoff;
1035}
1036
sewardjf0e43162004-08-20 00:11:12 +00001037/* Supposing h has keys of the form generated by mk_key_GetPut and
1038 mk_key_GetIPutI, invalidate any key which overlaps (k_lo
1039 .. k_hi).
1040*/
1041
sewardjea602bc2004-10-14 21:40:12 +00001042static void invalidateOverlaps ( HashHW* h, UInt k_lo, UInt k_hi )
sewardjf0e43162004-08-20 00:11:12 +00001043{
1044 Int j;
1045 UInt e_lo, e_hi;
1046 vassert(k_lo <= k_hi);
1047 /* invalidate any env entries which in any way overlap (k_lo
1048 .. k_hi) */
sewardj29632392004-08-22 02:38:11 +00001049 /* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */
1050
sewardjf0e43162004-08-20 00:11:12 +00001051 for (j = 0; j < h->used; j++) {
1052 if (!h->inuse[j])
1053 continue;
1054 e_lo = (((UInt)h->key[j]) >> 16) & 0xFFFF;
1055 e_hi = ((UInt)h->key[j]) & 0xFFFF;
1056 vassert(e_lo <= e_hi);
1057 if (e_hi < k_lo || k_hi < e_lo)
1058 continue; /* no overlap possible */
1059 else
1060 /* overlap; invalidate */
1061 h->inuse[j] = False;
1062 }
1063}
1064
sewardjd7217032004-08-19 10:49:10 +00001065
1066static void redundant_get_removal_BB ( IRBB* bb )
1067{
sewardjea602bc2004-10-14 21:40:12 +00001068 HashHW* env = newHHW();
sewardjbaf29ed2004-09-08 23:40:57 +00001069 UInt key = 0; /* keep gcc -O happy */
sewardjf0e43162004-08-20 00:11:12 +00001070 Int i;
sewardj088bcb42004-08-19 17:16:52 +00001071 Bool isPut;
sewardjea602bc2004-10-14 21:40:12 +00001072 HWord val;
sewardjd7217032004-08-19 10:49:10 +00001073
1074 for (i = 0; i < bb->stmts_used; i++) {
1075 IRStmt* st = bb->stmts[i];
1076
1077 if (!st)
1078 continue;
1079
1080 /* Deal with Gets */
1081 if (st->tag == Ist_Tmp
sewardj6d076362004-09-23 11:06:17 +00001082 && st->Ist.Tmp.data->tag == Iex_Get) {
sewardjd7217032004-08-19 10:49:10 +00001083 /* st is 't = Get(...)'. Look up in the environment and see
1084 if the Get can be replaced. */
sewardj6d076362004-09-23 11:06:17 +00001085 IRExpr* get = st->Ist.Tmp.data;
sewardjea602bc2004-10-14 21:40:12 +00001086 key = (HWord)mk_key_GetPut( get->Iex.Get.offset,
sewardjd7217032004-08-19 10:49:10 +00001087 get->Iex.Get.ty );
sewardjea602bc2004-10-14 21:40:12 +00001088 if (lookupHHW(env, &val, (HWord)key)) {
sewardjd7217032004-08-19 10:49:10 +00001089 /* found it */
sewardj088bcb42004-08-19 17:16:52 +00001090 if (DEBUG_IROPT) {
sewardjd7217032004-08-19 10:49:10 +00001091 vex_printf("rGET: "); ppIRExpr(get);
1092 vex_printf(" -> "); ppIRExpr((IRExpr*)val);
1093 vex_printf("\n");
sewardj29632392004-08-22 02:38:11 +00001094 }
sewardjd7217032004-08-19 10:49:10 +00001095 bb->stmts[i] = IRStmt_Tmp(st->Ist.Tmp.tmp,
1096 (IRExpr*)val);
sewardj29632392004-08-22 02:38:11 +00001097 } else {
1098 /* Not found, but at least we know that t and the Get(...)
1099 are now associated. So add a binding to reflect that
1100 fact. */
sewardjea602bc2004-10-14 21:40:12 +00001101 addToHHW( env, (HWord)key,
1102 (HWord)(IRExpr_Tmp(st->Ist.Tmp.tmp)) );
sewardjd7217032004-08-19 10:49:10 +00001103 }
1104 }
1105
1106 /* Deal with Puts */
1107 switch (st->tag) {
1108 case Ist_Put:
1109 isPut = True;
1110 key = mk_key_GetPut( st->Ist.Put.offset,
sewardj6d076362004-09-23 11:06:17 +00001111 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
sewardjd7217032004-08-19 10:49:10 +00001112 break;
1113 case Ist_PutI:
1114 isPut = True;
sewardj2d3f77c2004-09-22 23:49:09 +00001115 key = mk_key_GetIPutI( st->Ist.PutI.descr );
sewardjd7217032004-08-19 10:49:10 +00001116 break;
1117 default:
1118 isPut = False;
1119 }
1120
1121 /* invalidate any env entries overlapped by this Put */
1122 if (isPut) {
sewardjf0e43162004-08-20 00:11:12 +00001123 UInt k_lo, k_hi;
sewardjd7217032004-08-19 10:49:10 +00001124 k_lo = (key >> 16) & 0xFFFF;
1125 k_hi = key & 0xFFFF;
sewardjf0e43162004-08-20 00:11:12 +00001126 invalidateOverlaps(env, k_lo, k_hi);
sewardjd7217032004-08-19 10:49:10 +00001127 }
1128
1129 /* add this one to the env, if appropriate */
1130 if (st->tag == Ist_Put) {
sewardj6d076362004-09-23 11:06:17 +00001131 vassert(isAtom(st->Ist.Put.data));
sewardjea602bc2004-10-14 21:40:12 +00001132 addToHHW( env, (HWord)key, (HWord)(st->Ist.Put.data));
sewardjd7217032004-08-19 10:49:10 +00001133 }
1134
1135 } /* for (i = 0; i < bb->stmts_used; i++) */
1136
1137}
1138
1139
1140/*---------------------------------------------------------------*/
sewardjf0e43162004-08-20 00:11:12 +00001141/*--- In-place removal of redundant PUTs ---*/
1142/*---------------------------------------------------------------*/
1143
1144/* Find any Get uses in st and invalidate any partially or fully
1145 overlapping ranges listed in env. Due to the flattening phase, the
1146 only stmt kind we expect to find a Get on is IRStmt_Tmp. */
1147
sewardjea602bc2004-10-14 21:40:12 +00001148static void handle_gets_Stmt ( HashHW* env, IRStmt* st )
sewardjf0e43162004-08-20 00:11:12 +00001149{
sewardj044a2152004-10-21 10:22:10 +00001150 Int j;
sewardjbaf29ed2004-09-08 23:40:57 +00001151 UInt key = 0; /* keep gcc -O happy */
sewardjf0e43162004-08-20 00:11:12 +00001152 Bool isGet;
sewardj044a2152004-10-21 10:22:10 +00001153 Bool memRW = False;
sewardjf0e43162004-08-20 00:11:12 +00001154 IRExpr* e;
sewardj044a2152004-10-21 10:22:10 +00001155
sewardjf0e43162004-08-20 00:11:12 +00001156 switch (st->tag) {
1157
1158 /* This is the only interesting case. Deal with Gets in the RHS
1159 expression. */
1160 case Ist_Tmp:
sewardj6d076362004-09-23 11:06:17 +00001161 e = st->Ist.Tmp.data;
sewardjf0e43162004-08-20 00:11:12 +00001162 switch (e->tag) {
1163 case Iex_Get:
1164 isGet = True;
1165 key = mk_key_GetPut ( e->Iex.Get.offset, e->Iex.Get.ty );
1166 break;
1167 case Iex_GetI:
1168 isGet = True;
sewardj2d3f77c2004-09-22 23:49:09 +00001169 key = mk_key_GetIPutI ( e->Iex.GetI.descr );
sewardjf0e43162004-08-20 00:11:12 +00001170 break;
sewardj044a2152004-10-21 10:22:10 +00001171 case Iex_LDle:
1172 isGet = False;
1173 memRW = True;
1174 break;
sewardjf0e43162004-08-20 00:11:12 +00001175 default:
1176 isGet = False;
1177 }
1178 if (isGet) {
1179 UInt k_lo, k_hi;
1180 k_lo = (key >> 16) & 0xFFFF;
1181 k_hi = key & 0xFFFF;
1182 invalidateOverlaps(env, k_lo, k_hi);
1183 }
1184 break;
1185
sewardj044a2152004-10-21 10:22:10 +00001186 /* Be very conservative for dirty helper calls; dump the entire
1187 environment. The helper might read guest state, in which
1188 case it needs to be flushed first. Also, the helper might
1189 access guest memory, in which case all parts of the guest
1190 state requiring precise exceptions needs to be flushed. The
1191 crude solution is just to flush everything; we could easily
1192 enough do a lot better if needed. */
1193 case Ist_Dirty:
1194 for (j = 0; j < env->used; j++)
1195 env->inuse[j] = False;
1196 break;
1197
sewardjf0e43162004-08-20 00:11:12 +00001198 /* all other cases are boring. */
1199 case Ist_STle:
1200 vassert(isAtom(st->Ist.STle.addr));
1201 vassert(isAtom(st->Ist.STle.data));
sewardj044a2152004-10-21 10:22:10 +00001202 memRW = True;
1203 break;
sewardj17442fe2004-09-20 14:54:28 +00001204
sewardjf0e43162004-08-20 00:11:12 +00001205 case Ist_Exit:
1206 vassert(isAtom(st->Ist.Exit.cond));
sewardj044a2152004-10-21 10:22:10 +00001207 break;
sewardjf0e43162004-08-20 00:11:12 +00001208
sewardjcf1ea9c2004-09-06 23:51:00 +00001209 case Ist_PutI:
sewardj2d3f77c2004-09-22 23:49:09 +00001210 vassert(isAtom(st->Ist.PutI.off));
1211 vassert(isAtom(st->Ist.PutI.data));
sewardj044a2152004-10-21 10:22:10 +00001212 break;
sewardjcf1ea9c2004-09-06 23:51:00 +00001213
sewardjf0e43162004-08-20 00:11:12 +00001214 default:
1215 vex_printf("\n");
1216 ppIRStmt(st);
1217 vex_printf("\n");
1218 vpanic("handle_gets_Stmt");
1219 }
sewardj044a2152004-10-21 10:22:10 +00001220
1221 if (memRW) {
1222 /* This statement accesses memory. So we need to dump all parts
1223 of the environment corresponding to guest state that may not
1224 be reordered with respect to memory references. That means
1225 at least the stack pointer. */
1226 for (j = 0; j < env->used; j++) {
1227#if 0
1228 if (env->inuse[j]
1229 && env->key[j] == (HWord)((16 << 16) | 19)) {
1230 //vex_printf("found esp assignment\n");
1231 env->inuse[j] = False;
1232 }
1233#else
1234 env->inuse[j] = False;
1235#endif
1236 }
1237 }
sewardjf0e43162004-08-20 00:11:12 +00001238}
1239
1240
1241/* Scan backwards, building up a set of (min offset, max
1242 offset) pairs, indicating those parts of the guest state
1243 for which the next event is a write.
1244
1245 On seeing a conditional exit, empty the set.
1246
1247 On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is
1248 completely within the set, remove the Put. Otherwise, add
1249 (minoff,maxoff) to the set.
1250
1251 On seeing 'Get (minoff,maxoff)', remove any part of the set
1252 overlapping (minoff,maxoff).
1253*/
1254
1255static void redundant_put_removal_BB ( IRBB* bb )
1256{
1257 Int i, j;
1258 Bool isPut;
1259 IRStmt* st;
sewardjbaf29ed2004-09-08 23:40:57 +00001260 UInt key = 0; /* keep gcc -O happy */
sewardjf0e43162004-08-20 00:11:12 +00001261
sewardjea602bc2004-10-14 21:40:12 +00001262 HashHW* env = newHHW();
sewardjf0e43162004-08-20 00:11:12 +00001263 for (i = bb->stmts_used-1; i >= 0; i--) {
1264 st = bb->stmts[i];
sewardj695cff92004-10-13 14:50:14 +00001265 if (!st)
1266 continue;
sewardjf0e43162004-08-20 00:11:12 +00001267
1268 /* Deal with conditional exits. */
1269 if (st->tag == Ist_Exit) {
1270 /* Since control may not get beyond this point, we must empty
1271 out the set, since we can no longer claim that the next
1272 event for any part of the guest state is definitely a
1273 write. */
1274 vassert(isAtom(st->Ist.Exit.cond));
1275 for (j = 0; j < env->used; j++)
1276 env->inuse[j] = False;
1277 continue;
1278 }
1279
1280 /* Deal with Puts */
1281 switch (st->tag) {
1282 case Ist_Put:
1283 isPut = True;
1284 key = mk_key_GetPut( st->Ist.Put.offset,
sewardj6d076362004-09-23 11:06:17 +00001285 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
1286 vassert(isAtom(st->Ist.Put.data));
sewardjf0e43162004-08-20 00:11:12 +00001287 break;
1288 case Ist_PutI:
1289 isPut = True;
sewardj2d3f77c2004-09-22 23:49:09 +00001290 key = mk_key_GetIPutI( st->Ist.PutI.descr );
1291 vassert(isAtom(st->Ist.PutI.off));
1292 vassert(isAtom(st->Ist.PutI.data));
sewardjf0e43162004-08-20 00:11:12 +00001293 break;
1294 default:
1295 isPut = False;
1296 }
sewardjcf1ea9c2004-09-06 23:51:00 +00001297 if (isPut && st->tag != Ist_PutI) {
sewardjf0e43162004-08-20 00:11:12 +00001298 /* See if any single entry in env overlaps this Put. This is
1299 simplistic in that the transformation is valid if, say, two
1300 or more entries in the env overlap this Put, but the use of
sewardjea602bc2004-10-14 21:40:12 +00001301 lookupHHW will only find a single entry which exactly
sewardjf0e43162004-08-20 00:11:12 +00001302 overlaps this Put. This is suboptimal but safe. */
sewardjea602bc2004-10-14 21:40:12 +00001303 if (lookupHHW(env, NULL, (HWord)key)) {
sewardjf0e43162004-08-20 00:11:12 +00001304 /* This Put is redundant because a later one will overwrite
1305 it. So NULL (nop) it out. */
1306 if (DEBUG_IROPT) {
1307 vex_printf("rPUT: "); ppIRStmt(st);
1308 vex_printf("\n");
sewardj29632392004-08-22 02:38:11 +00001309 }
sewardjf0e43162004-08-20 00:11:12 +00001310 bb->stmts[i] = NULL;
1311 } else {
1312 /* We can't demonstrate that this Put is redundant, so add it
1313 to the running collection. */
sewardjea602bc2004-10-14 21:40:12 +00001314 addToHHW(env, (HWord)key, 0);
sewardjf0e43162004-08-20 00:11:12 +00001315 }
1316 continue;
1317 }
1318
1319 /* Deal with Gets. These remove bits of the environment since
1320 appearance of a Get means that the next event for that slice
1321 of the guest state is no longer a write, but a read. */
1322 handle_gets_Stmt( env, st );
1323 }
1324}
1325
1326
sewardj84ff0652004-08-23 16:16:08 +00001327/*---------------------------------------------------------------*/
1328/*--- Specialisation of helper function calls, in ---*/
1329/*--- collaboration with the front end ---*/
1330/*---------------------------------------------------------------*/
1331
1332static
1333void spec_helpers_BB ( IRBB* bb,
1334 IRExpr* (*specHelper) ( Char*, IRExpr**) )
1335{
1336 Int i;
1337 IRStmt* st;
1338 IRExpr* ex;
1339
1340 for (i = bb->stmts_used-1; i >= 0; i--) {
1341 st = bb->stmts[i];
1342
1343 if (!st
1344 || st->tag != Ist_Tmp
sewardj6d076362004-09-23 11:06:17 +00001345 || st->Ist.Tmp.data->tag != Iex_CCall)
sewardjaba4fff2004-10-08 21:37:15 +00001346 continue;
sewardj84ff0652004-08-23 16:16:08 +00001347
sewardj6d076362004-09-23 11:06:17 +00001348 ex = (*specHelper)( st->Ist.Tmp.data->Iex.CCall.name,
1349 st->Ist.Tmp.data->Iex.CCall.args );
sewardj84ff0652004-08-23 16:16:08 +00001350 if (!ex)
sewardjaba4fff2004-10-08 21:37:15 +00001351 /* the front end can't think of a suitable replacement */
1352 continue;
sewardj84ff0652004-08-23 16:16:08 +00001353
1354 /* We got something better. Install it in the bb. */
1355 bb->stmts[i]
1356 = IRStmt_Tmp(st->Ist.Tmp.tmp, ex);
1357
1358 if (0) {
1359 vex_printf("SPEC: ");
sewardj6d076362004-09-23 11:06:17 +00001360 ppIRExpr(st->Ist.Tmp.data);
sewardj84ff0652004-08-23 16:16:08 +00001361 vex_printf(" --> ");
1362 ppIRExpr(ex);
1363 vex_printf("\n");
1364 }
1365 }
1366}
1367
sewardj29632392004-08-22 02:38:11 +00001368
1369/*---------------------------------------------------------------*/
1370/*--- The tree builder ---*/
1371/*---------------------------------------------------------------*/
1372
1373typedef
1374 struct {
1375 Int occ; /* occurrence count for this tmp */
sewardj84ff0652004-08-23 16:16:08 +00001376 IRExpr* expr; /* expr it is bound to,
1377 or NULL if already 'used' */
sewardj29632392004-08-22 02:38:11 +00001378 Bool eDoesLoad; /* True <=> expr reads mem */
1379 Bool eDoesGet; /* True <=> expr reads guest state */
1380 Bool invalidateMe; /* used when dumping bindings */
1381 Int origPos; /* posn of the binder in the original bb */
1382 }
1383 TmpInfo;
1384
1385/* Given env :: IRTemp -> TmpInfo*
1386 Add the use-occurrences of temps in this expression
1387 to the environment.
1388*/
sewardjc9ad1152004-10-14 00:08:21 +00001389static void occCount_Temp ( TmpInfo** env, IRTemp tmp )
sewardj17442fe2004-09-20 14:54:28 +00001390{
sewardjc9ad1152004-10-14 00:08:21 +00001391 TmpInfo* ti = env[(Int)tmp];
1392 if (ti) {
sewardj17442fe2004-09-20 14:54:28 +00001393 ti->occ++;
1394 } else {
1395 ti = LibVEX_Alloc(sizeof(TmpInfo));
1396 ti->occ = 1;
1397 ti->expr = NULL;
1398 ti->eDoesLoad = False;
1399 ti->eDoesGet = False;
1400 ti->invalidateMe = False;
1401 ti->origPos = -1; /* filed in properly later */
sewardjc9ad1152004-10-14 00:08:21 +00001402 env[(Int)tmp] = ti;
sewardj17442fe2004-09-20 14:54:28 +00001403 }
1404}
1405
sewardjc9ad1152004-10-14 00:08:21 +00001406static void occCount_Expr ( TmpInfo** env, IRExpr* e )
sewardj29632392004-08-22 02:38:11 +00001407{
sewardj17442fe2004-09-20 14:54:28 +00001408 Int i;
sewardj29632392004-08-22 02:38:11 +00001409
1410 switch (e->tag) {
1411
1412 case Iex_Tmp: /* the only interesting case */
sewardj17442fe2004-09-20 14:54:28 +00001413 occCount_Temp(env, e->Iex.Tmp.tmp);
sewardj29632392004-08-22 02:38:11 +00001414 return;
1415
1416 case Iex_Mux0X:
1417 occCount_Expr(env, e->Iex.Mux0X.cond);
1418 occCount_Expr(env, e->Iex.Mux0X.expr0);
1419 occCount_Expr(env, e->Iex.Mux0X.exprX);
1420 return;
1421
1422 case Iex_Binop:
1423 occCount_Expr(env, e->Iex.Binop.arg1);
1424 occCount_Expr(env, e->Iex.Binop.arg2);
1425 return;
1426
1427 case Iex_Unop:
1428 occCount_Expr(env, e->Iex.Unop.arg);
1429 return;
1430
1431 case Iex_LDle:
1432 occCount_Expr(env, e->Iex.LDle.addr);
1433 return;
1434
1435 case Iex_CCall:
1436 for (i = 0; e->Iex.CCall.args[i]; i++)
1437 occCount_Expr(env, e->Iex.CCall.args[i]);
1438 return;
1439
sewardjf6501992004-08-27 11:58:24 +00001440 case Iex_GetI:
sewardj2d3f77c2004-09-22 23:49:09 +00001441 occCount_Expr(env, e->Iex.GetI.off);
sewardjf6501992004-08-27 11:58:24 +00001442 return;
1443
sewardj29632392004-08-22 02:38:11 +00001444 case Iex_Const:
1445 case Iex_Get:
1446 return;
1447
1448 default:
1449 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1450 vpanic("occCount_Expr");
1451 }
1452}
1453
1454
1455/* Given env :: IRTemp -> TmpInfo*
1456 Add the use-occurrences of temps in this expression
1457 to the environment.
1458*/
sewardjc9ad1152004-10-14 00:08:21 +00001459static void occCount_Stmt ( TmpInfo** env, IRStmt* st )
sewardj29632392004-08-22 02:38:11 +00001460{
sewardj17442fe2004-09-20 14:54:28 +00001461 Int i;
1462 IRDirty* d;
sewardj29632392004-08-22 02:38:11 +00001463 switch (st->tag) {
1464 case Ist_Tmp:
sewardj6d076362004-09-23 11:06:17 +00001465 occCount_Expr(env, st->Ist.Tmp.data);
sewardj29632392004-08-22 02:38:11 +00001466 return;
1467 case Ist_Put:
sewardj6d076362004-09-23 11:06:17 +00001468 occCount_Expr(env, st->Ist.Put.data);
sewardj29632392004-08-22 02:38:11 +00001469 return;
sewardjf6501992004-08-27 11:58:24 +00001470 case Ist_PutI:
sewardj2d3f77c2004-09-22 23:49:09 +00001471 occCount_Expr(env, st->Ist.PutI.off);
1472 occCount_Expr(env, st->Ist.PutI.data);
sewardjf6501992004-08-27 11:58:24 +00001473 return;
sewardj29632392004-08-22 02:38:11 +00001474 case Ist_STle:
1475 occCount_Expr(env, st->Ist.STle.addr);
1476 occCount_Expr(env, st->Ist.STle.data);
1477 return;
sewardj17442fe2004-09-20 14:54:28 +00001478 case Ist_Dirty:
1479 d = st->Ist.Dirty.details;
1480 if (d->mFx != Ifx_None)
1481 occCount_Expr(env, d->mAddr);
1482 for (i = 0; d->args[i]; i++)
1483 occCount_Expr(env, d->args[i]);
1484 return;
sewardj29632392004-08-22 02:38:11 +00001485 case Ist_Exit:
1486 occCount_Expr(env, st->Ist.Exit.cond);
1487 return;
1488 default:
1489 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
1490 vpanic("occCount_Stmt");
1491 }
1492}
1493
sewardj17442fe2004-09-20 14:54:28 +00001494/* Look up a binding for tmp in the env. If found, return the bound
1495 expression, and set the env's binding to NULL so it is marked as
1496 used. If not found, return NULL. */
1497
sewardjc9ad1152004-10-14 00:08:21 +00001498static IRExpr* tbSubst_Temp ( TmpInfo** env, IRTemp tmp )
sewardj17442fe2004-09-20 14:54:28 +00001499{
1500 TmpInfo* ti;
sewardj17442fe2004-09-20 14:54:28 +00001501 IRExpr* e;
sewardjc9ad1152004-10-14 00:08:21 +00001502 ti = env[(Int)tmp];
1503 if (ti){
1504 e = ti->expr;
sewardj17442fe2004-09-20 14:54:28 +00001505 if (e) {
1506 ti->expr = NULL;
1507 return e;
1508 } else {
1509 return NULL;
1510 }
1511 } else {
1512 return NULL;
1513 }
1514}
sewardj29632392004-08-22 02:38:11 +00001515
1516/* Traverse e, looking for temps. For each observed temp, see if env
1517 contains a binding for the temp, and if so return the bound value.
1518 The env has the property that any binding it holds is
1519 'single-shot', so once a binding is used, it is marked as no longer
1520 available, by setting its .expr field to NULL. */
1521
sewardjc9ad1152004-10-14 00:08:21 +00001522static IRExpr* tbSubst_Expr ( TmpInfo** env, IRExpr* e )
sewardj29632392004-08-22 02:38:11 +00001523{
sewardjc41f1fb2004-08-22 09:48:08 +00001524 IRExpr* e2;
1525 IRExpr** args2;
1526 Int i;
sewardj29632392004-08-22 02:38:11 +00001527
1528 switch (e->tag) {
1529
sewardjc41f1fb2004-08-22 09:48:08 +00001530 case Iex_CCall:
sewardj695cff92004-10-13 14:50:14 +00001531 args2 = sopyIRExprVec(e->Iex.CCall.args);
sewardjc41f1fb2004-08-22 09:48:08 +00001532 for (i = 0; args2[i]; i++)
1533 args2[i] = tbSubst_Expr(env,args2[i]);
1534 return IRExpr_CCall(e->Iex.CCall.name,
1535 e->Iex.CCall.retty,
1536 args2
1537 );
sewardj29632392004-08-22 02:38:11 +00001538 case Iex_Tmp:
sewardj17442fe2004-09-20 14:54:28 +00001539 e2 = tbSubst_Temp(env, e->Iex.Tmp.tmp);
1540 return e2 ? e2 : e;
sewardjc41f1fb2004-08-22 09:48:08 +00001541 case Iex_Mux0X:
1542 return IRExpr_Mux0X(
1543 tbSubst_Expr(env, e->Iex.Mux0X.cond),
1544 tbSubst_Expr(env, e->Iex.Mux0X.expr0),
1545 tbSubst_Expr(env, e->Iex.Mux0X.exprX)
1546 );
1547 case Iex_Binop:
1548 return IRExpr_Binop(
1549 e->Iex.Binop.op,
1550 tbSubst_Expr(env, e->Iex.Binop.arg1),
1551 tbSubst_Expr(env, e->Iex.Binop.arg2)
1552 );
1553 case Iex_Unop:
1554 return IRExpr_Unop(
1555 e->Iex.Unop.op,
1556 tbSubst_Expr(env, e->Iex.Unop.arg)
1557 );
1558 case Iex_LDle:
1559 return IRExpr_LDle(
1560 e->Iex.LDle.ty,
sewardjf6501992004-08-27 11:58:24 +00001561 tbSubst_Expr(env, e->Iex.LDle.addr)
sewardjc41f1fb2004-08-22 09:48:08 +00001562 );
sewardjf6501992004-08-27 11:58:24 +00001563 case Iex_GetI:
1564 return IRExpr_GetI(
sewardj2d3f77c2004-09-22 23:49:09 +00001565 e->Iex.GetI.descr,
1566 tbSubst_Expr(env, e->Iex.GetI.off),
1567 e->Iex.GetI.bias
1568 );
sewardjc41f1fb2004-08-22 09:48:08 +00001569 case Iex_Const:
sewardj29632392004-08-22 02:38:11 +00001570 case Iex_Get:
1571 return e;
1572 default:
1573 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1574 vpanic("tbSubst_Expr");
1575 }
1576}
1577
1578/* Same deal as tbSubst_Expr, except for stmts. */
1579
sewardjc9ad1152004-10-14 00:08:21 +00001580static IRStmt* tbSubst_Stmt ( TmpInfo** env, IRStmt* st )
sewardj29632392004-08-22 02:38:11 +00001581{
sewardj17442fe2004-09-20 14:54:28 +00001582 Int i;
1583 IRDirty* d;
1584 IRDirty* d2;
sewardj29632392004-08-22 02:38:11 +00001585 switch (st->tag) {
sewardj17442fe2004-09-20 14:54:28 +00001586 case Ist_STle:
1587 return IRStmt_STle(
1588 tbSubst_Expr(env, st->Ist.STle.addr),
1589 tbSubst_Expr(env, st->Ist.STle.data)
1590 );
sewardj29632392004-08-22 02:38:11 +00001591 case Ist_Tmp:
1592 return IRStmt_Tmp(
1593 st->Ist.Tmp.tmp,
sewardj6d076362004-09-23 11:06:17 +00001594 tbSubst_Expr(env, st->Ist.Tmp.data)
sewardj29632392004-08-22 02:38:11 +00001595 );
1596 case Ist_Put:
1597 return IRStmt_Put(
1598 st->Ist.Put.offset,
sewardj6d076362004-09-23 11:06:17 +00001599 tbSubst_Expr(env, st->Ist.Put.data)
sewardj29632392004-08-22 02:38:11 +00001600 );
sewardjf6501992004-08-27 11:58:24 +00001601 case Ist_PutI:
1602 return IRStmt_PutI(
sewardj2d3f77c2004-09-22 23:49:09 +00001603 st->Ist.PutI.descr,
1604 tbSubst_Expr(env, st->Ist.PutI.off),
1605 st->Ist.PutI.bias,
1606 tbSubst_Expr(env, st->Ist.PutI.data)
1607 );
sewardjf6501992004-08-27 11:58:24 +00001608
sewardj29632392004-08-22 02:38:11 +00001609 case Ist_Exit:
1610 return IRStmt_Exit(
1611 tbSubst_Expr(env, st->Ist.Exit.cond),
1612 st->Ist.Exit.dst
1613 );
sewardj17442fe2004-09-20 14:54:28 +00001614 case Ist_Dirty:
1615 d = st->Ist.Dirty.details;
1616 d2 = emptyIRDirty();
1617 *d2 = *d;
1618 if (d2->mFx != Ifx_None)
1619 d2->mAddr = tbSubst_Expr(env, d2->mAddr);
1620 for (i = 0; d2->args[i]; i++)
1621 d2->args[i] = tbSubst_Expr(env, d2->args[i]);
1622 return IRStmt_Dirty(d2);
sewardj29632392004-08-22 02:38:11 +00001623 default:
1624 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
1625 vpanic("tbSubst_Stmt");
1626 }
1627}
1628
1629
sewardj37d38012004-08-24 00:37:04 +00001630/* Traverse an expr, and detect if any part of it reads memory or does
1631 a Get. Be careful ... this really controls how much the
1632 tree-builder can reorder the code, so getting it right is critical.
1633*/
sewardj29632392004-08-22 02:38:11 +00001634static void setHints_Expr (Bool* doesLoad, Bool* doesGet, IRExpr* e )
1635{
sewardjc41f1fb2004-08-22 09:48:08 +00001636 Int i;
sewardj29632392004-08-22 02:38:11 +00001637 switch (e->tag) {
sewardjc41f1fb2004-08-22 09:48:08 +00001638 case Iex_CCall:
1639 for (i = 0; e->Iex.CCall.args[i]; i++)
1640 setHints_Expr(doesLoad, doesGet, e->Iex.CCall.args[i]);
1641 return;
1642 case Iex_Mux0X:
1643 setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.cond);
1644 setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.expr0);
1645 setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.exprX);
1646 return;
1647 case Iex_Binop:
1648 setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg1);
1649 setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg2);
1650 return;
1651 case Iex_Unop:
1652 setHints_Expr(doesLoad, doesGet, e->Iex.Unop.arg);
1653 return;
1654 case Iex_LDle:
sewardjc9ad1152004-10-14 00:08:21 +00001655 *doesLoad = True;
sewardjaba4fff2004-10-08 21:37:15 +00001656 setHints_Expr(doesLoad, doesGet, e->Iex.LDle.addr);
sewardjc41f1fb2004-08-22 09:48:08 +00001657 return;
sewardj29632392004-08-22 02:38:11 +00001658 case Iex_Get:
sewardjc9ad1152004-10-14 00:08:21 +00001659 *doesGet = True;
sewardj29632392004-08-22 02:38:11 +00001660 return;
sewardjf6501992004-08-27 11:58:24 +00001661 case Iex_GetI:
sewardjc9ad1152004-10-14 00:08:21 +00001662 *doesGet = True;
sewardj2d3f77c2004-09-22 23:49:09 +00001663 setHints_Expr(doesLoad, doesGet, e->Iex.GetI.off);
sewardjf6501992004-08-27 11:58:24 +00001664 return;
sewardjc41f1fb2004-08-22 09:48:08 +00001665 case Iex_Tmp:
1666 case Iex_Const:
1667 return;
sewardj29632392004-08-22 02:38:11 +00001668 default:
1669 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
1670 vpanic("setHints_Expr");
1671 }
1672}
1673
1674
sewardjc9ad1152004-10-14 00:08:21 +00001675static void dumpInvalidated ( TmpInfo** env, IRBB* bb, /*INOUT*/Int* j )
sewardj29632392004-08-22 02:38:11 +00001676{
1677 Int k, oldest_op, oldest_k;
1678 TmpInfo* ti;
1679
1680 /* Dump all the bindings to marked as invalidated, in order. */
1681 while (True) {
1682
1683 /* find the oldest bind marked 'invalidateMe'. */
1684 oldest_op = 1<<30;
1685 oldest_k = 1<<30;
sewardjc9ad1152004-10-14 00:08:21 +00001686 for (k = 0; k < bb->tyenv->types_used; k++) {
1687 ti = env[k];
1688 if (!ti)
sewardj29632392004-08-22 02:38:11 +00001689 continue;
sewardj29632392004-08-22 02:38:11 +00001690 if (!ti->expr)
1691 continue;
1692 if (!ti->invalidateMe)
1693 continue;
sewardjc41f1fb2004-08-22 09:48:08 +00001694 /* vex_printf("FOUND INVAL %d %d\n", ti->origPos, oldest_op); */
sewardj29632392004-08-22 02:38:11 +00001695 if (ti->origPos < oldest_op) {
1696 oldest_op = ti->origPos;
1697 oldest_k = k;
1698 }
1699 }
1700
1701 /* No more binds to invalidate. */
1702 if (oldest_op == 1<<30)
sewardjc41f1fb2004-08-22 09:48:08 +00001703 return;
sewardj29632392004-08-22 02:38:11 +00001704
1705 /* the oldest bind to invalidate has been identified */
1706 vassert(oldest_op != 1<<31 && oldest_k != 1<<31);
sewardjc9ad1152004-10-14 00:08:21 +00001707 ti = env[oldest_k];
sewardj29632392004-08-22 02:38:11 +00001708 vassert(ti->expr && ti->invalidateMe);
1709
1710 /* and invalidate it ... */
sewardjc9ad1152004-10-14 00:08:21 +00001711 bb->stmts[*j] = IRStmt_Tmp( (IRTemp)oldest_k, ti->expr );
sewardj29632392004-08-22 02:38:11 +00001712 /* vex_printf("**1 "); ppIRStmt(bb->stmts[*j]); vex_printf("\n"); */
1713 (*j)++;
1714 ti->invalidateMe = False;
1715 ti->expr = NULL; /* no longer available for substitution */
1716
1717 } /* loop which dumps the binds marked for invalidation */
1718}
1719
1720
1721
1722static void treebuild_BB ( IRBB* bb )
1723{
1724 Int i, j, k;
sewardj29632392004-08-22 02:38:11 +00001725 Bool invPut, invStore;
1726 IRStmt* st;
1727 IRStmt* st2;
1728 TmpInfo* ti;
1729 IRExpr* next2;
1730
1731 /* Mapping from IRTemp to TmpInfo*. */
sewardjc9ad1152004-10-14 00:08:21 +00001732 Int n_tmps = bb->tyenv->types_used;
1733 TmpInfo** env = LibVEX_Alloc(n_tmps * sizeof(TmpInfo*));
1734
1735 for (i = 0; i < n_tmps; i++)
1736 env[i] = NULL;
sewardj29632392004-08-22 02:38:11 +00001737
1738 /* Phase 1. Scan forwards in bb, counting use occurrences of each
1739 temp. Also count occurrences in the bb->next field. */
1740
1741 for (i = 0; i < bb->stmts_used; i++) {
1742 st = bb->stmts[i];
1743 if (!st)
1744 continue;
1745 occCount_Stmt( env, st );
1746 }
1747 occCount_Expr(env, bb->next );
1748
sewardjc9ad1152004-10-14 00:08:21 +00001749# if 0
sewardj29632392004-08-22 02:38:11 +00001750 for (i = 0; i < env->used; i++) {
1751 if (!env->inuse[i])
1752 continue;
1753 ppIRTemp( (IRTemp)(env->key[i]) );
1754 vex_printf(" used %d\n", ((TmpInfo*)env->val[i])->occ );
1755 }
sewardjc9ad1152004-10-14 00:08:21 +00001756# endif
sewardj29632392004-08-22 02:38:11 +00001757
1758 /* Phase 2. Fill in the origPos fields. */
1759
1760 for (i = 0; i < bb->stmts_used; i++) {
1761 st = bb->stmts[i];
1762 if (!st)
1763 continue;
1764 if (st->tag != Ist_Tmp)
1765 continue;
1766
sewardjc9ad1152004-10-14 00:08:21 +00001767 ti = env[(Int)st->Ist.Tmp.tmp];
1768 if (!ti) {
sewardj84ff0652004-08-23 16:16:08 +00001769 vex_printf("\n");
1770 ppIRTemp(st->Ist.Tmp.tmp);
1771 vex_printf("\n");
sewardj29632392004-08-22 02:38:11 +00001772 vpanic("treebuild_BB (phase 2): unmapped IRTemp");
1773 }
sewardj29632392004-08-22 02:38:11 +00001774 ti->origPos = i;
1775 }
1776
1777 /* Phase 3. Scan forwards in bb.
1778
1779 On seeing 't = E', occ(t)==1,
1780 let E'=env(E), set t's binding to be E', and
1781 delete this stmt.
1782 Also examine E' and set the hints for E' appropriately
1783 (doesLoad? doesGet?)
1784
1785 On seeing any other stmt,
1786 let stmt' = env(stmt)
1787 remove from env any 't=E' binds invalidated by stmt
1788 emit the invalidated stmts
1789 emit stmt'
1790
1791 Apply env to bb->next.
1792 */
1793
1794 /* The stmts in bb are being reordered, and we are guaranteed to
1795 end up with no more than the number we started with. Use i to
1796 be the cursor of the current stmt examined and j <= i to be that
1797 for the current stmt being written.
1798 */
1799 j = 0;
1800 for (i = 0; i < bb->stmts_used; i++) {
1801 st = bb->stmts[i];
1802 if (!st)
1803 continue;
1804
1805 if (st->tag == Ist_Tmp) {
sewardje80679a2004-09-21 23:00:11 +00001806 /* vex_printf("acquire binding\n"); */
sewardjc9ad1152004-10-14 00:08:21 +00001807 ti = env[st->Ist.Tmp.tmp];
1808 if (!ti) {
sewardj29632392004-08-22 02:38:11 +00001809 vpanic("treebuild_BB (phase 2): unmapped IRTemp");
1810 }
sewardj29632392004-08-22 02:38:11 +00001811 if (ti->occ == 1) {
1812 /* ok, we have 't = E', occ(t)==1. Do the abovementioned actions. */
sewardj6d076362004-09-23 11:06:17 +00001813 IRExpr* e = st->Ist.Tmp.data;
sewardj29632392004-08-22 02:38:11 +00001814 IRExpr* e2 = tbSubst_Expr(env, e);
1815 ti->expr = e2;
1816 ti->eDoesLoad = ti->eDoesGet = False;
1817 setHints_Expr(&ti->eDoesLoad, &ti->eDoesGet, e2);
1818 /* don't advance j, as we are deleting this stmt and instead
1819 holding it temporarily in the env. */
1820 continue; /* the for (i = 0; i < bb->stmts_used; i++) loop */
1821 }
1822 }
1823
1824 /* we get here for any other kind of statement. */
1825 /* 'use up' any bindings required by the current statement. */
1826 st2 = tbSubst_Stmt(env, st);
1827
1828 /* Now, before this stmt, dump any bindings it invalidates.
1829 These need to be dumped in the order in which they originally
1830 appeared. (Stupid algorithm): first, mark all bindings which
1831 need to be dumped. Then, dump them in the order in which
1832 they were defined. */
sewardj17442fe2004-09-20 14:54:28 +00001833 invPut = st->tag == Ist_Put
1834 || st->tag == Ist_PutI || st->tag == Ist_Dirty;
1835 invStore = st->tag == Ist_STle
1836 || st->tag == Ist_Dirty;
sewardj29632392004-08-22 02:38:11 +00001837
sewardjc9ad1152004-10-14 00:08:21 +00001838 for (k = 0; k < n_tmps; k++) {
1839 ti = env[k];
1840 if (!ti)
sewardj29632392004-08-22 02:38:11 +00001841 continue;
sewardj29632392004-08-22 02:38:11 +00001842 if (!ti->expr)
1843 continue;
1844
1845 /* We have to invalidate this binding. */
1846 ti->invalidateMe
1847 = (ti->eDoesLoad && invStore) || (ti->eDoesGet && invPut);
sewardjc41f1fb2004-08-22 09:48:08 +00001848 /*
1849 if (ti->invalidateMe)
1850 vex_printf("SET INVAL\n");
1851 */
sewardj29632392004-08-22 02:38:11 +00001852 }
1853
1854 dumpInvalidated ( env, bb, &j );
1855
1856 /* finally, emit the substituted statement */
1857 bb->stmts[j] = st2;
1858 /* vex_printf("**2 "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); */
1859 j++;
1860
1861 vassert(j <= i+1);
1862 } /* for each stmt in the original bb ... */
1863
1864 /* Finally ... substitute the ->next field as much as possible, and
1865 dump any left-over bindings. Hmm. Perhaps there should be no
1866 left over bindings? Or any left-over bindings are
1867 by definition dead? */
1868 next2 = tbSubst_Expr(env, bb->next);
1869 bb->next = next2;
1870 bb->stmts_used = j;
1871}
1872
1873
1874
sewardjf0e43162004-08-20 00:11:12 +00001875/*---------------------------------------------------------------*/
sewardj4345f7a2004-09-22 19:49:27 +00001876/*--- Common Subexpression Elimination ---*/
1877/*---------------------------------------------------------------*/
1878
1879/* Expensive in time and space. */
1880
1881/* Uses two environments:
1882 a IRTemp -> IRTemp mapping
1883 a mapping from AvailExpr* to IRTemp
1884*/
1885
1886typedef
1887 struct {
sewardj8f31b3b2004-10-13 16:59:41 +00001888 enum { Ut, Btt, Btc, Cf64i, Ld } tag;
sewardj4345f7a2004-09-22 19:49:27 +00001889 union {
1890 /* unop(tmp) */
1891 struct {
1892 IROp op;
1893 IRTemp arg;
1894 } Ut;
sewardjc0b42282004-10-12 13:44:12 +00001895 /* binop(tmp,tmp) */
sewardj4345f7a2004-09-22 19:49:27 +00001896 struct {
1897 IROp op;
1898 IRTemp arg1;
1899 IRTemp arg2;
1900 } Btt;
sewardjc0b42282004-10-12 13:44:12 +00001901 /* binop(tmp,const) */
sewardj4345f7a2004-09-22 19:49:27 +00001902 struct {
1903 IROp op;
1904 IRTemp arg1;
1905 IRConst con2;
1906 } Btc;
sewardjc0b42282004-10-12 13:44:12 +00001907 /* F64i-style const */
1908 struct {
1909 ULong f64i;
1910 } Cf64i;
sewardj8f31b3b2004-10-13 16:59:41 +00001911 /* LoadLE(tmp):ty */
1912 struct {
1913 IRTemp addr;
1914 IRType ty;
1915 } Ld;
sewardj4345f7a2004-09-22 19:49:27 +00001916 } u;
1917 }
1918 AvailExpr;
1919
1920static Bool eq_AvailExpr ( AvailExpr* a1, AvailExpr* a2 )
1921{
1922 if (a1->tag != a2->tag)
1923 return False;
1924 switch (a1->tag) {
1925 case Ut: return a1->u.Ut.op == a2->u.Ut.op
1926 && a1->u.Ut.arg == a2->u.Ut.arg;
1927 case Btt: return a1->u.Btt.op == a2->u.Btt.op
1928 && a1->u.Btt.arg1 == a2->u.Btt.arg1
1929 && a1->u.Btt.arg2 == a2->u.Btt.arg2;
1930 case Btc: return a1->u.Btc.op == a2->u.Btc.op
1931 && a1->u.Btc.arg1 == a2->u.Btc.arg1
1932 && eqIRConst(&a1->u.Btc.con2, &a2->u.Btc.con2);
sewardjc0b42282004-10-12 13:44:12 +00001933 case Cf64i: return a1->u.Cf64i.f64i == a2->u.Cf64i.f64i;
sewardj8f31b3b2004-10-13 16:59:41 +00001934 case Ld: return a1->u.Ld.ty == a2->u.Ld.ty
1935 && a1->u.Ld.addr == a2->u.Ld.addr;
sewardj4345f7a2004-09-22 19:49:27 +00001936 default: vpanic("eq_AvailExpr");
1937 }
1938}
1939
1940static IRExpr* availExpr_to_IRExpr ( AvailExpr* ae )
1941{
1942 IRConst* con;
1943 switch (ae->tag) {
1944 case Ut:
1945 return IRExpr_Unop( ae->u.Ut.op, IRExpr_Tmp(ae->u.Ut.arg) );
1946 case Btt:
1947 return IRExpr_Binop( ae->u.Btt.op,
1948 IRExpr_Tmp(ae->u.Btt.arg1),
1949 IRExpr_Tmp(ae->u.Btt.arg2) );
1950 case Btc:
1951 con = LibVEX_Alloc(sizeof(IRConst));
1952 *con = ae->u.Btc.con2;
1953 return IRExpr_Binop( ae->u.Btc.op,
1954 IRExpr_Tmp(ae->u.Btc.arg1), IRExpr_Const(con) );
sewardjc0b42282004-10-12 13:44:12 +00001955 case Cf64i:
1956 return IRExpr_Const(IRConst_F64i(ae->u.Cf64i.f64i));
sewardj8f31b3b2004-10-13 16:59:41 +00001957 case Ld:
1958 return IRExpr_LDle( ae->u.Ld.ty, IRExpr_Tmp(ae->u.Ld.addr) );
sewardj4345f7a2004-09-22 19:49:27 +00001959 default:
1960 vpanic("availExpr_to_IRExpr");
1961 }
1962}
1963
1964inline
sewardjea602bc2004-10-14 21:40:12 +00001965static IRTemp subst_AvailExpr_Temp ( HashHW* env, IRTemp tmp )
sewardj4345f7a2004-09-22 19:49:27 +00001966{
sewardjea602bc2004-10-14 21:40:12 +00001967 HWord res;
sewardj4345f7a2004-09-22 19:49:27 +00001968 /* env :: IRTemp -> IRTemp */
sewardjea602bc2004-10-14 21:40:12 +00001969 if (lookupHHW( env, &res, (HWord)tmp ))
sewardj4345f7a2004-09-22 19:49:27 +00001970 return (IRTemp)res;
1971 else
1972 return tmp;
1973}
1974
sewardjea602bc2004-10-14 21:40:12 +00001975static void subst_AvailExpr ( HashHW* env, AvailExpr* ae )
sewardj4345f7a2004-09-22 19:49:27 +00001976{
1977 /* env :: IRTemp -> IRTemp */
1978 switch (ae->tag) {
1979 case Ut:
1980 ae->u.Ut.arg = subst_AvailExpr_Temp( env, ae->u.Ut.arg );
1981 break;
1982 case Btt:
1983 ae->u.Btt.arg1 = subst_AvailExpr_Temp( env, ae->u.Btt.arg1 );
1984 ae->u.Btt.arg2 = subst_AvailExpr_Temp( env, ae->u.Btt.arg2 );
1985 break;
1986 case Btc:
1987 ae->u.Btc.arg1 = subst_AvailExpr_Temp( env, ae->u.Btc.arg1 );
1988 break;
sewardjc0b42282004-10-12 13:44:12 +00001989 case Cf64i:
1990 break;
sewardj8f31b3b2004-10-13 16:59:41 +00001991 case Ld:
1992 ae->u.Ld.addr = subst_AvailExpr_Temp( env, ae->u.Ld.addr );
1993 break;
sewardj4345f7a2004-09-22 19:49:27 +00001994 default:
1995 vpanic("subst_AvailExpr");
1996 }
1997}
1998
1999static AvailExpr* irExpr_to_AvailExpr ( IRExpr* e )
2000{
2001 AvailExpr* ae;
2002
2003 if (e->tag == Iex_Unop
2004 && e->Iex.Unop.arg->tag == Iex_Tmp) {
2005 ae = LibVEX_Alloc(sizeof(AvailExpr));
2006 ae->tag = Ut;
2007 ae->u.Ut.op = e->Iex.Unop.op;
2008 ae->u.Ut.arg = e->Iex.Unop.arg->Iex.Tmp.tmp;
2009 return ae;
2010 }
2011
2012 if (e->tag == Iex_Binop
2013 && e->Iex.Binop.arg1->tag == Iex_Tmp
2014 && e->Iex.Binop.arg2->tag == Iex_Tmp) {
2015 ae = LibVEX_Alloc(sizeof(AvailExpr));
2016 ae->tag = Btt;
2017 ae->u.Btt.op = e->Iex.Binop.op;
2018 ae->u.Btt.arg1 = e->Iex.Binop.arg1->Iex.Tmp.tmp;
2019 ae->u.Btt.arg2 = e->Iex.Binop.arg2->Iex.Tmp.tmp;
2020 return ae;
2021 }
2022
2023 if (e->tag == Iex_Binop
2024 && e->Iex.Binop.arg1->tag == Iex_Tmp
2025 && e->Iex.Binop.arg2->tag == Iex_Const) {
2026 ae = LibVEX_Alloc(sizeof(AvailExpr));
2027 ae->tag = Btc;
2028 ae->u.Btc.op = e->Iex.Binop.op;
2029 ae->u.Btc.arg1 = e->Iex.Binop.arg1->Iex.Tmp.tmp;
2030 ae->u.Btc.con2 = *(e->Iex.Binop.arg2->Iex.Const.con);
2031 return ae;
2032 }
2033
sewardjc0b42282004-10-12 13:44:12 +00002034 if (e->tag == Iex_Const
2035 && e->Iex.Const.con->tag == Ico_F64i) {
2036 ae = LibVEX_Alloc(sizeof(AvailExpr));
2037 ae->tag = Cf64i;
2038 ae->u.Cf64i.f64i = e->Iex.Const.con->Ico.F64i;
2039 return ae;
2040 }
2041
sewardj65599a32004-10-13 17:08:42 +00002042 /* Not right ... need to invalidate Ld-cses at store points and
2043 dirty helper calls. */
2044 if (0 &&
2045 e->tag == Iex_LDle
sewardj8f31b3b2004-10-13 16:59:41 +00002046 && e->Iex.LDle.addr->tag == Iex_Tmp) {
sewardj8f31b3b2004-10-13 16:59:41 +00002047 ae = LibVEX_Alloc(sizeof(AvailExpr));
2048 ae->tag = Ld;
2049 ae->u.Ld.addr = e->Iex.LDle.addr->Iex.Tmp.tmp;
2050 ae->u.Ld.ty = e->Iex.LDle.ty;
2051 return ae;
2052 }
2053
sewardj4345f7a2004-09-22 19:49:27 +00002054 return NULL;
2055}
2056
2057
2058/* The BB is modified in-place. */
2059
2060static void cse_BB ( IRBB* bb )
2061{
2062 Int i, j;
2063 IRTemp t, q;
2064 IRStmt* st;
2065 AvailExpr* eprime;
2066
sewardjea602bc2004-10-14 21:40:12 +00002067 HashHW* tenv = newHHW(); /* :: IRTemp -> IRTemp */
2068 HashHW* aenv = newHHW(); /* :: AvailExpr* -> IRTemp */
2069
2070 vassert(sizeof(IRTemp) <= sizeof(HWord));
sewardj4345f7a2004-09-22 19:49:27 +00002071
2072 //ppIRBB(bb);
2073 //vex_printf("\n\n");
2074
2075 /* Iterate forwards over the stmts.
2076 On seeing "t = E", where E is one of the 3 AvailExpr forms:
2077 let E' = apply tenv substitution to E
2078 search aenv for E'
2079 if a mapping E' -> q is found,
2080 replace this stmt by "t = q"
2081 and add binding t -> q to tenv
2082 else
2083 add binding E' -> t to aenv
2084 replace this stmt by "t = E'"
2085 Ignore any other kind of stmt.
2086 */
2087 for (i = 0; i < bb->stmts_used; i++) {
2088 st = bb->stmts[i];
2089
2090 /* ignore not-interestings */
2091 if ((!st) || st->tag != Ist_Tmp)
2092 continue;
2093
2094 t = st->Ist.Tmp.tmp;
sewardj6d076362004-09-23 11:06:17 +00002095 eprime = irExpr_to_AvailExpr(st->Ist.Tmp.data);
sewardj4345f7a2004-09-22 19:49:27 +00002096 /* ignore if not of AvailExpr form */
2097 if (!eprime)
2098 continue;
2099
sewardj0c4763c2004-10-13 17:21:34 +00002100 /* vex_printf("considering: " ); ppIRStmt(st); vex_printf("\n"); */
sewardj4345f7a2004-09-22 19:49:27 +00002101
2102 /* apply tenv */
2103 subst_AvailExpr( tenv, eprime );
2104
2105 /* search aenv for eprime, unfortunately the hard way */
2106 for (j = 0; j < aenv->used; j++)
2107 if (aenv->inuse[j] && eq_AvailExpr(eprime, (AvailExpr*)aenv->key[j]))
2108 break;
2109
2110 if (j < aenv->used) {
2111 /* A binding E' -> q was found. Replace stmt by "t = q" and
2112 note the t->q binding in tenv. */
2113 /* (this is the core of the CSE action) */
2114 q = (IRTemp)aenv->val[j];
2115 bb->stmts[i] = IRStmt_Tmp( t, IRExpr_Tmp(q) );
sewardjea602bc2004-10-14 21:40:12 +00002116 addToHHW( tenv, (HWord)t, (HWord)q );
sewardj4345f7a2004-09-22 19:49:27 +00002117 } else {
2118 /* No binding was found, so instead we add E' -> t to our
2119 collection of available expressions, replace this stmt
2120 with "t = E'", and move on. */
2121 bb->stmts[i] = IRStmt_Tmp( t, availExpr_to_IRExpr(eprime) );
sewardjea602bc2004-10-14 21:40:12 +00002122 addToHHW( aenv, (HWord)eprime, (HWord)t );
sewardj4345f7a2004-09-22 19:49:27 +00002123 }
2124 }
2125
2126 //ppIRBB(bb);
2127 //sanityCheckIRBB(bb, Ity_I32);
2128 //vex_printf("\n\n");
2129
2130}
2131
sewardja5dc3482004-10-03 23:10:48 +00002132
sewardj728c9d52004-09-24 17:19:22 +00002133/*---------------------------------------------------------------*/
2134/*--- Add32/Sub32 chain collapsing ---*/
2135/*---------------------------------------------------------------*/
2136
sewardj0ec972d2004-10-12 09:47:15 +00002137/* ----- Helper functions for Add32/Sub32 chain collapsing ----- */
2138
sewardja5dc3482004-10-03 23:10:48 +00002139/* Is this expression "Add32(tmp,const)" or "Sub32(tmp,const)" ? If
2140 yes, set *tmp and *i32 appropriately. *i32 is set as if the
2141 root node is Add32, not Sub32. */
2142
2143static Bool isAdd32OrSub32 ( IRExpr* e, IRTemp* tmp, Int* i32 )
2144{
2145 if (e->tag != Iex_Binop)
2146 return False;
2147 if (e->Iex.Binop.op != Iop_Add32 && e->Iex.Binop.op != Iop_Sub32)
2148 return False;
2149 if (e->Iex.Binop.arg1->tag != Iex_Tmp)
2150 return False;
2151 if (e->Iex.Binop.arg2->tag != Iex_Const)
2152 return False;
2153 *tmp = e->Iex.Binop.arg1->Iex.Tmp.tmp;
2154 *i32 = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32);
2155 if (e->Iex.Binop.op == Iop_Sub32)
2156 *i32 = -*i32;
2157 return True;
sewardj728c9d52004-09-24 17:19:22 +00002158}
2159
sewardja5dc3482004-10-03 23:10:48 +00002160
2161/* Figure out if tmp can be expressed as tmp3 +32 const, for some
2162 other tmp2. Scan backwards from the specified start point -- an
2163 optimisation. */
2164
2165static Bool collapseChain ( IRBB* bb, Int startHere,
2166 IRTemp tmp,
2167 IRTemp* tmp2, Int* i32 )
2168{
2169 Int j, ii;
2170 IRTemp vv;
2171 IRStmt* st;
2172 IRExpr* e;
2173
2174 /* the (var, con) pair contain the current 'representation' for
2175 'tmp'. We start with 'tmp + 0'. */
2176 IRTemp var = tmp;
2177 Int con = 0;
2178
2179 /* Scan backwards to see if tmp can be replaced by some other tmp
2180 +/- a constant. */
2181 for (j = startHere; j >= 0; j--) {
2182 st = bb->stmts[j];
2183 if (!st || st->tag != Ist_Tmp)
2184 continue;
2185 if (st->Ist.Tmp.tmp != var)
2186 continue;
2187 e = st->Ist.Tmp.data;
2188 if (!isAdd32OrSub32(e, &vv, &ii))
2189 break;
2190 var = vv;
2191 con += ii;
2192 }
2193 if (j == -1)
2194 /* no earlier binding for var .. ill-formed IR */
2195 vpanic("collapseChain");
2196
2197 /* so, did we find anything interesting? */
2198 if (var == tmp)
2199 return False; /* no .. */
2200
2201 *tmp2 = var;
2202 *i32 = con;
2203 return True;
2204}
2205
2206
sewardj0ec972d2004-10-12 09:47:15 +00002207/* ------- Main function for Add32/Sub32 chain collapsing ------ */
sewardja5dc3482004-10-03 23:10:48 +00002208
sewardj0ec972d2004-10-12 09:47:15 +00002209static void collapse_AddSub_chains_BB ( IRBB* bb )
sewardj728c9d52004-09-24 17:19:22 +00002210{
sewardja5dc3482004-10-03 23:10:48 +00002211 IRStmt *st;
2212 IRTemp var, var2;
2213 Int i, con, con2;
2214
sewardj728c9d52004-09-24 17:19:22 +00002215 for (i = bb->stmts_used-1; i >= 0; i--) {
2216 st = bb->stmts[i];
sewardja5dc3482004-10-03 23:10:48 +00002217 if (!st)
2218 continue;
sewardj728c9d52004-09-24 17:19:22 +00002219
sewardja5dc3482004-10-03 23:10:48 +00002220 /* Try to collapse 't1 = Add32/Sub32(t2, con)'. */
sewardj728c9d52004-09-24 17:19:22 +00002221
sewardja5dc3482004-10-03 23:10:48 +00002222 if (st->tag == Ist_Tmp
2223 && isAdd32OrSub32(st->Ist.Tmp.data, &var, &con)) {
2224
2225 /* So e1 is of the form Add32(var,con) or Sub32(var,-con).
2226 Find out if var can be expressed as var2 + con2. */
2227 if (collapseChain(bb, i-1, var, &var2, &con2)) {
2228 if (DEBUG_IROPT) {
2229 vex_printf("replacing1 ");
2230 ppIRStmt(st);
2231 vex_printf(" with ");
2232 }
2233 con2 += con;
2234 bb->stmts[i]
2235 = IRStmt_Tmp(
2236 st->Ist.Tmp.tmp,
2237 (con2 >= 0)
2238 ? IRExpr_Binop(Iop_Add32,
2239 IRExpr_Tmp(var2),
2240 IRExpr_Const(IRConst_U32(con2)))
2241 : IRExpr_Binop(Iop_Sub32,
2242 IRExpr_Tmp(var2),
2243 IRExpr_Const(IRConst_U32(-con2)))
2244 );
2245 if (DEBUG_IROPT) {
2246 ppIRStmt(bb->stmts[i]);
2247 vex_printf("\n");
2248 }
2249 }
2250
2251 continue;
sewardj728c9d52004-09-24 17:19:22 +00002252 }
sewardj728c9d52004-09-24 17:19:22 +00002253
sewardja5dc3482004-10-03 23:10:48 +00002254 /* Try to collapse 't1 = GetI[t2, con]'. */
2255
2256 if (st->tag == Ist_Tmp
2257 && st->Ist.Tmp.data->tag == Iex_GetI
2258 && st->Ist.Tmp.data->Iex.GetI.off->tag == Iex_Tmp
2259 && collapseChain(bb, i-1, st->Ist.Tmp.data->Iex.GetI.off
2260 ->Iex.Tmp.tmp, &var2, &con2)) {
2261 if (DEBUG_IROPT) {
2262 vex_printf("replacing3 ");
2263 ppIRStmt(st);
2264 vex_printf(" with ");
2265 }
2266 con2 += st->Ist.Tmp.data->Iex.GetI.bias;
2267 bb->stmts[i]
2268 = IRStmt_Tmp(
2269 st->Ist.Tmp.tmp,
2270 IRExpr_GetI(st->Ist.Tmp.data->Iex.GetI.descr,
2271 IRExpr_Tmp(var2),
2272 con2));
2273 if (DEBUG_IROPT) {
2274 ppIRStmt(bb->stmts[i]);
2275 vex_printf("\n");
2276 }
2277 continue;
sewardj728c9d52004-09-24 17:19:22 +00002278 }
sewardja5dc3482004-10-03 23:10:48 +00002279
2280 /* Perhaps st is PutI[t, con] ? */
2281
2282 if (st->tag == Ist_PutI
2283 && st->Ist.PutI.off->tag == Iex_Tmp
2284 && collapseChain(bb, i-1, st->Ist.PutI.off->Iex.Tmp.tmp,
2285 &var2, &con2)) {
2286 if (DEBUG_IROPT) {
2287 vex_printf("replacing2 ");
2288 ppIRStmt(st);
2289 vex_printf(" with ");
2290 }
2291 con2 += st->Ist.PutI.bias;
2292 bb->stmts[i]
2293 = IRStmt_PutI(st->Ist.PutI.descr,
2294 IRExpr_Tmp(var2),
2295 con2,
2296 st->Ist.PutI.data);
2297 if (DEBUG_IROPT) {
2298 ppIRStmt(bb->stmts[i]);
2299 vex_printf("\n");
2300 }
2301 continue;
2302 }
2303
2304 } /* for */
sewardj575670c2004-10-12 09:13:19 +00002305}
sewardj728c9d52004-09-24 17:19:22 +00002306
sewardj575670c2004-10-12 09:13:19 +00002307
2308/*---------------------------------------------------------------*/
2309/*--- PutI/GetI transformations ---*/
2310/*---------------------------------------------------------------*/
2311
2312/* ------- Helper functions for PutI/GetI transformations ------ */
2313
2314
2315/* Determine, to the extent possible, the relationship between two
2316 guest state accesses. The possible outcomes are:
2317
2318 * Exact alias. These two accesses denote precisely the same
2319 piece of the guest state.
2320
2321 * Definely no alias. These two accesses are guaranteed not to
2322 overlap any part of the guest state.
2323
2324 * Unknown -- if neither of the above can be established.
2325
2326 If in doubt, return Unknown. */
2327
2328typedef
2329 enum { ExactAlias, NoAlias, UnknownAlias }
2330 GSAliasing;
2331
2332
2333/* Produces the alias relation between an indexed guest
2334 state access and a non-indexed access. */
2335
2336static
2337GSAliasing getAliasingRelation_IC ( IRArray* descr1, IRExpr* ix1,
2338 Int offset2, IRType ty2 )
2339{
2340 UInt minoff1, maxoff1, minoff2, maxoff2;
2341
2342 getArrayBounds( descr1, &minoff1, &maxoff1 );
2343 minoff2 = offset2;
2344 maxoff2 = minoff2 + sizeofIRType(ty2) - 1;
2345
2346 if (maxoff1 < minoff2 || maxoff2 < minoff1)
2347 return NoAlias;
2348
2349 /* Could probably do better here if required. For the moment
2350 however just claim not to know anything more. */
2351 return UnknownAlias;
2352}
2353
2354
2355/* Produces the alias relation between two indexed guest state
2356 accesses. */
2357
2358static
2359GSAliasing getAliasingRelation_II (
2360 IRArray* descr1, IRExpr* ix1, Int bias1,
2361 IRArray* descr2, IRExpr* ix2, Int bias2
2362 )
2363{
2364 UInt minoff1, maxoff1, minoff2, maxoff2;
2365 Int iters;
2366
2367 /* First try hard to show they don't alias. */
2368 getArrayBounds( descr1, &minoff1, &maxoff1 );
2369 getArrayBounds( descr2, &minoff2, &maxoff2 );
2370 if (maxoff1 < minoff2 || maxoff2 < minoff1)
2371 return NoAlias;
2372
2373 /* So the two arrays at least partially overlap. To get any
2374 further we'll have to be sure that the descriptors are
2375 identical. */
2376 if (!eqIRArray(descr1, descr2))
2377 return UnknownAlias;
2378
2379 /* The descriptors are identical. Now the only difference can be
2380 in the index expressions. If they cannot be shown to be
2381 identical, we have to say we don't know what the aliasing
2382 relation will be. Now, since the IR is flattened, the index
2383 expressions should be atoms -- either consts or tmps. So that
2384 makes the comparison simple. */
2385 vassert(isAtom(ix1));
2386 vassert(isAtom(ix2));
2387 if (ix1->tag != Iex_Tmp || ix2->tag != Iex_Tmp)
2388 return UnknownAlias;
2389
2390 /* Both index expressions are Tmps. If they are not the same tmp,
2391 we're again hosed. */
2392 if (ix1->Iex.Tmp.tmp != ix2->Iex.Tmp.tmp)
2393 return UnknownAlias;
2394
2395 /* Ok, the index expressions are identical. So now the only way
2396 they can be different is in the bias. Normalise this
2397 paranoidly, to reliably establish equality/non-equality. */
2398
2399 /* So now we know that the GetI and PutI index the same array
2400 with the same base. Are the offsets the same, modulo the
2401 array size? Do this paranoidly. */
2402 vassert(descr1->nElems == descr2->nElems);
2403 vassert(descr1->elemTy == descr2->elemTy);
2404 vassert(descr1->base == descr2->base);
2405 iters = 0;
2406 while (bias1 < 0 || bias2 < 0) {
2407 bias1 += descr1->nElems;
2408 bias2 += descr1->nElems;
2409 iters++;
2410 if (iters > 10)
2411 vpanic("getAliasingRelation: iters");
2412 }
2413 vassert(bias1 >= 0 && bias2 >= 0);
2414 bias1 %= descr1->nElems;
2415 bias2 %= descr1->nElems;
2416 vassert(bias1 >= 0 && bias1 < descr1->nElems);
2417 vassert(bias2 >= 0 && bias2 < descr1->nElems);
2418
2419 /* Finally, biasP and biasG are normalised into the range
2420 0 .. descrP/G->nElems - 1. And so we can establish
2421 equality/non-equality. */
2422
2423 return bias1==bias2 ? ExactAlias : NoAlias;
2424}
2425
2426
sewardj0ec972d2004-10-12 09:47:15 +00002427/* Given the parts (descr, tmp, bias) for a GetI, scan backwards from
2428 the given starting point to find, if any, a PutI which writes
2429 exactly the same piece of guest state, and so return the expression
2430 that the PutI writes. This is the core of PutI-GetI forwarding. */
2431
sewardj575670c2004-10-12 09:13:19 +00002432static
2433IRExpr* findPutI ( IRBB* bb, Int startHere,
sewardj0ec972d2004-10-12 09:47:15 +00002434 IRArray* descrG, IRExpr* ixG, Int biasG )
sewardj575670c2004-10-12 09:13:19 +00002435{
sewardj0ec972d2004-10-12 09:47:15 +00002436 Int j;
sewardj0ec972d2004-10-12 09:47:15 +00002437 IRStmt* st;
2438 GSAliasing relation;
sewardj575670c2004-10-12 09:13:19 +00002439
2440 if (0) {
2441 vex_printf("\nfindPutI ");
2442 ppIRArray(descrG);
2443 vex_printf(" ");
sewardj0ec972d2004-10-12 09:47:15 +00002444 ppIRExpr(ixG);
sewardj575670c2004-10-12 09:13:19 +00002445 vex_printf(" %d\n", biasG);
2446 }
2447
2448 /* Scan backwards in bb from startHere to find a suitable PutI
sewardjeb309642004-10-12 09:51:56 +00002449 binding for (descrG, ixG, biasG), if any. */
sewardj575670c2004-10-12 09:13:19 +00002450
2451 for (j = startHere; j >= 0; j--) {
2452 st = bb->stmts[j];
2453 if (!st) continue;
2454
2455 if (st->tag == Ist_Put) {
2456 /* Non-indexed Put. This can't give a binding, but we do
2457 need to check it doesn't invalidate the search by
2458 overlapping any part of the indexed guest state. */
sewardj0ec972d2004-10-12 09:47:15 +00002459
2460 relation
2461 = getAliasingRelation_IC(
2462 descrG, ixG,
2463 st->Ist.Put.offset,
2464 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
2465
2466 if (relation == NoAlias) {
sewardj575670c2004-10-12 09:13:19 +00002467 /* we're OK; keep going */
2468 continue;
2469 } else {
sewardj0ec972d2004-10-12 09:47:15 +00002470 /* relation == UnknownAlias || relation == ExactAlias */
2471 /* If this assertion fails, we've found a Put which writes
2472 an area of guest state which is read by a GetI. Which
2473 is unlikely (although not per se wrong). */
2474 vassert(relation != ExactAlias);
sewardj575670c2004-10-12 09:13:19 +00002475 /* This Put potentially writes guest state that the GetI
2476 reads; we must fail. */
sewardj0ec972d2004-10-12 09:47:15 +00002477 return NULL;
sewardj575670c2004-10-12 09:13:19 +00002478 }
2479 }
2480
2481 if (st->tag == Ist_PutI) {
sewardj0ec972d2004-10-12 09:47:15 +00002482
2483 relation = getAliasingRelation_II(
2484 descrG, ixG, biasG,
2485 st->Ist.PutI.descr,
2486 st->Ist.PutI.off,
2487 st->Ist.PutI.bias
2488 );
2489
2490 if (relation == NoAlias) {
sewardj575670c2004-10-12 09:13:19 +00002491 /* This PutI definitely doesn't overlap. Ignore it and
2492 keep going. */
sewardj0ec972d2004-10-12 09:47:15 +00002493 continue; /* the for j loop */
sewardj575670c2004-10-12 09:13:19 +00002494 }
sewardj575670c2004-10-12 09:13:19 +00002495
sewardj0ec972d2004-10-12 09:47:15 +00002496 if (relation == UnknownAlias) {
2497 /* We don't know if this PutI writes to the same guest
2498 state that the GetI, or not. So we have to give up. */
sewardj575670c2004-10-12 09:13:19 +00002499 return NULL;
sewardj575670c2004-10-12 09:13:19 +00002500 }
2501
sewardj0ec972d2004-10-12 09:47:15 +00002502 /* Otherwise, we've found what we're looking for. */
2503 vassert(relation == ExactAlias);
2504 return st->Ist.PutI.data;
2505
sewardj575670c2004-10-12 09:13:19 +00002506 } /* if (st->tag == Ist_PutI) */
2507
sewardjae27ab62004-10-15 21:21:46 +00002508 if (st->tag == Ist_Dirty) {
2509 /* Be conservative. If the dirty call has any guest effects at
2510 all, give up. We could do better -- only give up if there
2511 are any guest writes/modifies. */
2512 if (st->Ist.Dirty.details->nFxState > 0)
2513 return NULL;
2514 }
sewardjeb309642004-10-12 09:51:56 +00002515
sewardj575670c2004-10-12 09:13:19 +00002516 } /* for */
2517
sewardj0ec972d2004-10-12 09:47:15 +00002518 /* No valid replacement was found. */
sewardj575670c2004-10-12 09:13:19 +00002519 return NULL;
2520}
2521
2522
2523
2524/* Assuming pi is a PutI stmt, is s2 identical to it (in the sense
2525 that it writes exactly the same piece of guest state) ? Safe
2526 answer: False. */
2527
2528static Bool identicalPutIs ( IRStmt* pi, IRStmt* s2 )
2529{
2530 vassert(pi->tag == Ist_PutI);
2531 if (s2->tag != Ist_PutI)
2532 return False;
2533
2534 return getAliasingRelation_II(
2535 pi->Ist.PutI.descr, pi->Ist.PutI.off, pi->Ist.PutI.bias,
2536 s2->Ist.PutI.descr, s2->Ist.PutI.off, s2->Ist.PutI.bias
2537 )
2538 == ExactAlias;
2539}
2540
2541
2542/* Assuming pi is a PutI stmt, is s2 a Get/GetI/Put/PutI which might
2543 overlap it? Safe answer: True. Note, we could do a lot better
2544 than this if needed. */
2545
2546static
2547Bool guestAccessWhichMightOverlapPutI (
2548 IRTypeEnv* tyenv, IRStmt* pi, IRStmt* s2
2549 )
2550{
2551 GSAliasing relation;
2552 UInt minoffP, maxoffP;
2553
2554 vassert(pi->tag == Ist_PutI);
2555 getArrayBounds(pi->Ist.PutI.descr, &minoffP, &maxoffP);
2556 switch (s2->tag) {
2557
sewardjae27ab62004-10-15 21:21:46 +00002558 case Ist_Dirty:
2559 /* If the dirty call has any guest effects at all, give up.
2560 Probably could do better. */
2561 if (s2->Ist.Dirty.details->nFxState > 0)
2562 return True;
2563 return False;
2564
sewardj575670c2004-10-12 09:13:19 +00002565 case Ist_Put:
2566 vassert(isAtom(s2->Ist.Put.data));
2567 relation
2568 = getAliasingRelation_IC(
2569 pi->Ist.PutI.descr, pi->Ist.PutI.off,
2570 s2->Ist.Put.offset,
2571 typeOfIRExpr(tyenv,s2->Ist.Put.data)
2572 );
2573 goto have_relation;
2574
2575 case Ist_PutI:
2576 vassert(isAtom(s2->Ist.PutI.off));
2577 vassert(isAtom(s2->Ist.PutI.data));
2578 relation
2579 = getAliasingRelation_II(
2580 pi->Ist.PutI.descr, pi->Ist.PutI.off, pi->Ist.PutI.bias,
2581 s2->Ist.PutI.descr, s2->Ist.PutI.off, s2->Ist.PutI.bias
2582 );
2583 goto have_relation;
2584
2585 case Ist_Tmp:
2586 if (s2->Ist.Tmp.data->tag == Iex_GetI) {
2587 relation
2588 = getAliasingRelation_II(
2589 pi->Ist.PutI.descr, pi->Ist.PutI.off,
2590 pi->Ist.PutI.bias,
2591 s2->Ist.Tmp.data->Iex.GetI.descr,
2592 s2->Ist.Tmp.data->Iex.GetI.off,
2593 s2->Ist.Tmp.data->Iex.GetI.bias
2594 );
2595 goto have_relation;
2596 }
2597 if (s2->Ist.Tmp.data->tag == Iex_Get) {
2598 relation
2599 = getAliasingRelation_IC(
2600 pi->Ist.PutI.descr, pi->Ist.PutI.off,
2601 s2->Ist.Tmp.data->Iex.Get.offset,
2602 s2->Ist.Tmp.data->Iex.Get.ty
2603 );
2604 goto have_relation;
2605 }
2606 return False;
2607
2608 case Ist_STle:
2609 vassert(isAtom(s2->Ist.STle.addr));
2610 vassert(isAtom(s2->Ist.STle.data));
2611 return False;
2612
2613 default:
2614 vex_printf("\n"); ppIRStmt(s2); vex_printf("\n");
2615 vpanic("guestAccessWhichMightOverlapPutI");
2616 }
2617
2618 have_relation:
2619 if (relation == NoAlias)
2620 return False;
2621 else
2622 return True; /* ExactAlias or UnknownAlias */
2623}
2624
2625
2626
2627/* ---------- PutI/GetI transformations main functions --------- */
2628
2629/* Do PutI -> GetI forwarding. bb is modified in-place. */
2630
2631static
2632void do_PutI_GetI_forwarding_BB ( IRBB* bb )
2633{
2634 Int i;
2635 IRStmt* st;
sewardje98dcf22004-10-04 09:15:11 +00002636
2637 for (i = bb->stmts_used-1; i >= 0; i--) {
2638 st = bb->stmts[i];
2639 if (!st)
2640 continue;
2641
2642 if (st->tag == Ist_Tmp
2643 && st->Ist.Tmp.data->tag == Iex_GetI
2644 && st->Ist.Tmp.data->Iex.GetI.off->tag == Iex_Tmp) {
sewardj3e2ba0a2004-10-08 21:30:27 +00002645 IRArray* descr = st->Ist.Tmp.data->Iex.GetI.descr;
sewardj0ec972d2004-10-12 09:47:15 +00002646 IRExpr* ix = st->Ist.Tmp.data->Iex.GetI.off;
sewardj3e2ba0a2004-10-08 21:30:27 +00002647 Int bias = st->Ist.Tmp.data->Iex.GetI.bias;
sewardj0ec972d2004-10-12 09:47:15 +00002648 IRExpr* replacement = findPutI(bb, i-1, descr, ix, bias);
2649 if (replacement
2650 && isAtom(replacement)
2651 /* Make sure we're doing a type-safe transformation! */
2652 && typeOfIRExpr(bb->tyenv, replacement) == descr->elemTy) {
sewardj3e2ba0a2004-10-08 21:30:27 +00002653 if (DEBUG_IROPT) {
2654 vex_printf("PiGi: ");
2655 ppIRExpr(st->Ist.Tmp.data);
2656 vex_printf(" -> ");
2657 ppIRExpr(replacement);
2658 vex_printf("\n");
2659 }
2660 bb->stmts[i] = IRStmt_Tmp(st->Ist.Tmp.tmp, replacement);
2661 }
sewardje98dcf22004-10-04 09:15:11 +00002662 }
2663 }
sewardj575670c2004-10-12 09:13:19 +00002664
sewardj728c9d52004-09-24 17:19:22 +00002665}
sewardj4345f7a2004-09-22 19:49:27 +00002666
sewardj575670c2004-10-12 09:13:19 +00002667/* Remove redundant PutIs, to the extent which they can be detected.
2668 bb is modified in-place. */
2669
2670static
2671void do_redundant_PutI_elimination ( IRBB* bb )
2672{
2673 Int i, j;
2674 Bool delete;
2675 IRStmt *st, *stj;
2676
2677 for (i = 0; i < bb->stmts_used; i++) {
2678 st = bb->stmts[i];
2679 if (!st || st->tag != Ist_PutI)
2680 continue;
2681 /* Ok, search forwards from here to see if we can find another
2682 PutI which makes this one redundant, and dodging various
2683 hazards. Search forwards:
2684 * If conditional exit, give up (because anything after that
2685 does not postdominate this put).
sewardj0ec972d2004-10-12 09:47:15 +00002686 * If a Get which might overlap, give up (because this PutI
sewardj575670c2004-10-12 09:13:19 +00002687 not necessarily dead).
sewardj0ec972d2004-10-12 09:47:15 +00002688 * If a Put which is identical, stop with success.
2689 * If a Put which might overlap, but is not identical, give up.
2690 * If a dirty helper call which might write guest state, give up.
2691 * If a Put which definitely doesn't overlap, or any other
sewardj575670c2004-10-12 09:13:19 +00002692 kind of stmt, continue.
2693 */
2694 delete = False;
2695 for (j = i+1; j < bb->stmts_used; j++) {
2696 stj = bb->stmts[j];
2697 if (!stj)
2698 continue;
2699 if (identicalPutIs(st, stj)) {
2700 /* success! */
2701 delete = True;
2702 break;
2703 }
2704 if (stj->tag == Ist_Exit)
2705 /* give up */
2706 break;
2707 if (st->tag == Ist_Dirty)
2708 /* give up; could do better here */
2709 break;
2710 if (guestAccessWhichMightOverlapPutI(bb->tyenv, st, stj))
2711 /* give up */
2712 break;
2713 }
2714
2715 if (delete) {
2716 if (DEBUG_IROPT) {
2717 vex_printf("rPI: ");
2718 ppIRStmt(st);
2719 vex_printf("\n");
2720 }
2721 bb->stmts[i] = NULL;
2722 }
2723
2724 }
2725}
2726
2727
sewardj4345f7a2004-09-22 19:49:27 +00002728/*---------------------------------------------------------------*/
sewardj695cff92004-10-13 14:50:14 +00002729/*--- Loop unrolling ---*/
2730/*---------------------------------------------------------------*/
2731
sewardjf6275b72004-10-13 15:03:18 +00002732/* Adjust all tmp values (names) in e by delta. e is destructively
2733 modified. */
2734
sewardj695cff92004-10-13 14:50:14 +00002735static void deltaIRExpr ( IRExpr* e, Int delta )
2736{
sewardjf6275b72004-10-13 15:03:18 +00002737 Int i;
2738 switch (e->tag) {
2739 case Iex_Tmp:
2740 e->Iex.Tmp.tmp += delta;
2741 break;
2742 case Iex_Get:
2743 case Iex_Const:
2744 break;
2745 case Iex_GetI:
2746 deltaIRExpr(e->Iex.GetI.off, delta);
2747 break;
2748 case Iex_Binop:
2749 deltaIRExpr(e->Iex.Binop.arg1, delta);
2750 deltaIRExpr(e->Iex.Binop.arg2, delta);
2751 break;
2752 case Iex_Unop:
2753 deltaIRExpr(e->Iex.Unop.arg, delta);
2754 break;
2755 case Iex_LDle:
2756 deltaIRExpr(e->Iex.LDle.addr, delta);
2757 break;
2758 case Iex_CCall:
2759 for (i = 0; e->Iex.CCall.args[i]; i++)
2760 deltaIRExpr(e->Iex.CCall.args[i], delta);
2761 break;
2762 case Iex_Mux0X:
2763 deltaIRExpr(e->Iex.Mux0X.cond, delta);
2764 deltaIRExpr(e->Iex.Mux0X.expr0, delta);
2765 deltaIRExpr(e->Iex.Mux0X.exprX, delta);
2766 break;
2767 default:
2768 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2769 vpanic("deltaIRExpr");
2770 }
sewardj695cff92004-10-13 14:50:14 +00002771}
2772
sewardjf6275b72004-10-13 15:03:18 +00002773/* Adjust all tmp values (names) in st by delta. st is destructively
2774 modified. */
2775
sewardj695cff92004-10-13 14:50:14 +00002776static void deltaIRStmt ( IRStmt* st, Int delta )
2777{
sewardj7447b5b2004-10-16 11:30:42 +00002778 Int i;
2779 IRDirty* d;
sewardjf6275b72004-10-13 15:03:18 +00002780 switch (st->tag) {
2781 case Ist_Put:
2782 deltaIRExpr(st->Ist.Put.data, delta);
2783 break;
2784 case Ist_PutI:
2785 deltaIRExpr(st->Ist.PutI.off, delta);
2786 deltaIRExpr(st->Ist.PutI.data, delta);
2787 break;
sewardj7447b5b2004-10-16 11:30:42 +00002788 case Ist_Tmp:
2789 st->Ist.Tmp.tmp += delta;
sewardjf6275b72004-10-13 15:03:18 +00002790 deltaIRExpr(st->Ist.Tmp.data, delta);
2791 break;
2792 case Ist_Exit:
2793 deltaIRExpr(st->Ist.Exit.cond, delta);
2794 break;
2795 case Ist_STle:
2796 deltaIRExpr(st->Ist.STle.addr, delta);
2797 deltaIRExpr(st->Ist.STle.data, delta);
2798 break;
sewardj7447b5b2004-10-16 11:30:42 +00002799 case Ist_Dirty:
2800 d = st->Ist.Dirty.details;
2801 for (i = 0; d->args[i]; i++)
2802 deltaIRExpr(d->args[i], delta);
2803 if (d->tmp != INVALID_IRTEMP)
2804 d->tmp += delta;
2805 if (d->mAddr)
2806 deltaIRExpr(d->mAddr, delta);
2807 break;
sewardjf6275b72004-10-13 15:03:18 +00002808 default:
2809 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
2810 vpanic("deltaIRStmt");
2811 }
sewardj695cff92004-10-13 14:50:14 +00002812}
2813
sewardjf6275b72004-10-13 15:03:18 +00002814
sewardj695cff92004-10-13 14:50:14 +00002815/* If possible, return a loop-unrolled version of bb0. The original
2816 is changed. If not possible, return NULL. */
2817
2818/* The two schemas considered are:
2819
2820 X: BODY; goto X
2821 --> X: BODY;BODY; goto X
2822
2823 and
2824
2825 X: BODY; if (c) goto X; goto Y
2826 which trivially transforms to
2827 X: BODY; if (!c) goto Y; goto X;
2828 so it falls in the scope of the first case.
2829
2830 X and Y must be literal (guest) addresses.
2831*/
2832
sewardj2d04d112004-10-13 20:44:46 +00002833static Int calc_unroll_factor( IRBB* bb )
2834{
2835 Int n_stmts, i;
2836
2837 n_stmts = 0;
2838 for (i = 0; i < bb->stmts_used; i++)
2839 if (bb->stmts[i])
2840 n_stmts++;
2841
2842 if (n_stmts <= UNROLL_TARGET/8) {
sewardj73017432004-10-14 19:33:25 +00002843 if (UNROLL_VERBOSE)
2844 vex_printf("vex iropt: 8 x unrolling (%d sts -> %d sts)\n",
2845 n_stmts, 8* n_stmts);
sewardj2d04d112004-10-13 20:44:46 +00002846 return 8;
2847 }
2848 if (n_stmts <= UNROLL_TARGET/4) {
sewardj73017432004-10-14 19:33:25 +00002849 if (UNROLL_VERBOSE)
2850 vex_printf("vex iropt: 4 x unrolling (%d sts -> %d sts)\n",
2851 n_stmts, 4* n_stmts);
sewardj2d04d112004-10-13 20:44:46 +00002852 return 4;
2853 }
2854
2855 if (n_stmts <= UNROLL_TARGET/2) {
sewardj73017432004-10-14 19:33:25 +00002856 if (UNROLL_VERBOSE)
2857 vex_printf("vex iropt: 2 x unrolling (%d sts -> %d sts)\n",
2858 n_stmts, 2* n_stmts);
sewardj2d04d112004-10-13 20:44:46 +00002859 return 2;
2860 }
2861
sewardj73017432004-10-14 19:33:25 +00002862 if (UNROLL_VERBOSE)
2863 vex_printf("vex iropt: not unrolling (%d sts)\n", n_stmts);
2864
sewardj2d04d112004-10-13 20:44:46 +00002865 return 1;
2866}
2867
2868
sewardj695cff92004-10-13 14:50:14 +00002869static IRBB* maybe_loop_unroll_BB ( IRBB* bb0, Addr64 my_addr )
2870{
sewardj2d04d112004-10-13 20:44:46 +00002871 Int i, j, jmax, n_vars;
sewardjf6275b72004-10-13 15:03:18 +00002872 Bool xxx_known;
2873 Addr64 xxx_value, yyy_value;
2874 IRExpr* udst;
2875 IRStmt* st;
2876 IRConst* con;
2877 IRBB *bb1, *bb2;
sewardj2d04d112004-10-13 20:44:46 +00002878 Int unroll_factor;
2879
2880 if (UNROLL_TARGET <= 0)
2881 return NULL;
sewardj695cff92004-10-13 14:50:14 +00002882
2883 /* First off, figure out if we can unroll this loop. Do this
2884 without modifying bb0. */
2885
sewardjf6275b72004-10-13 15:03:18 +00002886 if (bb0->jumpkind != Ijk_Boring)
2887 return NULL;
sewardj695cff92004-10-13 14:50:14 +00002888
sewardjf6275b72004-10-13 15:03:18 +00002889 xxx_known = False;
2890 xxx_value = 0;
sewardj695cff92004-10-13 14:50:14 +00002891
sewardjf6275b72004-10-13 15:03:18 +00002892 /* Extract the next-guest address. If it isn't a literal, we
2893 have to give up. */
sewardj695cff92004-10-13 14:50:14 +00002894
sewardjf6275b72004-10-13 15:03:18 +00002895 udst = bb0->next;
2896 if (udst->tag == Iex_Const
2897 && (udst->Iex.Const.con->tag == Ico_U32
2898 || udst->Iex.Const.con->tag == Ico_U64)) {
2899 /* The BB ends in a jump to a literal location. */
2900 xxx_known = True;
2901 xxx_value = udst->Iex.Const.con->tag == Ico_U64
2902 ? udst->Iex.Const.con->Ico.U64
2903 : (Addr64)(udst->Iex.Const.con->Ico.U32);
2904 }
sewardj695cff92004-10-13 14:50:14 +00002905
sewardjf6275b72004-10-13 15:03:18 +00002906 if (!xxx_known)
2907 return NULL;
sewardj695cff92004-10-13 14:50:14 +00002908
sewardjf6275b72004-10-13 15:03:18 +00002909 /* Now we know the BB ends to a jump to a literal location. If
2910 it's a jump to itself (viz, idiom #1), move directly to the
2911 unrolling stage, first cloning the bb so the original isn't
2912 modified. */
2913 if (xxx_value == my_addr) {
sewardj2d04d112004-10-13 20:44:46 +00002914 unroll_factor = calc_unroll_factor( bb0 );
2915 if (unroll_factor < 2)
2916 return NULL;
sewardjf6275b72004-10-13 15:03:18 +00002917 bb1 = dopyIRBB( bb0 );
2918 bb0 = NULL;
2919 udst = NULL; /* is now invalid */
2920 goto do_unroll;
2921 }
sewardj695cff92004-10-13 14:50:14 +00002922
sewardjf6275b72004-10-13 15:03:18 +00002923 /* Search for the second idiomatic form:
sewardj695cff92004-10-13 14:50:14 +00002924 X: BODY; if (c) goto X; goto Y
sewardjf6275b72004-10-13 15:03:18 +00002925 We know Y, but need to establish that the last stmt
2926 is 'if (c) goto X'.
2927 */
2928 yyy_value = xxx_value;
2929 for (i = bb0->stmts_used-1; i >= 0; i--)
2930 if (bb0->stmts[i])
2931 break;
sewardj695cff92004-10-13 14:50:14 +00002932
sewardjf6275b72004-10-13 15:03:18 +00002933 if (i < 0)
2934 return NULL; /* block with no stmts. Strange. */
sewardj695cff92004-10-13 14:50:14 +00002935
sewardjf6275b72004-10-13 15:03:18 +00002936 st = bb0->stmts[i];
2937 if (st->tag != Ist_Exit)
2938 return NULL;
sewardj695cff92004-10-13 14:50:14 +00002939
sewardjf6275b72004-10-13 15:03:18 +00002940 con = st->Ist.Exit.dst;
2941 vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
sewardj695cff92004-10-13 14:50:14 +00002942
sewardjf6275b72004-10-13 15:03:18 +00002943 xxx_value = con->tag == Ico_U64
2944 ? st->Ist.Exit.dst->Ico.U64
2945 : (Addr64)(st->Ist.Exit.dst->Ico.U32);
sewardj695cff92004-10-13 14:50:14 +00002946
sewardjf6275b72004-10-13 15:03:18 +00002947 /* If this assertion fails, we have some kind of type error. */
2948 vassert(con->tag == udst->Iex.Const.con->tag);
2949
2950 if (xxx_value != my_addr)
2951 /* We didn't find either idiom. Give up. */
2952 return NULL;
2953
2954 /* Ok, we found idiom #2. Copy the BB, switch around the xxx and
2955 yyy values (which makes it look like idiom #1), and go into
2956 unrolling proper. This means finding (again) the last stmt, in
2957 the copied BB. */
2958
sewardj2d04d112004-10-13 20:44:46 +00002959 unroll_factor = calc_unroll_factor( bb0 );
2960 if (unroll_factor < 2)
2961 return NULL;
2962
sewardjf6275b72004-10-13 15:03:18 +00002963 bb1 = dopyIRBB( bb0 );
2964 bb0 = NULL;
2965 udst = NULL; /* is now invalid */
2966 for (i = bb1->stmts_used-1; i >= 0; i--)
2967 if (bb1->stmts[i])
2968 break;
2969
2970 /* The next bunch of assertions should be true since we already
2971 found and checked the last stmt in the original bb. */
2972
2973 vassert(i >= 0);
2974
2975 st = bb1->stmts[i];
2976 vassert(st->tag == Ist_Exit);
2977
2978 con = st->Ist.Exit.dst;
2979 vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
2980
2981 udst = bb1->next;
2982 vassert(udst->tag == Iex_Const);
2983 vassert(udst->Iex.Const.con->tag == Ico_U32
sewardj695cff92004-10-13 14:50:14 +00002984 || udst->Iex.Const.con->tag == Ico_U64);
sewardjf6275b72004-10-13 15:03:18 +00002985 vassert(con->tag == udst->Iex.Const.con->tag);
2986
2987 /* switch the xxx and yyy fields around */
2988 if (con->tag == Ico_U64) {
2989 udst->Iex.Const.con->Ico.U64 = xxx_value;
2990 con->Ico.U64 = yyy_value;
2991 } else {
2992 udst->Iex.Const.con->Ico.U32 = (UInt)xxx_value;
2993 con->Ico.U64 = (UInt)yyy_value;
2994 }
2995
sewardj6e797c52004-10-13 15:20:17 +00002996 /* negate the test condition */
2997 st->Ist.Exit.cond = IRExpr_Unop(Iop_Not1,dopyIRExpr(st->Ist.Exit.cond));
sewardj695cff92004-10-13 14:50:14 +00002998
sewardjf6275b72004-10-13 15:03:18 +00002999 /* --- The unroller proper. Both idioms are by now --- */
3000 /* --- now converted to idiom 1. --- */
sewardj695cff92004-10-13 14:50:14 +00003001
sewardjf6275b72004-10-13 15:03:18 +00003002 do_unroll:
sewardj695cff92004-10-13 14:50:14 +00003003
sewardj2d04d112004-10-13 20:44:46 +00003004 vassert(unroll_factor == 2
3005 || unroll_factor == 4
3006 || unroll_factor == 8);
sewardj695cff92004-10-13 14:50:14 +00003007
sewardj2d04d112004-10-13 20:44:46 +00003008 jmax = unroll_factor==8 ? 3 : (unroll_factor==4 ? 2 : 1);
3009 for (j = 1; j <= jmax; j++) {
sewardjf6275b72004-10-13 15:03:18 +00003010
sewardj2d04d112004-10-13 20:44:46 +00003011 n_vars = bb1->tyenv->types_used;
3012
3013 bb2 = dopyIRBB(bb1);
3014 for (i = 0; i < n_vars; i++)
3015 (void)newIRTemp(bb1->tyenv, bb2->tyenv->types[i]);
3016
3017 for (i = 0; i < bb2->stmts_used; i++) {
3018 if (bb2->stmts[i] == NULL)
3019 continue;
3020 /* deltaIRStmt destructively modifies the stmt, but
3021 that's OK since bb2 is a complete fresh copy of bb1. */
3022 deltaIRStmt(bb2->stmts[i], n_vars);
3023 addStmtToIRBB(bb1, bb2->stmts[i]);
3024 }
sewardj695cff92004-10-13 14:50:14 +00003025 }
3026
sewardjf6275b72004-10-13 15:03:18 +00003027 if (DEBUG_IROPT) {
3028 vex_printf("\nUNROLLED (%llx)\n", my_addr);
3029 ppIRBB(bb1);
3030 vex_printf("\n");
sewardj695cff92004-10-13 14:50:14 +00003031 }
3032
sewardjf6275b72004-10-13 15:03:18 +00003033 /* Flattening; sigh. The unroller succeeds in breaking flatness
3034 by negating the test condition. This should be fixed properly.
3035 For the moment use this shotgun approach. */
3036 return flatten_BB(bb1);
sewardj695cff92004-10-13 14:50:14 +00003037}
3038
3039/*---------------------------------------------------------------*/
sewardjedf4d692004-08-17 13:52:58 +00003040/*--- iropt main ---*/
3041/*---------------------------------------------------------------*/
3042
sewardj695cff92004-10-13 14:50:14 +00003043static Bool iropt_verbose = False; //True;
sewardj4345f7a2004-09-22 19:49:27 +00003044
3045
sewardj4345f7a2004-09-22 19:49:27 +00003046/* Do a simple cleanup pass on bb. This is: redundant Get removal,
3047 redundant Put removal, constant propagation, dead code removal,
3048 clean helper specialisation, and dead code removal (again).
sewardj695cff92004-10-13 14:50:14 +00003049
3050 Note, spec_helpers_BB destroys the 'flat' property, as the
3051 expressions which replace clean helper calls can be arbitrarily
3052 deep. This should be fixed. */
sewardj4345f7a2004-09-22 19:49:27 +00003053
3054static
sewardj695cff92004-10-13 14:50:14 +00003055IRBB* cheap_transformations (
3056 IRBB* bb,
3057 IRExpr* (*specHelper) ( Char*, IRExpr**) )
sewardj4345f7a2004-09-22 19:49:27 +00003058{
3059 redundant_get_removal_BB ( bb );
3060 if (iropt_verbose) {
3061 vex_printf("\n========= REDUNDANT GET\n\n" );
3062 ppIRBB(bb);
3063 }
sewardj044a2152004-10-21 10:22:10 +00003064
sewardj4345f7a2004-09-22 19:49:27 +00003065 redundant_put_removal_BB ( bb );
3066 if (iropt_verbose) {
3067 vex_printf("\n========= REDUNDANT PUT\n\n" );
3068 ppIRBB(bb);
3069 }
sewardj044a2152004-10-21 10:22:10 +00003070
sewardj4345f7a2004-09-22 19:49:27 +00003071 bb = cprop_BB ( bb );
3072 if (iropt_verbose) {
3073 vex_printf("\n========= CPROPD\n\n" );
3074 ppIRBB(bb);
3075 }
3076
3077 dead_BB ( bb );
3078 if (iropt_verbose) {
3079 vex_printf("\n========= DEAD\n\n" );
3080 ppIRBB(bb);
3081 }
3082
3083 spec_helpers_BB ( bb, specHelper );
3084 dead_BB ( bb );
3085 if (iropt_verbose) {
3086 vex_printf("\n========= SPECd \n\n" );
3087 ppIRBB(bb);
3088 }
3089
3090 return bb;
3091}
3092
sewardj695cff92004-10-13 14:50:14 +00003093
3094/* Do some more expensive transformations on bb, which are aimed at
3095 optimising as much as possible in the presence of GetI and PutI. */
3096
3097static
3098IRBB* expensive_transformations( IRBB* bb )
3099{
3100 cse_BB( bb );
3101 collapse_AddSub_chains_BB( bb );
3102 do_PutI_GetI_forwarding_BB( bb );
3103 do_redundant_PutI_elimination( bb );
3104 dead_BB( bb );
3105 return flatten_BB( bb );
3106}
3107
3108
sewardj4345f7a2004-09-22 19:49:27 +00003109/* Scan a flattened BB to see if it has any GetI or PutIs in it. Used
3110 as a heuristic hack to see if iropt needs to do expensive
sewardj695cff92004-10-13 14:50:14 +00003111 optimisations (CSE, PutI -> GetI forwarding, redundant PutI
3112 elimination) to improve code containing GetI or PutI. */
3113
sewardj4345f7a2004-09-22 19:49:27 +00003114static Bool hasGetIorPutI ( IRBB* bb )
3115{
3116 Int i, j;
3117 IRStmt* st;
3118 IRDirty* d;
3119
3120 for (i = 0; i < bb->stmts_used; i++) {
3121 st = bb->stmts[i];
3122 if (!st)
3123 continue;
3124
3125 switch (st->tag) {
3126 case Ist_PutI:
3127 return True;
3128 case Ist_Tmp:
sewardj6d076362004-09-23 11:06:17 +00003129 if (st->Ist.Tmp.data->tag == Iex_GetI)
sewardj4345f7a2004-09-22 19:49:27 +00003130 return True;
3131 break;
3132 case Ist_Put:
sewardj6d076362004-09-23 11:06:17 +00003133 vassert(isAtom(st->Ist.Put.data));
sewardj4345f7a2004-09-22 19:49:27 +00003134 break;
3135 case Ist_STle:
3136 vassert(isAtom(st->Ist.STle.addr));
3137 vassert(isAtom(st->Ist.STle.data));
3138 break;
3139 case Ist_Exit:
3140 vassert(isAtom(st->Ist.Exit.cond));
3141 break;
3142 case Ist_Dirty:
3143 d = st->Ist.Dirty.details;
3144 for (j = 0; d->args[j]; j++)
3145 vassert(isAtom(d->args[j]));
3146 if (d->mFx != Ifx_None)
3147 vassert(isAtom(d->mAddr));
3148 break;
3149 default:
3150 ppIRStmt(st);
3151 vpanic("hasGetIorPutI");
3152 }
3153
3154 }
3155 return False;
3156
3157}
3158
3159
sewardj695cff92004-10-13 14:50:14 +00003160/* ---------------- The main iropt entry point. ---------------- */
3161
sewardjedf4d692004-08-17 13:52:58 +00003162/* exported from this file */
sewardj695cff92004-10-13 14:50:14 +00003163/* Rules of the game:
3164
3165 - IRExpr/IRStmt trees should be treated as immutable, as they
3166 may get shared. So never change a field of such a tree node;
3167 instead construct and return a new one if needed.
3168*/
3169
sewardj4345f7a2004-09-22 19:49:27 +00003170
sewardj84ff0652004-08-23 16:16:08 +00003171IRBB* do_iropt_BB ( IRBB* bb0,
sewardj695cff92004-10-13 14:50:14 +00003172 IRExpr* (*specHelper) ( Char*, IRExpr**),
3173 Addr64 guest_addr )
sewardjedf4d692004-08-17 13:52:58 +00003174{
sewardj4345f7a2004-09-22 19:49:27 +00003175 static UInt n_total = 0;
3176 static UInt n_expensive = 0;
sewardj29632392004-08-22 02:38:11 +00003177
sewardj4345f7a2004-09-22 19:49:27 +00003178 Bool show_res = False;
sewardj695cff92004-10-13 14:50:14 +00003179 Bool do_expensive;
sewardj695cff92004-10-13 14:50:14 +00003180 IRBB *bb, *bb2;
sewardj8c2c10b2004-10-16 20:51:52 +00003181
sewardjd0863ff2004-10-23 00:22:32 +00003182 /* Completely disable iropt? */
sewardj39555aa2004-10-24 22:29:19 +00003183 if (IROPT_LEVEL <= 0) return bb0;
sewardjd0863ff2004-10-23 00:22:32 +00003184
sewardj4345f7a2004-09-22 19:49:27 +00003185 n_total++;
3186
3187 /* First flatten the block out, since all other
3188 phases assume flat code. */
3189
3190 bb = flatten_BB ( bb0 );
3191
3192 if (iropt_verbose) {
3193 vex_printf("\n========= FLAT\n\n" );
3194 ppIRBB(bb);
sewardj84be7372004-08-18 13:59:33 +00003195 }
sewardjd7217032004-08-19 10:49:10 +00003196
sewardj695cff92004-10-13 14:50:14 +00003197 /* Now do a preliminary cleanup pass, and figure out if we also
3198 need to do 'expensive' optimisations. Expensive optimisations
3199 are deemed necessary if the block contains any GetIs or PutIs.
3200 If needed, do expensive transformations and then another cheap
3201 cleanup pass. */
sewardj4345f7a2004-09-22 19:49:27 +00003202
sewardj695cff92004-10-13 14:50:14 +00003203 bb = cheap_transformations( bb, specHelper );
sewardj695cff92004-10-13 14:50:14 +00003204
sewardj39555aa2004-10-24 22:29:19 +00003205 if (IROPT_LEVEL >= 1) {
3206 do_expensive = hasGetIorPutI(bb);
sewardj695cff92004-10-13 14:50:14 +00003207 if (do_expensive) {
sewardj39555aa2004-10-24 22:29:19 +00003208 n_expensive++;
3209 //show_res = True;
3210 if (DEBUG_IROPT)
3211 vex_printf("***** EXPENSIVE %d %d\n", n_total, n_expensive);
sewardj695cff92004-10-13 14:50:14 +00003212 bb = expensive_transformations( bb );
3213 bb = cheap_transformations( bb, specHelper );
3214 }
sewardj39555aa2004-10-24 22:29:19 +00003215
3216 /* Now have a go at unrolling simple (single-BB) loops. If
3217 successful, clean up the results as much as possible. */
3218
3219 bb2 = maybe_loop_unroll_BB( bb, guest_addr );
3220 if (bb2) {
3221 show_res = False; //True;
3222 bb = cheap_transformations( bb2, specHelper );
3223 if (do_expensive) {
3224 bb = expensive_transformations( bb );
3225 bb = cheap_transformations( bb, specHelper );
3226 } else {
3227 /* at least do CSE and dead code removal */
3228 cse_BB( bb );
3229 dead_BB( bb );
3230 }
3231 if (0) vex_printf("vex iropt: unrolled a loop\n");
3232 }
3233
sewardjd7217032004-08-19 10:49:10 +00003234 }
3235
sewardj4345f7a2004-09-22 19:49:27 +00003236 /* Finally, rebuild trees, for the benefit of instruction
3237 selection. */
sewardj11f429c2004-10-20 23:14:42 +00003238 dead_BB(bb);
sewardj695cff92004-10-13 14:50:14 +00003239 treebuild_BB( bb );
sewardj4345f7a2004-09-22 19:49:27 +00003240 if (show_res || iropt_verbose) {
sewardj29632392004-08-22 02:38:11 +00003241 vex_printf("\n========= TREEd \n\n" );
sewardj4345f7a2004-09-22 19:49:27 +00003242 ppIRBB(bb);
sewardj29632392004-08-22 02:38:11 +00003243 }
sewardj29632392004-08-22 02:38:11 +00003244
sewardj4345f7a2004-09-22 19:49:27 +00003245 return bb;
sewardjedf4d692004-08-17 13:52:58 +00003246}
3247
3248
sewardja1a370f2004-08-17 13:31:55 +00003249/*---------------------------------------------------------------*/
3250/*--- end ir/iropt.c ---*/
3251/*---------------------------------------------------------------*/