blob: 407e4e0836d2a7e4d6c9914c8755a99fdf139a2a [file] [log] [blame]
sewardja1a370f2004-08-17 13:31:55 +00001
2/*---------------------------------------------------------------*/
3/*--- ---*/
4/*--- This file (ir/iropt.c) is ---*/
sewardj496a58d2005-03-20 18:44:44 +00005/*--- Copyright (c) OpenWorks LLP. All rights reserved. ---*/
sewardja1a370f2004-08-17 13:31:55 +00006/*--- ---*/
7/*---------------------------------------------------------------*/
8
sewardjf8ed9d82004-11-12 17:40:23 +00009/*
10 This file is part of LibVEX, a library for dynamic binary
11 instrumentation and translation.
12
sewardj496a58d2005-03-20 18:44:44 +000013 Copyright (C) 2004-2005 OpenWorks LLP.
sewardjf8ed9d82004-11-12 17:40:23 +000014
15 This program is free software; you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation; Version 2 dated June 1991 of the
18 license.
19
20 This program is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or liability
23 for damages. See the GNU General Public License for more details.
24
25 Neither the names of the U.S. Department of Energy nor the
26 University of California nor the names of its contributors may be
27 used to endorse or promote products derived from this software
28 without prior written permission.
29
30 You should have received a copy of the GNU General Public License
31 along with this program; if not, write to the Free Software
32 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
33 USA.
34*/
35
sewardja1a370f2004-08-17 13:31:55 +000036#include "libvex_basictypes.h"
sewardjedf4d692004-08-17 13:52:58 +000037#include "libvex_ir.h"
sewardja1a370f2004-08-17 13:31:55 +000038#include "libvex.h"
39
40#include "main/vex_util.h"
sewardj08613742004-10-25 13:01:45 +000041#include "main/vex_globals.h"
sewardjedf4d692004-08-17 13:52:58 +000042#include "ir/iropt.h"
sewardja1a370f2004-08-17 13:31:55 +000043
sewardjd0863ff2004-10-23 00:22:32 +000044
sewardj088bcb42004-08-19 17:16:52 +000045/* Set to 1 for lots of debugging output. */
46#define DEBUG_IROPT 0
47
sewardja1a370f2004-08-17 13:31:55 +000048
sewardj08210532004-12-29 17:09:11 +000049/* What iropt does, 29 Dec 04.
50
51 It takes an IRBB and produces a new one with the same meaning,
52 defined thus:
53
54 After execution of the new BB, all guest state and guest memory is
55 the same as after execution of the original. This is true
56 regardless of how the block was exited (at the end vs side exit).
57
58 In addition, parts of the guest state will be identical to that
59 created by execution of the original at the following observation
60 points:
61
62 * In a dirty helper call, any parts of the guest state that the
63 helper states that it reads or modifies will be up to date.
64 Also, guest memory will be up to date. Parts of the guest state
65 not marked as being read or modified by the helper cannot be
66 assumed to be up-to-date at the point where the helper is called.
67
68 * Immediately prior to any load or store, those parts of the guest
69 state marked as requiring precise exceptions will be up to date.
70 Also, guest memory will be up to date. Parts of the guest state
71 not marked as requiring precise exceptions cannot be assumed to
72 be up-to-date at the point of the load/store.
73
74 The relative order of loads and stores (including loads/stores of
75 guest memory done by dirty helpers annotated as such) is not
76 changed. However, the relative order of loads with no intervening
77 stores/modifies may be changed.
78
79 Transformation order
80 ~~~~~~~~~~~~~~~~~~~~
81
82 There are three levels of optimisation, controlled by
83 vex_control.iropt_level. Define first:
84
85 "Cheap transformations" are the following sequence:
86 * Redundant-Get removal
87 * Redundant-Put removal
88 * Constant propagation/folding
89 * Dead code removal
90 * Specialisation of clean helper functions
91 * Dead code removal
92
93 "Expensive transformations" are the following sequence:
94 * CSE
95 * Folding of add/sub chains
96 * Redundant-GetI removal
97 * Redundant-PutI removal
98 * Dead code removal
sewardj08210532004-12-29 17:09:11 +000099
100 Then the transformations are as follows, as defined by
101 vex_control.iropt_level:
102
103 Level 0:
104 * Flatten into atomic form.
105
106 Level 1: the following sequence:
107 * Flatten into atomic form.
108 * Cheap transformations.
sewardj08210532004-12-29 17:09:11 +0000109
110 Level 2: the following sequence
111 * Flatten into atomic form.
112 * Cheap transformations.
113 * If block contains GetI or PutI, Expensive transformations.
114 * Try unrolling loops. Three possible outcomes:
115 - No effect: do nothing more.
116 - Unrolled a loop, and block does not contain GetI or PutI:
117 Do: * CSE
118 * Dead code removal
sewardj08210532004-12-29 17:09:11 +0000119 - Unrolled a loop, and block contains GetI or PutI:
120 Do: * Expensive transformations
121 * Cheap transformations
sewardj08210532004-12-29 17:09:11 +0000122*/
123
sewardj98430292004-12-29 17:34:50 +0000124/* Implementation notes, 29 Dec 04.
125
126 TODO (important): I think rPutI removal ignores precise exceptions
127 and is therefore in a sense, wrong. In the sense that PutIs are
128 assumed not to write parts of the guest state that we need to have
129 up-to-date at loads/stores. So far on x86 guest that has not
130 mattered since indeed only the x87 FP registers and tags are
131 accessed using GetI/PutI, and there is no need so far for them to
132 be up to date at mem exception points. The rPutI pass should be
133 fixed.
sewardjfb44d552004-10-25 09:48:47 +0000134
sewardj4c5f6d52004-10-26 13:25:33 +0000135 TODO: improve pessimistic handling of precise exceptions
136 in the tree builder.
137
sewardjfb44d552004-10-25 09:48:47 +0000138 TODO: check interaction of rGetI and dirty helpers.
sewardjc0b42282004-10-12 13:44:12 +0000139
140 F64i constants are treated differently from other constants.
141 They are not regarded as atoms, and instead lifted off and
142 bound to temps. This allows them to participate in CSE, which
143 is important for getting good performance for x86 guest code.
sewardj695cff92004-10-13 14:50:14 +0000144
sewardja5aa9cf2004-10-15 22:56:38 +0000145 CSE up F64 literals (already doing F64is)
sewardj4c5f6d52004-10-26 13:25:33 +0000146
147 CSE: consider carefully the requirement for precise exns
sewardj98430292004-12-29 17:34:50 +0000148 prior to making CSE any more aggressive. */
sewardjc0b42282004-10-12 13:44:12 +0000149
150
sewardja1a370f2004-08-17 13:31:55 +0000151/*---------------------------------------------------------------*/
152/*--- Finite mappery, of a sort ---*/
153/*---------------------------------------------------------------*/
154
sewardj08210532004-12-29 17:09:11 +0000155/* General map from HWord-sized thing HWord-sized thing. Could be by
156 hashing, but it's not clear whether or not this would really be any
157 faster. */
sewardja1a370f2004-08-17 13:31:55 +0000158
159typedef
160 struct {
161 Bool* inuse;
sewardjea602bc2004-10-14 21:40:12 +0000162 HWord* key;
163 HWord* val;
sewardja1a370f2004-08-17 13:31:55 +0000164 Int size;
165 Int used;
166 }
sewardjea602bc2004-10-14 21:40:12 +0000167 HashHW;
sewardja1a370f2004-08-17 13:31:55 +0000168
sewardjea602bc2004-10-14 21:40:12 +0000169static HashHW* newHHW ( void )
sewardja1a370f2004-08-17 13:31:55 +0000170{
sewardjea602bc2004-10-14 21:40:12 +0000171 HashHW* h = LibVEX_Alloc(sizeof(HashHW));
sewardj29632392004-08-22 02:38:11 +0000172 h->size = 8;
sewardja1a370f2004-08-17 13:31:55 +0000173 h->used = 0;
174 h->inuse = LibVEX_Alloc(h->size * sizeof(Bool));
sewardjea602bc2004-10-14 21:40:12 +0000175 h->key = LibVEX_Alloc(h->size * sizeof(HWord));
176 h->val = LibVEX_Alloc(h->size * sizeof(HWord));
sewardja1a370f2004-08-17 13:31:55 +0000177 return h;
178}
179
180
sewardj84be7372004-08-18 13:59:33 +0000181/* Look up key in the map. */
sewardja1a370f2004-08-17 13:31:55 +0000182
sewardjea602bc2004-10-14 21:40:12 +0000183static Bool lookupHHW ( HashHW* h, /*OUT*/HWord* val, HWord key )
sewardja1a370f2004-08-17 13:31:55 +0000184{
185 Int i;
sewardj08210532004-12-29 17:09:11 +0000186 /* vex_printf("lookupHHW(%llx)\n", key ); */
sewardja1a370f2004-08-17 13:31:55 +0000187 for (i = 0; i < h->used; i++) {
188 if (h->inuse[i] && h->key[i] == key) {
sewardj39e3f242004-08-18 16:54:52 +0000189 if (val)
190 *val = h->val[i];
sewardja1a370f2004-08-17 13:31:55 +0000191 return True;
192 }
193 }
194 return False;
195}
196
197
sewardja1a370f2004-08-17 13:31:55 +0000198/* Add key->val to the map. Replaces any existing binding for key. */
199
sewardjea602bc2004-10-14 21:40:12 +0000200static void addToHHW ( HashHW* h, HWord key, HWord val )
sewardja1a370f2004-08-17 13:31:55 +0000201{
202 Int i, j;
sewardj08210532004-12-29 17:09:11 +0000203 /* vex_printf("addToHHW(%llx, %llx)\n", key, val); */
sewardja1a370f2004-08-17 13:31:55 +0000204
205 /* Find and replace existing binding, if any. */
206 for (i = 0; i < h->used; i++) {
207 if (h->inuse[i] && h->key[i] == key) {
208 h->val[i] = val;
209 return;
210 }
211 }
212
213 /* Ensure a space is available. */
214 if (h->used == h->size) {
215 /* Copy into arrays twice the size. */
216 Bool* inuse2 = LibVEX_Alloc(2 * h->size * sizeof(Bool));
sewardjea602bc2004-10-14 21:40:12 +0000217 HWord* key2 = LibVEX_Alloc(2 * h->size * sizeof(HWord));
218 HWord* val2 = LibVEX_Alloc(2 * h->size * sizeof(HWord));
sewardja1a370f2004-08-17 13:31:55 +0000219 for (i = j = 0; i < h->size; i++) {
220 if (!h->inuse[i]) continue;
221 inuse2[j] = True;
222 key2[j] = h->key[i];
223 val2[j] = h->val[i];
224 j++;
225 }
226 h->used = j;
227 h->size *= 2;
228 h->inuse = inuse2;
229 h->key = key2;
230 h->val = val2;
231 }
232
233 /* Finally, add it. */
234 vassert(h->used < h->size);
235 h->inuse[h->used] = True;
236 h->key[h->used] = key;
sewardj84be7372004-08-18 13:59:33 +0000237 h->val[h->used] = val;
sewardja1a370f2004-08-17 13:31:55 +0000238 h->used++;
239}
240
sewardj84be7372004-08-18 13:59:33 +0000241
sewardjd7cb8532004-08-17 23:59:23 +0000242/*---------------------------------------------------------------*/
sewardj08210532004-12-29 17:09:11 +0000243/*--- Flattening out a BB into atomic SSA form ---*/
sewardjd7cb8532004-08-17 23:59:23 +0000244/*---------------------------------------------------------------*/
245
sewardje80679a2004-09-21 23:00:11 +0000246/* Non-critical helper, heuristic for reducing the number of tmp-tmp
247 copies made by flattening. If in doubt return False. */
248
249static Bool isFlat ( IRExpr* e )
250{
sewardj695cff92004-10-13 14:50:14 +0000251 if (e->tag == Iex_Get)
252 return True;
sewardje80679a2004-09-21 23:00:11 +0000253 if (e->tag == Iex_Binop)
sewardj496a58d2005-03-20 18:44:44 +0000254 return toBool( isIRAtom(e->Iex.Binop.arg1)
255 && isIRAtom(e->Iex.Binop.arg2) );
sewardje80679a2004-09-21 23:00:11 +0000256 if (e->tag == Iex_LDle)
sewardj496a58d2005-03-20 18:44:44 +0000257 return isIRAtom(e->Iex.LDle.addr);
sewardje80679a2004-09-21 23:00:11 +0000258 return False;
259}
260
sewardjd7cb8532004-08-17 23:59:23 +0000261/* Flatten out 'ex' so it is atomic, returning a new expression with
262 the same value, after having appended extra IRTemp assignments to
263 the end of 'bb'. */
264
265static IRExpr* flatten_Expr ( IRBB* bb, IRExpr* ex )
266{
267 Int i;
268 IRExpr** newargs;
269 IRType ty = typeOfIRExpr(bb->tyenv, ex);
270 IRTemp t1;
271
272 switch (ex->tag) {
273
sewardjd7217032004-08-19 10:49:10 +0000274 case Iex_GetI:
275 t1 = newIRTemp(bb->tyenv, ty);
276 addStmtToIRBB(bb, IRStmt_Tmp(t1,
sewardj2d3f77c2004-09-22 23:49:09 +0000277 IRExpr_GetI(ex->Iex.GetI.descr,
sewardjeeac8412004-11-02 00:26:55 +0000278 flatten_Expr(bb, ex->Iex.GetI.ix),
sewardj2d3f77c2004-09-22 23:49:09 +0000279 ex->Iex.GetI.bias)));
sewardjd7217032004-08-19 10:49:10 +0000280 return IRExpr_Tmp(t1);
281
sewardjd7cb8532004-08-17 23:59:23 +0000282 case Iex_Get:
283 t1 = newIRTemp(bb->tyenv, ty);
284 addStmtToIRBB(bb,
285 IRStmt_Tmp(t1, ex));
286 return IRExpr_Tmp(t1);
287
288 case Iex_Binop:
289 t1 = newIRTemp(bb->tyenv, ty);
290 addStmtToIRBB(bb, IRStmt_Tmp(t1,
291 IRExpr_Binop(ex->Iex.Binop.op,
292 flatten_Expr(bb, ex->Iex.Binop.arg1),
293 flatten_Expr(bb, ex->Iex.Binop.arg2))));
294 return IRExpr_Tmp(t1);
295
296 case Iex_Unop:
297 t1 = newIRTemp(bb->tyenv, ty);
298 addStmtToIRBB(bb, IRStmt_Tmp(t1,
299 IRExpr_Unop(ex->Iex.Unop.op,
300 flatten_Expr(bb, ex->Iex.Unop.arg))));
301 return IRExpr_Tmp(t1);
302
303 case Iex_LDle:
304 t1 = newIRTemp(bb->tyenv, ty);
305 addStmtToIRBB(bb, IRStmt_Tmp(t1,
306 IRExpr_LDle(ex->Iex.LDle.ty,
307 flatten_Expr(bb, ex->Iex.LDle.addr))));
308 return IRExpr_Tmp(t1);
309
310 case Iex_CCall:
sewardj695cff92004-10-13 14:50:14 +0000311 newargs = sopyIRExprVec(ex->Iex.CCall.args);
sewardjd7cb8532004-08-17 23:59:23 +0000312 for (i = 0; newargs[i]; i++)
313 newargs[i] = flatten_Expr(bb, newargs[i]);
314 t1 = newIRTemp(bb->tyenv, ty);
315 addStmtToIRBB(bb, IRStmt_Tmp(t1,
sewardj8ea867b2004-10-30 19:03:02 +0000316 IRExpr_CCall(ex->Iex.CCall.cee,
sewardjd7cb8532004-08-17 23:59:23 +0000317 ex->Iex.CCall.retty,
318 newargs)));
319 return IRExpr_Tmp(t1);
320
321 case Iex_Mux0X:
322 t1 = newIRTemp(bb->tyenv, ty);
323 addStmtToIRBB(bb, IRStmt_Tmp(t1,
324 IRExpr_Mux0X(flatten_Expr(bb, ex->Iex.Mux0X.cond),
325 flatten_Expr(bb, ex->Iex.Mux0X.expr0),
326 flatten_Expr(bb, ex->Iex.Mux0X.exprX))));
327 return IRExpr_Tmp(t1);
328
329 case Iex_Const:
sewardjc0b42282004-10-12 13:44:12 +0000330 /* Lift F64i constants out onto temps so they can be CSEd
331 later. */
332 if (ex->Iex.Const.con->tag == Ico_F64i) {
333 t1 = newIRTemp(bb->tyenv, ty);
334 addStmtToIRBB(bb, IRStmt_Tmp(t1,
335 IRExpr_Const(ex->Iex.Const.con)));
336 return IRExpr_Tmp(t1);
337 } else {
338 /* Leave all other constants alone. */
339 return ex;
340 }
341
sewardjd7cb8532004-08-17 23:59:23 +0000342 case Iex_Tmp:
343 return ex;
344
345 default:
346 vex_printf("\n");
347 ppIRExpr(ex);
348 vex_printf("\n");
349 vpanic("flatten_Expr");
350 }
351}
352
353
354/* Append a completely flattened form of 'st' to the end of 'bb'. */
355
356static void flatten_Stmt ( IRBB* bb, IRStmt* st )
357{
sewardj17442fe2004-09-20 14:54:28 +0000358 Int i;
359 IRExpr *e1, *e2;
360 IRDirty *d, *d2;
sewardjd7cb8532004-08-17 23:59:23 +0000361 switch (st->tag) {
362 case Ist_Put:
sewardj496a58d2005-03-20 18:44:44 +0000363 if (isIRAtom(st->Ist.Put.data)) {
sewardj49651f42004-10-28 22:11:04 +0000364 /* optimisation to reduce the amount of heap wasted
365 by the flattener */
366 addStmtToIRBB(bb, st);
367 } else {
368 /* general case, always correct */
369 e1 = flatten_Expr(bb, st->Ist.Put.data);
370 addStmtToIRBB(bb, IRStmt_Put(st->Ist.Put.offset, e1));
371 }
sewardjd7cb8532004-08-17 23:59:23 +0000372 break;
sewardjd7cb8532004-08-17 23:59:23 +0000373 case Ist_PutI:
sewardjeeac8412004-11-02 00:26:55 +0000374 e1 = flatten_Expr(bb, st->Ist.PutI.ix);
sewardj2d3f77c2004-09-22 23:49:09 +0000375 e2 = flatten_Expr(bb, st->Ist.PutI.data);
376 addStmtToIRBB(bb, IRStmt_PutI(st->Ist.PutI.descr,
377 e1,
378 st->Ist.PutI.bias,
379 e2));
sewardjd7217032004-08-19 10:49:10 +0000380 break;
sewardjd7cb8532004-08-17 23:59:23 +0000381 case Ist_Tmp:
sewardj6d076362004-09-23 11:06:17 +0000382 if (isFlat(st->Ist.Tmp.data)) {
sewardje80679a2004-09-21 23:00:11 +0000383 /* optimisation, to reduce the number of tmp-tmp
384 copies generated */
385 addStmtToIRBB(bb, st);
386 } else {
387 /* general case, always correct */
sewardj6d076362004-09-23 11:06:17 +0000388 e1 = flatten_Expr(bb, st->Ist.Tmp.data);
sewardje80679a2004-09-21 23:00:11 +0000389 addStmtToIRBB(bb, IRStmt_Tmp(st->Ist.Tmp.tmp, e1));
390 }
sewardjd7cb8532004-08-17 23:59:23 +0000391 break;
392 case Ist_STle:
393 e1 = flatten_Expr(bb, st->Ist.STle.addr);
394 e2 = flatten_Expr(bb, st->Ist.STle.data);
395 addStmtToIRBB(bb, IRStmt_STle(e1,e2));
396 break;
sewardj17442fe2004-09-20 14:54:28 +0000397 case Ist_Dirty:
398 d = st->Ist.Dirty.details;
399 d2 = emptyIRDirty();
400 *d2 = *d;
sewardj695cff92004-10-13 14:50:14 +0000401 d2->args = sopyIRExprVec(d2->args);
sewardj17442fe2004-09-20 14:54:28 +0000402 if (d2->mFx != Ifx_None) {
403 d2->mAddr = flatten_Expr(bb, d2->mAddr);
404 } else {
405 vassert(d2->mAddr == NULL);
406 }
sewardjb8385d82004-11-02 01:34:15 +0000407 d2->guard = flatten_Expr(bb, d2->guard);
sewardj17442fe2004-09-20 14:54:28 +0000408 for (i = 0; d2->args[i]; i++)
409 d2->args[i] = flatten_Expr(bb, d2->args[i]);
410 addStmtToIRBB(bb, IRStmt_Dirty(d2));
411 break;
sewardjd2445f62005-03-21 00:15:53 +0000412 case Ist_NoOp:
sewardj3e838932005-01-07 12:09:15 +0000413 case Ist_MFence:
sewardjd2445f62005-03-21 00:15:53 +0000414 case Ist_IMark:
sewardj3e838932005-01-07 12:09:15 +0000415 addStmtToIRBB(bb, st);
416 break;
sewardjd7cb8532004-08-17 23:59:23 +0000417 case Ist_Exit:
sewardj0276d4b2004-11-15 15:30:21 +0000418 e1 = flatten_Expr(bb, st->Ist.Exit.guard);
sewardj893aada2004-11-29 19:57:54 +0000419 addStmtToIRBB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk,
420 st->Ist.Exit.dst));
sewardjd7cb8532004-08-17 23:59:23 +0000421 break;
422 default:
423 vex_printf("\n");
424 ppIRStmt(st);
425 vex_printf("\n");
426 vpanic("flatten_Stmt");
427 }
428}
429
sewardj08210532004-12-29 17:09:11 +0000430
sewardjd7cb8532004-08-17 23:59:23 +0000431static IRBB* flatten_BB ( IRBB* in )
432{
433 Int i;
434 IRBB* out;
435 out = emptyIRBB();
sewardj695cff92004-10-13 14:50:14 +0000436 out->tyenv = dopyIRTypeEnv( in->tyenv );
sewardjd7cb8532004-08-17 23:59:23 +0000437 for (i = 0; i < in->stmts_used; i++)
sewardj4345f7a2004-09-22 19:49:27 +0000438 if (in->stmts[i])
439 flatten_Stmt( out, in->stmts[i] );
sewardjd7cb8532004-08-17 23:59:23 +0000440 out->next = flatten_Expr( out, in->next );
441 out->jumpkind = in->jumpkind;
442 return out;
443}
444
sewardjedf4d692004-08-17 13:52:58 +0000445
sewardj08210532004-12-29 17:09:11 +0000446/*---------------------------------------------------------------*/
447/*--- In-place removal of redundant GETs ---*/
448/*---------------------------------------------------------------*/
449
450/* Scan forwards, building up an environment binding (min offset, max
451 offset) pairs to values, which will either be temps or constants.
452
453 On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the
454 env and if it matches, replace the Get with the stored value. If
455 there is no match, add a (minoff,maxoff) :-> t binding.
456
457 On seeing 'Put (minoff,maxoff) = t or c', first remove in the env
458 any binding which fully or partially overlaps with (minoff,maxoff).
459 Then add a new (minoff,maxoff) :-> t or c binding. */
460
461/* Extract the min/max offsets from a guest state array descriptor. */
462
463inline
464static void getArrayBounds ( IRArray* descr, UInt* minoff, UInt* maxoff )
465{
466 *minoff = descr->base;
467 *maxoff = *minoff + descr->nElems*sizeofIRType(descr->elemTy) - 1;
468 vassert((*minoff & ~0xFFFF) == 0);
469 vassert((*maxoff & ~0xFFFF) == 0);
470 vassert(*minoff <= *maxoff);
471}
472
473/* Create keys, of the form ((minoffset << 16) | maxoffset). */
474
475static UInt mk_key_GetPut ( Int offset, IRType ty )
476{
477 /* offset should fit in 16 bits. */
478 UInt minoff = offset;
479 UInt maxoff = minoff + sizeofIRType(ty) - 1;
480 vassert((minoff & ~0xFFFF) == 0);
481 vassert((maxoff & ~0xFFFF) == 0);
482 return (minoff << 16) | maxoff;
483}
484
485static UInt mk_key_GetIPutI ( IRArray* descr )
486{
487 UInt minoff, maxoff;
488 getArrayBounds( descr, &minoff, &maxoff );
489 vassert((minoff & ~0xFFFF) == 0);
490 vassert((maxoff & ~0xFFFF) == 0);
491 return (minoff << 16) | maxoff;
492}
493
494/* Supposing h has keys of the form generated by mk_key_GetPut and
495 mk_key_GetIPutI, invalidate any key which overlaps (k_lo
496 .. k_hi).
497*/
498static void invalidateOverlaps ( HashHW* h, UInt k_lo, UInt k_hi )
499{
500 Int j;
501 UInt e_lo, e_hi;
502 vassert(k_lo <= k_hi);
503 /* invalidate any env entries which in any way overlap (k_lo
504 .. k_hi) */
505 /* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */
506
507 for (j = 0; j < h->used; j++) {
508 if (!h->inuse[j])
509 continue;
510 e_lo = (((UInt)h->key[j]) >> 16) & 0xFFFF;
511 e_hi = ((UInt)h->key[j]) & 0xFFFF;
512 vassert(e_lo <= e_hi);
513 if (e_hi < k_lo || k_hi < e_lo)
514 continue; /* no overlap possible */
515 else
516 /* overlap; invalidate */
517 h->inuse[j] = False;
518 }
519}
520
521
522static void redundant_get_removal_BB ( IRBB* bb )
523{
524 HashHW* env = newHHW();
525 UInt key = 0; /* keep gcc -O happy */
526 Int i, j;
527 HWord val;
528
529 for (i = 0; i < bb->stmts_used; i++) {
530 IRStmt* st = bb->stmts[i];
531
532 if (!st)
533 continue;
534
535 /* Deal with Gets */
536 if (st->tag == Ist_Tmp
537 && st->Ist.Tmp.data->tag == Iex_Get) {
538 /* st is 't = Get(...)'. Look up in the environment and see
539 if the Get can be replaced. */
540 IRExpr* get = st->Ist.Tmp.data;
541 key = (HWord)mk_key_GetPut( get->Iex.Get.offset,
542 get->Iex.Get.ty );
543 if (lookupHHW(env, &val, (HWord)key)) {
544 /* found it */
545 /* Note, we could do better here. If the types are
546 different we don't do the substitution, since doing so
547 could lead to invalidly-typed IR. An improvement would
548 be to stick in a reinterpret-style cast, although that
549 would make maintaining flatness more difficult. */
550 IRExpr* valE = (IRExpr*)val;
sewardj9d2e7692005-02-07 01:11:31 +0000551 Bool typesOK = toBool( typeOfIRExpr(bb->tyenv,valE)
552 == st->Ist.Tmp.data->Iex.Get.ty );
sewardj08210532004-12-29 17:09:11 +0000553 if (typesOK && DEBUG_IROPT) {
554 vex_printf("rGET: "); ppIRExpr(get);
555 vex_printf(" -> "); ppIRExpr(valE);
556 vex_printf("\n");
557 }
558 if (typesOK)
559 bb->stmts[i] = IRStmt_Tmp(st->Ist.Tmp.tmp, valE);
560 } else {
561 /* Not found, but at least we know that t and the Get(...)
562 are now associated. So add a binding to reflect that
563 fact. */
564 addToHHW( env, (HWord)key,
sewardj59c07782005-01-21 21:23:07 +0000565 (HWord)(void*)(IRExpr_Tmp(st->Ist.Tmp.tmp)) );
sewardj08210532004-12-29 17:09:11 +0000566 }
567 }
568
569 /* Deal with Puts: invalidate any env entries overlapped by this
570 Put */
571 if (st->tag == Ist_Put || st->tag == Ist_PutI) {
572 UInt k_lo, k_hi;
573 if (st->tag == Ist_Put) {
574 key = mk_key_GetPut( st->Ist.Put.offset,
575 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
576 } else {
577 vassert(st->tag == Ist_PutI);
578 key = mk_key_GetIPutI( st->Ist.PutI.descr );
579 }
580
581 k_lo = (key >> 16) & 0xFFFF;
582 k_hi = key & 0xFFFF;
583 invalidateOverlaps(env, k_lo, k_hi);
584 }
585 else
586 if (st->tag == Ist_Dirty) {
587 /* Deal with dirty helpers which write or modify guest state.
588 Invalidate the entire env. We could do a lot better
589 here. */
590 IRDirty* d = st->Ist.Dirty.details;
591 Bool writes = False;
592 for (j = 0; j < d->nFxState; j++) {
593 if (d->fxState[j].fx == Ifx_Modify
594 || d->fxState[j].fx == Ifx_Write)
595 writes = True;
596 }
597 if (writes) {
598 /* dump the entire env (not clever, but correct ...) */
599 for (j = 0; j < env->used; j++)
600 env->inuse[j] = False;
601 if (0) vex_printf("rGET: trash env due to dirty helper\n");
602 }
603 }
604
605 /* add this one to the env, if appropriate */
606 if (st->tag == Ist_Put) {
sewardj496a58d2005-03-20 18:44:44 +0000607 vassert(isIRAtom(st->Ist.Put.data));
sewardj08210532004-12-29 17:09:11 +0000608 addToHHW( env, (HWord)key, (HWord)(st->Ist.Put.data));
609 }
610
611 } /* for (i = 0; i < bb->stmts_used; i++) */
612
613}
614
615
616/*---------------------------------------------------------------*/
617/*--- In-place removal of redundant PUTs ---*/
618/*---------------------------------------------------------------*/
619
620/* Find any Get uses in st and invalidate any partially or fully
621 overlapping ranges listed in env. Due to the flattening phase, the
622 only stmt kind we expect to find a Get on is IRStmt_Tmp. */
623
624static void handle_gets_Stmt (
625 HashHW* env,
626 IRStmt* st,
627 Bool (*preciseMemExnsFn)(Int,Int)
628 )
629{
630 Int j;
631 UInt key = 0; /* keep gcc -O happy */
632 Bool isGet;
633 Bool memRW = False;
634 IRExpr* e;
635
636 switch (st->tag) {
637
638 /* This is the only interesting case. Deal with Gets in the RHS
639 expression. */
640 case Ist_Tmp:
641 e = st->Ist.Tmp.data;
642 switch (e->tag) {
643 case Iex_Get:
644 isGet = True;
645 key = mk_key_GetPut ( e->Iex.Get.offset, e->Iex.Get.ty );
646 break;
647 case Iex_GetI:
648 isGet = True;
649 key = mk_key_GetIPutI ( e->Iex.GetI.descr );
650 break;
651 case Iex_LDle:
652 isGet = False;
653 memRW = True;
654 break;
655 default:
656 isGet = False;
657 }
658 if (isGet) {
659 UInt k_lo, k_hi;
660 k_lo = (key >> 16) & 0xFFFF;
661 k_hi = key & 0xFFFF;
662 invalidateOverlaps(env, k_lo, k_hi);
663 }
664 break;
665
666 /* Be very conservative for dirty helper calls; dump the entire
667 environment. The helper might read guest state, in which
668 case it needs to be flushed first. Also, the helper might
669 access guest memory, in which case all parts of the guest
670 state requiring precise exceptions needs to be flushed. The
671 crude solution is just to flush everything; we could easily
672 enough do a lot better if needed. */
sewardj3e838932005-01-07 12:09:15 +0000673 /* Probably also overly-conservative, but also dump everything
674 if we hit a memory fence. */
675 case Ist_MFence:
sewardj08210532004-12-29 17:09:11 +0000676 case Ist_Dirty:
677 for (j = 0; j < env->used; j++)
678 env->inuse[j] = False;
679 break;
680
681 /* all other cases are boring. */
682 case Ist_STle:
sewardj496a58d2005-03-20 18:44:44 +0000683 vassert(isIRAtom(st->Ist.STle.addr));
684 vassert(isIRAtom(st->Ist.STle.data));
sewardj08210532004-12-29 17:09:11 +0000685 memRW = True;
686 break;
687
688 case Ist_Exit:
sewardj496a58d2005-03-20 18:44:44 +0000689 vassert(isIRAtom(st->Ist.Exit.guard));
sewardj08210532004-12-29 17:09:11 +0000690 break;
691
692 case Ist_PutI:
sewardj496a58d2005-03-20 18:44:44 +0000693 vassert(isIRAtom(st->Ist.PutI.ix));
694 vassert(isIRAtom(st->Ist.PutI.data));
sewardj08210532004-12-29 17:09:11 +0000695 break;
696
sewardjd2445f62005-03-21 00:15:53 +0000697 case Ist_NoOp:
sewardjf1689312005-03-16 18:19:10 +0000698 case Ist_IMark:
699 break;
700
sewardj08210532004-12-29 17:09:11 +0000701 default:
702 vex_printf("\n");
703 ppIRStmt(st);
704 vex_printf("\n");
705 vpanic("handle_gets_Stmt");
706 }
707
708 if (memRW) {
709 /* This statement accesses memory. So we need to dump all parts
710 of the environment corresponding to guest state that may not
711 be reordered with respect to memory references. That means
712 at least the stack pointer. */
713 for (j = 0; j < env->used; j++) {
714 if (!env->inuse[j])
715 continue;
716 if (vex_control.iropt_precise_memory_exns) {
717 /* Precise exceptions required. Flush all guest state. */
718 env->inuse[j] = False;
719 } else {
720 /* Just flush the minimal amount required, as computed by
721 preciseMemExnsFn. */
722 HWord k_lo = (env->key[j] >> 16) & 0xFFFF;
723 HWord k_hi = env->key[j] & 0xFFFF;
724 if (preciseMemExnsFn( k_lo, k_hi ))
725 env->inuse[j] = False;
726 }
727 }
728 } /* if (memRW) */
729
730}
731
732
733/* Scan backwards, building up a set of (min offset, max
734 offset) pairs, indicating those parts of the guest state
735 for which the next event is a write.
736
737 On seeing a conditional exit, empty the set.
738
739 On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is
740 completely within the set, remove the Put. Otherwise, add
741 (minoff,maxoff) to the set.
742
743 On seeing 'Get (minoff,maxoff)', remove any part of the set
sewardj98430292004-12-29 17:34:50 +0000744 overlapping (minoff,maxoff). The same has to happen for any events
745 which implicitly read parts of the guest state: dirty helper calls
746 and loads/stores.
sewardj08210532004-12-29 17:09:11 +0000747*/
748
749static void redundant_put_removal_BB (
750 IRBB* bb,
751 Bool (*preciseMemExnsFn)(Int,Int)
752 )
753{
754 Int i, j;
755 Bool isPut;
756 IRStmt* st;
757 UInt key = 0; /* keep gcc -O happy */
758
759 HashHW* env = newHHW();
760 for (i = bb->stmts_used-1; i >= 0; i--) {
761 st = bb->stmts[i];
762 if (!st)
763 continue;
764
765 /* Deal with conditional exits. */
766 if (st->tag == Ist_Exit) {
767 /* Since control may not get beyond this point, we must empty
768 out the set, since we can no longer claim that the next
769 event for any part of the guest state is definitely a
770 write. */
sewardj496a58d2005-03-20 18:44:44 +0000771 vassert(isIRAtom(st->Ist.Exit.guard));
sewardj08210532004-12-29 17:09:11 +0000772 for (j = 0; j < env->used; j++)
773 env->inuse[j] = False;
774 continue;
775 }
776
777 /* Deal with Puts */
778 switch (st->tag) {
779 case Ist_Put:
780 isPut = True;
781 key = mk_key_GetPut( st->Ist.Put.offset,
782 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
sewardj496a58d2005-03-20 18:44:44 +0000783 vassert(isIRAtom(st->Ist.Put.data));
sewardj08210532004-12-29 17:09:11 +0000784 break;
785 case Ist_PutI:
786 isPut = True;
787 key = mk_key_GetIPutI( st->Ist.PutI.descr );
sewardj496a58d2005-03-20 18:44:44 +0000788 vassert(isIRAtom(st->Ist.PutI.ix));
789 vassert(isIRAtom(st->Ist.PutI.data));
sewardj08210532004-12-29 17:09:11 +0000790 break;
791 default:
792 isPut = False;
793 }
794 if (isPut && st->tag != Ist_PutI) {
795 /* See if any single entry in env overlaps this Put. This is
796 simplistic in that the transformation is valid if, say, two
797 or more entries in the env overlap this Put, but the use of
798 lookupHHW will only find a single entry which exactly
799 overlaps this Put. This is suboptimal but safe. */
800 if (lookupHHW(env, NULL, (HWord)key)) {
801 /* This Put is redundant because a later one will overwrite
802 it. So NULL (nop) it out. */
803 if (DEBUG_IROPT) {
804 vex_printf("rPUT: "); ppIRStmt(st);
805 vex_printf("\n");
806 }
sewardjd2445f62005-03-21 00:15:53 +0000807 bb->stmts[i] = IRStmt_NoOp();
sewardj08210532004-12-29 17:09:11 +0000808 } else {
809 /* We can't demonstrate that this Put is redundant, so add it
810 to the running collection. */
811 addToHHW(env, (HWord)key, 0);
812 }
813 continue;
814 }
815
816 /* Deal with Gets. These remove bits of the environment since
817 appearance of a Get means that the next event for that slice
sewardj98430292004-12-29 17:34:50 +0000818 of the guest state is no longer a write, but a read. Also
819 deals with implicit reads of guest state needed to maintain
820 precise exceptions. */
sewardj08210532004-12-29 17:09:11 +0000821 handle_gets_Stmt( env, st, preciseMemExnsFn );
822 }
823}
824
sewardj84be7372004-08-18 13:59:33 +0000825
826/*---------------------------------------------------------------*/
827/*--- Constant propagation and folding ---*/
828/*---------------------------------------------------------------*/
829
sewardj62617ef2004-10-13 23:29:22 +0000830/* The env in this section is a map from IRTemp to IRExpr*,
831 that is, an array indexed by IRTemp. */
sewardjf6501992004-08-27 11:58:24 +0000832
sewardjf6729012004-08-25 12:45:13 +0000833/* Are both expressions simply the same IRTemp ? */
834static Bool sameIRTemps ( IRExpr* e1, IRExpr* e2 )
835{
sewardj9d2e7692005-02-07 01:11:31 +0000836 return toBool( e1->tag == Iex_Tmp
837 && e2->tag == Iex_Tmp
838 && e1->Iex.Tmp.tmp == e2->Iex.Tmp.tmp );
sewardjf6729012004-08-25 12:45:13 +0000839}
840
sewardje1d45da2004-11-12 00:13:21 +0000841static Bool notBool ( Bool b )
842{
843 if (b == True) return False;
844 if (b == False) return True;
845 vpanic("notBool");
846}
847
sewardj84be7372004-08-18 13:59:33 +0000848static IRExpr* fold_Expr ( IRExpr* e )
849{
sewardj278c44c2004-08-20 00:28:13 +0000850 Int shift;
sewardj84be7372004-08-18 13:59:33 +0000851 IRExpr* e2 = e; /* e2 is the result of folding e, if possible */
852
853 /* UNARY ops */
854 if (e->tag == Iex_Unop
855 && e->Iex.Unop.arg->tag == Iex_Const) {
856 switch (e->Iex.Unop.op) {
sewardjae27ab62004-10-15 21:21:46 +0000857 case Iop_1Uto8:
sewardj9d2e7692005-02-07 01:11:31 +0000858 e2 = IRExpr_Const(IRConst_U8(toUChar(
sewardjba999312004-11-15 15:21:17 +0000859 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
sewardj9d2e7692005-02-07 01:11:31 +0000860 ? 1 : 0)));
sewardjae27ab62004-10-15 21:21:46 +0000861 break;
sewardjf4a821d2004-10-09 00:58:19 +0000862 case Iop_1Uto32:
863 e2 = IRExpr_Const(IRConst_U32(
sewardjba999312004-11-15 15:21:17 +0000864 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
sewardjf4a821d2004-10-09 00:58:19 +0000865 ? 1 : 0));
866 break;
sewardje6b39932004-11-06 17:01:15 +0000867
sewardjd9997882004-11-04 19:42:59 +0000868 case Iop_1Sto32:
869 e2 = IRExpr_Const(IRConst_U32(
sewardjba999312004-11-15 15:21:17 +0000870 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
sewardjd9997882004-11-04 19:42:59 +0000871 ? 0xFFFFFFFF : 0));
872 break;
sewardje6b39932004-11-06 17:01:15 +0000873 case Iop_1Sto64:
874 e2 = IRExpr_Const(IRConst_U64(
sewardjba999312004-11-15 15:21:17 +0000875 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
sewardje6b39932004-11-06 17:01:15 +0000876 ? 0xFFFFFFFFFFFFFFFFULL : 0));
877 break;
878
sewardj883b00b2004-09-11 09:30:24 +0000879 case Iop_8Sto32: {
880 /* signed */ Int s32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
881 s32 <<= 24;
882 s32 >>= 24;
883 e2 = IRExpr_Const(IRConst_U32((UInt)s32));
884 break;
885 }
sewardj84be7372004-08-18 13:59:33 +0000886 case Iop_8Uto32:
887 e2 = IRExpr_Const(IRConst_U32(
888 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
889 break;
890 case Iop_16Uto32:
891 e2 = IRExpr_Const(IRConst_U32(
892 0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
893 break;
sewardj73017432004-10-14 19:33:25 +0000894 case Iop_32to16:
sewardj9d2e7692005-02-07 01:11:31 +0000895 e2 = IRExpr_Const(IRConst_U16(toUShort(
896 0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
sewardj73017432004-10-14 19:33:25 +0000897 break;
sewardj4345f7a2004-09-22 19:49:27 +0000898 case Iop_32to8:
sewardj9d2e7692005-02-07 01:11:31 +0000899 e2 = IRExpr_Const(IRConst_U8(toUChar(
900 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
sewardj4345f7a2004-09-22 19:49:27 +0000901 break;
sewardj7447b5b2004-10-16 11:30:42 +0000902 case Iop_32to1:
sewardj9d2e7692005-02-07 01:11:31 +0000903 e2 = IRExpr_Const(IRConst_U1(toBool(
904 1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
905 )));
sewardj7447b5b2004-10-16 11:30:42 +0000906 break;
sewardje6b39932004-11-06 17:01:15 +0000907
sewardjf057afb2005-02-27 13:35:41 +0000908 case Iop_Not64:
909 e2 = IRExpr_Const(IRConst_U64(
910 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U64)));
911 break;
sewardj883b00b2004-09-11 09:30:24 +0000912 case Iop_Not32:
913 e2 = IRExpr_Const(IRConst_U32(
914 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
915 break;
sewardje6b39932004-11-06 17:01:15 +0000916 case Iop_Not16:
sewardj9d2e7692005-02-07 01:11:31 +0000917 e2 = IRExpr_Const(IRConst_U16(toUShort(
918 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U16))));
sewardje6b39932004-11-06 17:01:15 +0000919 break;
sewardjd9997882004-11-04 19:42:59 +0000920 case Iop_Not8:
sewardj9d2e7692005-02-07 01:11:31 +0000921 e2 = IRExpr_Const(IRConst_U8(toUChar(
922 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U8))));
sewardjd9997882004-11-04 19:42:59 +0000923 break;
sewardje6b39932004-11-06 17:01:15 +0000924
sewardje1d45da2004-11-12 00:13:21 +0000925 case Iop_Not1:
sewardjba999312004-11-15 15:21:17 +0000926 e2 = IRExpr_Const(IRConst_U1(
927 notBool(e->Iex.Unop.arg->Iex.Const.con->Ico.U1)));
sewardje1d45da2004-11-12 00:13:21 +0000928 break;
929
sewardj1d8ce202004-12-13 14:14:16 +0000930 case Iop_64to32: {
931 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
932 w64 &= 0x00000000FFFFFFFFULL;
933 e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
sewardj37010592004-12-13 10:47:15 +0000934 break;
sewardj1d8ce202004-12-13 14:14:16 +0000935 }
sewardj1d8ce202004-12-13 14:14:16 +0000936 case Iop_64HIto32: {
937 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
938 w64 >>= 32;
939 e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
940 break;
941 }
sewardjb5710b82005-01-27 16:05:09 +0000942 case Iop_32Uto64:
943 e2 = IRExpr_Const(IRConst_U64(
944 0xFFFFFFFFULL
945 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32));
946 break;
sewardj37010592004-12-13 10:47:15 +0000947
sewardj84be7372004-08-18 13:59:33 +0000948 default:
949 goto unhandled;
950 }
951 }
952
953 /* BINARY ops */
954 if (e->tag == Iex_Binop) {
955 if (e->Iex.Binop.arg1->tag == Iex_Const
956 && e->Iex.Binop.arg2->tag == Iex_Const) {
957 /* cases where both args are consts */
958 switch (e->Iex.Binop.op) {
sewardje6b39932004-11-06 17:01:15 +0000959
sewardj855dc722005-02-17 09:26:05 +0000960 /* -- Or -- */
sewardjd9997882004-11-04 19:42:59 +0000961 case Iop_Or8:
sewardj9d2e7692005-02-07 01:11:31 +0000962 e2 = IRExpr_Const(IRConst_U8(toUChar(
sewardjd9997882004-11-04 19:42:59 +0000963 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
sewardj9d2e7692005-02-07 01:11:31 +0000964 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
sewardjd9997882004-11-04 19:42:59 +0000965 break;
sewardje6b39932004-11-06 17:01:15 +0000966 case Iop_Or16:
sewardj9d2e7692005-02-07 01:11:31 +0000967 e2 = IRExpr_Const(IRConst_U16(toUShort(
sewardje6b39932004-11-06 17:01:15 +0000968 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
sewardj9d2e7692005-02-07 01:11:31 +0000969 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
sewardje6b39932004-11-06 17:01:15 +0000970 break;
971 case Iop_Or32:
972 e2 = IRExpr_Const(IRConst_U32(
973 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
974 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
975 break;
sewardjf057afb2005-02-27 13:35:41 +0000976 case Iop_Or64:
977 e2 = IRExpr_Const(IRConst_U64(
978 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
979 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
980 break;
sewardje6b39932004-11-06 17:01:15 +0000981
sewardj855dc722005-02-17 09:26:05 +0000982 /* -- Xor -- */
sewardj883b00b2004-09-11 09:30:24 +0000983 case Iop_Xor8:
sewardj9d2e7692005-02-07 01:11:31 +0000984 e2 = IRExpr_Const(IRConst_U8(toUChar(
sewardj883b00b2004-09-11 09:30:24 +0000985 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
sewardj9d2e7692005-02-07 01:11:31 +0000986 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
sewardj883b00b2004-09-11 09:30:24 +0000987 break;
sewardj82c9f2f2005-03-02 16:05:13 +0000988 case Iop_Xor16:
sewardjc7c098f2005-03-21 01:06:20 +0000989 e2 = IRExpr_Const(IRConst_U16(toUShort(
sewardj82c9f2f2005-03-02 16:05:13 +0000990 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
sewardjc7c098f2005-03-21 01:06:20 +0000991 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
sewardj82c9f2f2005-03-02 16:05:13 +0000992 break;
sewardj855dc722005-02-17 09:26:05 +0000993 case Iop_Xor32:
994 e2 = IRExpr_Const(IRConst_U32(
995 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
996 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
997 break;
sewardjf057afb2005-02-27 13:35:41 +0000998 case Iop_Xor64:
999 e2 = IRExpr_Const(IRConst_U64(
1000 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1001 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1002 break;
sewardj855dc722005-02-17 09:26:05 +00001003
1004 /* -- And -- */
sewardj84be7372004-08-18 13:59:33 +00001005 case Iop_And8:
sewardj9d2e7692005-02-07 01:11:31 +00001006 e2 = IRExpr_Const(IRConst_U8(toUChar(
sewardj84be7372004-08-18 13:59:33 +00001007 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
sewardj9d2e7692005-02-07 01:11:31 +00001008 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
sewardj84be7372004-08-18 13:59:33 +00001009 break;
sewardj855dc722005-02-17 09:26:05 +00001010 case Iop_And32:
1011 e2 = IRExpr_Const(IRConst_U32(
1012 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1013 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1014 break;
1015 case Iop_And64:
1016 e2 = IRExpr_Const(IRConst_U64(
1017 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1018 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1019 break;
1020
1021 /* -- Add -- */
sewardj4345f7a2004-09-22 19:49:27 +00001022 case Iop_Add8:
sewardj9d2e7692005-02-07 01:11:31 +00001023 e2 = IRExpr_Const(IRConst_U8(toUChar(
sewardj4345f7a2004-09-22 19:49:27 +00001024 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
sewardj9d2e7692005-02-07 01:11:31 +00001025 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
sewardj4345f7a2004-09-22 19:49:27 +00001026 break;
sewardj855dc722005-02-17 09:26:05 +00001027 case Iop_Add32:
1028 e2 = IRExpr_Const(IRConst_U32(
1029 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1030 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1031 break;
1032 case Iop_Add64:
1033 e2 = IRExpr_Const(IRConst_U64(
1034 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1035 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1036 break;
1037
1038 /* -- Sub -- */
sewardj84be7372004-08-18 13:59:33 +00001039 case Iop_Sub8:
sewardj9d2e7692005-02-07 01:11:31 +00001040 e2 = IRExpr_Const(IRConst_U8(toUChar(
sewardj84be7372004-08-18 13:59:33 +00001041 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
sewardj9d2e7692005-02-07 01:11:31 +00001042 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
sewardj84be7372004-08-18 13:59:33 +00001043 break;
sewardjd7217032004-08-19 10:49:10 +00001044 case Iop_Sub32:
1045 e2 = IRExpr_Const(IRConst_U32(
1046 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1047 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1048 break;
sewardj70a8ddf2005-02-13 02:24:26 +00001049 case Iop_Sub64:
1050 e2 = IRExpr_Const(IRConst_U64(
1051 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1052 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1053 break;
sewardjc2bcb6f2005-02-07 00:17:12 +00001054
sewardj855dc722005-02-17 09:26:05 +00001055 /* -- Mul -- */
sewardjb9c5cf62004-08-24 15:10:38 +00001056 case Iop_Mul32:
1057 e2 = IRExpr_Const(IRConst_U32(
1058 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1059 * e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1060 break;
sewardjea6bccb2005-03-01 10:19:23 +00001061 case Iop_MullS32: {
1062 /* very paranoid */
1063 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1064 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1065 Int s32a = (Int)u32a;
1066 Int s32b = (Int)u32b;
1067 Long s64a = (Long)s32a;
1068 Long s64b = (Long)s32b;
1069 Long sres = s64a * s64b;
1070 ULong ures = (ULong)sres;
1071 e2 = IRExpr_Const(IRConst_U64(ures));
1072 break;
1073 }
sewardjb095fba2005-02-13 14:13:04 +00001074
sewardj855dc722005-02-17 09:26:05 +00001075 /* -- Shl -- */
sewardjd7217032004-08-19 10:49:10 +00001076 case Iop_Shl32:
sewardj61348472004-08-20 01:01:04 +00001077 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1078 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
sewardj29632392004-08-22 02:38:11 +00001079 if (shift >= 0 && shift <= 31)
sewardj278c44c2004-08-20 00:28:13 +00001080 e2 = IRExpr_Const(IRConst_U32(
1081 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1082 << shift)));
sewardjd7217032004-08-19 10:49:10 +00001083 break;
sewardjb095fba2005-02-13 14:13:04 +00001084 case Iop_Shl64:
1085 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1086 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1087 if (shift >= 0 && shift <= 63)
1088 e2 = IRExpr_Const(IRConst_U64(
1089 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1090 << shift)));
1091 break;
1092
sewardj855dc722005-02-17 09:26:05 +00001093 /* -- Sar -- */
sewardj278c44c2004-08-20 00:28:13 +00001094 case Iop_Sar32: {
1095 /* paranoid ... */
1096 /*signed*/ Int s32;
sewardj61348472004-08-20 01:01:04 +00001097 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
sewardj278c44c2004-08-20 00:28:13 +00001098 s32 = (Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
sewardj61348472004-08-20 01:01:04 +00001099 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
sewardj278c44c2004-08-20 00:28:13 +00001100 if (shift >= 0 && shift <= 31) {
1101 s32 >>=/*signed*/ shift;
1102 e2 = IRExpr_Const(IRConst_U32((UInt)s32));
1103 }
1104 break;
1105 }
sewardj855dc722005-02-17 09:26:05 +00001106 case Iop_Sar64: {
1107 /* paranoid ... */
1108 /*signed*/ Long s64;
1109 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1110 s64 = (Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
1111 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1112 if (shift >= 0 && shift <= 63) {
1113 s64 >>=/*signed*/ shift;
1114 e2 = IRExpr_Const(IRConst_U64((ULong)s64));
1115 }
1116 break;
1117 }
1118
1119 /* -- Shr -- */
sewardj61348472004-08-20 01:01:04 +00001120 case Iop_Shr32: {
1121 /* paranoid ... */
sewardj4add7142005-02-21 08:20:22 +00001122 /*unsigned*/ UInt u32;
sewardj61348472004-08-20 01:01:04 +00001123 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
sewardj4add7142005-02-21 08:20:22 +00001124 u32 = (UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
sewardj61348472004-08-20 01:01:04 +00001125 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1126 if (shift >= 0 && shift <= 31) {
sewardj4add7142005-02-21 08:20:22 +00001127 u32 >>=/*unsigned*/ shift;
1128 e2 = IRExpr_Const(IRConst_U32(u32));
1129 }
1130 break;
1131 }
1132 case Iop_Shr64: {
1133 /* paranoid ... */
1134 /*unsigned*/ ULong u64;
1135 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1136 u64 = (ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
1137 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1138 if (shift >= 0 && shift <= 63) {
1139 u64 >>=/*unsigned*/ shift;
1140 e2 = IRExpr_Const(IRConst_U64(u64));
sewardj61348472004-08-20 01:01:04 +00001141 }
1142 break;
1143 }
sewardj855dc722005-02-17 09:26:05 +00001144
1145 /* -- CmpEQ -- */
sewardjb8e75862004-08-19 17:58:45 +00001146 case Iop_CmpEQ32:
sewardj9d2e7692005-02-07 01:11:31 +00001147 e2 = IRExpr_Const(IRConst_U1(toBool(
sewardjb8e75862004-08-19 17:58:45 +00001148 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
sewardj9d2e7692005-02-07 01:11:31 +00001149 == e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
sewardjb8e75862004-08-19 17:58:45 +00001150 break;
sewardj855dc722005-02-17 09:26:05 +00001151 case Iop_CmpEQ64:
1152 e2 = IRExpr_Const(IRConst_U1(toBool(
1153 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1154 == e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
1155 break;
1156
1157 /* -- CmpNE -- */
1158 case Iop_CmpNE8:
1159 e2 = IRExpr_Const(IRConst_U1(toBool(
1160 ((0xFF & e->Iex.Binop.arg1->Iex.Const.con->Ico.U8)
1161 != (0xFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))));
1162 break;
sewardjae27ab62004-10-15 21:21:46 +00001163 case Iop_CmpNE32:
sewardj9d2e7692005-02-07 01:11:31 +00001164 e2 = IRExpr_Const(IRConst_U1(toBool(
sewardjae27ab62004-10-15 21:21:46 +00001165 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
sewardj9d2e7692005-02-07 01:11:31 +00001166 != e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
sewardjae27ab62004-10-15 21:21:46 +00001167 break;
sewardje6b39932004-11-06 17:01:15 +00001168 case Iop_CmpNE64:
sewardj9d2e7692005-02-07 01:11:31 +00001169 e2 = IRExpr_Const(IRConst_U1(toBool(
sewardje6b39932004-11-06 17:01:15 +00001170 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
sewardj9d2e7692005-02-07 01:11:31 +00001171 != e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
sewardje6b39932004-11-06 17:01:15 +00001172 break;
1173
sewardj855dc722005-02-17 09:26:05 +00001174 /* -- CmpLEU -- */
sewardj7447b5b2004-10-16 11:30:42 +00001175 case Iop_CmpLE32U:
sewardj9d2e7692005-02-07 01:11:31 +00001176 e2 = IRExpr_Const(IRConst_U1(toBool(
sewardj7447b5b2004-10-16 11:30:42 +00001177 ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
sewardj9d2e7692005-02-07 01:11:31 +00001178 <= (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
sewardj7447b5b2004-10-16 11:30:42 +00001179 break;
sewardj855dc722005-02-17 09:26:05 +00001180
1181 /* -- CmpLES -- */
sewardj088e4f72004-10-19 01:25:02 +00001182 case Iop_CmpLE32S:
sewardj9d2e7692005-02-07 01:11:31 +00001183 e2 = IRExpr_Const(IRConst_U1(toBool(
sewardj088e4f72004-10-19 01:25:02 +00001184 ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
sewardj9d2e7692005-02-07 01:11:31 +00001185 <= (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
sewardj088e4f72004-10-19 01:25:02 +00001186 break;
sewardje1d45da2004-11-12 00:13:21 +00001187
sewardj855dc722005-02-17 09:26:05 +00001188 /* -- CmpLTS -- */
sewardj9bdd2652004-10-19 12:56:33 +00001189 case Iop_CmpLT32S:
sewardj9d2e7692005-02-07 01:11:31 +00001190 e2 = IRExpr_Const(IRConst_U1(toBool(
sewardj9bdd2652004-10-19 12:56:33 +00001191 ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
sewardj9d2e7692005-02-07 01:11:31 +00001192 < (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
sewardj9bdd2652004-10-19 12:56:33 +00001193 break;
sewardj855dc722005-02-17 09:26:05 +00001194
1195 /* -- CmpLTU -- */
sewardje1d45da2004-11-12 00:13:21 +00001196 case Iop_CmpLT32U:
sewardj9d2e7692005-02-07 01:11:31 +00001197 e2 = IRExpr_Const(IRConst_U1(toBool(
sewardje1d45da2004-11-12 00:13:21 +00001198 ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
sewardj9d2e7692005-02-07 01:11:31 +00001199 < (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
sewardje1d45da2004-11-12 00:13:21 +00001200 break;
sewardj7447b5b2004-10-16 11:30:42 +00001201
sewardj855dc722005-02-17 09:26:05 +00001202 /* -- nHLto2n -- */
sewardj088bcb42004-08-19 17:16:52 +00001203 case Iop_32HLto64:
1204 e2 = IRExpr_Const(IRConst_U64(
1205 (((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)) << 32)
1206 | ((ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))
1207 ));
1208 break;
sewardj855dc722005-02-17 09:26:05 +00001209
sewardj607dd4f2004-09-08 18:20:19 +00001210 default:
1211 goto unhandled;
sewardjd7217032004-08-19 10:49:10 +00001212 }
sewardjf6729012004-08-25 12:45:13 +00001213
sewardj84be7372004-08-18 13:59:33 +00001214 } else {
sewardjf6729012004-08-25 12:45:13 +00001215
sewardj84be7372004-08-18 13:59:33 +00001216 /* other cases (identities, etc) */
sewardj4afab822005-01-20 10:04:05 +00001217 /* Shl32/Shr32(x,0) ==> x */
1218 if ((e->Iex.Binop.op == Iop_Shl32 || e->Iex.Binop.op == Iop_Shr32)
sewardj52345402004-10-13 16:08:14 +00001219 && e->Iex.Binop.arg2->tag == Iex_Const
sewardj4980c6b2004-10-13 16:16:27 +00001220 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U8 == 0) {
sewardj52345402004-10-13 16:08:14 +00001221 e2 = e->Iex.Binop.arg1;
1222 } else
1223
sewardjd9997882004-11-04 19:42:59 +00001224 /* Or32/Add32(x,0) ==> x */
1225 if ((e->Iex.Binop.op == Iop_Add32 || e->Iex.Binop.op == Iop_Or32)
sewardj84be7372004-08-18 13:59:33 +00001226 && e->Iex.Binop.arg2->tag == Iex_Const
1227 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U32 == 0) {
1228 e2 = e->Iex.Binop.arg1;
sewardjf6729012004-08-25 12:45:13 +00001229 } else
1230
sewardjdcd6c882004-12-16 11:41:06 +00001231 /* Or64/Add64(x,0) ==> x */
1232 if ((e->Iex.Binop.op == Iop_Add64 || e->Iex.Binop.op == Iop_Or64)
1233 && e->Iex.Binop.arg2->tag == Iex_Const
1234 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U64 == 0) {
1235 e2 = e->Iex.Binop.arg1;
1236 } else
1237
sewardj832853b2004-11-06 12:10:04 +00001238 /* And32(x,0xFFFFFFFF) ==> x */
1239 if (e->Iex.Binop.op == Iop_And32
1240 && e->Iex.Binop.arg2->tag == Iex_Const
1241 && e->Iex.Binop.arg2->Iex.Const.con->Ico.U32 == 0xFFFFFFFF) {
1242 e2 = e->Iex.Binop.arg1;
1243 } else
1244
sewardjd9997882004-11-04 19:42:59 +00001245 /* Or32(0,x) ==> x */
1246 if (e->Iex.Binop.op == Iop_Or32
1247 && e->Iex.Binop.arg1->tag == Iex_Const
1248 && e->Iex.Binop.arg1->Iex.Const.con->Ico.U32 == 0) {
1249 e2 = e->Iex.Binop.arg2;
1250 } else
1251
sewardjdcd6c882004-12-16 11:41:06 +00001252 /* Or8/16/32/64(t,t) ==> t, for some IRTemp t */
1253 /* And8/16/32/64(t,t) ==> t, for some IRTemp t */
1254 if ( (e->Iex.Binop.op == Iop_And64
1255 || e->Iex.Binop.op == Iop_And32
sewardjf6729012004-08-25 12:45:13 +00001256 || e->Iex.Binop.op == Iop_And16
sewardjdcd6c882004-12-16 11:41:06 +00001257 || e->Iex.Binop.op == Iop_And8
1258 || e->Iex.Binop.op == Iop_Or64
1259 || e->Iex.Binop.op == Iop_Or32
1260 || e->Iex.Binop.op == Iop_Or16
1261 || e->Iex.Binop.op == Iop_Or8)
sewardjf6729012004-08-25 12:45:13 +00001262 && sameIRTemps(e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
1263 e2 = e->Iex.Binop.arg1;
sewardjaba4fff2004-10-08 21:37:15 +00001264 }
sewardjf6729012004-08-25 12:45:13 +00001265
sewardj84be7372004-08-18 13:59:33 +00001266 }
1267 }
1268
1269 /* Mux0X */
1270 if (e->tag == Iex_Mux0X
1271 && e->Iex.Mux0X.cond->tag == Iex_Const) {
1272 Bool zero;
1273 /* assured us by the IR type rules */
1274 vassert(e->Iex.Mux0X.cond->Iex.Const.con->tag == Ico_U8);
sewardj9d2e7692005-02-07 01:11:31 +00001275 zero = toBool(0 == (0xFF & e->Iex.Mux0X.cond
1276 ->Iex.Const.con->Ico.U8));
sewardj84be7372004-08-18 13:59:33 +00001277 e2 = zero ? e->Iex.Mux0X.expr0 : e->Iex.Mux0X.exprX;
1278 }
1279
sewardj088bcb42004-08-19 17:16:52 +00001280 if (DEBUG_IROPT && e2 != e) {
1281 vex_printf("FOLD: ");
sewardj84be7372004-08-18 13:59:33 +00001282 ppIRExpr(e); vex_printf(" -> ");
1283 ppIRExpr(e2); vex_printf("\n");
1284 }
1285
1286 return e2;
1287
1288 unhandled:
sewardj883b00b2004-09-11 09:30:24 +00001289# if 0
sewardj84be7372004-08-18 13:59:33 +00001290 vex_printf("\n\n");
1291 ppIRExpr(e);
1292 vpanic("fold_Expr: no rule for the above");
sewardj883b00b2004-09-11 09:30:24 +00001293# else
1294 vex_printf("vex iropt: fold_Expr: no rule for: ");
1295 ppIRExpr(e);
1296 vex_printf("\n");
1297 return e2;
1298# endif
sewardj84be7372004-08-18 13:59:33 +00001299}
1300
1301
sewardj84be7372004-08-18 13:59:33 +00001302/* Apply the subst to a simple 1-level expression -- guaranteed to be
1303 1-level due to previous flattening pass. */
1304
sewardj62617ef2004-10-13 23:29:22 +00001305static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex )
sewardj84be7372004-08-18 13:59:33 +00001306{
sewardj62617ef2004-10-13 23:29:22 +00001307 switch (ex->tag) {
1308 case Iex_Tmp:
1309 if (env[(Int)ex->Iex.Tmp.tmp] != NULL) {
1310 return env[(Int)ex->Iex.Tmp.tmp];
1311 } else {
1312 /* not bound in env */
1313 return ex;
1314 }
1315
1316 case Iex_Const:
1317 case Iex_Get:
sewardj84be7372004-08-18 13:59:33 +00001318 return ex;
sewardj62617ef2004-10-13 23:29:22 +00001319
1320 case Iex_GetI:
sewardj496a58d2005-03-20 18:44:44 +00001321 vassert(isIRAtom(ex->Iex.GetI.ix));
sewardj62617ef2004-10-13 23:29:22 +00001322 return IRExpr_GetI(
1323 ex->Iex.GetI.descr,
sewardjeeac8412004-11-02 00:26:55 +00001324 subst_Expr(env, ex->Iex.GetI.ix),
sewardj62617ef2004-10-13 23:29:22 +00001325 ex->Iex.GetI.bias
1326 );
1327
1328 case Iex_Binop:
sewardj496a58d2005-03-20 18:44:44 +00001329 vassert(isIRAtom(ex->Iex.Binop.arg1));
1330 vassert(isIRAtom(ex->Iex.Binop.arg2));
sewardj62617ef2004-10-13 23:29:22 +00001331 return IRExpr_Binop(
1332 ex->Iex.Binop.op,
1333 subst_Expr(env, ex->Iex.Binop.arg1),
1334 subst_Expr(env, ex->Iex.Binop.arg2)
1335 );
1336
1337 case Iex_Unop:
sewardj496a58d2005-03-20 18:44:44 +00001338 vassert(isIRAtom(ex->Iex.Unop.arg));
sewardj62617ef2004-10-13 23:29:22 +00001339 return IRExpr_Unop(
1340 ex->Iex.Unop.op,
1341 subst_Expr(env, ex->Iex.Unop.arg)
1342 );
1343
1344 case Iex_LDle:
sewardj496a58d2005-03-20 18:44:44 +00001345 vassert(isIRAtom(ex->Iex.LDle.addr));
sewardj62617ef2004-10-13 23:29:22 +00001346 return IRExpr_LDle(
1347 ex->Iex.LDle.ty,
1348 subst_Expr(env, ex->Iex.LDle.addr)
1349 );
1350
1351 case Iex_CCall: {
1352 Int i;
1353 IRExpr** args2 = sopyIRExprVec(ex->Iex.CCall.args);
1354 for (i = 0; args2[i]; i++) {
sewardj496a58d2005-03-20 18:44:44 +00001355 vassert(isIRAtom(args2[i]));
sewardj62617ef2004-10-13 23:29:22 +00001356 args2[i] = subst_Expr(env, args2[i]);
1357 }
1358 return IRExpr_CCall(
sewardj8ea867b2004-10-30 19:03:02 +00001359 ex->Iex.CCall.cee,
sewardj62617ef2004-10-13 23:29:22 +00001360 ex->Iex.CCall.retty,
1361 args2
1362 );
sewardj84be7372004-08-18 13:59:33 +00001363 }
sewardj62617ef2004-10-13 23:29:22 +00001364
1365 case Iex_Mux0X:
sewardj496a58d2005-03-20 18:44:44 +00001366 vassert(isIRAtom(ex->Iex.Mux0X.cond));
1367 vassert(isIRAtom(ex->Iex.Mux0X.expr0));
1368 vassert(isIRAtom(ex->Iex.Mux0X.exprX));
sewardj62617ef2004-10-13 23:29:22 +00001369 return IRExpr_Mux0X(
1370 subst_Expr(env, ex->Iex.Mux0X.cond),
1371 subst_Expr(env, ex->Iex.Mux0X.expr0),
1372 subst_Expr(env, ex->Iex.Mux0X.exprX)
1373 );
1374
1375 default:
1376 vex_printf("\n\n"); ppIRExpr(ex);
1377 vpanic("subst_Expr");
1378
sewardj84be7372004-08-18 13:59:33 +00001379 }
sewardj84be7372004-08-18 13:59:33 +00001380}
1381
1382
1383/* Apply the subst to stmt, then fold the result as much as possible.
sewardjd2445f62005-03-21 00:15:53 +00001384 Much simplified due to stmt being previously flattened. As a
1385 result of this, the stmt may wind up being turned into a no-op.
1386*/
sewardj62617ef2004-10-13 23:29:22 +00001387static IRStmt* subst_and_fold_Stmt ( IRExpr** env, IRStmt* st )
sewardj84be7372004-08-18 13:59:33 +00001388{
1389# if 0
1390 vex_printf("\nsubst and fold stmt\n");
1391 ppIRStmt(st);
1392 vex_printf("\n");
1393# endif
1394
sewardj62617ef2004-10-13 23:29:22 +00001395 switch (st->tag) {
1396 case Ist_Put:
sewardj496a58d2005-03-20 18:44:44 +00001397 vassert(isIRAtom(st->Ist.Put.data));
sewardj62617ef2004-10-13 23:29:22 +00001398 return IRStmt_Put(
1399 st->Ist.Put.offset,
1400 fold_Expr(subst_Expr(env, st->Ist.Put.data))
1401 );
sewardj84be7372004-08-18 13:59:33 +00001402
sewardj62617ef2004-10-13 23:29:22 +00001403 case Ist_PutI:
sewardj496a58d2005-03-20 18:44:44 +00001404 vassert(isIRAtom(st->Ist.PutI.ix));
1405 vassert(isIRAtom(st->Ist.PutI.data));
sewardj62617ef2004-10-13 23:29:22 +00001406 return IRStmt_PutI(
1407 st->Ist.PutI.descr,
sewardjeeac8412004-11-02 00:26:55 +00001408 fold_Expr(subst_Expr(env, st->Ist.PutI.ix)),
sewardj62617ef2004-10-13 23:29:22 +00001409 st->Ist.PutI.bias,
1410 fold_Expr(subst_Expr(env, st->Ist.PutI.data))
1411 );
sewardjd7217032004-08-19 10:49:10 +00001412
sewardj62617ef2004-10-13 23:29:22 +00001413 case Ist_Tmp:
1414 /* This is the one place where an expr (st->Ist.Tmp.data) is
1415 allowed to be more than just a constant or a tmp. */
1416 return IRStmt_Tmp(
1417 st->Ist.Tmp.tmp,
1418 fold_Expr(subst_Expr(env, st->Ist.Tmp.data))
1419 );
sewardj84be7372004-08-18 13:59:33 +00001420
sewardj62617ef2004-10-13 23:29:22 +00001421 case Ist_STle:
sewardj496a58d2005-03-20 18:44:44 +00001422 vassert(isIRAtom(st->Ist.STle.addr));
1423 vassert(isIRAtom(st->Ist.STle.data));
sewardj62617ef2004-10-13 23:29:22 +00001424 return IRStmt_STle(
1425 fold_Expr(subst_Expr(env, st->Ist.STle.addr)),
1426 fold_Expr(subst_Expr(env, st->Ist.STle.data))
1427 );
sewardj84be7372004-08-18 13:59:33 +00001428
sewardj62617ef2004-10-13 23:29:22 +00001429 case Ist_Dirty: {
1430 Int i;
1431 IRDirty *d, *d2;
1432 d = st->Ist.Dirty.details;
1433 d2 = emptyIRDirty();
1434 *d2 = *d;
1435 d2->args = sopyIRExprVec(d2->args);
1436 if (d2->mFx != Ifx_None) {
sewardj496a58d2005-03-20 18:44:44 +00001437 vassert(isIRAtom(d2->mAddr));
sewardj62617ef2004-10-13 23:29:22 +00001438 d2->mAddr = fold_Expr(subst_Expr(env, d2->mAddr));
sewardjb8e75862004-08-19 17:58:45 +00001439 }
sewardj496a58d2005-03-20 18:44:44 +00001440 vassert(isIRAtom(d2->guard));
sewardjb8385d82004-11-02 01:34:15 +00001441 d2->guard = fold_Expr(subst_Expr(env, d2->guard));
sewardj62617ef2004-10-13 23:29:22 +00001442 for (i = 0; d2->args[i]; i++) {
sewardj496a58d2005-03-20 18:44:44 +00001443 vassert(isIRAtom(d2->args[i]));
sewardj62617ef2004-10-13 23:29:22 +00001444 d2->args[i] = fold_Expr(subst_Expr(env, d2->args[i]));
1445 }
1446 return IRStmt_Dirty(d2);
sewardjb8e75862004-08-19 17:58:45 +00001447 }
sewardj84be7372004-08-18 13:59:33 +00001448
sewardjf1689312005-03-16 18:19:10 +00001449 case Ist_IMark:
1450 return IRStmt_IMark(st->Ist.IMark.addr, st->Ist.IMark.len);
1451
sewardjd2445f62005-03-21 00:15:53 +00001452 case Ist_NoOp:
1453 return IRStmt_NoOp();
1454
sewardj3e838932005-01-07 12:09:15 +00001455 case Ist_MFence:
1456 return IRStmt_MFence();
1457
sewardj62617ef2004-10-13 23:29:22 +00001458 case Ist_Exit: {
1459 IRExpr* fcond;
sewardj496a58d2005-03-20 18:44:44 +00001460 vassert(isIRAtom(st->Ist.Exit.guard));
sewardj0276d4b2004-11-15 15:30:21 +00001461 fcond = fold_Expr(subst_Expr(env, st->Ist.Exit.guard));
sewardj62617ef2004-10-13 23:29:22 +00001462 if (fcond->tag == Iex_Const) {
1463 /* Interesting. The condition on this exit has folded down to
1464 a constant. */
sewardjba999312004-11-15 15:21:17 +00001465 vassert(fcond->Iex.Const.con->tag == Ico_U1);
sewardj98430292004-12-29 17:34:50 +00001466 vassert(fcond->Iex.Const.con->Ico.U1 == False
1467 || fcond->Iex.Const.con->Ico.U1 == True);
sewardjba999312004-11-15 15:21:17 +00001468 if (fcond->Iex.Const.con->Ico.U1 == False) {
sewardj62617ef2004-10-13 23:29:22 +00001469 /* exit is never going to happen, so dump the statement. */
sewardjd2445f62005-03-21 00:15:53 +00001470 return IRStmt_NoOp();
sewardj62617ef2004-10-13 23:29:22 +00001471 } else {
sewardjba999312004-11-15 15:21:17 +00001472 vassert(fcond->Iex.Const.con->Ico.U1 == True);
sewardj62617ef2004-10-13 23:29:22 +00001473 /* Hmmm. The exit has become unconditional. Leave it as
1474 it is for now, since we'd have to truncate the BB at
1475 this point, which is tricky. */
1476 /* fall out into the reconstruct-the-exit code. */
sewardj08613742004-10-25 13:01:45 +00001477 if (vex_control.iropt_verbosity > 0)
1478 /* really a misuse of vex_control.iropt_verbosity */
sewardj8c2c10b2004-10-16 20:51:52 +00001479 vex_printf("vex iropt: IRStmt_Exit became unconditional\n");
sewardj62617ef2004-10-13 23:29:22 +00001480 }
1481 }
sewardj893aada2004-11-29 19:57:54 +00001482 return IRStmt_Exit(fcond, st->Ist.Exit.jk, st->Ist.Exit.dst);
sewardj62617ef2004-10-13 23:29:22 +00001483 }
1484
1485 default:
1486 vex_printf("\n"); ppIRStmt(st);
1487 vpanic("subst_and_fold_Stmt");
1488 }
sewardj84be7372004-08-18 13:59:33 +00001489}
1490
1491
sewardjd9997882004-11-04 19:42:59 +00001492IRBB* cprop_BB ( IRBB* in )
sewardj84be7372004-08-18 13:59:33 +00001493{
sewardj62617ef2004-10-13 23:29:22 +00001494 Int i;
1495 IRBB* out;
1496 IRStmt* st2;
1497 Int n_tmps = in->tyenv->types_used;
1498 IRExpr** env = LibVEX_Alloc(n_tmps * sizeof(IRExpr*));
sewardj84be7372004-08-18 13:59:33 +00001499
1500 out = emptyIRBB();
sewardj695cff92004-10-13 14:50:14 +00001501 out->tyenv = dopyIRTypeEnv( in->tyenv );
sewardj84be7372004-08-18 13:59:33 +00001502
1503 /* Set up the env with which travels forward. This holds a
1504 substitution, mapping IRTemps to atoms, that is, IRExprs which
1505 are either IRTemps or IRConsts. Thus, copy and constant
1506 propagation is done. The environment is to be applied as we
1507 move along. Keys are IRTemps. Values are IRExpr*s.
1508 */
sewardj62617ef2004-10-13 23:29:22 +00001509 for (i = 0; i < n_tmps; i++)
1510 env[i] = NULL;
sewardj84be7372004-08-18 13:59:33 +00001511
1512 /* For each original SSA-form stmt ... */
1513 for (i = 0; i < in->stmts_used; i++) {
1514
1515 /* First apply the substitution to the current stmt. This
1516 propagates in any constants and tmp-tmp assignments
1517 accumulated prior to this point. As part of the subst_Stmt
1518 call, also then fold any constant expressions resulting. */
1519
sewardjf0e43162004-08-20 00:11:12 +00001520 st2 = in->stmts[i];
1521
1522 /* perhaps st2 is already a no-op? */
1523 if (!st2) continue;
1524
1525 st2 = subst_and_fold_Stmt( env, st2 );
sewardj84be7372004-08-18 13:59:33 +00001526
sewardjb8e75862004-08-19 17:58:45 +00001527 /* If the statement has been folded into a no-op, forget it. */
sewardjf0e43162004-08-20 00:11:12 +00001528 if (!st2) continue;
sewardjb8e75862004-08-19 17:58:45 +00001529
sewardj84be7372004-08-18 13:59:33 +00001530 /* Now consider what the stmt looks like. If it's of the form
1531 't = const' or 't1 = t2', add it to the running environment
1532 and not to the output BB. Otherwise, add it to the output
sewardjc0b42282004-10-12 13:44:12 +00001533 BB. Note, we choose not to propagate const when const is an
1534 F64i, so that F64i literals can be CSE'd later. This helps
1535 x86 floating point code generation. */
sewardj84be7372004-08-18 13:59:33 +00001536
sewardjc0b42282004-10-12 13:44:12 +00001537 if (st2->tag == Ist_Tmp
1538 && st2->Ist.Tmp.data->tag == Iex_Const
1539 && st2->Ist.Tmp.data->Iex.Const.con->tag != Ico_F64i) {
sewardj84be7372004-08-18 13:59:33 +00001540 /* 't = const' -- add to env.
1541 The pair (IRTemp, IRExpr*) is added. */
sewardj62617ef2004-10-13 23:29:22 +00001542 env[(Int)(st2->Ist.Tmp.tmp)] = st2->Ist.Tmp.data;
sewardj84be7372004-08-18 13:59:33 +00001543 }
1544 else
sewardj6d076362004-09-23 11:06:17 +00001545 if (st2->tag == Ist_Tmp && st2->Ist.Tmp.data->tag == Iex_Tmp) {
sewardj84be7372004-08-18 13:59:33 +00001546 /* 't1 = t2' -- add to env.
1547 The pair (IRTemp, IRExpr*) is added. */
sewardj62617ef2004-10-13 23:29:22 +00001548 env[(Int)(st2->Ist.Tmp.tmp)] = st2->Ist.Tmp.data;
sewardj84be7372004-08-18 13:59:33 +00001549 }
1550 else {
1551 /* Not interesting, copy st2 into the output block. */
1552 addStmtToIRBB( out, st2 );
1553 }
1554 }
1555
sewardj84be7372004-08-18 13:59:33 +00001556 out->next = subst_Expr( env, in->next );
1557 out->jumpkind = in->jumpkind;
1558 return out;
1559}
1560
1561
1562
sewardjedf4d692004-08-17 13:52:58 +00001563/*---------------------------------------------------------------*/
sewardj39e3f242004-08-18 16:54:52 +00001564/*--- Dead code (t = E) removal ---*/
1565/*---------------------------------------------------------------*/
1566
sewardjea602bc2004-10-14 21:40:12 +00001567/* The type of the HashHW map is: a map from IRTemp to nothing
sewardj39e3f242004-08-18 16:54:52 +00001568 -- really just operating a set or IRTemps.
1569*/
1570
sewardjd503a322004-10-13 22:41:16 +00001571inline
1572static void addUses_Temp ( Bool* set, IRTemp tmp )
sewardj17442fe2004-09-20 14:54:28 +00001573{
sewardjd503a322004-10-13 22:41:16 +00001574 set[(Int)tmp] = True;
sewardj17442fe2004-09-20 14:54:28 +00001575}
1576
sewardjd503a322004-10-13 22:41:16 +00001577static void addUses_Expr ( Bool* set, IRExpr* e )
sewardj39e3f242004-08-18 16:54:52 +00001578{
1579 Int i;
1580 switch (e->tag) {
sewardjd7217032004-08-19 10:49:10 +00001581 case Iex_GetI:
sewardjeeac8412004-11-02 00:26:55 +00001582 addUses_Expr(set, e->Iex.GetI.ix);
sewardjd7217032004-08-19 10:49:10 +00001583 return;
sewardj39e3f242004-08-18 16:54:52 +00001584 case Iex_Mux0X:
1585 addUses_Expr(set, e->Iex.Mux0X.cond);
1586 addUses_Expr(set, e->Iex.Mux0X.expr0);
1587 addUses_Expr(set, e->Iex.Mux0X.exprX);
1588 return;
1589 case Iex_CCall:
1590 for (i = 0; e->Iex.CCall.args[i]; i++)
1591 addUses_Expr(set, e->Iex.CCall.args[i]);
1592 return;
1593 case Iex_LDle:
1594 addUses_Expr(set, e->Iex.LDle.addr);
1595 return;
1596 case Iex_Binop:
1597 addUses_Expr(set, e->Iex.Binop.arg1);
1598 addUses_Expr(set, e->Iex.Binop.arg2);
1599 return;
1600 case Iex_Unop:
1601 addUses_Expr(set, e->Iex.Unop.arg);
1602 return;
1603 case Iex_Tmp:
sewardj17442fe2004-09-20 14:54:28 +00001604 addUses_Temp(set, e->Iex.Tmp.tmp);
sewardj39e3f242004-08-18 16:54:52 +00001605 return;
1606 case Iex_Const:
1607 case Iex_Get:
1608 return;
1609 default:
1610 vex_printf("\n");
1611 ppIRExpr(e);
1612 vpanic("addUses_Expr");
1613 }
1614}
1615
sewardjd503a322004-10-13 22:41:16 +00001616static void addUses_Stmt ( Bool* set, IRStmt* st )
sewardj39e3f242004-08-18 16:54:52 +00001617{
sewardj17442fe2004-09-20 14:54:28 +00001618 Int i;
1619 IRDirty* d;
sewardj39e3f242004-08-18 16:54:52 +00001620 switch (st->tag) {
sewardjd7217032004-08-19 10:49:10 +00001621 case Ist_PutI:
sewardjeeac8412004-11-02 00:26:55 +00001622 addUses_Expr(set, st->Ist.PutI.ix);
sewardj2d3f77c2004-09-22 23:49:09 +00001623 addUses_Expr(set, st->Ist.PutI.data);
sewardjd7217032004-08-19 10:49:10 +00001624 return;
sewardj39e3f242004-08-18 16:54:52 +00001625 case Ist_Tmp:
sewardj6d076362004-09-23 11:06:17 +00001626 addUses_Expr(set, st->Ist.Tmp.data);
sewardj39e3f242004-08-18 16:54:52 +00001627 return;
1628 case Ist_Put:
sewardj6d076362004-09-23 11:06:17 +00001629 addUses_Expr(set, st->Ist.Put.data);
sewardj39e3f242004-08-18 16:54:52 +00001630 return;
1631 case Ist_STle:
1632 addUses_Expr(set, st->Ist.STle.addr);
1633 addUses_Expr(set, st->Ist.STle.data);
1634 return;
sewardj17442fe2004-09-20 14:54:28 +00001635 case Ist_Dirty:
1636 d = st->Ist.Dirty.details;
1637 if (d->mFx != Ifx_None)
1638 addUses_Expr(set, d->mAddr);
sewardjb8385d82004-11-02 01:34:15 +00001639 addUses_Expr(set, d->guard);
sewardj17442fe2004-09-20 14:54:28 +00001640 for (i = 0; d->args[i] != NULL; i++)
1641 addUses_Expr(set, d->args[i]);
1642 return;
sewardjd2445f62005-03-21 00:15:53 +00001643 case Ist_NoOp:
sewardjf1689312005-03-16 18:19:10 +00001644 case Ist_IMark:
sewardj3e838932005-01-07 12:09:15 +00001645 case Ist_MFence:
1646 return;
sewardj17442fe2004-09-20 14:54:28 +00001647 case Ist_Exit:
sewardj0276d4b2004-11-15 15:30:21 +00001648 addUses_Expr(set, st->Ist.Exit.guard);
sewardj17442fe2004-09-20 14:54:28 +00001649 return;
sewardj39e3f242004-08-18 16:54:52 +00001650 default:
1651 vex_printf("\n");
1652 ppIRStmt(st);
1653 vpanic("addUses_Stmt");
sewardjd503a322004-10-13 22:41:16 +00001654 }
sewardj39e3f242004-08-18 16:54:52 +00001655}
1656
1657
sewardjba999312004-11-15 15:21:17 +00001658/* Is this literally IRExpr_Const(IRConst_U1(False)) ? */
1659static Bool isZeroU1 ( IRExpr* e )
sewardjd9997882004-11-04 19:42:59 +00001660{
sewardj9d2e7692005-02-07 01:11:31 +00001661 return toBool( e->tag == Iex_Const
1662 && e->Iex.Const.con->tag == Ico_U1
1663 && e->Iex.Const.con->Ico.U1 == False );
sewardjd9997882004-11-04 19:42:59 +00001664}
1665
sewardj39e3f242004-08-18 16:54:52 +00001666
1667/* Note, this destructively modifies the given IRBB. */
1668
1669/* Scan backwards through statements, carrying a set of IRTemps which
1670 are known to be used after the current point. On encountering 't =
1671 E', delete the binding if it is not used. Otherwise, add any temp
1672 uses to the set and keep on moving backwards. */
1673
sewardj49651f42004-10-28 22:11:04 +00001674/* notstatic */ void do_deadcode_BB ( IRBB* bb )
sewardj39e3f242004-08-18 16:54:52 +00001675{
1676 Int i;
sewardjd503a322004-10-13 22:41:16 +00001677 Int n_tmps = bb->tyenv->types_used;
1678 Bool* set = LibVEX_Alloc(n_tmps * sizeof(Bool));
sewardj39e3f242004-08-18 16:54:52 +00001679 IRStmt* st;
1680
sewardjd503a322004-10-13 22:41:16 +00001681 for (i = 0; i < n_tmps; i++)
1682 set[i] = False;
1683
sewardj39e3f242004-08-18 16:54:52 +00001684 /* start off by recording IRTemp uses in the next field. */
1685 addUses_Expr(set, bb->next);
1686
1687 /* Work backwards through the stmts */
1688 for (i = bb->stmts_used-1; i >= 0; i--) {
1689 st = bb->stmts[i];
sewardj84ff0652004-08-23 16:16:08 +00001690 if (!st)
1691 continue;
sewardj39e3f242004-08-18 16:54:52 +00001692 if (st->tag == Ist_Tmp
sewardjd503a322004-10-13 22:41:16 +00001693 && set[(Int)(st->Ist.Tmp.tmp)] == False) {
sewardj39e3f242004-08-18 16:54:52 +00001694 /* it's an IRTemp which never got used. Delete it. */
sewardj088bcb42004-08-19 17:16:52 +00001695 if (DEBUG_IROPT) {
sewardj39e3f242004-08-18 16:54:52 +00001696 vex_printf("DEAD: ");
1697 ppIRStmt(st);
1698 vex_printf("\n");
1699 }
sewardjd2445f62005-03-21 00:15:53 +00001700 bb->stmts[i] = IRStmt_NoOp();
sewardjd9997882004-11-04 19:42:59 +00001701 }
1702 else
1703 if (st->tag == Ist_Dirty
1704 && st->Ist.Dirty.details->guard
sewardjba999312004-11-15 15:21:17 +00001705 && isZeroU1(st->Ist.Dirty.details->guard)) {
sewardjd2445f62005-03-21 00:15:53 +00001706 /* This is a dirty helper which will never get called.
1707 Delete it. */
1708 bb->stmts[i] = IRStmt_NoOp();
sewardjd9997882004-11-04 19:42:59 +00001709 }
1710 else {
sewardj39e3f242004-08-18 16:54:52 +00001711 /* Note any IRTemp uses made by the current statement. */
1712 addUses_Stmt(set, st);
1713 }
1714 }
1715}
1716
sewardj84ff0652004-08-23 16:16:08 +00001717/*---------------------------------------------------------------*/
1718/*--- Specialisation of helper function calls, in ---*/
1719/*--- collaboration with the front end ---*/
1720/*---------------------------------------------------------------*/
1721
1722static
sewardjb9230752004-12-29 19:25:06 +00001723IRBB* spec_helpers_BB ( IRBB* bb,
sewardj9d2e7692005-02-07 01:11:31 +00001724 IRExpr* (*specHelper) ( HChar*, IRExpr**) )
sewardj84ff0652004-08-23 16:16:08 +00001725{
sewardjb9230752004-12-29 19:25:06 +00001726 Int i;
sewardj84ff0652004-08-23 16:16:08 +00001727 IRStmt* st;
1728 IRExpr* ex;
sewardjb9230752004-12-29 19:25:06 +00001729 Bool any = False;
sewardj84ff0652004-08-23 16:16:08 +00001730
1731 for (i = bb->stmts_used-1; i >= 0; i--) {
1732 st = bb->stmts[i];
1733
1734 if (!st
1735 || st->tag != Ist_Tmp
sewardj6d076362004-09-23 11:06:17 +00001736 || st->Ist.Tmp.data->tag != Iex_CCall)
sewardjaba4fff2004-10-08 21:37:15 +00001737 continue;
sewardj84ff0652004-08-23 16:16:08 +00001738
sewardj8ea867b2004-10-30 19:03:02 +00001739 ex = (*specHelper)( st->Ist.Tmp.data->Iex.CCall.cee->name,
sewardj6d076362004-09-23 11:06:17 +00001740 st->Ist.Tmp.data->Iex.CCall.args );
sewardj84ff0652004-08-23 16:16:08 +00001741 if (!ex)
sewardjaba4fff2004-10-08 21:37:15 +00001742 /* the front end can't think of a suitable replacement */
1743 continue;
sewardj84ff0652004-08-23 16:16:08 +00001744
1745 /* We got something better. Install it in the bb. */
sewardjb9230752004-12-29 19:25:06 +00001746 any = True;
sewardj84ff0652004-08-23 16:16:08 +00001747 bb->stmts[i]
1748 = IRStmt_Tmp(st->Ist.Tmp.tmp, ex);
1749
1750 if (0) {
1751 vex_printf("SPEC: ");
sewardj6d076362004-09-23 11:06:17 +00001752 ppIRExpr(st->Ist.Tmp.data);
sewardj84ff0652004-08-23 16:16:08 +00001753 vex_printf(" --> ");
1754 ppIRExpr(ex);
1755 vex_printf("\n");
1756 }
1757 }
sewardjb9230752004-12-29 19:25:06 +00001758
1759 if (any)
1760 bb = flatten_BB(bb);
1761 return bb;
sewardj84ff0652004-08-23 16:16:08 +00001762}
1763
sewardj29632392004-08-22 02:38:11 +00001764
1765/*---------------------------------------------------------------*/
sewardj08210532004-12-29 17:09:11 +00001766/*--- Common Subexpression Elimination ---*/
1767/*---------------------------------------------------------------*/
1768
1769/* Expensive in time and space. */
1770
1771/* Uses two environments:
1772 a IRTemp -> IRTemp mapping
1773 a mapping from AvailExpr* to IRTemp
1774*/
1775
1776typedef
1777 struct {
1778 enum { Ut, Btt, Btc, Bct, Cf64i } tag;
1779 union {
1780 /* unop(tmp) */
1781 struct {
1782 IROp op;
1783 IRTemp arg;
1784 } Ut;
1785 /* binop(tmp,tmp) */
1786 struct {
1787 IROp op;
1788 IRTemp arg1;
1789 IRTemp arg2;
1790 } Btt;
1791 /* binop(tmp,const) */
1792 struct {
1793 IROp op;
1794 IRTemp arg1;
1795 IRConst con2;
1796 } Btc;
1797 /* binop(const,tmp) */
1798 struct {
1799 IROp op;
1800 IRConst con1;
1801 IRTemp arg2;
1802 } Bct;
1803 /* F64i-style const */
1804 struct {
1805 ULong f64i;
1806 } Cf64i;
1807 } u;
1808 }
1809 AvailExpr;
1810
1811static Bool eq_AvailExpr ( AvailExpr* a1, AvailExpr* a2 )
1812{
1813 if (a1->tag != a2->tag)
1814 return False;
1815 switch (a1->tag) {
1816 case Ut:
sewardj9d2e7692005-02-07 01:11:31 +00001817 return toBool(
1818 a1->u.Ut.op == a2->u.Ut.op
1819 && a1->u.Ut.arg == a2->u.Ut.arg);
sewardj08210532004-12-29 17:09:11 +00001820 case Btt:
sewardj9d2e7692005-02-07 01:11:31 +00001821 return toBool(
1822 a1->u.Btt.op == a2->u.Btt.op
sewardj08210532004-12-29 17:09:11 +00001823 && a1->u.Btt.arg1 == a2->u.Btt.arg1
sewardj9d2e7692005-02-07 01:11:31 +00001824 && a1->u.Btt.arg2 == a2->u.Btt.arg2);
sewardj08210532004-12-29 17:09:11 +00001825 case Btc:
sewardj9d2e7692005-02-07 01:11:31 +00001826 return toBool(
1827 a1->u.Btc.op == a2->u.Btc.op
sewardj08210532004-12-29 17:09:11 +00001828 && a1->u.Btc.arg1 == a2->u.Btc.arg1
sewardj9d2e7692005-02-07 01:11:31 +00001829 && eqIRConst(&a1->u.Btc.con2, &a2->u.Btc.con2));
sewardj08210532004-12-29 17:09:11 +00001830 case Bct:
sewardj9d2e7692005-02-07 01:11:31 +00001831 return toBool(
1832 a1->u.Bct.op == a2->u.Bct.op
sewardj08210532004-12-29 17:09:11 +00001833 && a1->u.Bct.arg2 == a2->u.Bct.arg2
sewardj9d2e7692005-02-07 01:11:31 +00001834 && eqIRConst(&a1->u.Bct.con1, &a2->u.Bct.con1));
sewardj08210532004-12-29 17:09:11 +00001835 case Cf64i:
sewardj9d2e7692005-02-07 01:11:31 +00001836 return toBool(a1->u.Cf64i.f64i == a2->u.Cf64i.f64i);
sewardj08210532004-12-29 17:09:11 +00001837 default: vpanic("eq_AvailExpr");
1838 }
1839}
1840
1841static IRExpr* availExpr_to_IRExpr ( AvailExpr* ae )
1842{
1843 IRConst* con;
1844 switch (ae->tag) {
1845 case Ut:
1846 return IRExpr_Unop( ae->u.Ut.op, IRExpr_Tmp(ae->u.Ut.arg) );
1847 case Btt:
1848 return IRExpr_Binop( ae->u.Btt.op,
1849 IRExpr_Tmp(ae->u.Btt.arg1),
1850 IRExpr_Tmp(ae->u.Btt.arg2) );
1851 case Btc:
1852 con = LibVEX_Alloc(sizeof(IRConst));
1853 *con = ae->u.Btc.con2;
1854 return IRExpr_Binop( ae->u.Btc.op,
1855 IRExpr_Tmp(ae->u.Btc.arg1), IRExpr_Const(con) );
1856 case Bct:
1857 con = LibVEX_Alloc(sizeof(IRConst));
1858 *con = ae->u.Bct.con1;
1859 return IRExpr_Binop( ae->u.Bct.op,
1860 IRExpr_Const(con), IRExpr_Tmp(ae->u.Bct.arg2) );
1861 case Cf64i:
1862 return IRExpr_Const(IRConst_F64i(ae->u.Cf64i.f64i));
1863 default:
1864 vpanic("availExpr_to_IRExpr");
1865 }
1866}
1867
1868inline
1869static IRTemp subst_AvailExpr_Temp ( HashHW* env, IRTemp tmp )
1870{
1871 HWord res;
1872 /* env :: IRTemp -> IRTemp */
1873 if (lookupHHW( env, &res, (HWord)tmp ))
1874 return (IRTemp)res;
1875 else
1876 return tmp;
1877}
1878
1879static void subst_AvailExpr ( HashHW* env, AvailExpr* ae )
1880{
1881 /* env :: IRTemp -> IRTemp */
1882 switch (ae->tag) {
1883 case Ut:
1884 ae->u.Ut.arg = subst_AvailExpr_Temp( env, ae->u.Ut.arg );
1885 break;
1886 case Btt:
1887 ae->u.Btt.arg1 = subst_AvailExpr_Temp( env, ae->u.Btt.arg1 );
1888 ae->u.Btt.arg2 = subst_AvailExpr_Temp( env, ae->u.Btt.arg2 );
1889 break;
1890 case Btc:
1891 ae->u.Btc.arg1 = subst_AvailExpr_Temp( env, ae->u.Btc.arg1 );
1892 break;
1893 case Bct:
1894 ae->u.Bct.arg2 = subst_AvailExpr_Temp( env, ae->u.Bct.arg2 );
1895 break;
1896 case Cf64i:
1897 break;
1898 default:
1899 vpanic("subst_AvailExpr");
1900 }
1901}
1902
1903static AvailExpr* irExpr_to_AvailExpr ( IRExpr* e )
1904{
1905 AvailExpr* ae;
1906
1907 if (e->tag == Iex_Unop
1908 && e->Iex.Unop.arg->tag == Iex_Tmp) {
1909 ae = LibVEX_Alloc(sizeof(AvailExpr));
1910 ae->tag = Ut;
1911 ae->u.Ut.op = e->Iex.Unop.op;
1912 ae->u.Ut.arg = e->Iex.Unop.arg->Iex.Tmp.tmp;
1913 return ae;
1914 }
1915
1916 if (e->tag == Iex_Binop
1917 && e->Iex.Binop.arg1->tag == Iex_Tmp
1918 && e->Iex.Binop.arg2->tag == Iex_Tmp) {
1919 ae = LibVEX_Alloc(sizeof(AvailExpr));
1920 ae->tag = Btt;
1921 ae->u.Btt.op = e->Iex.Binop.op;
1922 ae->u.Btt.arg1 = e->Iex.Binop.arg1->Iex.Tmp.tmp;
1923 ae->u.Btt.arg2 = e->Iex.Binop.arg2->Iex.Tmp.tmp;
1924 return ae;
1925 }
1926
1927 if (e->tag == Iex_Binop
1928 && e->Iex.Binop.arg1->tag == Iex_Tmp
1929 && e->Iex.Binop.arg2->tag == Iex_Const) {
1930 ae = LibVEX_Alloc(sizeof(AvailExpr));
1931 ae->tag = Btc;
1932 ae->u.Btc.op = e->Iex.Binop.op;
1933 ae->u.Btc.arg1 = e->Iex.Binop.arg1->Iex.Tmp.tmp;
1934 ae->u.Btc.con2 = *(e->Iex.Binop.arg2->Iex.Const.con);
1935 return ae;
1936 }
1937
1938 if (e->tag == Iex_Binop
1939 && e->Iex.Binop.arg1->tag == Iex_Const
1940 && e->Iex.Binop.arg2->tag == Iex_Tmp) {
1941 ae = LibVEX_Alloc(sizeof(AvailExpr));
1942 ae->tag = Bct;
1943 ae->u.Bct.op = e->Iex.Binop.op;
1944 ae->u.Bct.arg2 = e->Iex.Binop.arg2->Iex.Tmp.tmp;
1945 ae->u.Bct.con1 = *(e->Iex.Binop.arg1->Iex.Const.con);
1946 return ae;
1947 }
1948
1949 if (e->tag == Iex_Const
1950 && e->Iex.Const.con->tag == Ico_F64i) {
1951 ae = LibVEX_Alloc(sizeof(AvailExpr));
1952 ae->tag = Cf64i;
1953 ae->u.Cf64i.f64i = e->Iex.Const.con->Ico.F64i;
1954 return ae;
1955 }
1956
1957 return NULL;
1958}
1959
1960
1961/* The BB is modified in-place. */
1962
1963void do_cse_BB ( IRBB* bb )
1964{
1965 Int i, j;
1966 IRTemp t, q;
1967 IRStmt* st;
1968 AvailExpr* eprime;
1969
1970 HashHW* tenv = newHHW(); /* :: IRTemp -> IRTemp */
1971 HashHW* aenv = newHHW(); /* :: AvailExpr* -> IRTemp */
1972
1973 vassert(sizeof(IRTemp) <= sizeof(HWord));
1974
1975 //ppIRBB(bb);
1976 //vex_printf("\n\n");
1977
1978 /* Iterate forwards over the stmts.
1979 On seeing "t = E", where E is one of the 3 AvailExpr forms:
1980 let E' = apply tenv substitution to E
1981 search aenv for E'
1982 if a mapping E' -> q is found,
1983 replace this stmt by "t = q"
1984 and add binding t -> q to tenv
1985 else
1986 add binding E' -> t to aenv
1987 replace this stmt by "t = E'"
1988 Ignore any other kind of stmt.
1989 */
1990 for (i = 0; i < bb->stmts_used; i++) {
1991 st = bb->stmts[i];
1992
1993 /* ignore not-interestings */
1994 if ((!st) || st->tag != Ist_Tmp)
1995 continue;
1996
1997 t = st->Ist.Tmp.tmp;
1998 eprime = irExpr_to_AvailExpr(st->Ist.Tmp.data);
1999 /* ignore if not of AvailExpr form */
2000 if (!eprime)
2001 continue;
2002
2003 /* vex_printf("considering: " ); ppIRStmt(st); vex_printf("\n"); */
2004
2005 /* apply tenv */
2006 subst_AvailExpr( tenv, eprime );
2007
2008 /* search aenv for eprime, unfortunately the hard way */
2009 for (j = 0; j < aenv->used; j++)
2010 if (aenv->inuse[j] && eq_AvailExpr(eprime, (AvailExpr*)aenv->key[j]))
2011 break;
2012
2013 if (j < aenv->used) {
2014 /* A binding E' -> q was found. Replace stmt by "t = q" and
2015 note the t->q binding in tenv. */
2016 /* (this is the core of the CSE action) */
2017 q = (IRTemp)aenv->val[j];
2018 bb->stmts[i] = IRStmt_Tmp( t, IRExpr_Tmp(q) );
2019 addToHHW( tenv, (HWord)t, (HWord)q );
2020 } else {
2021 /* No binding was found, so instead we add E' -> t to our
2022 collection of available expressions, replace this stmt
2023 with "t = E'", and move on. */
2024 bb->stmts[i] = IRStmt_Tmp( t, availExpr_to_IRExpr(eprime) );
2025 addToHHW( aenv, (HWord)eprime, (HWord)t );
2026 }
2027 }
2028
2029 //ppIRBB(bb);
2030 //sanityCheckIRBB(bb, Ity_I32);
2031 //vex_printf("\n\n");
2032
2033}
2034
2035
2036/*---------------------------------------------------------------*/
2037/*--- Add32/Sub32 chain collapsing ---*/
2038/*---------------------------------------------------------------*/
2039
2040/* ----- Helper functions for Add32/Sub32 chain collapsing ----- */
2041
2042/* Is this expression "Add32(tmp,const)" or "Sub32(tmp,const)" ? If
2043 yes, set *tmp and *i32 appropriately. *i32 is set as if the
2044 root node is Add32, not Sub32. */
2045
2046static Bool isAdd32OrSub32 ( IRExpr* e, IRTemp* tmp, Int* i32 )
2047{
2048 if (e->tag != Iex_Binop)
2049 return False;
2050 if (e->Iex.Binop.op != Iop_Add32 && e->Iex.Binop.op != Iop_Sub32)
2051 return False;
2052 if (e->Iex.Binop.arg1->tag != Iex_Tmp)
2053 return False;
2054 if (e->Iex.Binop.arg2->tag != Iex_Const)
2055 return False;
2056 *tmp = e->Iex.Binop.arg1->Iex.Tmp.tmp;
2057 *i32 = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32);
2058 if (e->Iex.Binop.op == Iop_Sub32)
2059 *i32 = -*i32;
2060 return True;
2061}
2062
2063
2064/* Figure out if tmp can be expressed as tmp2 +32 const, for some
2065 other tmp2. Scan backwards from the specified start point -- an
2066 optimisation. */
2067
2068static Bool collapseChain ( IRBB* bb, Int startHere,
2069 IRTemp tmp,
2070 IRTemp* tmp2, Int* i32 )
2071{
2072 Int j, ii;
2073 IRTemp vv;
2074 IRStmt* st;
2075 IRExpr* e;
2076
2077 /* the (var, con) pair contain the current 'representation' for
2078 'tmp'. We start with 'tmp + 0'. */
2079 IRTemp var = tmp;
2080 Int con = 0;
2081
2082 /* Scan backwards to see if tmp can be replaced by some other tmp
2083 +/- a constant. */
2084 for (j = startHere; j >= 0; j--) {
2085 st = bb->stmts[j];
2086 if (!st || st->tag != Ist_Tmp)
2087 continue;
2088 if (st->Ist.Tmp.tmp != var)
2089 continue;
2090 e = st->Ist.Tmp.data;
2091 if (!isAdd32OrSub32(e, &vv, &ii))
2092 break;
2093 var = vv;
2094 con += ii;
2095 }
2096 if (j == -1)
2097 /* no earlier binding for var .. ill-formed IR */
2098 vpanic("collapseChain");
2099
2100 /* so, did we find anything interesting? */
2101 if (var == tmp)
2102 return False; /* no .. */
2103
2104 *tmp2 = var;
2105 *i32 = con;
2106 return True;
2107}
2108
2109
2110/* ------- Main function for Add32/Sub32 chain collapsing ------ */
2111
2112static void collapse_AddSub_chains_BB ( IRBB* bb )
2113{
2114 IRStmt *st;
2115 IRTemp var, var2;
2116 Int i, con, con2;
2117
2118 for (i = bb->stmts_used-1; i >= 0; i--) {
2119 st = bb->stmts[i];
2120 if (!st)
2121 continue;
2122
2123 /* Try to collapse 't1 = Add32/Sub32(t2, con)'. */
2124
2125 if (st->tag == Ist_Tmp
2126 && isAdd32OrSub32(st->Ist.Tmp.data, &var, &con)) {
2127
2128 /* So e1 is of the form Add32(var,con) or Sub32(var,-con).
2129 Find out if var can be expressed as var2 + con2. */
2130 if (collapseChain(bb, i-1, var, &var2, &con2)) {
2131 if (DEBUG_IROPT) {
2132 vex_printf("replacing1 ");
2133 ppIRStmt(st);
2134 vex_printf(" with ");
2135 }
2136 con2 += con;
2137 bb->stmts[i]
2138 = IRStmt_Tmp(
2139 st->Ist.Tmp.tmp,
2140 (con2 >= 0)
2141 ? IRExpr_Binop(Iop_Add32,
2142 IRExpr_Tmp(var2),
2143 IRExpr_Const(IRConst_U32(con2)))
2144 : IRExpr_Binop(Iop_Sub32,
2145 IRExpr_Tmp(var2),
2146 IRExpr_Const(IRConst_U32(-con2)))
2147 );
2148 if (DEBUG_IROPT) {
2149 ppIRStmt(bb->stmts[i]);
2150 vex_printf("\n");
2151 }
2152 }
2153
2154 continue;
2155 }
2156
2157 /* Try to collapse 't1 = GetI[t2, con]'. */
2158
2159 if (st->tag == Ist_Tmp
2160 && st->Ist.Tmp.data->tag == Iex_GetI
2161 && st->Ist.Tmp.data->Iex.GetI.ix->tag == Iex_Tmp
2162 && collapseChain(bb, i-1, st->Ist.Tmp.data->Iex.GetI.ix
2163 ->Iex.Tmp.tmp, &var2, &con2)) {
2164 if (DEBUG_IROPT) {
2165 vex_printf("replacing3 ");
2166 ppIRStmt(st);
2167 vex_printf(" with ");
2168 }
2169 con2 += st->Ist.Tmp.data->Iex.GetI.bias;
2170 bb->stmts[i]
2171 = IRStmt_Tmp(
2172 st->Ist.Tmp.tmp,
2173 IRExpr_GetI(st->Ist.Tmp.data->Iex.GetI.descr,
2174 IRExpr_Tmp(var2),
2175 con2));
2176 if (DEBUG_IROPT) {
2177 ppIRStmt(bb->stmts[i]);
2178 vex_printf("\n");
2179 }
2180 continue;
2181 }
2182
2183 /* Perhaps st is PutI[t, con] ? */
2184
2185 if (st->tag == Ist_PutI
2186 && st->Ist.PutI.ix->tag == Iex_Tmp
2187 && collapseChain(bb, i-1, st->Ist.PutI.ix->Iex.Tmp.tmp,
2188 &var2, &con2)) {
2189 if (DEBUG_IROPT) {
2190 vex_printf("replacing2 ");
2191 ppIRStmt(st);
2192 vex_printf(" with ");
2193 }
2194 con2 += st->Ist.PutI.bias;
2195 bb->stmts[i]
2196 = IRStmt_PutI(st->Ist.PutI.descr,
2197 IRExpr_Tmp(var2),
2198 con2,
2199 st->Ist.PutI.data);
2200 if (DEBUG_IROPT) {
2201 ppIRStmt(bb->stmts[i]);
2202 vex_printf("\n");
2203 }
2204 continue;
2205 }
2206
2207 } /* for */
2208}
2209
2210
2211/*---------------------------------------------------------------*/
2212/*--- PutI/GetI transformations ---*/
2213/*---------------------------------------------------------------*/
2214
2215/* ------- Helper functions for PutI/GetI transformations ------ */
2216
sewardj08210532004-12-29 17:09:11 +00002217/* Determine, to the extent possible, the relationship between two
2218 guest state accesses. The possible outcomes are:
2219
2220 * Exact alias. These two accesses denote precisely the same
2221 piece of the guest state.
2222
2223 * Definitely no alias. These two accesses are guaranteed not to
2224 overlap any part of the guest state.
2225
2226 * Unknown -- if neither of the above can be established.
2227
2228 If in doubt, return Unknown. */
2229
2230typedef
2231 enum { ExactAlias, NoAlias, UnknownAlias }
2232 GSAliasing;
2233
2234
2235/* Produces the alias relation between an indexed guest
2236 state access and a non-indexed access. */
2237
2238static
2239GSAliasing getAliasingRelation_IC ( IRArray* descr1, IRExpr* ix1,
2240 Int offset2, IRType ty2 )
2241{
2242 UInt minoff1, maxoff1, minoff2, maxoff2;
2243
2244 getArrayBounds( descr1, &minoff1, &maxoff1 );
2245 minoff2 = offset2;
2246 maxoff2 = minoff2 + sizeofIRType(ty2) - 1;
2247
2248 if (maxoff1 < minoff2 || maxoff2 < minoff1)
2249 return NoAlias;
2250
2251 /* Could probably do better here if required. For the moment
2252 however just claim not to know anything more. */
2253 return UnknownAlias;
2254}
2255
2256
2257/* Produces the alias relation between two indexed guest state
2258 accesses. */
2259
2260static
2261GSAliasing getAliasingRelation_II (
2262 IRArray* descr1, IRExpr* ix1, Int bias1,
2263 IRArray* descr2, IRExpr* ix2, Int bias2
2264 )
2265{
2266 UInt minoff1, maxoff1, minoff2, maxoff2;
2267 Int iters;
2268
2269 /* First try hard to show they don't alias. */
2270 getArrayBounds( descr1, &minoff1, &maxoff1 );
2271 getArrayBounds( descr2, &minoff2, &maxoff2 );
2272 if (maxoff1 < minoff2 || maxoff2 < minoff1)
2273 return NoAlias;
2274
2275 /* So the two arrays at least partially overlap. To get any
2276 further we'll have to be sure that the descriptors are
2277 identical. */
2278 if (!eqIRArray(descr1, descr2))
2279 return UnknownAlias;
2280
2281 /* The descriptors are identical. Now the only difference can be
2282 in the index expressions. If they cannot be shown to be
2283 identical, we have to say we don't know what the aliasing
2284 relation will be. Now, since the IR is flattened, the index
2285 expressions should be atoms -- either consts or tmps. So that
2286 makes the comparison simple. */
sewardj496a58d2005-03-20 18:44:44 +00002287 vassert(isIRAtom(ix1));
2288 vassert(isIRAtom(ix2));
2289 if (!eqIRAtom(ix1,ix2))
sewardj08210532004-12-29 17:09:11 +00002290 return UnknownAlias;
2291
2292 /* Ok, the index expressions are identical. So now the only way
2293 they can be different is in the bias. Normalise this
2294 paranoidly, to reliably establish equality/non-equality. */
2295
2296 /* So now we know that the GetI and PutI index the same array
2297 with the same base. Are the offsets the same, modulo the
2298 array size? Do this paranoidly. */
2299 vassert(descr1->nElems == descr2->nElems);
2300 vassert(descr1->elemTy == descr2->elemTy);
2301 vassert(descr1->base == descr2->base);
2302 iters = 0;
2303 while (bias1 < 0 || bias2 < 0) {
2304 bias1 += descr1->nElems;
2305 bias2 += descr1->nElems;
2306 iters++;
2307 if (iters > 10)
2308 vpanic("getAliasingRelation: iters");
2309 }
2310 vassert(bias1 >= 0 && bias2 >= 0);
2311 bias1 %= descr1->nElems;
2312 bias2 %= descr1->nElems;
2313 vassert(bias1 >= 0 && bias1 < descr1->nElems);
2314 vassert(bias2 >= 0 && bias2 < descr1->nElems);
2315
2316 /* Finally, biasP and biasG are normalised into the range
2317 0 .. descrP/G->nElems - 1. And so we can establish
2318 equality/non-equality. */
2319
2320 return bias1==bias2 ? ExactAlias : NoAlias;
2321}
2322
2323
2324/* Given the parts (descr, tmp, bias) for a GetI, scan backwards from
2325 the given starting point to find, if any, a PutI which writes
2326 exactly the same piece of guest state, and so return the expression
2327 that the PutI writes. This is the core of PutI-GetI forwarding. */
2328
2329static
2330IRExpr* findPutI ( IRBB* bb, Int startHere,
2331 IRArray* descrG, IRExpr* ixG, Int biasG )
2332{
2333 Int j;
2334 IRStmt* st;
2335 GSAliasing relation;
2336
2337 if (0) {
2338 vex_printf("\nfindPutI ");
2339 ppIRArray(descrG);
2340 vex_printf(" ");
2341 ppIRExpr(ixG);
2342 vex_printf(" %d\n", biasG);
2343 }
2344
2345 /* Scan backwards in bb from startHere to find a suitable PutI
2346 binding for (descrG, ixG, biasG), if any. */
2347
2348 for (j = startHere; j >= 0; j--) {
2349 st = bb->stmts[j];
2350 if (!st) continue;
2351
2352 if (st->tag == Ist_Put) {
2353 /* Non-indexed Put. This can't give a binding, but we do
2354 need to check it doesn't invalidate the search by
2355 overlapping any part of the indexed guest state. */
2356
2357 relation
2358 = getAliasingRelation_IC(
2359 descrG, ixG,
2360 st->Ist.Put.offset,
2361 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
2362
2363 if (relation == NoAlias) {
2364 /* we're OK; keep going */
2365 continue;
2366 } else {
2367 /* relation == UnknownAlias || relation == ExactAlias */
2368 /* If this assertion fails, we've found a Put which writes
2369 an area of guest state which is read by a GetI. Which
2370 is unlikely (although not per se wrong). */
2371 vassert(relation != ExactAlias);
2372 /* This Put potentially writes guest state that the GetI
2373 reads; we must fail. */
2374 return NULL;
2375 }
2376 }
2377
2378 if (st->tag == Ist_PutI) {
2379
2380 relation = getAliasingRelation_II(
2381 descrG, ixG, biasG,
2382 st->Ist.PutI.descr,
2383 st->Ist.PutI.ix,
2384 st->Ist.PutI.bias
2385 );
2386
2387 if (relation == NoAlias) {
2388 /* This PutI definitely doesn't overlap. Ignore it and
2389 keep going. */
2390 continue; /* the for j loop */
2391 }
2392
2393 if (relation == UnknownAlias) {
2394 /* We don't know if this PutI writes to the same guest
2395 state that the GetI, or not. So we have to give up. */
2396 return NULL;
2397 }
2398
2399 /* Otherwise, we've found what we're looking for. */
2400 vassert(relation == ExactAlias);
2401 return st->Ist.PutI.data;
2402
2403 } /* if (st->tag == Ist_PutI) */
2404
2405 if (st->tag == Ist_Dirty) {
2406 /* Be conservative. If the dirty call has any guest effects at
2407 all, give up. We could do better -- only give up if there
2408 are any guest writes/modifies. */
2409 if (st->Ist.Dirty.details->nFxState > 0)
2410 return NULL;
2411 }
2412
2413 } /* for */
2414
2415 /* No valid replacement was found. */
2416 return NULL;
2417}
2418
2419
2420
2421/* Assuming pi is a PutI stmt, is s2 identical to it (in the sense
2422 that it writes exactly the same piece of guest state) ? Safe
2423 answer: False. */
2424
2425static Bool identicalPutIs ( IRStmt* pi, IRStmt* s2 )
2426{
2427 vassert(pi->tag == Ist_PutI);
2428 if (s2->tag != Ist_PutI)
2429 return False;
2430
sewardj9d2e7692005-02-07 01:11:31 +00002431 return toBool(
2432 getAliasingRelation_II(
sewardj08210532004-12-29 17:09:11 +00002433 pi->Ist.PutI.descr, pi->Ist.PutI.ix, pi->Ist.PutI.bias,
2434 s2->Ist.PutI.descr, s2->Ist.PutI.ix, s2->Ist.PutI.bias
2435 )
sewardj9d2e7692005-02-07 01:11:31 +00002436 == ExactAlias
2437 );
sewardj08210532004-12-29 17:09:11 +00002438}
2439
2440
2441/* Assuming pi is a PutI stmt, is s2 a Get/GetI/Put/PutI which might
2442 overlap it? Safe answer: True. Note, we could do a lot better
2443 than this if needed. */
2444
2445static
2446Bool guestAccessWhichMightOverlapPutI (
2447 IRTypeEnv* tyenv, IRStmt* pi, IRStmt* s2
2448 )
2449{
2450 GSAliasing relation;
2451 UInt minoffP, maxoffP;
2452
2453 vassert(pi->tag == Ist_PutI);
2454 getArrayBounds(pi->Ist.PutI.descr, &minoffP, &maxoffP);
2455 switch (s2->tag) {
2456
sewardjd2445f62005-03-21 00:15:53 +00002457 case Ist_NoOp:
sewardjf1689312005-03-16 18:19:10 +00002458 case Ist_IMark:
2459 return False;
2460
sewardjbb3f52d2005-01-07 14:14:50 +00002461 case Ist_MFence:
2462 /* just be paranoid ... this should be rare. */
2463 return True;
2464
sewardj08210532004-12-29 17:09:11 +00002465 case Ist_Dirty:
2466 /* If the dirty call has any guest effects at all, give up.
2467 Probably could do better. */
2468 if (s2->Ist.Dirty.details->nFxState > 0)
2469 return True;
2470 return False;
2471
2472 case Ist_Put:
sewardj496a58d2005-03-20 18:44:44 +00002473 vassert(isIRAtom(s2->Ist.Put.data));
sewardj08210532004-12-29 17:09:11 +00002474 relation
2475 = getAliasingRelation_IC(
2476 pi->Ist.PutI.descr, pi->Ist.PutI.ix,
2477 s2->Ist.Put.offset,
2478 typeOfIRExpr(tyenv,s2->Ist.Put.data)
2479 );
2480 goto have_relation;
2481
2482 case Ist_PutI:
sewardj496a58d2005-03-20 18:44:44 +00002483 vassert(isIRAtom(s2->Ist.PutI.ix));
2484 vassert(isIRAtom(s2->Ist.PutI.data));
sewardj08210532004-12-29 17:09:11 +00002485 relation
2486 = getAliasingRelation_II(
2487 pi->Ist.PutI.descr, pi->Ist.PutI.ix, pi->Ist.PutI.bias,
2488 s2->Ist.PutI.descr, s2->Ist.PutI.ix, s2->Ist.PutI.bias
2489 );
2490 goto have_relation;
2491
2492 case Ist_Tmp:
2493 if (s2->Ist.Tmp.data->tag == Iex_GetI) {
2494 relation
2495 = getAliasingRelation_II(
2496 pi->Ist.PutI.descr, pi->Ist.PutI.ix,
2497 pi->Ist.PutI.bias,
2498 s2->Ist.Tmp.data->Iex.GetI.descr,
2499 s2->Ist.Tmp.data->Iex.GetI.ix,
2500 s2->Ist.Tmp.data->Iex.GetI.bias
2501 );
2502 goto have_relation;
2503 }
2504 if (s2->Ist.Tmp.data->tag == Iex_Get) {
2505 relation
2506 = getAliasingRelation_IC(
2507 pi->Ist.PutI.descr, pi->Ist.PutI.ix,
2508 s2->Ist.Tmp.data->Iex.Get.offset,
2509 s2->Ist.Tmp.data->Iex.Get.ty
2510 );
2511 goto have_relation;
2512 }
2513 return False;
2514
2515 case Ist_STle:
sewardj496a58d2005-03-20 18:44:44 +00002516 vassert(isIRAtom(s2->Ist.STle.addr));
2517 vassert(isIRAtom(s2->Ist.STle.data));
sewardj08210532004-12-29 17:09:11 +00002518 return False;
2519
2520 default:
2521 vex_printf("\n"); ppIRStmt(s2); vex_printf("\n");
2522 vpanic("guestAccessWhichMightOverlapPutI");
2523 }
2524
2525 have_relation:
2526 if (relation == NoAlias)
2527 return False;
2528 else
2529 return True; /* ExactAlias or UnknownAlias */
2530}
2531
2532
2533
2534/* ---------- PutI/GetI transformations main functions --------- */
2535
2536/* Remove redundant GetIs, to the extent that they can be detected.
2537 bb is modified in-place. */
2538
2539static
2540void do_redundant_GetI_elimination ( IRBB* bb )
2541{
2542 Int i;
2543 IRStmt* st;
2544
2545 for (i = bb->stmts_used-1; i >= 0; i--) {
2546 st = bb->stmts[i];
2547 if (!st)
2548 continue;
2549
2550 if (st->tag == Ist_Tmp
2551 && st->Ist.Tmp.data->tag == Iex_GetI
2552 && st->Ist.Tmp.data->Iex.GetI.ix->tag == Iex_Tmp) {
2553 IRArray* descr = st->Ist.Tmp.data->Iex.GetI.descr;
2554 IRExpr* ix = st->Ist.Tmp.data->Iex.GetI.ix;
2555 Int bias = st->Ist.Tmp.data->Iex.GetI.bias;
2556 IRExpr* replacement = findPutI(bb, i-1, descr, ix, bias);
2557 if (replacement
sewardj496a58d2005-03-20 18:44:44 +00002558 && isIRAtom(replacement)
sewardj08210532004-12-29 17:09:11 +00002559 /* Make sure we're doing a type-safe transformation! */
2560 && typeOfIRExpr(bb->tyenv, replacement) == descr->elemTy) {
2561 if (DEBUG_IROPT) {
2562 vex_printf("rGI: ");
2563 ppIRExpr(st->Ist.Tmp.data);
2564 vex_printf(" -> ");
2565 ppIRExpr(replacement);
2566 vex_printf("\n");
2567 }
2568 bb->stmts[i] = IRStmt_Tmp(st->Ist.Tmp.tmp, replacement);
2569 }
2570 }
2571 }
2572
2573}
2574
2575
2576/* Remove redundant PutIs, to the extent which they can be detected.
2577 bb is modified in-place. */
2578
2579static
2580void do_redundant_PutI_elimination ( IRBB* bb )
2581{
2582 Int i, j;
2583 Bool delete;
2584 IRStmt *st, *stj;
2585
2586 for (i = 0; i < bb->stmts_used; i++) {
2587 st = bb->stmts[i];
2588 if (!st || st->tag != Ist_PutI)
2589 continue;
2590 /* Ok, search forwards from here to see if we can find another
2591 PutI which makes this one redundant, and dodging various
2592 hazards. Search forwards:
2593 * If conditional exit, give up (because anything after that
2594 does not postdominate this put).
2595 * If a Get which might overlap, give up (because this PutI
2596 not necessarily dead).
2597 * If a Put which is identical, stop with success.
2598 * If a Put which might overlap, but is not identical, give up.
2599 * If a dirty helper call which might write guest state, give up.
2600 * If a Put which definitely doesn't overlap, or any other
2601 kind of stmt, continue.
2602 */
2603 delete = False;
2604 for (j = i+1; j < bb->stmts_used; j++) {
2605 stj = bb->stmts[j];
2606 if (!stj)
2607 continue;
2608 if (identicalPutIs(st, stj)) {
2609 /* success! */
2610 delete = True;
2611 break;
2612 }
2613 if (stj->tag == Ist_Exit)
2614 /* give up */
2615 break;
2616 if (st->tag == Ist_Dirty)
2617 /* give up; could do better here */
2618 break;
2619 if (guestAccessWhichMightOverlapPutI(bb->tyenv, st, stj))
2620 /* give up */
2621 break;
2622 }
2623
2624 if (delete) {
2625 if (DEBUG_IROPT) {
2626 vex_printf("rPI: ");
2627 ppIRStmt(st);
2628 vex_printf("\n");
2629 }
sewardjd2445f62005-03-21 00:15:53 +00002630 bb->stmts[i] = IRStmt_NoOp();
sewardj08210532004-12-29 17:09:11 +00002631 }
2632
2633 }
2634}
2635
2636
2637/*---------------------------------------------------------------*/
2638/*--- Loop unrolling ---*/
2639/*---------------------------------------------------------------*/
2640
2641/* Adjust all tmp values (names) in e by delta. e is destructively
2642 modified. */
2643
2644static void deltaIRExpr ( IRExpr* e, Int delta )
2645{
2646 Int i;
2647 switch (e->tag) {
2648 case Iex_Tmp:
2649 e->Iex.Tmp.tmp += delta;
2650 break;
2651 case Iex_Get:
2652 case Iex_Const:
2653 break;
2654 case Iex_GetI:
2655 deltaIRExpr(e->Iex.GetI.ix, delta);
2656 break;
2657 case Iex_Binop:
2658 deltaIRExpr(e->Iex.Binop.arg1, delta);
2659 deltaIRExpr(e->Iex.Binop.arg2, delta);
2660 break;
2661 case Iex_Unop:
2662 deltaIRExpr(e->Iex.Unop.arg, delta);
2663 break;
2664 case Iex_LDle:
2665 deltaIRExpr(e->Iex.LDle.addr, delta);
2666 break;
2667 case Iex_CCall:
2668 for (i = 0; e->Iex.CCall.args[i]; i++)
2669 deltaIRExpr(e->Iex.CCall.args[i], delta);
2670 break;
2671 case Iex_Mux0X:
2672 deltaIRExpr(e->Iex.Mux0X.cond, delta);
2673 deltaIRExpr(e->Iex.Mux0X.expr0, delta);
2674 deltaIRExpr(e->Iex.Mux0X.exprX, delta);
2675 break;
2676 default:
2677 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
2678 vpanic("deltaIRExpr");
2679 }
2680}
2681
2682/* Adjust all tmp values (names) in st by delta. st is destructively
2683 modified. */
2684
2685static void deltaIRStmt ( IRStmt* st, Int delta )
2686{
2687 Int i;
2688 IRDirty* d;
2689 switch (st->tag) {
sewardjd2445f62005-03-21 00:15:53 +00002690 case Ist_NoOp:
sewardjf1689312005-03-16 18:19:10 +00002691 case Ist_IMark:
2692 break;
sewardj08210532004-12-29 17:09:11 +00002693 case Ist_Put:
2694 deltaIRExpr(st->Ist.Put.data, delta);
2695 break;
2696 case Ist_PutI:
2697 deltaIRExpr(st->Ist.PutI.ix, delta);
2698 deltaIRExpr(st->Ist.PutI.data, delta);
2699 break;
2700 case Ist_Tmp:
2701 st->Ist.Tmp.tmp += delta;
2702 deltaIRExpr(st->Ist.Tmp.data, delta);
2703 break;
2704 case Ist_Exit:
2705 deltaIRExpr(st->Ist.Exit.guard, delta);
2706 break;
2707 case Ist_STle:
2708 deltaIRExpr(st->Ist.STle.addr, delta);
2709 deltaIRExpr(st->Ist.STle.data, delta);
2710 break;
2711 case Ist_Dirty:
2712 d = st->Ist.Dirty.details;
2713 deltaIRExpr(d->guard, delta);
2714 for (i = 0; d->args[i]; i++)
2715 deltaIRExpr(d->args[i], delta);
2716 if (d->tmp != IRTemp_INVALID)
2717 d->tmp += delta;
2718 if (d->mAddr)
2719 deltaIRExpr(d->mAddr, delta);
2720 break;
2721 default:
2722 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
2723 vpanic("deltaIRStmt");
2724 }
2725}
2726
2727
2728/* If possible, return a loop-unrolled version of bb0. The original
2729 is changed. If not possible, return NULL. */
2730
2731/* The two schemas considered are:
2732
2733 X: BODY; goto X
2734
2735 which unrolls to (eg) X: BODY;BODY; goto X
2736
2737 and
2738
2739 X: BODY; if (c) goto X; goto Y
2740 which trivially transforms to
2741 X: BODY; if (!c) goto Y; goto X;
2742 so it falls in the scope of the first case.
2743
2744 X and Y must be literal (guest) addresses.
2745*/
2746
2747static Int calc_unroll_factor( IRBB* bb )
2748{
2749 Int n_stmts, i;
2750
2751 n_stmts = 0;
2752 for (i = 0; i < bb->stmts_used; i++)
2753 if (bb->stmts[i])
2754 n_stmts++;
2755
2756 if (n_stmts <= vex_control.iropt_unroll_thresh/8) {
2757 if (vex_control.iropt_verbosity > 0)
2758 vex_printf("vex iropt: 8 x unrolling (%d sts -> %d sts)\n",
2759 n_stmts, 8* n_stmts);
2760 return 8;
2761 }
2762 if (n_stmts <= vex_control.iropt_unroll_thresh/4) {
2763 if (vex_control.iropt_verbosity > 0)
2764 vex_printf("vex iropt: 4 x unrolling (%d sts -> %d sts)\n",
2765 n_stmts, 4* n_stmts);
2766 return 4;
2767 }
2768
2769 if (n_stmts <= vex_control.iropt_unroll_thresh/2) {
2770 if (vex_control.iropt_verbosity > 0)
2771 vex_printf("vex iropt: 2 x unrolling (%d sts -> %d sts)\n",
2772 n_stmts, 2* n_stmts);
2773 return 2;
2774 }
2775
2776 if (vex_control.iropt_verbosity > 0)
2777 vex_printf("vex iropt: not unrolling (%d sts)\n", n_stmts);
2778
2779 return 1;
2780}
2781
2782
2783static IRBB* maybe_loop_unroll_BB ( IRBB* bb0, Addr64 my_addr )
2784{
2785 Int i, j, jmax, n_vars;
2786 Bool xxx_known;
2787 Addr64 xxx_value, yyy_value;
2788 IRExpr* udst;
2789 IRStmt* st;
2790 IRConst* con;
2791 IRBB *bb1, *bb2;
2792 Int unroll_factor;
2793
2794 if (vex_control.iropt_unroll_thresh <= 0)
2795 return NULL;
2796
2797 /* First off, figure out if we can unroll this loop. Do this
2798 without modifying bb0. */
2799
2800 if (bb0->jumpkind != Ijk_Boring)
2801 return NULL;
2802
2803 xxx_known = False;
2804 xxx_value = 0;
2805
2806 /* Extract the next-guest address. If it isn't a literal, we
2807 have to give up. */
2808
2809 udst = bb0->next;
2810 if (udst->tag == Iex_Const
2811 && (udst->Iex.Const.con->tag == Ico_U32
2812 || udst->Iex.Const.con->tag == Ico_U64)) {
2813 /* The BB ends in a jump to a literal location. */
2814 xxx_known = True;
2815 xxx_value = udst->Iex.Const.con->tag == Ico_U64
2816 ? udst->Iex.Const.con->Ico.U64
2817 : (Addr64)(udst->Iex.Const.con->Ico.U32);
2818 }
2819
2820 if (!xxx_known)
2821 return NULL;
2822
2823 /* Now we know the BB ends to a jump to a literal location. If
2824 it's a jump to itself (viz, idiom #1), move directly to the
2825 unrolling stage, first cloning the bb so the original isn't
2826 modified. */
2827 if (xxx_value == my_addr) {
2828 unroll_factor = calc_unroll_factor( bb0 );
2829 if (unroll_factor < 2)
2830 return NULL;
2831 bb1 = dopyIRBB( bb0 );
2832 bb0 = NULL;
2833 udst = NULL; /* is now invalid */
2834 goto do_unroll;
2835 }
2836
2837 /* Search for the second idiomatic form:
2838 X: BODY; if (c) goto X; goto Y
2839 We know Y, but need to establish that the last stmt
2840 is 'if (c) goto X'.
2841 */
2842 yyy_value = xxx_value;
2843 for (i = bb0->stmts_used-1; i >= 0; i--)
2844 if (bb0->stmts[i])
2845 break;
2846
2847 if (i < 0)
2848 return NULL; /* block with no stmts. Strange. */
2849
2850 st = bb0->stmts[i];
2851 if (st->tag != Ist_Exit)
2852 return NULL;
2853 if (st->Ist.Exit.jk != Ijk_Boring)
2854 return NULL;
2855
2856 con = st->Ist.Exit.dst;
2857 vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
2858
2859 xxx_value = con->tag == Ico_U64
2860 ? st->Ist.Exit.dst->Ico.U64
2861 : (Addr64)(st->Ist.Exit.dst->Ico.U32);
2862
2863 /* If this assertion fails, we have some kind of type error. */
2864 vassert(con->tag == udst->Iex.Const.con->tag);
2865
2866 if (xxx_value != my_addr)
2867 /* We didn't find either idiom. Give up. */
2868 return NULL;
2869
2870 /* Ok, we found idiom #2. Copy the BB, switch around the xxx and
2871 yyy values (which makes it look like idiom #1), and go into
2872 unrolling proper. This means finding (again) the last stmt, in
2873 the copied BB. */
2874
2875 unroll_factor = calc_unroll_factor( bb0 );
2876 if (unroll_factor < 2)
2877 return NULL;
2878
2879 bb1 = dopyIRBB( bb0 );
2880 bb0 = NULL;
2881 udst = NULL; /* is now invalid */
2882 for (i = bb1->stmts_used-1; i >= 0; i--)
2883 if (bb1->stmts[i])
2884 break;
2885
2886 /* The next bunch of assertions should be true since we already
2887 found and checked the last stmt in the original bb. */
2888
2889 vassert(i >= 0);
2890
2891 st = bb1->stmts[i];
2892 vassert(st->tag == Ist_Exit);
2893
2894 con = st->Ist.Exit.dst;
2895 vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
2896
2897 udst = bb1->next;
2898 vassert(udst->tag == Iex_Const);
2899 vassert(udst->Iex.Const.con->tag == Ico_U32
2900 || udst->Iex.Const.con->tag == Ico_U64);
2901 vassert(con->tag == udst->Iex.Const.con->tag);
2902
2903 /* switch the xxx and yyy fields around */
2904 if (con->tag == Ico_U64) {
2905 udst->Iex.Const.con->Ico.U64 = xxx_value;
2906 con->Ico.U64 = yyy_value;
2907 } else {
2908 udst->Iex.Const.con->Ico.U32 = (UInt)xxx_value;
cerion57b4c6d2005-02-22 11:07:35 +00002909 con->Ico.U32 = (UInt)yyy_value;
sewardj08210532004-12-29 17:09:11 +00002910 }
2911
2912 /* negate the test condition */
2913 st->Ist.Exit.guard
2914 = IRExpr_Unop(Iop_Not1,dopyIRExpr(st->Ist.Exit.guard));
2915
2916 /* --- The unroller proper. Both idioms are by now --- */
2917 /* --- now converted to idiom 1. --- */
2918
2919 do_unroll:
2920
2921 vassert(unroll_factor == 2
2922 || unroll_factor == 4
2923 || unroll_factor == 8);
2924
2925 jmax = unroll_factor==8 ? 3 : (unroll_factor==4 ? 2 : 1);
2926 for (j = 1; j <= jmax; j++) {
2927
2928 n_vars = bb1->tyenv->types_used;
2929
2930 bb2 = dopyIRBB(bb1);
2931 for (i = 0; i < n_vars; i++)
2932 (void)newIRTemp(bb1->tyenv, bb2->tyenv->types[i]);
2933
2934 for (i = 0; i < bb2->stmts_used; i++) {
2935 if (bb2->stmts[i] == NULL)
2936 continue;
2937 /* deltaIRStmt destructively modifies the stmt, but
2938 that's OK since bb2 is a complete fresh copy of bb1. */
2939 deltaIRStmt(bb2->stmts[i], n_vars);
2940 addStmtToIRBB(bb1, bb2->stmts[i]);
2941 }
2942 }
2943
2944 if (DEBUG_IROPT) {
2945 vex_printf("\nUNROLLED (%llx)\n", my_addr);
2946 ppIRBB(bb1);
2947 vex_printf("\n");
2948 }
2949
2950 /* Flattening; sigh. The unroller succeeds in breaking flatness
2951 by negating the test condition. This should be fixed properly.
2952 For the moment use this shotgun approach. */
2953 return flatten_BB(bb1);
2954}
2955
2956
2957/*---------------------------------------------------------------*/
sewardj29632392004-08-22 02:38:11 +00002958/*--- The tree builder ---*/
2959/*---------------------------------------------------------------*/
2960
sewardj08210532004-12-29 17:09:11 +00002961/* This isn't part of IR optimisation. Really it's a pass done prior
2962 to instruction selection, which improves the code that the
2963 instruction selector can produce. */
2964
sewardj29632392004-08-22 02:38:11 +00002965typedef
2966 struct {
2967 Int occ; /* occurrence count for this tmp */
sewardj84ff0652004-08-23 16:16:08 +00002968 IRExpr* expr; /* expr it is bound to,
2969 or NULL if already 'used' */
sewardj29632392004-08-22 02:38:11 +00002970 Bool eDoesLoad; /* True <=> expr reads mem */
2971 Bool eDoesGet; /* True <=> expr reads guest state */
2972 Bool invalidateMe; /* used when dumping bindings */
2973 Int origPos; /* posn of the binder in the original bb */
2974 }
2975 TmpInfo;
2976
2977/* Given env :: IRTemp -> TmpInfo*
2978 Add the use-occurrences of temps in this expression
2979 to the environment.
2980*/
sewardjc9ad1152004-10-14 00:08:21 +00002981static void occCount_Temp ( TmpInfo** env, IRTemp tmp )
sewardj17442fe2004-09-20 14:54:28 +00002982{
sewardjc9ad1152004-10-14 00:08:21 +00002983 TmpInfo* ti = env[(Int)tmp];
2984 if (ti) {
sewardj17442fe2004-09-20 14:54:28 +00002985 ti->occ++;
2986 } else {
2987 ti = LibVEX_Alloc(sizeof(TmpInfo));
2988 ti->occ = 1;
2989 ti->expr = NULL;
2990 ti->eDoesLoad = False;
2991 ti->eDoesGet = False;
2992 ti->invalidateMe = False;
2993 ti->origPos = -1; /* filed in properly later */
sewardjc9ad1152004-10-14 00:08:21 +00002994 env[(Int)tmp] = ti;
sewardj17442fe2004-09-20 14:54:28 +00002995 }
2996}
2997
sewardjc9ad1152004-10-14 00:08:21 +00002998static void occCount_Expr ( TmpInfo** env, IRExpr* e )
sewardj29632392004-08-22 02:38:11 +00002999{
sewardj17442fe2004-09-20 14:54:28 +00003000 Int i;
sewardj29632392004-08-22 02:38:11 +00003001
3002 switch (e->tag) {
3003
3004 case Iex_Tmp: /* the only interesting case */
sewardj17442fe2004-09-20 14:54:28 +00003005 occCount_Temp(env, e->Iex.Tmp.tmp);
sewardj29632392004-08-22 02:38:11 +00003006 return;
3007
3008 case Iex_Mux0X:
3009 occCount_Expr(env, e->Iex.Mux0X.cond);
3010 occCount_Expr(env, e->Iex.Mux0X.expr0);
3011 occCount_Expr(env, e->Iex.Mux0X.exprX);
3012 return;
3013
3014 case Iex_Binop:
3015 occCount_Expr(env, e->Iex.Binop.arg1);
3016 occCount_Expr(env, e->Iex.Binop.arg2);
3017 return;
3018
3019 case Iex_Unop:
3020 occCount_Expr(env, e->Iex.Unop.arg);
3021 return;
3022
3023 case Iex_LDle:
3024 occCount_Expr(env, e->Iex.LDle.addr);
3025 return;
3026
3027 case Iex_CCall:
3028 for (i = 0; e->Iex.CCall.args[i]; i++)
3029 occCount_Expr(env, e->Iex.CCall.args[i]);
3030 return;
3031
sewardjf6501992004-08-27 11:58:24 +00003032 case Iex_GetI:
sewardjeeac8412004-11-02 00:26:55 +00003033 occCount_Expr(env, e->Iex.GetI.ix);
sewardjf6501992004-08-27 11:58:24 +00003034 return;
3035
sewardj29632392004-08-22 02:38:11 +00003036 case Iex_Const:
3037 case Iex_Get:
3038 return;
3039
3040 default:
3041 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3042 vpanic("occCount_Expr");
3043 }
3044}
3045
3046
3047/* Given env :: IRTemp -> TmpInfo*
3048 Add the use-occurrences of temps in this expression
3049 to the environment.
3050*/
sewardjc9ad1152004-10-14 00:08:21 +00003051static void occCount_Stmt ( TmpInfo** env, IRStmt* st )
sewardj29632392004-08-22 02:38:11 +00003052{
sewardj17442fe2004-09-20 14:54:28 +00003053 Int i;
3054 IRDirty* d;
sewardj29632392004-08-22 02:38:11 +00003055 switch (st->tag) {
3056 case Ist_Tmp:
sewardj6d076362004-09-23 11:06:17 +00003057 occCount_Expr(env, st->Ist.Tmp.data);
sewardj29632392004-08-22 02:38:11 +00003058 return;
3059 case Ist_Put:
sewardj6d076362004-09-23 11:06:17 +00003060 occCount_Expr(env, st->Ist.Put.data);
sewardj29632392004-08-22 02:38:11 +00003061 return;
sewardjf6501992004-08-27 11:58:24 +00003062 case Ist_PutI:
sewardjeeac8412004-11-02 00:26:55 +00003063 occCount_Expr(env, st->Ist.PutI.ix);
sewardj2d3f77c2004-09-22 23:49:09 +00003064 occCount_Expr(env, st->Ist.PutI.data);
sewardjf6501992004-08-27 11:58:24 +00003065 return;
sewardj29632392004-08-22 02:38:11 +00003066 case Ist_STle:
3067 occCount_Expr(env, st->Ist.STle.addr);
3068 occCount_Expr(env, st->Ist.STle.data);
3069 return;
sewardj17442fe2004-09-20 14:54:28 +00003070 case Ist_Dirty:
3071 d = st->Ist.Dirty.details;
3072 if (d->mFx != Ifx_None)
3073 occCount_Expr(env, d->mAddr);
sewardjb8385d82004-11-02 01:34:15 +00003074 occCount_Expr(env, d->guard);
sewardj17442fe2004-09-20 14:54:28 +00003075 for (i = 0; d->args[i]; i++)
3076 occCount_Expr(env, d->args[i]);
3077 return;
sewardjd2445f62005-03-21 00:15:53 +00003078 case Ist_NoOp:
sewardjf1689312005-03-16 18:19:10 +00003079 case Ist_IMark:
sewardj3e838932005-01-07 12:09:15 +00003080 case Ist_MFence:
3081 return;
sewardj29632392004-08-22 02:38:11 +00003082 case Ist_Exit:
sewardj0276d4b2004-11-15 15:30:21 +00003083 occCount_Expr(env, st->Ist.Exit.guard);
sewardj29632392004-08-22 02:38:11 +00003084 return;
3085 default:
3086 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
3087 vpanic("occCount_Stmt");
3088 }
3089}
3090
sewardj17442fe2004-09-20 14:54:28 +00003091/* Look up a binding for tmp in the env. If found, return the bound
3092 expression, and set the env's binding to NULL so it is marked as
3093 used. If not found, return NULL. */
3094
sewardjc9ad1152004-10-14 00:08:21 +00003095static IRExpr* tbSubst_Temp ( TmpInfo** env, IRTemp tmp )
sewardj17442fe2004-09-20 14:54:28 +00003096{
3097 TmpInfo* ti;
sewardj17442fe2004-09-20 14:54:28 +00003098 IRExpr* e;
sewardjc9ad1152004-10-14 00:08:21 +00003099 ti = env[(Int)tmp];
3100 if (ti){
3101 e = ti->expr;
sewardj17442fe2004-09-20 14:54:28 +00003102 if (e) {
3103 ti->expr = NULL;
3104 return e;
3105 } else {
3106 return NULL;
3107 }
3108 } else {
3109 return NULL;
3110 }
3111}
sewardj29632392004-08-22 02:38:11 +00003112
3113/* Traverse e, looking for temps. For each observed temp, see if env
3114 contains a binding for the temp, and if so return the bound value.
3115 The env has the property that any binding it holds is
3116 'single-shot', so once a binding is used, it is marked as no longer
3117 available, by setting its .expr field to NULL. */
3118
sewardjc9ad1152004-10-14 00:08:21 +00003119static IRExpr* tbSubst_Expr ( TmpInfo** env, IRExpr* e )
sewardj29632392004-08-22 02:38:11 +00003120{
sewardjc41f1fb2004-08-22 09:48:08 +00003121 IRExpr* e2;
3122 IRExpr** args2;
3123 Int i;
sewardj29632392004-08-22 02:38:11 +00003124
3125 switch (e->tag) {
3126
sewardjc41f1fb2004-08-22 09:48:08 +00003127 case Iex_CCall:
sewardj695cff92004-10-13 14:50:14 +00003128 args2 = sopyIRExprVec(e->Iex.CCall.args);
sewardjc41f1fb2004-08-22 09:48:08 +00003129 for (i = 0; args2[i]; i++)
3130 args2[i] = tbSubst_Expr(env,args2[i]);
sewardj8ea867b2004-10-30 19:03:02 +00003131 return IRExpr_CCall(e->Iex.CCall.cee,
sewardjc41f1fb2004-08-22 09:48:08 +00003132 e->Iex.CCall.retty,
3133 args2
3134 );
sewardj29632392004-08-22 02:38:11 +00003135 case Iex_Tmp:
sewardj17442fe2004-09-20 14:54:28 +00003136 e2 = tbSubst_Temp(env, e->Iex.Tmp.tmp);
3137 return e2 ? e2 : e;
sewardjc41f1fb2004-08-22 09:48:08 +00003138 case Iex_Mux0X:
3139 return IRExpr_Mux0X(
3140 tbSubst_Expr(env, e->Iex.Mux0X.cond),
3141 tbSubst_Expr(env, e->Iex.Mux0X.expr0),
3142 tbSubst_Expr(env, e->Iex.Mux0X.exprX)
3143 );
3144 case Iex_Binop:
3145 return IRExpr_Binop(
3146 e->Iex.Binop.op,
3147 tbSubst_Expr(env, e->Iex.Binop.arg1),
3148 tbSubst_Expr(env, e->Iex.Binop.arg2)
3149 );
3150 case Iex_Unop:
3151 return IRExpr_Unop(
3152 e->Iex.Unop.op,
3153 tbSubst_Expr(env, e->Iex.Unop.arg)
3154 );
3155 case Iex_LDle:
3156 return IRExpr_LDle(
3157 e->Iex.LDle.ty,
sewardjf6501992004-08-27 11:58:24 +00003158 tbSubst_Expr(env, e->Iex.LDle.addr)
sewardjc41f1fb2004-08-22 09:48:08 +00003159 );
sewardjf6501992004-08-27 11:58:24 +00003160 case Iex_GetI:
3161 return IRExpr_GetI(
sewardj2d3f77c2004-09-22 23:49:09 +00003162 e->Iex.GetI.descr,
sewardjeeac8412004-11-02 00:26:55 +00003163 tbSubst_Expr(env, e->Iex.GetI.ix),
sewardj2d3f77c2004-09-22 23:49:09 +00003164 e->Iex.GetI.bias
3165 );
sewardjc41f1fb2004-08-22 09:48:08 +00003166 case Iex_Const:
sewardj29632392004-08-22 02:38:11 +00003167 case Iex_Get:
3168 return e;
3169 default:
3170 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3171 vpanic("tbSubst_Expr");
3172 }
3173}
3174
3175/* Same deal as tbSubst_Expr, except for stmts. */
3176
sewardjc9ad1152004-10-14 00:08:21 +00003177static IRStmt* tbSubst_Stmt ( TmpInfo** env, IRStmt* st )
sewardj29632392004-08-22 02:38:11 +00003178{
sewardj17442fe2004-09-20 14:54:28 +00003179 Int i;
3180 IRDirty* d;
3181 IRDirty* d2;
sewardj29632392004-08-22 02:38:11 +00003182 switch (st->tag) {
sewardj17442fe2004-09-20 14:54:28 +00003183 case Ist_STle:
3184 return IRStmt_STle(
3185 tbSubst_Expr(env, st->Ist.STle.addr),
3186 tbSubst_Expr(env, st->Ist.STle.data)
3187 );
sewardj29632392004-08-22 02:38:11 +00003188 case Ist_Tmp:
3189 return IRStmt_Tmp(
3190 st->Ist.Tmp.tmp,
sewardj6d076362004-09-23 11:06:17 +00003191 tbSubst_Expr(env, st->Ist.Tmp.data)
sewardj29632392004-08-22 02:38:11 +00003192 );
3193 case Ist_Put:
3194 return IRStmt_Put(
3195 st->Ist.Put.offset,
sewardj6d076362004-09-23 11:06:17 +00003196 tbSubst_Expr(env, st->Ist.Put.data)
sewardj29632392004-08-22 02:38:11 +00003197 );
sewardjf6501992004-08-27 11:58:24 +00003198 case Ist_PutI:
3199 return IRStmt_PutI(
sewardj2d3f77c2004-09-22 23:49:09 +00003200 st->Ist.PutI.descr,
sewardjeeac8412004-11-02 00:26:55 +00003201 tbSubst_Expr(env, st->Ist.PutI.ix),
sewardj2d3f77c2004-09-22 23:49:09 +00003202 st->Ist.PutI.bias,
3203 tbSubst_Expr(env, st->Ist.PutI.data)
3204 );
sewardjf6501992004-08-27 11:58:24 +00003205
sewardj29632392004-08-22 02:38:11 +00003206 case Ist_Exit:
3207 return IRStmt_Exit(
sewardj0276d4b2004-11-15 15:30:21 +00003208 tbSubst_Expr(env, st->Ist.Exit.guard),
sewardj893aada2004-11-29 19:57:54 +00003209 st->Ist.Exit.jk,
sewardj29632392004-08-22 02:38:11 +00003210 st->Ist.Exit.dst
3211 );
sewardjf1689312005-03-16 18:19:10 +00003212 case Ist_IMark:
3213 return IRStmt_IMark(st->Ist.IMark.addr, st->Ist.IMark.len);
sewardjd2445f62005-03-21 00:15:53 +00003214 case Ist_NoOp:
3215 return IRStmt_NoOp();
sewardj3e838932005-01-07 12:09:15 +00003216 case Ist_MFence:
3217 return IRStmt_MFence();
sewardj17442fe2004-09-20 14:54:28 +00003218 case Ist_Dirty:
3219 d = st->Ist.Dirty.details;
3220 d2 = emptyIRDirty();
3221 *d2 = *d;
3222 if (d2->mFx != Ifx_None)
3223 d2->mAddr = tbSubst_Expr(env, d2->mAddr);
sewardjb8385d82004-11-02 01:34:15 +00003224 d2->guard = tbSubst_Expr(env, d2->guard);
sewardj17442fe2004-09-20 14:54:28 +00003225 for (i = 0; d2->args[i]; i++)
3226 d2->args[i] = tbSubst_Expr(env, d2->args[i]);
3227 return IRStmt_Dirty(d2);
sewardj29632392004-08-22 02:38:11 +00003228 default:
3229 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
3230 vpanic("tbSubst_Stmt");
3231 }
3232}
3233
3234
sewardj37d38012004-08-24 00:37:04 +00003235/* Traverse an expr, and detect if any part of it reads memory or does
3236 a Get. Be careful ... this really controls how much the
3237 tree-builder can reorder the code, so getting it right is critical.
3238*/
sewardj29632392004-08-22 02:38:11 +00003239static void setHints_Expr (Bool* doesLoad, Bool* doesGet, IRExpr* e )
3240{
sewardjc41f1fb2004-08-22 09:48:08 +00003241 Int i;
sewardj29632392004-08-22 02:38:11 +00003242 switch (e->tag) {
sewardjc41f1fb2004-08-22 09:48:08 +00003243 case Iex_CCall:
3244 for (i = 0; e->Iex.CCall.args[i]; i++)
3245 setHints_Expr(doesLoad, doesGet, e->Iex.CCall.args[i]);
3246 return;
3247 case Iex_Mux0X:
3248 setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.cond);
3249 setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.expr0);
3250 setHints_Expr(doesLoad, doesGet, e->Iex.Mux0X.exprX);
3251 return;
3252 case Iex_Binop:
3253 setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg1);
3254 setHints_Expr(doesLoad, doesGet, e->Iex.Binop.arg2);
3255 return;
3256 case Iex_Unop:
3257 setHints_Expr(doesLoad, doesGet, e->Iex.Unop.arg);
3258 return;
3259 case Iex_LDle:
sewardjc9ad1152004-10-14 00:08:21 +00003260 *doesLoad = True;
sewardjaba4fff2004-10-08 21:37:15 +00003261 setHints_Expr(doesLoad, doesGet, e->Iex.LDle.addr);
sewardjc41f1fb2004-08-22 09:48:08 +00003262 return;
sewardj29632392004-08-22 02:38:11 +00003263 case Iex_Get:
sewardjc9ad1152004-10-14 00:08:21 +00003264 *doesGet = True;
sewardj29632392004-08-22 02:38:11 +00003265 return;
sewardjf6501992004-08-27 11:58:24 +00003266 case Iex_GetI:
sewardjc9ad1152004-10-14 00:08:21 +00003267 *doesGet = True;
sewardjeeac8412004-11-02 00:26:55 +00003268 setHints_Expr(doesLoad, doesGet, e->Iex.GetI.ix);
sewardjf6501992004-08-27 11:58:24 +00003269 return;
sewardjc41f1fb2004-08-22 09:48:08 +00003270 case Iex_Tmp:
3271 case Iex_Const:
3272 return;
sewardj29632392004-08-22 02:38:11 +00003273 default:
3274 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
3275 vpanic("setHints_Expr");
3276 }
3277}
3278
3279
sewardjc9ad1152004-10-14 00:08:21 +00003280static void dumpInvalidated ( TmpInfo** env, IRBB* bb, /*INOUT*/Int* j )
sewardj29632392004-08-22 02:38:11 +00003281{
3282 Int k, oldest_op, oldest_k;
3283 TmpInfo* ti;
3284
3285 /* Dump all the bindings to marked as invalidated, in order. */
3286 while (True) {
3287
3288 /* find the oldest bind marked 'invalidateMe'. */
3289 oldest_op = 1<<30;
3290 oldest_k = 1<<30;
sewardjc9ad1152004-10-14 00:08:21 +00003291 for (k = 0; k < bb->tyenv->types_used; k++) {
3292 ti = env[k];
3293 if (!ti)
sewardj29632392004-08-22 02:38:11 +00003294 continue;
sewardj29632392004-08-22 02:38:11 +00003295 if (!ti->expr)
3296 continue;
3297 if (!ti->invalidateMe)
3298 continue;
sewardjc41f1fb2004-08-22 09:48:08 +00003299 /* vex_printf("FOUND INVAL %d %d\n", ti->origPos, oldest_op); */
sewardj29632392004-08-22 02:38:11 +00003300 if (ti->origPos < oldest_op) {
3301 oldest_op = ti->origPos;
3302 oldest_k = k;
3303 }
3304 }
3305
3306 /* No more binds to invalidate. */
3307 if (oldest_op == 1<<30)
sewardjc41f1fb2004-08-22 09:48:08 +00003308 return;
sewardj29632392004-08-22 02:38:11 +00003309
3310 /* the oldest bind to invalidate has been identified */
3311 vassert(oldest_op != 1<<31 && oldest_k != 1<<31);
sewardjc9ad1152004-10-14 00:08:21 +00003312 ti = env[oldest_k];
sewardj29632392004-08-22 02:38:11 +00003313 vassert(ti->expr && ti->invalidateMe);
3314
3315 /* and invalidate it ... */
sewardjc9ad1152004-10-14 00:08:21 +00003316 bb->stmts[*j] = IRStmt_Tmp( (IRTemp)oldest_k, ti->expr );
sewardj29632392004-08-22 02:38:11 +00003317 /* vex_printf("**1 "); ppIRStmt(bb->stmts[*j]); vex_printf("\n"); */
3318 (*j)++;
3319 ti->invalidateMe = False;
3320 ti->expr = NULL; /* no longer available for substitution */
3321
3322 } /* loop which dumps the binds marked for invalidation */
3323}
3324
3325
3326
sewardj49651f42004-10-28 22:11:04 +00003327/* notstatic */ void do_treebuild_BB ( IRBB* bb )
sewardj29632392004-08-22 02:38:11 +00003328{
3329 Int i, j, k;
sewardj29632392004-08-22 02:38:11 +00003330 Bool invPut, invStore;
3331 IRStmt* st;
3332 IRStmt* st2;
3333 TmpInfo* ti;
3334 IRExpr* next2;
3335
3336 /* Mapping from IRTemp to TmpInfo*. */
sewardjc9ad1152004-10-14 00:08:21 +00003337 Int n_tmps = bb->tyenv->types_used;
3338 TmpInfo** env = LibVEX_Alloc(n_tmps * sizeof(TmpInfo*));
3339
3340 for (i = 0; i < n_tmps; i++)
3341 env[i] = NULL;
sewardj29632392004-08-22 02:38:11 +00003342
3343 /* Phase 1. Scan forwards in bb, counting use occurrences of each
3344 temp. Also count occurrences in the bb->next field. */
3345
3346 for (i = 0; i < bb->stmts_used; i++) {
3347 st = bb->stmts[i];
3348 if (!st)
3349 continue;
3350 occCount_Stmt( env, st );
3351 }
3352 occCount_Expr(env, bb->next );
3353
sewardjc9ad1152004-10-14 00:08:21 +00003354# if 0
sewardj29632392004-08-22 02:38:11 +00003355 for (i = 0; i < env->used; i++) {
3356 if (!env->inuse[i])
3357 continue;
3358 ppIRTemp( (IRTemp)(env->key[i]) );
3359 vex_printf(" used %d\n", ((TmpInfo*)env->val[i])->occ );
3360 }
sewardjc9ad1152004-10-14 00:08:21 +00003361# endif
sewardj29632392004-08-22 02:38:11 +00003362
3363 /* Phase 2. Fill in the origPos fields. */
3364
3365 for (i = 0; i < bb->stmts_used; i++) {
3366 st = bb->stmts[i];
3367 if (!st)
3368 continue;
3369 if (st->tag != Ist_Tmp)
3370 continue;
3371
sewardjc9ad1152004-10-14 00:08:21 +00003372 ti = env[(Int)st->Ist.Tmp.tmp];
3373 if (!ti) {
sewardj84ff0652004-08-23 16:16:08 +00003374 vex_printf("\n");
3375 ppIRTemp(st->Ist.Tmp.tmp);
3376 vex_printf("\n");
sewardj29632392004-08-22 02:38:11 +00003377 vpanic("treebuild_BB (phase 2): unmapped IRTemp");
3378 }
sewardj29632392004-08-22 02:38:11 +00003379 ti->origPos = i;
3380 }
3381
3382 /* Phase 3. Scan forwards in bb.
3383
3384 On seeing 't = E', occ(t)==1,
3385 let E'=env(E), set t's binding to be E', and
3386 delete this stmt.
3387 Also examine E' and set the hints for E' appropriately
3388 (doesLoad? doesGet?)
3389
3390 On seeing any other stmt,
3391 let stmt' = env(stmt)
3392 remove from env any 't=E' binds invalidated by stmt
3393 emit the invalidated stmts
3394 emit stmt'
3395
3396 Apply env to bb->next.
3397 */
3398
3399 /* The stmts in bb are being reordered, and we are guaranteed to
3400 end up with no more than the number we started with. Use i to
3401 be the cursor of the current stmt examined and j <= i to be that
3402 for the current stmt being written.
3403 */
3404 j = 0;
3405 for (i = 0; i < bb->stmts_used; i++) {
3406 st = bb->stmts[i];
3407 if (!st)
3408 continue;
3409
3410 if (st->tag == Ist_Tmp) {
sewardje80679a2004-09-21 23:00:11 +00003411 /* vex_printf("acquire binding\n"); */
sewardjc9ad1152004-10-14 00:08:21 +00003412 ti = env[st->Ist.Tmp.tmp];
3413 if (!ti) {
sewardj29632392004-08-22 02:38:11 +00003414 vpanic("treebuild_BB (phase 2): unmapped IRTemp");
3415 }
sewardj29632392004-08-22 02:38:11 +00003416 if (ti->occ == 1) {
3417 /* ok, we have 't = E', occ(t)==1. Do the abovementioned actions. */
sewardj6d076362004-09-23 11:06:17 +00003418 IRExpr* e = st->Ist.Tmp.data;
sewardj29632392004-08-22 02:38:11 +00003419 IRExpr* e2 = tbSubst_Expr(env, e);
3420 ti->expr = e2;
3421 ti->eDoesLoad = ti->eDoesGet = False;
3422 setHints_Expr(&ti->eDoesLoad, &ti->eDoesGet, e2);
3423 /* don't advance j, as we are deleting this stmt and instead
3424 holding it temporarily in the env. */
3425 continue; /* the for (i = 0; i < bb->stmts_used; i++) loop */
3426 }
3427 }
3428
3429 /* we get here for any other kind of statement. */
3430 /* 'use up' any bindings required by the current statement. */
3431 st2 = tbSubst_Stmt(env, st);
3432
3433 /* Now, before this stmt, dump any bindings it invalidates.
3434 These need to be dumped in the order in which they originally
3435 appeared. (Stupid algorithm): first, mark all bindings which
3436 need to be dumped. Then, dump them in the order in which
3437 they were defined. */
sewardj3e838932005-01-07 12:09:15 +00003438
sewardj9d2e7692005-02-07 01:11:31 +00003439 invPut = toBool(st->tag == Ist_Put
3440 || st->tag == Ist_PutI
3441 || st->tag == Ist_Dirty);
sewardj3e838932005-01-07 12:09:15 +00003442
sewardj9d2e7692005-02-07 01:11:31 +00003443 invStore = toBool(st->tag == Ist_STle
3444 || st->tag == Ist_Dirty);
sewardj29632392004-08-22 02:38:11 +00003445
sewardjc9ad1152004-10-14 00:08:21 +00003446 for (k = 0; k < n_tmps; k++) {
3447 ti = env[k];
3448 if (!ti)
sewardj29632392004-08-22 02:38:11 +00003449 continue;
sewardj29632392004-08-22 02:38:11 +00003450 if (!ti->expr)
3451 continue;
3452
sewardj3e838932005-01-07 12:09:15 +00003453 /* Do we have to invalidate this binding? */
3454
sewardj29632392004-08-22 02:38:11 +00003455 ti->invalidateMe
sewardj9d2e7692005-02-07 01:11:31 +00003456 = toBool(
3457 /* a store invalidates loaded data */
sewardj4c5f6d52004-10-26 13:25:33 +00003458 (ti->eDoesLoad && invStore)
3459 /* a put invalidates get'd data */
3460 || (ti->eDoesGet && invPut)
3461 /* a put invalidates loaded data. Note, we could do
3462 much better here in the sense that we only need to
3463 invalidate trees containing loads if the Put in
3464 question is marked as requiring precise
3465 exceptions. */
sewardj3e838932005-01-07 12:09:15 +00003466 || (ti->eDoesLoad && invPut)
3467 /* probably overly conservative: a memory fence
3468 invalidates absolutely everything, so that all
3469 computation prior to it is forced to complete before
3470 proceeding with the fence. */
sewardj9d2e7692005-02-07 01:11:31 +00003471 || st->tag == Ist_MFence
3472 );
sewardjc41f1fb2004-08-22 09:48:08 +00003473 /*
3474 if (ti->invalidateMe)
sewardj3e838932005-01-07 12:09:15 +00003475 vex_printf("SET INVAL\n");
3476 */
sewardj29632392004-08-22 02:38:11 +00003477 }
3478
3479 dumpInvalidated ( env, bb, &j );
3480
3481 /* finally, emit the substituted statement */
3482 bb->stmts[j] = st2;
3483 /* vex_printf("**2 "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); */
3484 j++;
3485
3486 vassert(j <= i+1);
3487 } /* for each stmt in the original bb ... */
3488
3489 /* Finally ... substitute the ->next field as much as possible, and
3490 dump any left-over bindings. Hmm. Perhaps there should be no
3491 left over bindings? Or any left-over bindings are
3492 by definition dead? */
3493 next2 = tbSubst_Expr(env, bb->next);
3494 bb->next = next2;
3495 bb->stmts_used = j;
3496}
3497
3498
sewardj695cff92004-10-13 14:50:14 +00003499/*---------------------------------------------------------------*/
sewardjedf4d692004-08-17 13:52:58 +00003500/*--- iropt main ---*/
3501/*---------------------------------------------------------------*/
3502
sewardj695cff92004-10-13 14:50:14 +00003503static Bool iropt_verbose = False; //True;
sewardj4345f7a2004-09-22 19:49:27 +00003504
3505
sewardj4345f7a2004-09-22 19:49:27 +00003506/* Do a simple cleanup pass on bb. This is: redundant Get removal,
3507 redundant Put removal, constant propagation, dead code removal,
3508 clean helper specialisation, and dead code removal (again).
sewardjb9230752004-12-29 19:25:06 +00003509*/
sewardj695cff92004-10-13 14:50:14 +00003510
sewardj4345f7a2004-09-22 19:49:27 +00003511
3512static
sewardj695cff92004-10-13 14:50:14 +00003513IRBB* cheap_transformations (
3514 IRBB* bb,
sewardj9d2e7692005-02-07 01:11:31 +00003515 IRExpr* (*specHelper) (HChar*, IRExpr**),
sewardj8d2291c2004-10-25 14:50:21 +00003516 Bool (*preciseMemExnsFn)(Int,Int)
3517 )
sewardj4345f7a2004-09-22 19:49:27 +00003518{
3519 redundant_get_removal_BB ( bb );
3520 if (iropt_verbose) {
3521 vex_printf("\n========= REDUNDANT GET\n\n" );
3522 ppIRBB(bb);
3523 }
sewardj044a2152004-10-21 10:22:10 +00003524
sewardj8d2291c2004-10-25 14:50:21 +00003525 redundant_put_removal_BB ( bb, preciseMemExnsFn );
sewardj4345f7a2004-09-22 19:49:27 +00003526 if (iropt_verbose) {
3527 vex_printf("\n========= REDUNDANT PUT\n\n" );
3528 ppIRBB(bb);
3529 }
sewardj044a2152004-10-21 10:22:10 +00003530
sewardj4345f7a2004-09-22 19:49:27 +00003531 bb = cprop_BB ( bb );
3532 if (iropt_verbose) {
3533 vex_printf("\n========= CPROPD\n\n" );
3534 ppIRBB(bb);
3535 }
3536
sewardj49651f42004-10-28 22:11:04 +00003537 do_deadcode_BB ( bb );
sewardj4345f7a2004-09-22 19:49:27 +00003538 if (iropt_verbose) {
3539 vex_printf("\n========= DEAD\n\n" );
3540 ppIRBB(bb);
3541 }
3542
sewardjb9230752004-12-29 19:25:06 +00003543 bb = spec_helpers_BB ( bb, specHelper );
sewardj49651f42004-10-28 22:11:04 +00003544 do_deadcode_BB ( bb );
sewardj4345f7a2004-09-22 19:49:27 +00003545 if (iropt_verbose) {
3546 vex_printf("\n========= SPECd \n\n" );
3547 ppIRBB(bb);
3548 }
3549
3550 return bb;
3551}
3552
sewardj695cff92004-10-13 14:50:14 +00003553
3554/* Do some more expensive transformations on bb, which are aimed at
3555 optimising as much as possible in the presence of GetI and PutI. */
3556
3557static
3558IRBB* expensive_transformations( IRBB* bb )
3559{
sewardjfe1ccfc2004-11-11 02:14:45 +00003560 do_cse_BB( bb );
sewardj695cff92004-10-13 14:50:14 +00003561 collapse_AddSub_chains_BB( bb );
sewardj08210532004-12-29 17:09:11 +00003562 do_redundant_GetI_elimination( bb );
sewardj695cff92004-10-13 14:50:14 +00003563 do_redundant_PutI_elimination( bb );
sewardj49651f42004-10-28 22:11:04 +00003564 do_deadcode_BB( bb );
sewardjb9230752004-12-29 19:25:06 +00003565 return bb;
sewardj695cff92004-10-13 14:50:14 +00003566}
3567
3568
sewardj4345f7a2004-09-22 19:49:27 +00003569/* Scan a flattened BB to see if it has any GetI or PutIs in it. Used
3570 as a heuristic hack to see if iropt needs to do expensive
sewardj695cff92004-10-13 14:50:14 +00003571 optimisations (CSE, PutI -> GetI forwarding, redundant PutI
3572 elimination) to improve code containing GetI or PutI. */
3573
sewardj4345f7a2004-09-22 19:49:27 +00003574static Bool hasGetIorPutI ( IRBB* bb )
3575{
3576 Int i, j;
3577 IRStmt* st;
3578 IRDirty* d;
3579
3580 for (i = 0; i < bb->stmts_used; i++) {
3581 st = bb->stmts[i];
3582 if (!st)
3583 continue;
3584
3585 switch (st->tag) {
3586 case Ist_PutI:
3587 return True;
3588 case Ist_Tmp:
sewardj6d076362004-09-23 11:06:17 +00003589 if (st->Ist.Tmp.data->tag == Iex_GetI)
sewardj4345f7a2004-09-22 19:49:27 +00003590 return True;
3591 break;
3592 case Ist_Put:
sewardj496a58d2005-03-20 18:44:44 +00003593 vassert(isIRAtom(st->Ist.Put.data));
sewardj4345f7a2004-09-22 19:49:27 +00003594 break;
3595 case Ist_STle:
sewardj496a58d2005-03-20 18:44:44 +00003596 vassert(isIRAtom(st->Ist.STle.addr));
3597 vassert(isIRAtom(st->Ist.STle.data));
sewardj4345f7a2004-09-22 19:49:27 +00003598 break;
sewardj4345f7a2004-09-22 19:49:27 +00003599 case Ist_Dirty:
3600 d = st->Ist.Dirty.details;
sewardj496a58d2005-03-20 18:44:44 +00003601 vassert(isIRAtom(d->guard));
sewardj4345f7a2004-09-22 19:49:27 +00003602 for (j = 0; d->args[j]; j++)
sewardj496a58d2005-03-20 18:44:44 +00003603 vassert(isIRAtom(d->args[j]));
sewardj4345f7a2004-09-22 19:49:27 +00003604 if (d->mFx != Ifx_None)
sewardj496a58d2005-03-20 18:44:44 +00003605 vassert(isIRAtom(d->mAddr));
sewardj4345f7a2004-09-22 19:49:27 +00003606 break;
sewardjd2445f62005-03-21 00:15:53 +00003607 case Ist_NoOp:
sewardjf1689312005-03-16 18:19:10 +00003608 case Ist_IMark:
sewardj3e838932005-01-07 12:09:15 +00003609 case Ist_MFence:
3610 break;
3611 case Ist_Exit:
sewardj496a58d2005-03-20 18:44:44 +00003612 vassert(isIRAtom(st->Ist.Exit.guard));
sewardj3e838932005-01-07 12:09:15 +00003613 break;
sewardj4345f7a2004-09-22 19:49:27 +00003614 default:
3615 ppIRStmt(st);
3616 vpanic("hasGetIorPutI");
3617 }
3618
3619 }
3620 return False;
3621
3622}
3623
3624
sewardj695cff92004-10-13 14:50:14 +00003625/* ---------------- The main iropt entry point. ---------------- */
3626
sewardjedf4d692004-08-17 13:52:58 +00003627/* exported from this file */
sewardj695cff92004-10-13 14:50:14 +00003628/* Rules of the game:
3629
3630 - IRExpr/IRStmt trees should be treated as immutable, as they
3631 may get shared. So never change a field of such a tree node;
3632 instead construct and return a new one if needed.
3633*/
3634
sewardj4345f7a2004-09-22 19:49:27 +00003635
sewardj84ff0652004-08-23 16:16:08 +00003636IRBB* do_iropt_BB ( IRBB* bb0,
sewardj9d2e7692005-02-07 01:11:31 +00003637 IRExpr* (*specHelper) (HChar*, IRExpr**),
sewardj8d2291c2004-10-25 14:50:21 +00003638 Bool (*preciseMemExnsFn)(Int,Int),
sewardj695cff92004-10-13 14:50:14 +00003639 Addr64 guest_addr )
sewardjedf4d692004-08-17 13:52:58 +00003640{
sewardj9d2e7692005-02-07 01:11:31 +00003641 static Int n_total = 0;
3642 static Int n_expensive = 0;
sewardj29632392004-08-22 02:38:11 +00003643
sewardj695cff92004-10-13 14:50:14 +00003644 Bool do_expensive;
sewardj695cff92004-10-13 14:50:14 +00003645 IRBB *bb, *bb2;
sewardj8c2c10b2004-10-16 20:51:52 +00003646
sewardj4345f7a2004-09-22 19:49:27 +00003647 n_total++;
3648
3649 /* First flatten the block out, since all other
3650 phases assume flat code. */
3651
3652 bb = flatten_BB ( bb0 );
3653
3654 if (iropt_verbose) {
3655 vex_printf("\n========= FLAT\n\n" );
3656 ppIRBB(bb);
sewardj84be7372004-08-18 13:59:33 +00003657 }
sewardjd7217032004-08-19 10:49:10 +00003658
sewardj08210532004-12-29 17:09:11 +00003659 /* If at level 0, stop now. */
3660 if (vex_control.iropt_level <= 0) return bb;
3661
sewardj695cff92004-10-13 14:50:14 +00003662 /* Now do a preliminary cleanup pass, and figure out if we also
3663 need to do 'expensive' optimisations. Expensive optimisations
3664 are deemed necessary if the block contains any GetIs or PutIs.
3665 If needed, do expensive transformations and then another cheap
3666 cleanup pass. */
sewardj4345f7a2004-09-22 19:49:27 +00003667
sewardj8d2291c2004-10-25 14:50:21 +00003668 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
sewardj695cff92004-10-13 14:50:14 +00003669
sewardj08613742004-10-25 13:01:45 +00003670 if (vex_control.iropt_level > 1) {
sewardj39555aa2004-10-24 22:29:19 +00003671 do_expensive = hasGetIorPutI(bb);
sewardj695cff92004-10-13 14:50:14 +00003672 if (do_expensive) {
sewardj39555aa2004-10-24 22:29:19 +00003673 n_expensive++;
sewardj39555aa2004-10-24 22:29:19 +00003674 if (DEBUG_IROPT)
3675 vex_printf("***** EXPENSIVE %d %d\n", n_total, n_expensive);
sewardj695cff92004-10-13 14:50:14 +00003676 bb = expensive_transformations( bb );
sewardj8d2291c2004-10-25 14:50:21 +00003677 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
sewardj695cff92004-10-13 14:50:14 +00003678 }
sewardj39555aa2004-10-24 22:29:19 +00003679
3680 /* Now have a go at unrolling simple (single-BB) loops. If
3681 successful, clean up the results as much as possible. */
3682
3683 bb2 = maybe_loop_unroll_BB( bb, guest_addr );
3684 if (bb2) {
sewardj8d2291c2004-10-25 14:50:21 +00003685 bb = cheap_transformations( bb2, specHelper, preciseMemExnsFn );
sewardj39555aa2004-10-24 22:29:19 +00003686 if (do_expensive) {
3687 bb = expensive_transformations( bb );
sewardj8d2291c2004-10-25 14:50:21 +00003688 bb = cheap_transformations( bb, specHelper, preciseMemExnsFn );
sewardj39555aa2004-10-24 22:29:19 +00003689 } else {
3690 /* at least do CSE and dead code removal */
sewardjfe1ccfc2004-11-11 02:14:45 +00003691 do_cse_BB( bb );
sewardj49651f42004-10-28 22:11:04 +00003692 do_deadcode_BB( bb );
sewardj39555aa2004-10-24 22:29:19 +00003693 }
3694 if (0) vex_printf("vex iropt: unrolled a loop\n");
3695 }
3696
sewardjd7217032004-08-19 10:49:10 +00003697 }
3698
sewardj4345f7a2004-09-22 19:49:27 +00003699 return bb;
sewardjedf4d692004-08-17 13:52:58 +00003700}
3701
3702
sewardja1a370f2004-08-17 13:31:55 +00003703/*---------------------------------------------------------------*/
3704/*--- end ir/iropt.c ---*/
3705/*---------------------------------------------------------------*/