blob: 6ed8bd2266899f91d2d0c685a045ae9f79e10553 [file] [log] [blame]
sewardjde4a1d02002-03-22 01:27:54 +00001
2/*--------------------------------------------------------------------*/
3/*--- The JITter proper: register allocation & code improvement ---*/
4/*--- vg_translate.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
njnc9539842002-10-02 13:26:35 +00008 This file is part of Valgrind, an extensible x86 protected-mode
9 emulator for monitoring program execution on x86-Unixes.
sewardjde4a1d02002-03-22 01:27:54 +000010
nethercotebb1c9912004-01-04 16:43:23 +000011 Copyright (C) 2000-2004 Julian Seward
sewardjde4a1d02002-03-22 01:27:54 +000012 jseward@acm.org
sewardjde4a1d02002-03-22 01:27:54 +000013
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
njn25e49d8e72002-09-23 09:36:25 +000029 The GNU General Public License is contained in the file COPYING.
sewardjde4a1d02002-03-22 01:27:54 +000030*/
31
32#include "vg_include.h"
33
sewardjde4a1d02002-03-22 01:27:54 +000034/*------------------------------------------------------------*/
35/*--- Renamings of frequently-used global functions. ---*/
36/*------------------------------------------------------------*/
37
njn25e49d8e72002-09-23 09:36:25 +000038#define dis VG_(print_codegen)
sewardjde4a1d02002-03-22 01:27:54 +000039
sewardje1042472002-09-30 12:33:11 +000040
sewardjde4a1d02002-03-22 01:27:54 +000041/*------------------------------------------------------------*/
42/*--- Basics ---*/
43/*------------------------------------------------------------*/
44
njn810086f2002-11-14 12:42:47 +000045/* This one is called by the core */
njn4ba5a792002-09-30 10:23:54 +000046UCodeBlock* VG_(alloc_UCodeBlock) ( void )
sewardjde4a1d02002-03-22 01:27:54 +000047{
njn25e49d8e72002-09-23 09:36:25 +000048 UCodeBlock* cb = VG_(arena_malloc)(VG_AR_CORE, sizeof(UCodeBlock));
sewardjde4a1d02002-03-22 01:27:54 +000049 cb->used = cb->size = cb->nextTemp = 0;
50 cb->instrs = NULL;
51 return cb;
52}
53
njn810086f2002-11-14 12:42:47 +000054/* This one is called by skins */
55UCodeBlock* VG_(setup_UCodeBlock) ( UCodeBlock* cb_in )
56{
57 UCodeBlock* cb = VG_(arena_malloc)(VG_AR_CORE, sizeof(UCodeBlock));
sewardj22854b92002-11-30 14:00:47 +000058 cb->orig_eip = cb_in->orig_eip;
njn810086f2002-11-14 12:42:47 +000059 cb->used = cb->size = 0;
60 cb->nextTemp = cb_in->nextTemp;
61 cb->instrs = NULL;
62 return cb;
63}
sewardjde4a1d02002-03-22 01:27:54 +000064
njn4ba5a792002-09-30 10:23:54 +000065void VG_(free_UCodeBlock) ( UCodeBlock* cb )
sewardjde4a1d02002-03-22 01:27:54 +000066{
njn25e49d8e72002-09-23 09:36:25 +000067 if (cb->instrs) VG_(arena_free)(VG_AR_CORE, cb->instrs);
68 VG_(arena_free)(VG_AR_CORE, cb);
sewardjde4a1d02002-03-22 01:27:54 +000069}
70
71
72/* Ensure there's enough space in a block to add one uinstr. */
daywalkerb18d2532003-09-27 20:15:01 +000073static
sewardjde4a1d02002-03-22 01:27:54 +000074void ensureUInstr ( UCodeBlock* cb )
75{
76 if (cb->used == cb->size) {
77 if (cb->instrs == NULL) {
78 vg_assert(cb->size == 0);
79 vg_assert(cb->used == 0);
80 cb->size = 8;
njn25e49d8e72002-09-23 09:36:25 +000081 cb->instrs = VG_(arena_malloc)(VG_AR_CORE, 8 * sizeof(UInstr));
sewardjde4a1d02002-03-22 01:27:54 +000082 } else {
83 Int i;
njn25e49d8e72002-09-23 09:36:25 +000084 UInstr* instrs2 = VG_(arena_malloc)(VG_AR_CORE,
sewardjde4a1d02002-03-22 01:27:54 +000085 2 * sizeof(UInstr) * cb->size);
86 for (i = 0; i < cb->used; i++)
87 instrs2[i] = cb->instrs[i];
88 cb->size *= 2;
njn25e49d8e72002-09-23 09:36:25 +000089 VG_(arena_free)(VG_AR_CORE, cb->instrs);
sewardjde4a1d02002-03-22 01:27:54 +000090 cb->instrs = instrs2;
91 }
92 }
93
94 vg_assert(cb->used < cb->size);
95}
96
97
98__inline__
njn4ba5a792002-09-30 10:23:54 +000099void VG_(new_NOP) ( UInstr* u )
sewardjde4a1d02002-03-22 01:27:54 +0000100{
101 u->val1 = u->val2 = u->val3 = 0;
102 u->tag1 = u->tag2 = u->tag3 = NoValue;
103 u->flags_r = u->flags_w = FlagsEmpty;
sewardj2e93c502002-04-12 11:12:52 +0000104 u->jmpkind = JmpBoring;
njn25e49d8e72002-09-23 09:36:25 +0000105 u->signed_widen = u->has_ret_val = False;
106 u->regs_live_after = ALL_RREGS_LIVE;
sewardjde4a1d02002-03-22 01:27:54 +0000107 u->lit32 = 0;
njn25e49d8e72002-09-23 09:36:25 +0000108 u->opcode = NOP;
sewardjde4a1d02002-03-22 01:27:54 +0000109 u->size = 0;
110 u->cond = 0;
111 u->extra4b = 0;
njn25e49d8e72002-09-23 09:36:25 +0000112 u->argc = u->regparms_n = 0;
sewardjde4a1d02002-03-22 01:27:54 +0000113}
114
115
116/* Add an instruction to a ucode block, and return the index of the
117 instruction. */
118__inline__
njn4ba5a792002-09-30 10:23:54 +0000119void VG_(new_UInstr3) ( UCodeBlock* cb, Opcode opcode, Int sz,
sewardjde4a1d02002-03-22 01:27:54 +0000120 Tag tag1, UInt val1,
121 Tag tag2, UInt val2,
122 Tag tag3, UInt val3 )
123{
124 UInstr* ui;
125 ensureUInstr(cb);
126 ui = & cb->instrs[cb->used];
127 cb->used++;
njn4ba5a792002-09-30 10:23:54 +0000128 VG_(new_NOP)(ui);
sewardjde4a1d02002-03-22 01:27:54 +0000129 ui->val1 = val1;
130 ui->val2 = val2;
131 ui->val3 = val3;
132 ui->opcode = opcode;
133 ui->tag1 = tag1;
134 ui->tag2 = tag2;
135 ui->tag3 = tag3;
136 ui->size = sz;
137 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
138 if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
139 if (tag3 == TempReg) vg_assert(val3 != INVALID_TEMPREG);
140}
141
142
143__inline__
njn4ba5a792002-09-30 10:23:54 +0000144void VG_(new_UInstr2) ( UCodeBlock* cb, Opcode opcode, Int sz,
sewardjde4a1d02002-03-22 01:27:54 +0000145 Tag tag1, UInt val1,
146 Tag tag2, UInt val2 )
147{
148 UInstr* ui;
149 ensureUInstr(cb);
150 ui = & cb->instrs[cb->used];
151 cb->used++;
njn4ba5a792002-09-30 10:23:54 +0000152 VG_(new_NOP)(ui);
sewardjde4a1d02002-03-22 01:27:54 +0000153 ui->val1 = val1;
154 ui->val2 = val2;
155 ui->opcode = opcode;
156 ui->tag1 = tag1;
157 ui->tag2 = tag2;
158 ui->size = sz;
159 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
160 if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
161}
162
163
164__inline__
njn4ba5a792002-09-30 10:23:54 +0000165void VG_(new_UInstr1) ( UCodeBlock* cb, Opcode opcode, Int sz,
sewardjde4a1d02002-03-22 01:27:54 +0000166 Tag tag1, UInt val1 )
167{
168 UInstr* ui;
169 ensureUInstr(cb);
170 ui = & cb->instrs[cb->used];
171 cb->used++;
njn4ba5a792002-09-30 10:23:54 +0000172 VG_(new_NOP)(ui);
sewardjde4a1d02002-03-22 01:27:54 +0000173 ui->val1 = val1;
174 ui->opcode = opcode;
175 ui->tag1 = tag1;
176 ui->size = sz;
177 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
178}
179
180
181__inline__
njn4ba5a792002-09-30 10:23:54 +0000182void VG_(new_UInstr0) ( UCodeBlock* cb, Opcode opcode, Int sz )
sewardjde4a1d02002-03-22 01:27:54 +0000183{
184 UInstr* ui;
185 ensureUInstr(cb);
186 ui = & cb->instrs[cb->used];
187 cb->used++;
njn4ba5a792002-09-30 10:23:54 +0000188 VG_(new_NOP)(ui);
sewardjde4a1d02002-03-22 01:27:54 +0000189 ui->opcode = opcode;
190 ui->size = sz;
191}
192
sewardjde4a1d02002-03-22 01:27:54 +0000193/* Copy an instruction into the given codeblock. */
njn4f9c9342002-04-29 16:03:24 +0000194__inline__
njn4ba5a792002-09-30 10:23:54 +0000195void VG_(copy_UInstr) ( UCodeBlock* cb, UInstr* instr )
sewardjde4a1d02002-03-22 01:27:54 +0000196{
197 ensureUInstr(cb);
198 cb->instrs[cb->used] = *instr;
199 cb->used++;
200}
201
sewardjde4a1d02002-03-22 01:27:54 +0000202/* Copy auxiliary info from one uinstr to another. */
203static __inline__
204void copyAuxInfoFromTo ( UInstr* src, UInstr* dst )
205{
njn25e49d8e72002-09-23 09:36:25 +0000206 dst->cond = src->cond;
207 dst->extra4b = src->extra4b;
208 dst->signed_widen = src->signed_widen;
209 dst->jmpkind = src->jmpkind;
210 dst->flags_r = src->flags_r;
211 dst->flags_w = src->flags_w;
212 dst->argc = src->argc;
213 dst->regparms_n = src->regparms_n;
214 dst->has_ret_val = src->has_ret_val;
215 dst->regs_live_after = src->regs_live_after;
sewardjde4a1d02002-03-22 01:27:54 +0000216}
217
218
sewardjde4a1d02002-03-22 01:27:54 +0000219/* Set the lit32 field of the most recent uinsn. */
njn4ba5a792002-09-30 10:23:54 +0000220void VG_(set_lit_field) ( UCodeBlock* cb, UInt lit32 )
sewardjde4a1d02002-03-22 01:27:54 +0000221{
222 LAST_UINSTR(cb).lit32 = lit32;
223}
224
225
njn25e49d8e72002-09-23 09:36:25 +0000226/* Set the C call info fields of the most recent uinsn. */
njn4ba5a792002-09-30 10:23:54 +0000227void VG_(set_ccall_fields) ( UCodeBlock* cb, Addr fn, UChar argc, UChar
228 regparms_n, Bool has_ret_val )
njn25e49d8e72002-09-23 09:36:25 +0000229{
230 vg_assert(argc < 4);
231 vg_assert(regparms_n <= argc);
232 LAST_UINSTR(cb).lit32 = fn;
233 LAST_UINSTR(cb).argc = argc;
234 LAST_UINSTR(cb).regparms_n = regparms_n;
235 LAST_UINSTR(cb).has_ret_val = has_ret_val;
236}
237
njn810086f2002-11-14 12:42:47 +0000238/* For the last uinsn inserted into cb, set the read, written and
239 undefined flags. Undefined flags are counted as written, but it
240 seems worthwhile to distinguish them.
241*/
242__inline__
243void VG_(set_flag_fields) ( UCodeBlock* cb,
244 FlagSet rr, FlagSet ww, FlagSet uu )
245{
246 FlagSet uw = VG_UNION_FLAG_SETS(ww,uu);
247
248 vg_assert(rr == (rr & FlagsALL));
249 vg_assert(uw == (uw & FlagsALL));
250 LAST_UINSTR(cb).flags_r = rr;
251 LAST_UINSTR(cb).flags_w = uw;
252}
253
254
njn4ba5a792002-09-30 10:23:54 +0000255Bool VG_(any_flag_use) ( UInstr* u )
sewardjde4a1d02002-03-22 01:27:54 +0000256{
257 return (u->flags_r != FlagsEmpty
258 || u->flags_w != FlagsEmpty);
259}
260
njn25e49d8e72002-09-23 09:36:25 +0000261#if 1
262# define BEST_ALLOC_ORDER
263#endif
sewardjde4a1d02002-03-22 01:27:54 +0000264
265/* Convert a rank in the range 0 .. VG_MAX_REALREGS-1 into an Intel
266 register number. This effectively defines the order in which real
267 registers are allocated. %ebp is excluded since it is permanently
njn25e49d8e72002-09-23 09:36:25 +0000268 reserved for pointing at VG_(baseBlock).
sewardjde4a1d02002-03-22 01:27:54 +0000269
njn25e49d8e72002-09-23 09:36:25 +0000270 Important! This function must correspond with the value of
271 VG_MAX_REALREGS (actually, VG_MAX_REALREGS can be reduced without
272 a problem, except the generated code will obviously be worse).
sewardjde4a1d02002-03-22 01:27:54 +0000273*/
njn25e49d8e72002-09-23 09:36:25 +0000274__inline__
njn4ba5a792002-09-30 10:23:54 +0000275Int VG_(rank_to_realreg) ( Int rank )
sewardjde4a1d02002-03-22 01:27:54 +0000276{
277 switch (rank) {
njn25e49d8e72002-09-23 09:36:25 +0000278# ifdef BEST_ALLOC_ORDER
sewardjde4a1d02002-03-22 01:27:54 +0000279 /* Probably the best allocation ordering. */
280 case 0: return R_EAX;
281 case 1: return R_EBX;
282 case 2: return R_ECX;
283 case 3: return R_EDX;
284 case 4: return R_ESI;
njn25e49d8e72002-09-23 09:36:25 +0000285 case 5: return R_EDI;
sewardjde4a1d02002-03-22 01:27:54 +0000286# else
287 /* Contrary; probably the worst. Helpful for debugging, tho. */
njn25e49d8e72002-09-23 09:36:25 +0000288 case 5: return R_EAX;
289 case 4: return R_EBX;
290 case 3: return R_ECX;
291 case 2: return R_EDX;
292 case 1: return R_ESI;
293 case 0: return R_EDI;
sewardjde4a1d02002-03-22 01:27:54 +0000294# endif
njne427a662002-10-02 11:08:25 +0000295 default: VG_(core_panic)("VG_(rank_to_realreg)");
njn25e49d8e72002-09-23 09:36:25 +0000296 }
297}
298
299/* Convert an Intel register number into a rank in the range 0 ..
njn4ba5a792002-09-30 10:23:54 +0000300 VG_MAX_REALREGS-1. See related comments for rank_to_realreg()
njn25e49d8e72002-09-23 09:36:25 +0000301 above. */
302__inline__
njn4ba5a792002-09-30 10:23:54 +0000303Int VG_(realreg_to_rank) ( Int realReg )
njn25e49d8e72002-09-23 09:36:25 +0000304{
305 switch (realReg) {
306# ifdef BEST_ALLOC_ORDER
307 case R_EAX: return 0;
308 case R_EBX: return 1;
309 case R_ECX: return 2;
310 case R_EDX: return 3;
311 case R_ESI: return 4;
312 case R_EDI: return 5;
313# else
314 case R_EAX: return 5;
315 case R_EBX: return 4;
316 case R_ECX: return 3;
317 case R_EDX: return 2;
318 case R_ESI: return 1;
319 case R_EDI: return 0;
320# endif
njne427a662002-10-02 11:08:25 +0000321 default: VG_(core_panic)("VG_(realreg_to_rank)");
sewardjde4a1d02002-03-22 01:27:54 +0000322 }
323}
324
325
326/*------------------------------------------------------------*/
327/*--- Sanity checking uinstrs. ---*/
328/*------------------------------------------------------------*/
329
330/* This seems as good a place as any to record some important stuff
331 about ucode semantics.
332
333 * TempRegs are 32 bits wide. LOADs of 8/16 bit values into a
334 TempReg are defined to zero-extend the loaded value to 32 bits.
335 This is needed to make the translation of movzbl et al work
336 properly.
337
338 * Similarly, GETs of a 8/16 bit ArchRegs are zero-extended.
339
340 * Arithmetic on TempRegs is at the specified size. For example,
341 SUBW t1, t2 has to result in a real 16 bit x86 subtraction
342 being emitted -- not a 32 bit one.
343
344 * On some insns we allow the cc bit to be set. If so, the
345 intention is that the simulated machine's %eflags register
346 is copied into that of the real machine before the insn,
347 and copied back again afterwards. This means that the
348 code generated for that insn must be very careful only to
349 update %eflags in the intended way. This is particularly
350 important for the routines referenced by CALL insns.
351*/
352
353/* Meaning of operand kinds is as follows:
354
355 ArchReg is a register of the simulated CPU, stored in memory,
356 in vg_m_state.m_eax .. m_edi. These values are stored
357 using the Intel register encoding.
358
359 RealReg is a register of the real CPU. There are VG_MAX_REALREGS
360 available for allocation. As with ArchRegs, these values
361 are stored using the Intel register encoding.
362
363 TempReg is a temporary register used to express the results of
364 disassembly. There is an unlimited supply of them --
365 register allocation and spilling eventually assigns them
366 to RealRegs.
367
368 SpillNo is a spill slot number. The number of required spill
369 slots is VG_MAX_PSEUDOS, in general. Only allowed
370 as the ArchReg operand of GET and PUT.
371
372 Lit16 is a signed 16-bit literal value.
373
374 Literal is a 32-bit literal value. Each uinstr can only hold
375 one of these.
376
377 The disassembled code is expressed purely in terms of ArchReg,
378 TempReg and Literal operands. Eventually, register allocation
379 removes all the TempRegs, giving a result using ArchRegs, RealRegs,
380 and Literals. New x86 code can easily be synthesised from this.
381 There are carefully designed restrictions on which insns can have
382 which operands, intended to make it possible to generate x86 code
383 from the result of register allocation on the ucode efficiently and
384 without need of any further RealRegs.
385
njn25e49d8e72002-09-23 09:36:25 +0000386 Restrictions for the individual UInstrs are clear from the checks below.
387 Abbreviations: A=ArchReg S=SpillNo T=TempReg L=Literal
388 Ls=Lit16 R=RealReg N=NoValue
sewardje1042472002-09-30 12:33:11 +0000389 As=ArchRegS
sewardjde4a1d02002-03-22 01:27:54 +0000390
sewardjde4a1d02002-03-22 01:27:54 +0000391 Before register allocation, S operands should not appear anywhere.
392 After register allocation, all T operands should have been
393 converted into Rs, and S operands are allowed in GET and PUT --
394 denoting spill saves/restores.
395
njn25e49d8e72002-09-23 09:36:25 +0000396 Before liveness analysis, save_e[acd]x fields should all be True.
397 Afterwards, they may be False.
398
sewardjde4a1d02002-03-22 01:27:54 +0000399 The size field should be 0 for insns for which it is meaningless,
400 ie those which do not directly move/operate on data.
401*/
njn25e49d8e72002-09-23 09:36:25 +0000402Bool VG_(saneUInstr) ( Bool beforeRA, Bool beforeLiveness, UInstr* u )
sewardjde4a1d02002-03-22 01:27:54 +0000403{
njn25e49d8e72002-09-23 09:36:25 +0000404# define LIT0 (u->lit32 == 0)
sewardjb31b06d2003-06-13 00:26:02 +0000405# define LIT8 (((u->lit32) & 0xFFFFFF00) == 0)
njn25e49d8e72002-09-23 09:36:25 +0000406# define LIT1 (!(LIT0))
407# define LITm (u->tag1 == Literal ? True : LIT0 )
sewardj77d30a22003-10-19 08:18:52 +0000408# define SZ16 (u->size == 16)
sewardj3d7c9c82003-03-26 21:08:13 +0000409# define SZ8 (u->size == 8)
njn25e49d8e72002-09-23 09:36:25 +0000410# define SZ4 (u->size == 4)
411# define SZ2 (u->size == 2)
412# define SZ1 (u->size == 1)
413# define SZ0 (u->size == 0)
414# define SZ42 (u->size == 4 || u->size == 2)
sewardjd7971012003-04-04 00:21:58 +0000415# define SZ48 (u->size == 4 || u->size == 8)
sewardjfebaa3b2003-05-25 01:07:34 +0000416# define SZ416 (u->size == 4 || u->size == 16)
nethercoteb1affa82004-01-19 19:14:18 +0000417# define SZ816 (u->size == 8 || u->size == 16)
418# define SZsse2 (u->size == 4 || u->size == 8 || u->size == 16 || u->size == 512)
jsewardfca60182004-01-04 23:30:55 +0000419# define SZsse3 (u->size == 4 || u->size == 8 || u->size == 16)
njn25e49d8e72002-09-23 09:36:25 +0000420# define SZi (u->size == 4 || u->size == 2 || u->size == 1)
421# define SZf ( u->size == 4 || u->size == 8 || u->size == 2 \
422 || u->size == 10 || u->size == 28 || u->size == 108)
423# define SZ4m ((u->tag1 == TempReg || u->tag1 == RealReg) \
424 ? (u->size == 4) : True)
425
426/* For these ones, two cases:
427 *
428 * 1. They are transliterations of the corresponding x86 instruction, in
429 * which case they should have its flags (except that redundant write
430 * flags can be annulled by the optimisation pass).
431 *
432 * 2. They are being used generally for other purposes, eg. helping with a
433 * 'rep'-prefixed instruction, in which case should have empty flags .
434 */
435# define emptyR (u->flags_r == FlagsEmpty)
436# define emptyW (u->flags_w == FlagsEmpty)
437# define CC0 (emptyR && emptyW)
438# define CCr (u->flags_r == FlagsALL && emptyW)
439# define CCw (emptyR && u->flags_w == FlagsALL)
440# define CCa (emptyR && (u->flags_w == FlagsOSZACP || emptyW))
441# define CCc (emptyR && (u->flags_w == FlagsOC || emptyW))
442# define CCe (emptyR && (u->flags_w == FlagsOSZAP || emptyW))
443# define CCb ((u->flags_r==FlagC || emptyR) && \
444 (u->flags_w==FlagsOSZACP || emptyW))
445# define CCd ((u->flags_r==FlagC || emptyR) && \
446 (u->flags_w==FlagsOC || emptyW))
sewardjc232b212002-12-10 22:24:03 +0000447# define CCf (CC0 || (emptyR && u->flags_w==FlagsZCP) \
448 || (u->flags_r==FlagsZCP && emptyW))
njn25e49d8e72002-09-23 09:36:25 +0000449# define CCg ((u->flags_r==FlagsOSZACP || emptyR) && emptyW)
450# define CCj (u->cond==CondAlways ? CC0 : CCg)
451
sewardjde4a1d02002-03-22 01:27:54 +0000452# define TR1 (beforeRA ? (u->tag1 == TempReg) : (u->tag1 == RealReg))
453# define TR2 (beforeRA ? (u->tag2 == TempReg) : (u->tag2 == RealReg))
454# define TR3 (beforeRA ? (u->tag3 == TempReg) : (u->tag3 == RealReg))
455# define A1 (u->tag1 == ArchReg)
456# define A2 (u->tag2 == ArchReg)
457# define AS1 ((u->tag1 == ArchReg) || ((!beforeRA && (u->tag1 == SpillNo))))
458# define AS2 ((u->tag2 == ArchReg) || ((!beforeRA && (u->tag2 == SpillNo))))
459# define AS3 ((u->tag3 == ArchReg) || ((!beforeRA && (u->tag3 == SpillNo))))
460# define L1 (u->tag1 == Literal && u->val1 == 0)
461# define L2 (u->tag2 == Literal && u->val2 == 0)
462# define Ls1 (u->tag1 == Lit16)
sewardjfebaa3b2003-05-25 01:07:34 +0000463# define Ls2 (u->tag2 == Lit16)
sewardjde4a1d02002-03-22 01:27:54 +0000464# define Ls3 (u->tag3 == Lit16)
njn25e49d8e72002-09-23 09:36:25 +0000465# define TRL1 (TR1 || L1)
466# define TRAL1 (TR1 || A1 || L1)
jsgf5efa4fd2003-10-14 21:49:11 +0000467# define TRA1 (TR1 || A1)
468# define TRA2 (TR2 || A2)
sewardjde4a1d02002-03-22 01:27:54 +0000469# define N1 (u->tag1 == NoValue)
470# define N2 (u->tag2 == NoValue)
471# define N3 (u->tag3 == NoValue)
sewardje1042472002-09-30 12:33:11 +0000472# define Se1 (u->tag1 == ArchRegS)
473# define Se2 (u->tag2 == ArchRegS)
sewardjde4a1d02002-03-22 01:27:54 +0000474
njn25e49d8e72002-09-23 09:36:25 +0000475# define COND0 (u->cond == 0)
476# define EXTRA4b0 (u->extra4b == 0)
477# define SG_WD0 (u->signed_widen == 0)
478# define JMPKIND0 (u->jmpkind == 0)
479# define CCALL0 (u->argc==0 && u->regparms_n==0 && u->has_ret_val==0 && \
480 ( beforeLiveness \
481 ? u->regs_live_after == ALL_RREGS_LIVE \
482 : True ))
483
484# define XCONDi ( EXTRA4b0 && SG_WD0 && JMPKIND0 && CCALL0)
485# define Xextra4b (COND0 && SG_WD0 && JMPKIND0 && CCALL0)
486# define XWIDEN (COND0 && JMPKIND0 && CCALL0)
487# define XJMP ( SG_WD0 && CCALL0)
488# define XCCALL (COND0 && EXTRA4b0 && SG_WD0 && JMPKIND0 )
489# define XOTHER (COND0 && EXTRA4b0 && SG_WD0 && JMPKIND0 && CCALL0)
490
491 /* 0 or 1 Literal args per UInstr */
sewardjde4a1d02002-03-22 01:27:54 +0000492 Int n_lits = 0;
493 if (u->tag1 == Literal) n_lits++;
494 if (u->tag2 == Literal) n_lits++;
495 if (u->tag3 == Literal) n_lits++;
496 if (n_lits > 1)
497 return False;
498
njn25e49d8e72002-09-23 09:36:25 +0000499 /* Fields not checked: val1, val2, val3 */
500
sewardjde4a1d02002-03-22 01:27:54 +0000501 switch (u->opcode) {
njn25e49d8e72002-09-23 09:36:25 +0000502
503 /* Fields checked: lit32 size flags_r/w tag1 tag2 tag3 (rest) */
sewardje1042472002-09-30 12:33:11 +0000504 case PUTSEG: return LIT0 && SZ2 && CC0 && TR1 && Se2 && N3 && XOTHER;
505 case GETSEG: return LIT0 && SZ2 && CC0 && Se1 && TR2 && N3 && XOTHER;
506 case USESEG: return LIT0 && SZ0 && CC0 && TR1 && TR2 && N3 && XOTHER;
njn25e49d8e72002-09-23 09:36:25 +0000507 case NOP: return LIT0 && SZ0 && CC0 && N1 && N2 && N3 && XOTHER;
sewardj7a5ebcf2002-11-13 22:42:13 +0000508 case LOCK: return LIT0 && SZ0 && CC0 && N1 && N2 && N3 && XOTHER;
njn25e49d8e72002-09-23 09:36:25 +0000509 case GETF: return LIT0 && SZ42 && CCr && TR1 && N2 && N3 && XOTHER;
510 case PUTF: return LIT0 && SZ42 && CCw && TR1 && N2 && N3 && XOTHER;
511 case GET: return LIT0 && SZi && CC0 && AS1 && TR2 && N3 && XOTHER;
512 case PUT: return LIT0 && SZi && CC0 && TR1 && AS2 && N3 && XOTHER;
513 case LOAD:
514 case STORE: return LIT0 && SZi && CC0 && TR1 && TR2 && N3 && XOTHER;
515 case MOV: return LITm && SZ4m && CC0 && TRL1 && TR2 && N3 && XOTHER;
516 case CMOV: return LIT0 && SZ4 && CCg && TR1 && TR2 && N3 && XCONDi;
njn95bc3862003-09-30 13:22:30 +0000517 case WIDEN: return LIT0 && SZ42 && CC0 && TR1 && N2 && N3 && XWIDEN;
njn25e49d8e72002-09-23 09:36:25 +0000518 case JMP: return LITm && SZ0 && CCj && TRL1 && N2 && N3 && XJMP;
519 case CALLM: return LIT0 && SZ0 /*any*/ && Ls1 && N2 && N3 && XOTHER;
520 case CALLM_S:
521 case CALLM_E:return LIT0 && SZ0 && CC0 && N1 && N2 && N3 && XOTHER;
522 case PUSH:
523 case POP: return LIT0 && SZi && CC0 && TR1 && N2 && N3 && XOTHER;
524 case CLEAR: return LIT0 && SZ0 && CC0 && Ls1 && N2 && N3 && XOTHER;
525 case AND:
526 case OR: return LIT0 && SZi && CCa && TR1 && TR2 && N3 && XOTHER;
jsgf5efa4fd2003-10-14 21:49:11 +0000527 case MUL: return LIT0 && SZ42 && CCa && TRA1 &&TRA2 && N3 && XOTHER;
njn25e49d8e72002-09-23 09:36:25 +0000528 case ADD:
529 case XOR:
530 case SUB: return LITm && SZi && CCa &&TRAL1 && TR2 && N3 && XOTHER;
531 case SBB:
532 case ADC: return LITm && SZi && CCb &&TRAL1 && TR2 && N3 && XOTHER;
533 case SHL:
534 case SHR:
535 case SAR: return LITm && SZi && CCa && TRL1 && TR2 && N3 && XOTHER;
536 case ROL:
537 case ROR: return LITm && SZi && CCc && TRL1 && TR2 && N3 && XOTHER;
538 case RCL:
539 case RCR: return LITm && SZi && CCd && TRL1 && TR2 && N3 && XOTHER;
540 case NOT: return LIT0 && SZi && CC0 && TR1 && N2 && N3 && XOTHER;
541 case NEG: return LIT0 && SZi && CCa && TR1 && N2 && N3 && XOTHER;
542 case INC:
543 case DEC: return LIT0 && SZi && CCe && TR1 && N2 && N3 && XOTHER;
544 case CC2VAL: return LIT0 && SZ1 && CCg && TR1 && N2 && N3 && XCONDi;
545 case BSWAP: return LIT0 && SZ4 && CC0 && TR1 && N2 && N3 && XOTHER;
546 case JIFZ: return LIT1 && SZ4 && CC0 && TR1 && L2 && N3 && XOTHER;
547 case FPU_R:
548 case FPU_W: return LIT0 && SZf && CC0 && Ls1 && TR2 && N3 && XOTHER;
549 case FPU: return LIT0 && SZ0 && CCf && Ls1 && N2 && N3 && XOTHER;
550 case LEA1: return /*any*/ SZ4 && CC0 && TR1 && TR2 && N3 && XOTHER;
551 case LEA2: return /*any*/ SZ4 && CC0 && TR1 && TR2 && TR3 && Xextra4b;
552 case INCEIP: return LIT0 && SZ0 && CC0 && Ls1 && N2 && N3 && XOTHER;
553 case CCALL: return LIT1 && SZ0 && CC0 &&
554 (u->argc > 0 ? TR1 : N1) &&
555 (u->argc > 1 ? TR2 : N2) &&
556 (u->argc > 2 || u->has_ret_val ? TR3 : N3) &&
557 u->regparms_n <= u->argc && XCCALL;
thughes96b466a2004-03-15 16:43:58 +0000558 /* Fields checked: lit32 size flags_r/w tag1 tag2 tag3 (rest) */
sewardj3d7c9c82003-03-26 21:08:13 +0000559 case MMX1:
thughes96b466a2004-03-15 16:43:58 +0000560 case MMX2: return LIT0 && SZ0 && CC0 && Ls1 && N2 && N3 && XOTHER;
561 case MMX3: return LIT0 && SZ0 && CC0 && Ls1 && Ls2 && N3 && XOTHER;
562 case MMX2_MemRd: return LIT0 && SZ48 && CC0 && Ls1 && TR2 && N3 && XOTHER;
563 case MMX2_MemWr: return LIT0 && SZ48 && CC0 && Ls1 && TR2 && N3 && XOTHER;
564 case MMX2a1_MemRd: return LIT0 && SZ8 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
565 case MMX2_ERegRd: return LIT0 && SZ4 && CC0 && Ls1 && TR2 && N3 && XOTHER;
566 case MMX2_ERegWr: return LIT0 && SZ4 && CC0 && Ls1 && TR2 && N3 && XOTHER;
sewardjfebaa3b2003-05-25 01:07:34 +0000567
568 /* Fields checked: lit32 size flags_r/w tag1 tag2 tag3 (rest) */
jsewardfca60182004-01-04 23:30:55 +0000569 case SSE2a_MemWr: return LIT0 && SZsse2 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
570 case SSE2a_MemRd: return LIT0 && SZsse2 && CCa && Ls1 && Ls2 && TR3 && XOTHER;
nethercote1018bdd2004-02-11 23:33:29 +0000571 case SSE2a1_MemRd: return LIT0 && SZsse3 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
nethercoteb1affa82004-01-19 19:14:18 +0000572 case SSE2g_RegWr: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
573 case SSE2g1_RegWr: return LIT8 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
574 case SSE2e1_RegRd: return LIT8 && SZ2 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
jsewardfca60182004-01-04 23:30:55 +0000575 case SSE3a_MemWr: return LIT0 && SZsse3 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
576 case SSE3a_MemRd: return LIT0 && SZsse3 && CCa && Ls1 && Ls2 && TR3 && XOTHER;
577 case SSE3e_RegRd: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
578 case SSE3e_RegWr: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
nethercoteb1affa82004-01-19 19:14:18 +0000579 case SSE3a1_MemRd: return LIT8 && SZ816 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
jsewardfca60182004-01-04 23:30:55 +0000580 case SSE3g_RegWr: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
581 case SSE3g1_RegWr: return LIT8 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
582 case SSE3e1_RegRd: return LIT8 && SZ2 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
583 case SSE3: return LIT0 && SZ0 && CCa && Ls1 && Ls2 && N3 && XOTHER;
584 case SSE4: return LIT0 && SZ0 && CCa && Ls1 && Ls2 && N3 && XOTHER;
585 case SSE5: return LIT0 && SZ0 && CC0 && Ls1 && Ls2 && Ls3 && XOTHER;
sewardje3891fa2003-06-15 03:13:48 +0000586 case SSE3ag_MemRd_RegWr:
jsewardfca60182004-01-04 23:30:55 +0000587 return SZ48 && CC0 && TR1 && TR2 && N3 && XOTHER;
njn25e49d8e72002-09-23 09:36:25 +0000588 default:
589 if (VG_(needs).extended_UCode)
njn4ba5a792002-09-30 10:23:54 +0000590 return SK_(sane_XUInstr)(beforeRA, beforeLiveness, u);
njn25e49d8e72002-09-23 09:36:25 +0000591 else {
592 VG_(printf)("unhandled opcode: %u. Perhaps "
593 "VG_(needs).extended_UCode should be set?",
594 u->opcode);
njne427a662002-10-02 11:08:25 +0000595 VG_(core_panic)("VG_(saneUInstr): unhandled opcode");
njn25e49d8e72002-09-23 09:36:25 +0000596 }
sewardjde4a1d02002-03-22 01:27:54 +0000597 }
njn25e49d8e72002-09-23 09:36:25 +0000598# undef LIT0
599# undef LIT1
sewardjb31b06d2003-06-13 00:26:02 +0000600# undef LIT8
njn25e49d8e72002-09-23 09:36:25 +0000601# undef LITm
sewardj77d30a22003-10-19 08:18:52 +0000602# undef SZ16
sewardj3d7c9c82003-03-26 21:08:13 +0000603# undef SZ8
sewardjde4a1d02002-03-22 01:27:54 +0000604# undef SZ4
605# undef SZ2
606# undef SZ1
607# undef SZ0
njn25e49d8e72002-09-23 09:36:25 +0000608# undef SZ42
sewardjd7971012003-04-04 00:21:58 +0000609# undef SZ48
sewardjfebaa3b2003-05-25 01:07:34 +0000610# undef SZ416
jsewardfca60182004-01-04 23:30:55 +0000611# undef SZsse2
612# undef SZsse3
njn25e49d8e72002-09-23 09:36:25 +0000613# undef SZi
614# undef SZf
615# undef SZ4m
616# undef emptyR
617# undef emptyW
618# undef CC0
619# undef CCr
620# undef CCw
621# undef CCa
622# undef CCb
623# undef CCc
624# undef CCd
625# undef CCe
626# undef CCf
627# undef CCg
628# undef CCj
sewardjde4a1d02002-03-22 01:27:54 +0000629# undef TR1
630# undef TR2
631# undef TR3
632# undef A1
633# undef A2
634# undef AS1
635# undef AS2
636# undef AS3
637# undef L1
sewardjde4a1d02002-03-22 01:27:54 +0000638# undef L2
njn25e49d8e72002-09-23 09:36:25 +0000639# undef Ls1
sewardjfebaa3b2003-05-25 01:07:34 +0000640# undef Ls2
sewardjde4a1d02002-03-22 01:27:54 +0000641# undef Ls3
njn25e49d8e72002-09-23 09:36:25 +0000642# undef TRL1
643# undef TRAL1
sewardjde4a1d02002-03-22 01:27:54 +0000644# undef N1
645# undef N2
646# undef N3
sewardje1042472002-09-30 12:33:11 +0000647# undef Se2
648# undef Se1
njn25e49d8e72002-09-23 09:36:25 +0000649# undef COND0
650# undef EXTRA4b0
651# undef SG_WD0
652# undef JMPKIND0
653# undef CCALL0
654# undef Xextra4b
655# undef XWIDEN
656# undef XJMP
657# undef XCCALL
658# undef XOTHER
sewardjde4a1d02002-03-22 01:27:54 +0000659}
660
njn25e49d8e72002-09-23 09:36:25 +0000661void VG_(saneUCodeBlock) ( UCodeBlock* cb )
662{
663 Int i;
664
665 for (i = 0; i < cb->used; i++) {
666 Bool sane = VG_(saneUInstr)(True, True, &cb->instrs[i]);
667 if (!sane) {
668 VG_(printf)("Instruction failed sanity check:\n");
njn4ba5a792002-09-30 10:23:54 +0000669 VG_(up_UInstr)(i, &cb->instrs[i]);
njn25e49d8e72002-09-23 09:36:25 +0000670 }
671 vg_assert(sane);
672 }
673}
sewardjde4a1d02002-03-22 01:27:54 +0000674
675/* Sanity checks to do with CALLMs in UCodeBlocks. */
njn25e49d8e72002-09-23 09:36:25 +0000676Bool VG_(saneUCodeBlockCalls) ( UCodeBlock* cb )
sewardjde4a1d02002-03-22 01:27:54 +0000677{
678 Int callm = 0;
679 Int callm_s = 0;
680 Int callm_e = 0;
681 Int callm_ptr, calls_ptr;
682 Int i, j, t;
683 Bool incall = False;
684
685 /* Ensure the number of CALLM, CALLM_S and CALLM_E are the same. */
686
687 for (i = 0; i < cb->used; i++) {
688 switch (cb->instrs[i].opcode) {
689 case CALLM:
690 if (!incall) return False;
691 callm++;
692 break;
693 case CALLM_S:
694 if (incall) return False;
695 incall = True;
696 callm_s++;
697 break;
698 case CALLM_E:
699 if (!incall) return False;
700 incall = False;
701 callm_e++;
702 break;
703 case PUSH: case POP: case CLEAR:
704 if (!incall) return False;
705 break;
706 default:
707 break;
708 }
709 }
710 if (incall) return False;
711 if (callm != callm_s || callm != callm_e) return False;
712
713 /* Check the sections between CALLM_S and CALLM's. Ensure that no
714 PUSH uinsn pushes any TempReg that any other PUSH in the same
715 section pushes. Ie, check that the TempReg args to PUSHes in
716 the section are unique. If not, the instrumenter generates
717 incorrect code for CALLM insns. */
718
719 callm_ptr = 0;
720
721 find_next_CALLM:
722 /* Search for the next interval, making calls_ptr .. callm_ptr
723 bracket it. */
724 while (callm_ptr < cb->used
725 && cb->instrs[callm_ptr].opcode != CALLM)
726 callm_ptr++;
727 if (callm_ptr == cb->used)
728 return True;
729 vg_assert(cb->instrs[callm_ptr].opcode == CALLM);
730
731 calls_ptr = callm_ptr - 1;
732 while (cb->instrs[calls_ptr].opcode != CALLM_S)
733 calls_ptr--;
734 vg_assert(cb->instrs[calls_ptr].opcode == CALLM_S);
735 vg_assert(calls_ptr >= 0);
736
737 /* VG_(printf)("interval from %d to %d\n", calls_ptr, callm_ptr ); */
738
739 /* For each PUSH insn in the interval ... */
740 for (i = calls_ptr + 1; i < callm_ptr; i++) {
741 if (cb->instrs[i].opcode != PUSH) continue;
742 t = cb->instrs[i].val1;
743 /* Ensure no later PUSH insns up to callm_ptr push the same
744 TempReg. Return False if any such are found. */
745 for (j = i+1; j < callm_ptr; j++) {
746 if (cb->instrs[j].opcode == PUSH &&
747 cb->instrs[j].val1 == t)
748 return False;
749 }
750 }
751
752 /* This interval is clean. Keep going ... */
753 callm_ptr++;
754 goto find_next_CALLM;
755}
756
757
758/*------------------------------------------------------------*/
759/*--- Printing uinstrs. ---*/
760/*------------------------------------------------------------*/
761
njn25e49d8e72002-09-23 09:36:25 +0000762/* Global that dictates whether to print generated code at all stages */
763Bool VG_(print_codegen);
764
njn563f96f2003-02-03 11:17:46 +0000765Char* VG_(name_UCondcode) ( Condcode cond )
sewardjde4a1d02002-03-22 01:27:54 +0000766{
767 switch (cond) {
768 case CondO: return "o";
769 case CondNO: return "no";
770 case CondB: return "b";
771 case CondNB: return "nb";
772 case CondZ: return "z";
773 case CondNZ: return "nz";
774 case CondBE: return "be";
775 case CondNBE: return "nbe";
776 case CondS: return "s";
sewardje1042472002-09-30 12:33:11 +0000777 case CondNS: return "ns";
sewardjde4a1d02002-03-22 01:27:54 +0000778 case CondP: return "p";
779 case CondNP: return "np";
780 case CondL: return "l";
781 case CondNL: return "nl";
782 case CondLE: return "le";
783 case CondNLE: return "nle";
784 case CondAlways: return "MP"; /* hack! */
njn563f96f2003-02-03 11:17:46 +0000785 default: VG_(core_panic)("name_UCondcode");
sewardjde4a1d02002-03-22 01:27:54 +0000786 }
787}
788
789
790static void vg_ppFlagSet ( Char* prefix, FlagSet set )
791{
792 VG_(printf)("%s", prefix);
793 if (set & FlagD) VG_(printf)("D");
794 if (set & FlagO) VG_(printf)("O");
795 if (set & FlagS) VG_(printf)("S");
796 if (set & FlagZ) VG_(printf)("Z");
797 if (set & FlagA) VG_(printf)("A");
798 if (set & FlagC) VG_(printf)("C");
799 if (set & FlagP) VG_(printf)("P");
800}
801
802
803static void ppTempReg ( Int tt )
804{
805 if ((tt & 1) == 0)
806 VG_(printf)("t%d", tt);
807 else
808 VG_(printf)("q%d", tt-1);
809}
810
811
njn4ba5a792002-09-30 10:23:54 +0000812void VG_(pp_UOperand) ( UInstr* u, Int operandNo, Int sz, Bool parens )
sewardjde4a1d02002-03-22 01:27:54 +0000813{
814 UInt tag, val;
815 switch (operandNo) {
816 case 1: tag = u->tag1; val = u->val1; break;
817 case 2: tag = u->tag2; val = u->val2; break;
818 case 3: tag = u->tag3; val = u->val3; break;
njne427a662002-10-02 11:08:25 +0000819 default: VG_(core_panic)("VG_(pp_UOperand)(1)");
sewardjde4a1d02002-03-22 01:27:54 +0000820 }
821 if (tag == Literal) val = u->lit32;
822
823 if (parens) VG_(printf)("(");
824 switch (tag) {
sewardje1042472002-09-30 12:33:11 +0000825 case TempReg: ppTempReg(val); break;
826 case RealReg: VG_(printf)("%s",nameIReg(sz==0 ? 4 : sz,val)); break;
827 case Literal: VG_(printf)("$0x%x", val); break;
828 case Lit16: VG_(printf)("$0x%x", val); break;
829 case NoValue: VG_(printf)("NoValue"); break;
830 case ArchReg: VG_(printf)("%S",nameIReg(sz,val)); break;
831 case ArchRegS: VG_(printf)("%S",nameSReg(val)); break;
832 case SpillNo: VG_(printf)("spill%d", val); break;
njne427a662002-10-02 11:08:25 +0000833 default: VG_(core_panic)("VG_(ppUOperand)(2)");
sewardjde4a1d02002-03-22 01:27:54 +0000834 }
835 if (parens) VG_(printf)(")");
836}
837
838
njn4ba5a792002-09-30 10:23:54 +0000839Char* VG_(name_UOpcode) ( Bool upper, Opcode opc )
sewardjde4a1d02002-03-22 01:27:54 +0000840{
841 switch (opc) {
842 case ADD: return (upper ? "ADD" : "add");
843 case ADC: return (upper ? "ADC" : "adc");
844 case AND: return (upper ? "AND" : "and");
845 case OR: return (upper ? "OR" : "or");
846 case XOR: return (upper ? "XOR" : "xor");
847 case SUB: return (upper ? "SUB" : "sub");
848 case SBB: return (upper ? "SBB" : "sbb");
849 case SHL: return (upper ? "SHL" : "shl");
850 case SHR: return (upper ? "SHR" : "shr");
851 case SAR: return (upper ? "SAR" : "sar");
852 case ROL: return (upper ? "ROL" : "rol");
853 case ROR: return (upper ? "ROR" : "ror");
854 case RCL: return (upper ? "RCL" : "rcl");
855 case RCR: return (upper ? "RCR" : "rcr");
jsgf5efa4fd2003-10-14 21:49:11 +0000856 case MUL: return (upper ? "MUL" : "mul");
sewardjde4a1d02002-03-22 01:27:54 +0000857 case NOT: return (upper ? "NOT" : "not");
858 case NEG: return (upper ? "NEG" : "neg");
859 case INC: return (upper ? "INC" : "inc");
860 case DEC: return (upper ? "DEC" : "dec");
861 case BSWAP: return (upper ? "BSWAP" : "bswap");
862 default: break;
863 }
njne427a662002-10-02 11:08:25 +0000864 if (!upper) VG_(core_panic)("vg_name_UOpcode: invalid !upper");
sewardjde4a1d02002-03-22 01:27:54 +0000865 switch (opc) {
sewardjde4a1d02002-03-22 01:27:54 +0000866 case CALLM_S: return "CALLM_S";
867 case CALLM_E: return "CALLM_E";
868 case INCEIP: return "INCEIP";
869 case LEA1: return "LEA1";
870 case LEA2: return "LEA2";
871 case NOP: return "NOP";
sewardj7a5ebcf2002-11-13 22:42:13 +0000872 case LOCK: return "LOCK";
sewardjde4a1d02002-03-22 01:27:54 +0000873 case GET: return "GET";
874 case PUT: return "PUT";
875 case GETF: return "GETF";
876 case PUTF: return "PUTF";
sewardje1042472002-09-30 12:33:11 +0000877 case GETSEG: return "GETSEG";
878 case PUTSEG: return "PUTSEG";
879 case USESEG: return "USESEG";
sewardjde4a1d02002-03-22 01:27:54 +0000880 case LOAD: return "LD" ;
881 case STORE: return "ST" ;
882 case MOV: return "MOV";
883 case CMOV: return "CMOV";
884 case WIDEN: return "WIDEN";
885 case JMP: return "J" ;
886 case JIFZ: return "JIFZ" ;
887 case CALLM: return "CALLM";
njn25e49d8e72002-09-23 09:36:25 +0000888 case CCALL: return "CCALL";
sewardjde4a1d02002-03-22 01:27:54 +0000889 case PUSH: return "PUSH" ;
890 case POP: return "POP" ;
891 case CLEAR: return "CLEAR";
892 case CC2VAL: return "CC2VAL";
893 case FPU_R: return "FPU_R";
894 case FPU_W: return "FPU_W";
895 case FPU: return "FPU" ;
sewardj3d7c9c82003-03-26 21:08:13 +0000896 case MMX1: return "MMX1" ;
897 case MMX2: return "MMX2" ;
sewardjca860012003-03-27 23:52:58 +0000898 case MMX3: return "MMX3" ;
sewardj3d7c9c82003-03-26 21:08:13 +0000899 case MMX2_MemRd: return "MMX2_MRd" ;
900 case MMX2_MemWr: return "MMX2_MWr" ;
thughes96b466a2004-03-15 16:43:58 +0000901 case MMX2a1_MemRd: return "MMX2a1_MRd" ;
sewardj4fbe6e92003-06-15 21:54:34 +0000902 case MMX2_ERegRd: return "MMX2_eRRd" ;
903 case MMX2_ERegWr: return "MMX2_eRWr" ;
sewardjfebaa3b2003-05-25 01:07:34 +0000904 case SSE2a_MemWr: return "SSE2a_MWr";
905 case SSE2a_MemRd: return "SSE2a_MRd";
nethercoteb1affa82004-01-19 19:14:18 +0000906 case SSE2g_RegWr: return "SSE2g_RWr";
sewardj9dd209f2003-06-18 23:30:52 +0000907 case SSE2a1_MemRd: return "SSE2a1_MRd";
nethercoteb1affa82004-01-19 19:14:18 +0000908 case SSE2g1_RegWr: return "SSE2g1_RWr";
909 case SSE2e1_RegRd: return "SSE2e1_RRd";
sewardj4fbe6e92003-06-15 21:54:34 +0000910 case SSE3e_RegRd: return "SSE3e_RRd";
sewardjabf8bf82003-06-15 22:28:05 +0000911 case SSE3e_RegWr: return "SSE3e_RWr";
sewardj02af6bc2003-06-12 00:56:06 +0000912 case SSE3g_RegWr: return "SSE3g_RWr";
sewardj77d30a22003-10-19 08:18:52 +0000913 case SSE3a1_MemRd: return "SSE3a1_MRd";
sewardjb31b06d2003-06-13 00:26:02 +0000914 case SSE3g1_RegWr: return "SSE3g1_RWr";
sewardj4fbe6e92003-06-15 21:54:34 +0000915 case SSE3e1_RegRd: return "SSE3e1_RRd";
sewardja60be0e2003-05-26 08:47:27 +0000916 case SSE3: return "SSE3";
sewardjfebaa3b2003-05-25 01:07:34 +0000917 case SSE4: return "SSE4";
sewardja453fb02003-06-14 13:22:36 +0000918 case SSE5: return "SSE5";
sewardjfebaa3b2003-05-25 01:07:34 +0000919 case SSE3a_MemWr: return "SSE3a_MWr";
920 case SSE3a_MemRd: return "SSE3a_MRd";
sewardje3891fa2003-06-15 03:13:48 +0000921 case SSE3ag_MemRd_RegWr: return "SSE3ag_MemRd_RegWr";
njn25e49d8e72002-09-23 09:36:25 +0000922 default:
923 if (VG_(needs).extended_UCode)
njn4ba5a792002-09-30 10:23:54 +0000924 return SK_(name_XUOpcode)(opc);
njn25e49d8e72002-09-23 09:36:25 +0000925 else {
926 VG_(printf)("unhandled opcode: %u. Perhaps "
927 "VG_(needs).extended_UCode should be set?",
928 opc);
njne427a662002-10-02 11:08:25 +0000929 VG_(core_panic)("name_UOpcode: unhandled opcode");
njn25e49d8e72002-09-23 09:36:25 +0000930 }
sewardjde4a1d02002-03-22 01:27:54 +0000931 }
932}
933
sewardja38e0922002-10-01 00:50:47 +0000934static
njn4ba5a792002-09-30 10:23:54 +0000935void pp_realregs_liveness ( UInstr* u )
njn25e49d8e72002-09-23 09:36:25 +0000936{
937# define PRINT_RREG_LIVENESS(realReg,s) \
njn4ba5a792002-09-30 10:23:54 +0000938 VG_(printf)( IS_RREG_LIVE(VG_(realreg_to_rank)(realReg), \
njn25e49d8e72002-09-23 09:36:25 +0000939 u->regs_live_after) \
940 ? s : "-");
sewardjde4a1d02002-03-22 01:27:54 +0000941
njn25e49d8e72002-09-23 09:36:25 +0000942 VG_(printf)("[");
943 PRINT_RREG_LIVENESS(R_EAX, "a");
944 PRINT_RREG_LIVENESS(R_EBX, "b");
945 PRINT_RREG_LIVENESS(R_ECX, "c");
946 PRINT_RREG_LIVENESS(R_EDX, "d");
947 PRINT_RREG_LIVENESS(R_ESI, "S");
948 PRINT_RREG_LIVENESS(R_EDI, "D");
949 VG_(printf)("]");
950
951# undef PRINT_RREG_LIVENESS
952}
953
954/* Ugly-print UInstr :) */
njn4ba5a792002-09-30 10:23:54 +0000955void VG_(up_UInstr) ( Int i, UInstr* u )
njn25e49d8e72002-09-23 09:36:25 +0000956{
njn4ba5a792002-09-30 10:23:54 +0000957 VG_(pp_UInstr_regs)(i, u);
njn25e49d8e72002-09-23 09:36:25 +0000958
959 VG_(printf)("opcode: %d\n", u->opcode);
sewardjc1b86882002-10-06 21:43:50 +0000960 VG_(printf)("lit32: 0x%x\n", u->lit32);
njn25e49d8e72002-09-23 09:36:25 +0000961 VG_(printf)("size: %d\n", u->size);
962 VG_(printf)("val1,val2,val3: %d, %d, %d\n", u->val1, u->val2, u->val3);
963 VG_(printf)("tag1,tag2,tag3: %d, %d, %d\n", u->tag1, u->tag2, u->tag3);
sewardjc1b86882002-10-06 21:43:50 +0000964 VG_(printf)("flags_r: 0x%x\n", u->flags_r);
965 VG_(printf)("flags_w: 0x%x\n", u->flags_w);
966 VG_(printf)("extra4b: 0x%x\n", u->extra4b);
967 VG_(printf)("cond: 0x%x\n", u->cond);
njn25e49d8e72002-09-23 09:36:25 +0000968 VG_(printf)("signed_widen: %d\n", u->signed_widen);
969 VG_(printf)("jmpkind: %d\n", u->jmpkind);
970 VG_(printf)("argc,regparms_n: %d, %d\n", u->argc, u->regparms_n);
971 VG_(printf)("has_ret_val: %d\n", u->has_ret_val);
972 VG_(printf)("regs_live_after: ");
njn4ba5a792002-09-30 10:23:54 +0000973 pp_realregs_liveness(u);
njn25e49d8e72002-09-23 09:36:25 +0000974 VG_(printf)("\n");
975}
976
sewardja38e0922002-10-01 00:50:47 +0000977static
njn4ba5a792002-09-30 10:23:54 +0000978void pp_UInstrWorker ( Int instrNo, UInstr* u, Bool ppRegsLiveness )
sewardjde4a1d02002-03-22 01:27:54 +0000979{
980 VG_(printf)("\t%4d: %s", instrNo,
njn4ba5a792002-09-30 10:23:54 +0000981 VG_(name_UOpcode)(True, u->opcode));
sewardjde4a1d02002-03-22 01:27:54 +0000982 if (u->opcode == JMP || u->opcode == CC2VAL)
njn563f96f2003-02-03 11:17:46 +0000983 VG_(printf)("%s", VG_(name_UCondcode)(u->cond));
sewardjde4a1d02002-03-22 01:27:54 +0000984
985 switch (u->size) {
986 case 0: VG_(printf)("o"); break;
987 case 1: VG_(printf)("B"); break;
988 case 2: VG_(printf)("W"); break;
989 case 4: VG_(printf)("L"); break;
990 case 8: VG_(printf)("Q"); break;
sewardjfebaa3b2003-05-25 01:07:34 +0000991 case 16: VG_(printf)("QQ"); break;
sewardjde4a1d02002-03-22 01:27:54 +0000992 default: VG_(printf)("%d", (Int)u->size); break;
993 }
994
sewardjfebaa3b2003-05-25 01:07:34 +0000995 VG_(printf)(" \t");
996
sewardjde4a1d02002-03-22 01:27:54 +0000997 switch (u->opcode) {
998
sewardjde4a1d02002-03-22 01:27:54 +0000999 case CALLM_S: case CALLM_E:
1000 break;
1001
1002 case INCEIP:
sewardjfebaa3b2003-05-25 01:07:34 +00001003 VG_(printf)("$%d", u->val1);
sewardjde4a1d02002-03-22 01:27:54 +00001004 break;
1005
1006 case LEA2:
sewardjfebaa3b2003-05-25 01:07:34 +00001007 VG_(printf)("%d(" , u->lit32);
njn4ba5a792002-09-30 10:23:54 +00001008 VG_(pp_UOperand)(u, 1, 4, False);
sewardjde4a1d02002-03-22 01:27:54 +00001009 VG_(printf)(",");
njn4ba5a792002-09-30 10:23:54 +00001010 VG_(pp_UOperand)(u, 2, 4, False);
sewardjde4a1d02002-03-22 01:27:54 +00001011 VG_(printf)(",%d), ", (Int)u->extra4b);
njn4ba5a792002-09-30 10:23:54 +00001012 VG_(pp_UOperand)(u, 3, 4, False);
sewardjde4a1d02002-03-22 01:27:54 +00001013 break;
1014
1015 case LEA1:
sewardjfebaa3b2003-05-25 01:07:34 +00001016 VG_(printf)("%d" , u->lit32);
njn4ba5a792002-09-30 10:23:54 +00001017 VG_(pp_UOperand)(u, 1, 4, True);
sewardjde4a1d02002-03-22 01:27:54 +00001018 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001019 VG_(pp_UOperand)(u, 2, 4, False);
sewardjde4a1d02002-03-22 01:27:54 +00001020 break;
1021
sewardj7a5ebcf2002-11-13 22:42:13 +00001022 case NOP: case LOCK:
sewardjde4a1d02002-03-22 01:27:54 +00001023 break;
1024
1025 case FPU_W:
sewardjfebaa3b2003-05-25 01:07:34 +00001026 VG_(printf)("0x%x:0x%x, ",
sewardjde4a1d02002-03-22 01:27:54 +00001027 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
njn4ba5a792002-09-30 10:23:54 +00001028 VG_(pp_UOperand)(u, 2, 4, True);
sewardjde4a1d02002-03-22 01:27:54 +00001029 break;
1030
1031 case FPU_R:
sewardjfebaa3b2003-05-25 01:07:34 +00001032 VG_(printf)("");
njn4ba5a792002-09-30 10:23:54 +00001033 VG_(pp_UOperand)(u, 2, 4, True);
sewardjde4a1d02002-03-22 01:27:54 +00001034 VG_(printf)(", 0x%x:0x%x",
1035 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
1036 break;
1037
1038 case FPU:
sewardjfebaa3b2003-05-25 01:07:34 +00001039 VG_(printf)("0x%x:0x%x",
sewardjde4a1d02002-03-22 01:27:54 +00001040 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
1041 break;
1042
sewardj3d7c9c82003-03-26 21:08:13 +00001043 case MMX1:
sewardjfebaa3b2003-05-25 01:07:34 +00001044 VG_(printf)("0x%x",
sewardj3d7c9c82003-03-26 21:08:13 +00001045 u->val1 & 0xFF );
1046 break;
1047
1048 case MMX2:
sewardjfebaa3b2003-05-25 01:07:34 +00001049 VG_(printf)("0x%x:0x%x",
sewardj3d7c9c82003-03-26 21:08:13 +00001050 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
1051 break;
1052
sewardjca860012003-03-27 23:52:58 +00001053 case MMX3:
sewardjfebaa3b2003-05-25 01:07:34 +00001054 VG_(printf)("0x%x:0x%x:0x%x",
sewardjca860012003-03-27 23:52:58 +00001055 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF, u->val2 & 0xFF );
1056 break;
1057
sewardj4fbe6e92003-06-15 21:54:34 +00001058 case MMX2_ERegWr:
1059 case MMX2_ERegRd:
sewardjfebaa3b2003-05-25 01:07:34 +00001060 VG_(printf)("0x%x:0x%x, ",
sewardjca860012003-03-27 23:52:58 +00001061 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
1062 VG_(pp_UOperand)(u, 2, 4, False);
1063 break;
1064
sewardj3d7c9c82003-03-26 21:08:13 +00001065 case MMX2_MemWr:
1066 case MMX2_MemRd:
sewardjfebaa3b2003-05-25 01:07:34 +00001067 VG_(printf)("0x%x:0x%x",
sewardj3d7c9c82003-03-26 21:08:13 +00001068 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
1069 VG_(pp_UOperand)(u, 2, 4, True);
1070 break;
1071
thughes96b466a2004-03-15 16:43:58 +00001072 case MMX2a1_MemRd:
1073 VG_(printf)("0x%x:0x%x:0x%x",
1074 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF, u->val2 & 0xFF );
1075 VG_(pp_UOperand)(u, 3, 4, True);
1076 break;
1077
sewardjfebaa3b2003-05-25 01:07:34 +00001078 case SSE2a_MemWr:
1079 case SSE2a_MemRd:
nethercoteb1affa82004-01-19 19:14:18 +00001080 case SSE2g_RegWr:
1081 case SSE2g1_RegWr:
1082 case SSE2e1_RegRd:
sewardjfebaa3b2003-05-25 01:07:34 +00001083 VG_(printf)("0x%x:0x%x:0x%x",
1084 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF, u->val2 & 0xFF );
1085 VG_(pp_UOperand)(u, 3, 4, True);
1086 break;
1087
sewardj9dd209f2003-06-18 23:30:52 +00001088 case SSE2a1_MemRd:
sewardjfebaa3b2003-05-25 01:07:34 +00001089 case SSE3a_MemWr:
1090 case SSE3a_MemRd:
1091 VG_(printf)("0x%x:0x%x:0x%x:0x%x",
1092 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
sewardjde8aecf2003-05-27 00:46:28 +00001093 (u->val2 >> 8) & 0xFF, u->val2 & 0xFF );
sewardjfebaa3b2003-05-25 01:07:34 +00001094 VG_(pp_UOperand)(u, 3, 4, True);
1095 break;
1096
sewardjabf8bf82003-06-15 22:28:05 +00001097 case SSE3e_RegWr:
sewardj4fbe6e92003-06-15 21:54:34 +00001098 case SSE3e_RegRd:
sewardj02af6bc2003-06-12 00:56:06 +00001099 case SSE3g_RegWr:
sewardjfebaa3b2003-05-25 01:07:34 +00001100 VG_(printf)("0x%x:0x%x:0x%x:0x%x",
1101 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
1102 (u->val2 >> 8) & 0xFF, u->val2 & 0xFF );
1103 VG_(pp_UOperand)(u, 3, 4, True);
1104 break;
1105
sewardjb31b06d2003-06-13 00:26:02 +00001106 case SSE3g1_RegWr:
sewardj4fbe6e92003-06-15 21:54:34 +00001107 case SSE3e1_RegRd:
sewardj77d30a22003-10-19 08:18:52 +00001108 case SSE3a1_MemRd:
sewardjb31b06d2003-06-13 00:26:02 +00001109 VG_(printf)("0x%x:0x%x:0x%x:0x%x:0x%x",
1110 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
1111 (u->val2 >> 8) & 0xFF, u->val2 & 0xFF,
1112 u->lit32 );
1113 VG_(pp_UOperand)(u, 3, 4, True);
1114 break;
1115
sewardja60be0e2003-05-26 08:47:27 +00001116 case SSE3:
1117 VG_(printf)("0x%x:0x%x:0x%x",
1118 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
1119 u->val2 & 0xFF );
1120 break;
1121
sewardjfebaa3b2003-05-25 01:07:34 +00001122 case SSE4:
1123 VG_(printf)("0x%x:0x%x:0x%x:0x%x",
1124 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
1125 (u->val2 >> 8) & 0xFF, u->val2 & 0xFF );
1126 break;
1127
sewardja453fb02003-06-14 13:22:36 +00001128 case SSE5:
1129 VG_(printf)("0x%x:0x%x:0x%x:0x%x:0x%x",
1130 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
1131 (u->val2 >> 8) & 0xFF, u->val2 & 0xFF,
1132 u->val3 & 0xFF );
1133 break;
1134
sewardje3891fa2003-06-15 03:13:48 +00001135 case SSE3ag_MemRd_RegWr:
1136 VG_(printf)("0x%x(addr=", u->lit32 );
1137 VG_(pp_UOperand)(u, 1, 4, False);
1138 VG_(printf)(", dst=");
1139 VG_(pp_UOperand)(u, 2, 4, False);
1140 VG_(printf)(")");
1141 break;
1142
sewardjde4a1d02002-03-22 01:27:54 +00001143 case GET: case PUT: case MOV: case LOAD: case STORE: case CMOV:
sewardje1042472002-09-30 12:33:11 +00001144 case GETSEG: case PUTSEG:
njn4ba5a792002-09-30 10:23:54 +00001145 VG_(pp_UOperand)(u, 1, u->size, u->opcode==LOAD);
sewardjde4a1d02002-03-22 01:27:54 +00001146 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001147 VG_(pp_UOperand)(u, 2, u->size, u->opcode==STORE);
njn25e49d8e72002-09-23 09:36:25 +00001148 break;
1149
1150 case JMP:
1151 switch (u->jmpkind) {
1152 case JmpCall: VG_(printf)("-c"); break;
1153 case JmpRet: VG_(printf)("-r"); break;
1154 case JmpSyscall: VG_(printf)("-sys"); break;
1155 case JmpClientReq: VG_(printf)("-cli"); break;
fitzhardingea02f8812003-12-18 09:06:09 +00001156 case JmpYield: VG_(printf)("-yld"); break;
njn25e49d8e72002-09-23 09:36:25 +00001157 default: break;
1158 }
njn4ba5a792002-09-30 10:23:54 +00001159 VG_(pp_UOperand)(u, 1, u->size, False);
njn25e49d8e72002-09-23 09:36:25 +00001160 if (CondAlways == u->cond) {
1161 /* Print x86 instruction size if filled in */
1162 if (0 != u->extra4b)
1163 VG_(printf)(" ($%u)", u->extra4b);
1164 }
sewardjde4a1d02002-03-22 01:27:54 +00001165 break;
1166
1167 case GETF: case PUTF:
njn25e49d8e72002-09-23 09:36:25 +00001168 case CC2VAL: case PUSH: case POP: case CLEAR: case CALLM:
1169 case NOT: case NEG: case INC: case DEC: case BSWAP:
njn4ba5a792002-09-30 10:23:54 +00001170 VG_(pp_UOperand)(u, 1, u->size, False);
sewardjde4a1d02002-03-22 01:27:54 +00001171 break;
1172
njn25e49d8e72002-09-23 09:36:25 +00001173 /* Print a "(s)" after args passed on stack */
1174 case CCALL:
njn25e49d8e72002-09-23 09:36:25 +00001175 if (u->has_ret_val) {
njn4ba5a792002-09-30 10:23:54 +00001176 VG_(pp_UOperand)(u, 3, 0, False);
njn25e49d8e72002-09-23 09:36:25 +00001177 VG_(printf)(" = ");
sewardj2e93c502002-04-12 11:12:52 +00001178 }
njn25e49d8e72002-09-23 09:36:25 +00001179 VG_(printf)("%p(", u->lit32);
1180 if (u->argc > 0) {
njn4ba5a792002-09-30 10:23:54 +00001181 VG_(pp_UOperand)(u, 1, 0, False);
njn25e49d8e72002-09-23 09:36:25 +00001182 if (u->regparms_n < 1)
1183 VG_(printf)("(s)");
1184 }
1185 if (u->argc > 1) {
1186 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001187 VG_(pp_UOperand)(u, 2, 0, False);
njn25e49d8e72002-09-23 09:36:25 +00001188 if (u->regparms_n < 2)
1189 VG_(printf)("(s)");
1190 }
1191 if (u->argc > 2) {
1192 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001193 VG_(pp_UOperand)(u, 3, 0, False);
njn25e49d8e72002-09-23 09:36:25 +00001194 if (u->regparms_n < 3)
1195 VG_(printf)("(s)");
1196 }
1197 VG_(printf)(") ");
njn6431be72002-07-28 09:53:34 +00001198 break;
1199
sewardje1042472002-09-30 12:33:11 +00001200 case USESEG:
sewardjde4a1d02002-03-22 01:27:54 +00001201 case JIFZ:
sewardjde4a1d02002-03-22 01:27:54 +00001202 case ADD: case ADC: case AND: case OR:
1203 case XOR: case SUB: case SBB:
1204 case SHL: case SHR: case SAR:
jsgf5efa4fd2003-10-14 21:49:11 +00001205 case ROL: case ROR: case RCL: case RCR:
1206 case MUL:
njn4ba5a792002-09-30 10:23:54 +00001207 VG_(pp_UOperand)(u, 1, u->size, False);
sewardjde4a1d02002-03-22 01:27:54 +00001208 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001209 VG_(pp_UOperand)(u, 2, u->size, False);
sewardjde4a1d02002-03-22 01:27:54 +00001210 break;
1211
1212 case WIDEN:
1213 VG_(printf)("_%c%c", VG_(toupper)(nameISize(u->extra4b)),
1214 u->signed_widen?'s':'z');
njn4ba5a792002-09-30 10:23:54 +00001215 VG_(pp_UOperand)(u, 1, u->size, False);
sewardjde4a1d02002-03-22 01:27:54 +00001216 break;
1217
njn25e49d8e72002-09-23 09:36:25 +00001218 default:
1219 if (VG_(needs).extended_UCode)
njn4ba5a792002-09-30 10:23:54 +00001220 SK_(pp_XUInstr)(u);
njn25e49d8e72002-09-23 09:36:25 +00001221 else {
1222 VG_(printf)("unhandled opcode: %u. Perhaps "
1223 "VG_(needs).extended_UCode should be set?",
1224 u->opcode);
njne427a662002-10-02 11:08:25 +00001225 VG_(core_panic)("pp_UInstr: unhandled opcode");
njn25e49d8e72002-09-23 09:36:25 +00001226 }
sewardjde4a1d02002-03-22 01:27:54 +00001227 }
sewardjde4a1d02002-03-22 01:27:54 +00001228 if (u->flags_r != FlagsEmpty || u->flags_w != FlagsEmpty) {
1229 VG_(printf)(" (");
1230 if (u->flags_r != FlagsEmpty)
1231 vg_ppFlagSet("-r", u->flags_r);
1232 if (u->flags_w != FlagsEmpty)
1233 vg_ppFlagSet("-w", u->flags_w);
1234 VG_(printf)(")");
1235 }
njn25e49d8e72002-09-23 09:36:25 +00001236
1237 if (ppRegsLiveness) {
1238 VG_(printf)("\t\t");
njn4ba5a792002-09-30 10:23:54 +00001239 pp_realregs_liveness ( u );
njn25e49d8e72002-09-23 09:36:25 +00001240 }
1241
sewardjde4a1d02002-03-22 01:27:54 +00001242 VG_(printf)("\n");
1243}
1244
njn4ba5a792002-09-30 10:23:54 +00001245void VG_(pp_UInstr) ( Int instrNo, UInstr* u )
njn25e49d8e72002-09-23 09:36:25 +00001246{
njn4ba5a792002-09-30 10:23:54 +00001247 pp_UInstrWorker ( instrNo, u, /*ppRegsLiveness*/False );
njn25e49d8e72002-09-23 09:36:25 +00001248}
1249
njn4ba5a792002-09-30 10:23:54 +00001250void VG_(pp_UInstr_regs) ( Int instrNo, UInstr* u )
njn25e49d8e72002-09-23 09:36:25 +00001251{
njn4ba5a792002-09-30 10:23:54 +00001252 pp_UInstrWorker ( instrNo, u, /*ppRegsLiveness*/True );
njn25e49d8e72002-09-23 09:36:25 +00001253}
sewardjde4a1d02002-03-22 01:27:54 +00001254
njn4ba5a792002-09-30 10:23:54 +00001255void VG_(pp_UCodeBlock) ( UCodeBlock* cb, Char* title )
sewardjde4a1d02002-03-22 01:27:54 +00001256{
1257 Int i;
njn25e49d8e72002-09-23 09:36:25 +00001258 VG_(printf)("%s\n", title);
sewardjde4a1d02002-03-22 01:27:54 +00001259 for (i = 0; i < cb->used; i++)
njn25e49d8e72002-09-23 09:36:25 +00001260 if (cb->instrs[i].opcode != NOP)
njn4ba5a792002-09-30 10:23:54 +00001261 VG_(pp_UInstr) ( i, &cb->instrs[i] );
sewardjde4a1d02002-03-22 01:27:54 +00001262 VG_(printf)("\n");
1263}
1264
1265
1266/*------------------------------------------------------------*/
1267/*--- uinstr helpers for register allocation ---*/
1268/*--- and code improvement. ---*/
1269/*------------------------------------------------------------*/
1270
njn25e49d8e72002-09-23 09:36:25 +00001271/* Get the temp/reg use of a uinstr, parking them in an array supplied by
njn810086f2002-11-14 12:42:47 +00001272 the caller (regs), which is assumed to be big enough. Return the number
1273 of entries. Written regs are indicated in parallel array isWrites.
1274 Insns which read _and_ write a register wind up mentioning it twice.
1275 Entries are placed in the array in program order, so that if a reg is
1276 read-modified-written, it appears first as a read and then as a write.
1277 'tag' indicates whether we are looking at TempRegs or RealRegs.
sewardjde4a1d02002-03-22 01:27:54 +00001278*/
njn810086f2002-11-14 12:42:47 +00001279Int VG_(get_reg_usage) ( UInstr* u, Tag tag, Int* regs, Bool* isWrites )
sewardjde4a1d02002-03-22 01:27:54 +00001280{
njn810086f2002-11-14 12:42:47 +00001281# define RD(ono) VG_UINSTR_READS_REG(ono, regs, isWrites)
1282# define WR(ono) VG_UINSTR_WRITES_REG(ono, regs, isWrites)
sewardjde4a1d02002-03-22 01:27:54 +00001283
1284 Int n = 0;
1285 switch (u->opcode) {
1286 case LEA1: RD(1); WR(2); break;
1287 case LEA2: RD(1); RD(2); WR(3); break;
1288
sewardj77d30a22003-10-19 08:18:52 +00001289 case SSE3a1_MemRd:
sewardj9dd209f2003-06-18 23:30:52 +00001290 case SSE2a1_MemRd:
nethercoteb1affa82004-01-19 19:14:18 +00001291 case SSE2e1_RegRd:
sewardj4fbe6e92003-06-15 21:54:34 +00001292 case SSE3e_RegRd:
sewardjfebaa3b2003-05-25 01:07:34 +00001293 case SSE3a_MemWr:
1294 case SSE3a_MemRd:
1295 case SSE2a_MemWr:
sewardj4fbe6e92003-06-15 21:54:34 +00001296 case SSE3e1_RegRd:
sewardj02af6bc2003-06-12 00:56:06 +00001297 case SSE2a_MemRd: RD(3); break;
1298
nethercoteb1affa82004-01-19 19:14:18 +00001299 case SSE2g_RegWr:
1300 case SSE2g1_RegWr:
sewardjabf8bf82003-06-15 22:28:05 +00001301 case SSE3e_RegWr:
sewardjb31b06d2003-06-13 00:26:02 +00001302 case SSE3g1_RegWr:
sewardj02af6bc2003-06-12 00:56:06 +00001303 case SSE3g_RegWr: WR(3); break;
sewardjfebaa3b2003-05-25 01:07:34 +00001304
sewardje3891fa2003-06-15 03:13:48 +00001305 case SSE3ag_MemRd_RegWr: RD(1); WR(2); break;
1306
thughes96b466a2004-03-15 16:43:58 +00001307 case MMX2a1_MemRd: RD(3); break;
sewardj4fbe6e92003-06-15 21:54:34 +00001308 case MMX2_ERegRd: RD(2); break;
1309 case MMX2_ERegWr: WR(2); break;
sewardjca860012003-03-27 23:52:58 +00001310
sewardja453fb02003-06-14 13:22:36 +00001311 case SSE4: case SSE3: case SSE5:
sewardjca860012003-03-27 23:52:58 +00001312 case MMX1: case MMX2: case MMX3:
njn25e49d8e72002-09-23 09:36:25 +00001313 case NOP: case FPU: case INCEIP: case CALLM_S: case CALLM_E:
sewardj7a5ebcf2002-11-13 22:42:13 +00001314 case CLEAR: case CALLM: case LOCK: break;
njn25e49d8e72002-09-23 09:36:25 +00001315
1316 case CCALL:
1317 if (u->argc > 0) RD(1);
1318 if (u->argc > 1) RD(2);
1319 if (u->argc > 2) RD(3);
1320 if (u->has_ret_val) WR(3);
1321 break;
1322
sewardj3d7c9c82003-03-26 21:08:13 +00001323 case MMX2_MemRd: case MMX2_MemWr:
sewardjde4a1d02002-03-22 01:27:54 +00001324 case FPU_R: case FPU_W: RD(2); break;
1325
sewardje1042472002-09-30 12:33:11 +00001326 case GETSEG: WR(2); break;
1327 case PUTSEG: RD(1); break;
1328
sewardjde4a1d02002-03-22 01:27:54 +00001329 case GETF: WR(1); break;
1330 case PUTF: RD(1); break;
1331
1332 case GET: WR(2); break;
1333 case PUT: RD(1); break;
1334 case LOAD: RD(1); WR(2); break;
njn25e49d8e72002-09-23 09:36:25 +00001335 case STORE: RD(1); RD(2); break;
sewardjde4a1d02002-03-22 01:27:54 +00001336 case MOV: RD(1); WR(2); break;
1337
1338 case JMP: RD(1); break;
sewardjde4a1d02002-03-22 01:27:54 +00001339
njn25e49d8e72002-09-23 09:36:25 +00001340 case PUSH: RD(1); break;
sewardjde4a1d02002-03-22 01:27:54 +00001341 case POP: WR(1); break;
1342
sewardje1042472002-09-30 12:33:11 +00001343 case USESEG:
sewardjde4a1d02002-03-22 01:27:54 +00001344 case CMOV:
1345 case ADD: case ADC: case AND: case OR:
1346 case XOR: case SUB: case SBB:
jsgf5efa4fd2003-10-14 21:49:11 +00001347 case MUL:
sewardjde4a1d02002-03-22 01:27:54 +00001348 RD(1); RD(2); WR(2); break;
1349
1350 case SHL: case SHR: case SAR:
1351 case ROL: case ROR: case RCL: case RCR:
1352 RD(1); RD(2); WR(2); break;
1353
njn25e49d8e72002-09-23 09:36:25 +00001354 case NOT: case NEG: case INC: case DEC: case BSWAP:
sewardjde4a1d02002-03-22 01:27:54 +00001355 RD(1); WR(1); break;
1356
1357 case WIDEN: RD(1); WR(1); break;
1358
1359 case CC2VAL: WR(1); break;
1360 case JIFZ: RD(1); break;
1361
njn25e49d8e72002-09-23 09:36:25 +00001362 default:
1363 if (VG_(needs).extended_UCode)
njn810086f2002-11-14 12:42:47 +00001364 return SK_(get_Xreg_usage)(u, tag, regs, isWrites);
njn25e49d8e72002-09-23 09:36:25 +00001365 else {
1366 VG_(printf)("unhandled opcode: %u. Perhaps "
1367 "VG_(needs).extended_UCode should be set?",
1368 u->opcode);
njne427a662002-10-02 11:08:25 +00001369 VG_(core_panic)("VG_(get_reg_usage): unhandled opcode");
njn25e49d8e72002-09-23 09:36:25 +00001370 }
sewardjde4a1d02002-03-22 01:27:54 +00001371 }
1372 return n;
1373
1374# undef RD
1375# undef WR
1376}
1377
1378
njn25e49d8e72002-09-23 09:36:25 +00001379/* Change temp regs in u into real regs, as directed by the
1380 * temps[i]-->reals[i] mapping. */
sewardj56867352003-10-12 10:27:06 +00001381static
njn810086f2002-11-14 12:42:47 +00001382void patchUInstr ( UInstr* u, Int temps[], UInt reals[], Int n_tmap )
sewardjde4a1d02002-03-22 01:27:54 +00001383{
1384 Int i;
1385 if (u->tag1 == TempReg) {
1386 for (i = 0; i < n_tmap; i++)
njn810086f2002-11-14 12:42:47 +00001387 if (temps[i] == u->val1) break;
njne427a662002-10-02 11:08:25 +00001388 if (i == n_tmap) VG_(core_panic)("patchUInstr(1)");
sewardjde4a1d02002-03-22 01:27:54 +00001389 u->tag1 = RealReg;
njn25e49d8e72002-09-23 09:36:25 +00001390 u->val1 = reals[i];
sewardjde4a1d02002-03-22 01:27:54 +00001391 }
1392 if (u->tag2 == TempReg) {
1393 for (i = 0; i < n_tmap; i++)
njn810086f2002-11-14 12:42:47 +00001394 if (temps[i] == u->val2) break;
njne427a662002-10-02 11:08:25 +00001395 if (i == n_tmap) VG_(core_panic)("patchUInstr(2)");
sewardjde4a1d02002-03-22 01:27:54 +00001396 u->tag2 = RealReg;
njn25e49d8e72002-09-23 09:36:25 +00001397 u->val2 = reals[i];
sewardjde4a1d02002-03-22 01:27:54 +00001398 }
1399 if (u->tag3 == TempReg) {
1400 for (i = 0; i < n_tmap; i++)
njn810086f2002-11-14 12:42:47 +00001401 if (temps[i] == u->val3) break;
njne427a662002-10-02 11:08:25 +00001402 if (i == n_tmap) VG_(core_panic)("patchUInstr(3)");
sewardjde4a1d02002-03-22 01:27:54 +00001403 u->tag3 = RealReg;
njn25e49d8e72002-09-23 09:36:25 +00001404 u->val3 = reals[i];
sewardjde4a1d02002-03-22 01:27:54 +00001405 }
1406}
1407
1408
1409/* Tedious x86-specific hack which compensates for the fact that the
1410 register numbers for %ah .. %dh do not correspond to those for %eax
1411 .. %edx. It maps a (reg size, reg no) pair to the number of the
1412 containing 32-bit reg. */
1413static __inline__
1414Int containingArchRegOf ( Int sz, Int aregno )
1415{
1416 switch (sz) {
1417 case 4: return aregno;
1418 case 2: return aregno;
1419 case 1: return aregno >= 4 ? aregno-4 : aregno;
njne427a662002-10-02 11:08:25 +00001420 default: VG_(core_panic)("containingArchRegOf");
sewardjde4a1d02002-03-22 01:27:54 +00001421 }
1422}
1423
1424
1425/* If u reads an ArchReg, return the number of the containing arch
njn25e49d8e72002-09-23 09:36:25 +00001426 reg. Otherwise return -1. Used in redundant-PUT elimination.
1427 Note that this is not required for skins extending UCode because
1428 this happens before instrumentation. */
sewardj56867352003-10-12 10:27:06 +00001429static
sewardjde4a1d02002-03-22 01:27:54 +00001430Int maybe_uinstrReadsArchReg ( UInstr* u )
1431{
1432 switch (u->opcode) {
1433 case GET:
1434 case ADD: case ADC: case AND: case OR:
1435 case XOR: case SUB: case SBB:
1436 case SHL: case SHR: case SAR: case ROL:
1437 case ROR: case RCL: case RCR:
jsgf5efa4fd2003-10-14 21:49:11 +00001438 case MUL:
sewardjde4a1d02002-03-22 01:27:54 +00001439 if (u->tag1 == ArchReg)
1440 return containingArchRegOf ( u->size, u->val1 );
1441 else
1442 return -1;
1443
1444 case GETF: case PUTF:
1445 case CALLM_S: case CALLM_E:
1446 case INCEIP:
1447 case LEA1:
1448 case LEA2:
1449 case NOP:
sewardj7a5ebcf2002-11-13 22:42:13 +00001450 case LOCK:
sewardjde4a1d02002-03-22 01:27:54 +00001451 case PUT:
1452 case LOAD:
1453 case STORE:
1454 case MOV:
1455 case CMOV:
1456 case JMP:
1457 case CALLM: case CLEAR: case PUSH: case POP:
1458 case NOT: case NEG: case INC: case DEC: case BSWAP:
1459 case CC2VAL:
1460 case JIFZ:
1461 case FPU: case FPU_R: case FPU_W:
sewardjca860012003-03-27 23:52:58 +00001462 case MMX1: case MMX2: case MMX3:
thughes96b466a2004-03-15 16:43:58 +00001463 case MMX2_MemRd: case MMX2_MemWr: case MMX2a1_MemRd:
sewardj4fbe6e92003-06-15 21:54:34 +00001464 case MMX2_ERegRd: case MMX2_ERegWr:
sewardj9dd209f2003-06-18 23:30:52 +00001465 case SSE2a_MemWr: case SSE2a_MemRd: case SSE2a1_MemRd:
nethercoteb1affa82004-01-19 19:14:18 +00001466 case SSE2g_RegWr: case SSE2g1_RegWr: case SSE2e1_RegRd:
sewardj77d30a22003-10-19 08:18:52 +00001467 case SSE3a_MemWr: case SSE3a_MemRd: case SSE3a1_MemRd:
sewardjabf8bf82003-06-15 22:28:05 +00001468 case SSE3e_RegRd: case SSE3g_RegWr: case SSE3e_RegWr:
sewardj4fbe6e92003-06-15 21:54:34 +00001469 case SSE3g1_RegWr: case SSE3e1_RegRd:
sewardje3891fa2003-06-15 03:13:48 +00001470 case SSE4: case SSE3: case SSE5: case SSE3ag_MemRd_RegWr:
sewardjde4a1d02002-03-22 01:27:54 +00001471 case WIDEN:
sewardje1042472002-09-30 12:33:11 +00001472 /* GETSEG and USESEG are to do with ArchRegS, not ArchReg */
1473 case GETSEG: case PUTSEG:
1474 case USESEG:
sewardjde4a1d02002-03-22 01:27:54 +00001475 return -1;
1476
1477 default:
njn4ba5a792002-09-30 10:23:54 +00001478 VG_(pp_UInstr)(0,u);
njne427a662002-10-02 11:08:25 +00001479 VG_(core_panic)("maybe_uinstrReadsArchReg: unhandled opcode");
sewardjde4a1d02002-03-22 01:27:54 +00001480 }
1481}
1482
1483static __inline__
1484Bool uInstrMentionsTempReg ( UInstr* u, Int tempreg )
1485{
1486 Int i, k;
njnf4ce3d32003-02-10 10:17:26 +00001487 Int tempUse[VG_MAX_REGS_USED];
1488 Bool notUsed[VG_MAX_REGS_USED];
njn810086f2002-11-14 12:42:47 +00001489
1490 k = VG_(get_reg_usage) ( u, TempReg, &tempUse[0], &notUsed[0] );
sewardjde4a1d02002-03-22 01:27:54 +00001491 for (i = 0; i < k; i++)
njn810086f2002-11-14 12:42:47 +00001492 if (tempUse[i] == tempreg)
sewardjde4a1d02002-03-22 01:27:54 +00001493 return True;
1494 return False;
1495}
1496
1497
1498/*------------------------------------------------------------*/
1499/*--- ucode improvement. ---*/
1500/*------------------------------------------------------------*/
1501
1502/* Improve the code in cb by doing
1503 -- Redundant ArchReg-fetch elimination
1504 -- Redundant PUT elimination
1505 -- Redundant cond-code restore/save elimination
1506 The overall effect of these is to allow target registers to be
1507 cached in host registers over multiple target insns.
1508*/
1509static void vg_improve ( UCodeBlock* cb )
1510{
1511 Int i, j, k, m, n, ar, tr, told, actual_areg;
1512 Int areg_map[8];
1513 Bool annul_put[8];
njnf4ce3d32003-02-10 10:17:26 +00001514 Int tempUse[VG_MAX_REGS_USED];
1515 Bool isWrites[VG_MAX_REGS_USED];
sewardjde4a1d02002-03-22 01:27:54 +00001516 UInstr* u;
1517 Bool wr;
1518 Int* last_live_before;
1519 FlagSet future_dead_flags;
1520
njn25e49d8e72002-09-23 09:36:25 +00001521 if (dis)
1522 VG_(printf) ("Improvements:\n");
1523
sewardjde4a1d02002-03-22 01:27:54 +00001524 if (cb->nextTemp > 0)
njn25e49d8e72002-09-23 09:36:25 +00001525 last_live_before = VG_(arena_malloc) ( VG_AR_JITTER,
1526 cb->nextTemp * sizeof(Int) );
sewardjde4a1d02002-03-22 01:27:54 +00001527 else
1528 last_live_before = NULL;
1529
1530
1531 /* PASS 1: redundant GET elimination. (Actually, more general than
1532 that -- eliminates redundant fetches of ArchRegs). */
1533
1534 /* Find the live-range-ends for all temporaries. Duplicates code
1535 in the register allocator :-( */
1536
1537 for (i = 0; i < cb->nextTemp; i++) last_live_before[i] = -1;
1538
1539 for (i = cb->used-1; i >= 0; i--) {
1540 u = &cb->instrs[i];
1541
njn810086f2002-11-14 12:42:47 +00001542 k = VG_(get_reg_usage)(u, TempReg, &tempUse[0], &isWrites[0]);
sewardjde4a1d02002-03-22 01:27:54 +00001543
1544 /* For each temp usage ... bwds in program order. */
1545 for (j = k-1; j >= 0; j--) {
njn810086f2002-11-14 12:42:47 +00001546 tr = tempUse[j];
1547 wr = isWrites[j];
sewardjde4a1d02002-03-22 01:27:54 +00001548 if (last_live_before[tr] == -1) {
1549 vg_assert(tr >= 0 && tr < cb->nextTemp);
1550 last_live_before[tr] = wr ? (i+1) : i;
1551 }
1552 }
1553
1554 }
1555
1556# define BIND_ARCH_TO_TEMP(archreg,tempreg)\
1557 { Int q; \
1558 /* Invalidate any old binding(s) to tempreg. */ \
1559 for (q = 0; q < 8; q++) \
1560 if (areg_map[q] == tempreg) areg_map[q] = -1; \
1561 /* Add the new binding. */ \
1562 areg_map[archreg] = (tempreg); \
1563 }
1564
1565 /* Set up the A-reg map. */
1566 for (i = 0; i < 8; i++) areg_map[i] = -1;
1567
1568 /* Scan insns. */
1569 for (i = 0; i < cb->used; i++) {
1570 u = &cb->instrs[i];
1571 if (u->opcode == GET && u->size == 4) {
1572 /* GET; see if it can be annulled. */
1573 vg_assert(u->tag1 == ArchReg);
1574 vg_assert(u->tag2 == TempReg);
1575 ar = u->val1;
1576 tr = u->val2;
1577 told = areg_map[ar];
1578 if (told != -1 && last_live_before[told] <= i) {
1579 /* ar already has an old mapping to told, but that runs
1580 out here. Annul this GET, rename tr to told for the
1581 rest of the block, and extend told's live range to that
1582 of tr. */
njn4ba5a792002-09-30 10:23:54 +00001583 VG_(new_NOP)(u);
sewardjde4a1d02002-03-22 01:27:54 +00001584 n = last_live_before[tr] + 1;
1585 if (n > cb->used) n = cb->used;
1586 last_live_before[told] = last_live_before[tr];
1587 last_live_before[tr] = i-1;
njn25e49d8e72002-09-23 09:36:25 +00001588 if (dis)
sewardjde4a1d02002-03-22 01:27:54 +00001589 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001590 " at %2d: delete GET, rename t%d to t%d in (%d .. %d)\n",
sewardjde4a1d02002-03-22 01:27:54 +00001591 i, tr, told,i+1, n-1);
1592 for (m = i+1; m < n; m++) {
1593 if (cb->instrs[m].tag1 == TempReg
1594 && cb->instrs[m].val1 == tr)
1595 cb->instrs[m].val1 = told;
1596 if (cb->instrs[m].tag2 == TempReg
1597 && cb->instrs[m].val2 == tr)
1598 cb->instrs[m].val2 = told;
sewardjfebaa3b2003-05-25 01:07:34 +00001599 if (cb->instrs[m].tag3 == TempReg
1600 && cb->instrs[m].val3 == tr)
1601 cb->instrs[m].val3 = told;
sewardjde4a1d02002-03-22 01:27:54 +00001602 }
1603 BIND_ARCH_TO_TEMP(ar,told);
1604 }
1605 else
1606 BIND_ARCH_TO_TEMP(ar,tr);
1607 }
1608 else if (u->opcode == GET && u->size != 4) {
1609 /* Invalidate any mapping for this archreg. */
1610 actual_areg = containingArchRegOf ( u->size, u->val1 );
1611 areg_map[actual_areg] = -1;
1612 }
1613 else if (u->opcode == PUT && u->size == 4) {
1614 /* PUT; re-establish t -> a binding */
1615 vg_assert(u->tag1 == TempReg);
1616 vg_assert(u->tag2 == ArchReg);
1617 BIND_ARCH_TO_TEMP(u->val2, u->val1);
1618 }
1619 else if (u->opcode == PUT && u->size != 4) {
1620 /* Invalidate any mapping for this archreg. */
1621 actual_areg = containingArchRegOf ( u->size, u->val2 );
1622 areg_map[actual_areg] = -1;
1623 } else {
1624
1625 /* see if insn has an archreg as a read operand; if so try to
1626 map it. */
1627 if (u->tag1 == ArchReg && u->size == 4
1628 && areg_map[u->val1] != -1) {
1629 switch (u->opcode) {
1630 case ADD: case SUB: case AND: case OR: case XOR:
1631 case ADC: case SBB:
1632 case SHL: case SHR: case SAR: case ROL: case ROR:
1633 case RCL: case RCR:
jsgf5efa4fd2003-10-14 21:49:11 +00001634 case MUL:
njn25e49d8e72002-09-23 09:36:25 +00001635 if (dis)
sewardjde4a1d02002-03-22 01:27:54 +00001636 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001637 " at %2d: change ArchReg %S to TempReg t%d\n",
sewardjde4a1d02002-03-22 01:27:54 +00001638 i, nameIReg(4,u->val1), areg_map[u->val1]);
1639 u->tag1 = TempReg;
1640 u->val1 = areg_map[u->val1];
1641 /* Remember to extend the live range of the TempReg,
1642 if necessary. */
1643 if (last_live_before[u->val1] < i)
1644 last_live_before[u->val1] = i;
1645 break;
1646 default:
1647 break;
1648 }
1649 }
1650
1651 /* boring insn; invalidate any mappings to temps it writes */
njn810086f2002-11-14 12:42:47 +00001652 k = VG_(get_reg_usage)(u, TempReg, &tempUse[0], &isWrites[0]);
sewardjde4a1d02002-03-22 01:27:54 +00001653
1654 for (j = 0; j < k; j++) {
njn810086f2002-11-14 12:42:47 +00001655 wr = isWrites[j];
sewardjde4a1d02002-03-22 01:27:54 +00001656 if (!wr) continue;
njn810086f2002-11-14 12:42:47 +00001657 tr = tempUse[j];
sewardjde4a1d02002-03-22 01:27:54 +00001658 for (m = 0; m < 8; m++)
1659 if (areg_map[m] == tr) areg_map[m] = -1;
1660 }
1661 }
1662
1663 }
1664
1665# undef BIND_ARCH_TO_TEMP
1666
sewardj05f1aa12002-04-30 00:29:36 +00001667 /* PASS 2: redundant PUT elimination. Don't annul (delay) puts of
1668 %ESP, since the memory check machinery always requires the
1669 in-memory value of %ESP to be up to date. Although this isn't
1670 actually required by other analyses (cache simulation), it's
1671 simplest to be consistent for all end-uses. */
sewardjde4a1d02002-03-22 01:27:54 +00001672 for (j = 0; j < 8; j++)
1673 annul_put[j] = False;
1674
1675 for (i = cb->used-1; i >= 0; i--) {
1676 u = &cb->instrs[i];
1677 if (u->opcode == NOP) continue;
1678
1679 if (u->opcode == PUT && u->size == 4) {
1680 vg_assert(u->tag2 == ArchReg);
1681 actual_areg = containingArchRegOf ( 4, u->val2 );
1682 if (annul_put[actual_areg]) {
sewardj05f1aa12002-04-30 00:29:36 +00001683 vg_assert(actual_areg != R_ESP);
njn4ba5a792002-09-30 10:23:54 +00001684 VG_(new_NOP)(u);
njn25e49d8e72002-09-23 09:36:25 +00001685 if (dis)
1686 VG_(printf)(" at %2d: delete PUT\n", i );
sewardjde4a1d02002-03-22 01:27:54 +00001687 } else {
sewardj05f1aa12002-04-30 00:29:36 +00001688 if (actual_areg != R_ESP)
sewardjde4a1d02002-03-22 01:27:54 +00001689 annul_put[actual_areg] = True;
1690 }
1691 }
1692 else if (u->opcode == PUT && u->size != 4) {
1693 actual_areg = containingArchRegOf ( u->size, u->val2 );
1694 annul_put[actual_areg] = False;
1695 }
1696 else if (u->opcode == JMP || u->opcode == JIFZ
1697 || u->opcode == CALLM) {
1698 for (j = 0; j < 8; j++)
1699 annul_put[j] = False;
1700 }
1701 else {
1702 /* If an instruction reads an ArchReg, the immediately
1703 preceding PUT cannot be annulled. */
1704 actual_areg = maybe_uinstrReadsArchReg ( u );
1705 if (actual_areg != -1)
1706 annul_put[actual_areg] = False;
1707 }
1708 }
1709
1710 /* PASS 2a: redundant-move elimination. Given MOV t1, t2 and t1 is
1711 dead after this point, annul the MOV insn and rename t2 to t1.
1712 Further modifies the last_live_before map. */
1713
1714# if 0
njn4ba5a792002-09-30 10:23:54 +00001715 VG_(pp_UCodeBlock)(cb, "Before MOV elimination" );
sewardjde4a1d02002-03-22 01:27:54 +00001716 for (i = 0; i < cb->nextTemp; i++)
1717 VG_(printf)("llb[t%d]=%d ", i, last_live_before[i]);
1718 VG_(printf)("\n");
1719# endif
1720
1721 for (i = 0; i < cb->used-1; i++) {
1722 u = &cb->instrs[i];
1723 if (u->opcode != MOV) continue;
1724 if (u->tag1 == Literal) continue;
1725 vg_assert(u->tag1 == TempReg);
1726 vg_assert(u->tag2 == TempReg);
1727 if (last_live_before[u->val1] == i) {
njn25e49d8e72002-09-23 09:36:25 +00001728 if (dis)
sewardjde4a1d02002-03-22 01:27:54 +00001729 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001730 " at %2d: delete MOV, rename t%d to t%d in (%d .. %d)\n",
sewardjde4a1d02002-03-22 01:27:54 +00001731 i, u->val2, u->val1, i+1, last_live_before[u->val2] );
1732 for (j = i+1; j <= last_live_before[u->val2]; j++) {
1733 if (cb->instrs[j].tag1 == TempReg
1734 && cb->instrs[j].val1 == u->val2)
1735 cb->instrs[j].val1 = u->val1;
1736 if (cb->instrs[j].tag2 == TempReg
1737 && cb->instrs[j].val2 == u->val2)
1738 cb->instrs[j].val2 = u->val1;
sewardjfebaa3b2003-05-25 01:07:34 +00001739 if (cb->instrs[j].tag3 == TempReg
1740 && cb->instrs[j].val3 == u->val2)
1741 cb->instrs[j].val3 = u->val1;
sewardjde4a1d02002-03-22 01:27:54 +00001742 }
1743 last_live_before[u->val1] = last_live_before[u->val2];
1744 last_live_before[u->val2] = i-1;
njn4ba5a792002-09-30 10:23:54 +00001745 VG_(new_NOP)(u);
sewardjde4a1d02002-03-22 01:27:54 +00001746 }
1747 }
1748
1749 /* PASS 3: redundant condition-code restore/save elimination.
1750 Scan backwards from the end. future_dead_flags records the set
1751 of flags which are dead at this point, that is, will be written
1752 before they are next read. Earlier uinsns which write flags
1753 already in future_dead_flags can have their writes annulled.
1754 */
1755 future_dead_flags = FlagsEmpty;
1756
1757 for (i = cb->used-1; i >= 0; i--) {
1758 u = &cb->instrs[i];
1759
1760 /* We might never make it to insns beyond this one, so be
1761 conservative. */
1762 if (u->opcode == JIFZ || u->opcode == JMP) {
1763 future_dead_flags = FlagsEmpty;
1764 continue;
1765 }
1766
sewardjfbb6cda2002-07-24 09:33:52 +00001767 /* PUTF modifies the %EFLAGS in essentially unpredictable ways.
1768 For example people try to mess with bit 21 to see if CPUID
1769 works. The setting may or may not actually take hold. So we
1770 play safe here. */
1771 if (u->opcode == PUTF) {
1772 future_dead_flags = FlagsEmpty;
1773 continue;
1774 }
1775
sewardjde4a1d02002-03-22 01:27:54 +00001776 /* We can annul the flags written by this insn if it writes a
1777 subset (or eq) of the set of flags known to be dead after
1778 this insn. If not, just record the flags also written by
1779 this insn.*/
1780 if (u->flags_w != FlagsEmpty
1781 && VG_IS_FLAG_SUBSET(u->flags_w, future_dead_flags)) {
njn25e49d8e72002-09-23 09:36:25 +00001782 if (dis) {
1783 VG_(printf)(" at %2d: annul flag write ", i);
sewardjde4a1d02002-03-22 01:27:54 +00001784 vg_ppFlagSet("", u->flags_w);
1785 VG_(printf)(" due to later ");
1786 vg_ppFlagSet("", future_dead_flags);
1787 VG_(printf)("\n");
1788 }
1789 u->flags_w = FlagsEmpty;
1790 } else {
1791 future_dead_flags
1792 = VG_UNION_FLAG_SETS ( u->flags_w, future_dead_flags );
1793 }
1794
1795 /* If this insn also reads flags, empty out future_dead_flags so
1796 as to force preceding writes not to be annulled. */
1797 if (u->flags_r != FlagsEmpty)
1798 future_dead_flags = FlagsEmpty;
1799 }
1800
1801 if (last_live_before)
njn25e49d8e72002-09-23 09:36:25 +00001802 VG_(arena_free) ( VG_AR_JITTER, last_live_before );
1803
1804 if (dis) {
1805 VG_(printf)("\n");
njn4ba5a792002-09-30 10:23:54 +00001806 VG_(pp_UCodeBlock) ( cb, "Improved UCode:" );
njn25e49d8e72002-09-23 09:36:25 +00001807 }
sewardjde4a1d02002-03-22 01:27:54 +00001808}
1809
njn9b007f62003-04-07 14:40:25 +00001810/*------------------------------------------------------------*/
1811/*--- %ESP-update pass ---*/
1812/*------------------------------------------------------------*/
1813
1814/* For skins that want to know about %ESP changes, this pass adds
1815 in the appropriate hooks. We have to do it after the skin's
1816 instrumentation, so the skin doesn't have to worry about the CCALLs
1817 it adds in, and we must do it before register allocation because
1818 spilled temps make it much harder to work out the %esp deltas.
njned619712003-10-01 16:45:04 +00001819 Thus we have it as an extra phase between the two.
1820
1821 We look for "GETL %ESP, t_ESP", then track ADDs and SUBs of
1822 literal values to t_ESP, and the total delta of the ADDs/SUBs. Then if
1823 "PUTL t_ESP, %ESP" happens, we call the helper with the known delta. We
1824 also cope with "MOVL t_ESP, tX", making tX the new t_ESP. If any other
1825 instruction clobbers t_ESP, we don't track it anymore, and fall back to
1826 the delta-is-unknown case. That case is also used when the delta is not
1827 a nice small amount, or an unknown amount.
1828*/
njn9b007f62003-04-07 14:40:25 +00001829static
1830UCodeBlock* vg_ESP_update_pass(UCodeBlock* cb_in)
1831{
1832 UCodeBlock* cb;
1833 UInstr* u;
1834 Int delta = 0;
1835 UInt t_ESP = INVALID_TEMPREG;
sewardj05bcdcb2003-05-18 10:05:38 +00001836 Int i;
njn9b007f62003-04-07 14:40:25 +00001837
1838 cb = VG_(setup_UCodeBlock)(cb_in);
1839
1840 for (i = 0; i < VG_(get_num_instrs)(cb_in); i++) {
1841 u = VG_(get_instr)(cb_in, i);
1842
1843 if (GET == u->opcode && R_ESP == u->val1) {
1844 t_ESP = u->val2;
1845 delta = 0;
1846
1847 } else if (PUT == u->opcode && R_ESP == u->val2 && 4 == u->size) {
1848
fitzhardinge98abfc72003-12-16 02:05:15 +00001849# define DO_GENERIC \
1850 if (VG_(defined_new_mem_stack)() || \
1851 VG_(defined_die_mem_stack)()) { \
1852 uInstr1(cb, CCALL, 0, TempReg, u->val1); \
1853 uCCall(cb, (Addr) VG_(unknown_esp_update), \
1854 1, 1, False); \
njn9b007f62003-04-07 14:40:25 +00001855 }
1856
fitzhardinge98abfc72003-12-16 02:05:15 +00001857# define DO(kind, size) \
1858 if (VG_(defined_##kind##_mem_stack_##size)()) { \
1859 uInstr1(cb, CCALL, 0, TempReg, u->val1); \
1860 uCCall(cb, (Addr) VG_(tool_interface).track_##kind##_mem_stack_##size, \
1861 1, 1, False); \
1862 \
1863 } else \
1864 DO_GENERIC \
njn9b007f62003-04-07 14:40:25 +00001865 break
1866
1867 if (u->val1 == t_ESP) {
1868 /* Known delta, common cases handled specially. */
1869 switch (delta) {
njned619712003-10-01 16:45:04 +00001870 case 0: break;
njn9b007f62003-04-07 14:40:25 +00001871 case 4: DO(die, 4);
1872 case -4: DO(new, 4);
1873 case 8: DO(die, 8);
1874 case -8: DO(new, 8);
1875 case 12: DO(die, 12);
1876 case -12: DO(new, 12);
1877 case 16: DO(die, 16);
1878 case -16: DO(new, 16);
1879 case 32: DO(die, 32);
1880 case -32: DO(new, 32);
1881 default: DO_GENERIC; break;
1882 }
1883 } else {
1884 /* Unknown delta */
1885 DO_GENERIC;
njned619712003-10-01 16:45:04 +00001886
daywalker972a7592003-10-01 10:19:08 +00001887 /* now we know the temp that points to %ESP */
njned619712003-10-01 16:45:04 +00001888 t_ESP = u->val1;
njn9b007f62003-04-07 14:40:25 +00001889 }
1890 delta = 0;
1891
1892# undef DO
1893# undef DO_GENERIC
1894
njned619712003-10-01 16:45:04 +00001895 } else if (ADD == u->opcode && Literal == u->tag1 && t_ESP == u->val2) {
1896 delta += u->lit32;
1897
1898 } else if (SUB == u->opcode && Literal == u->tag1 && t_ESP == u->val2) {
1899 delta -= u->lit32;
njn9b007f62003-04-07 14:40:25 +00001900
1901 } else if (MOV == u->opcode && TempReg == u->tag1 && t_ESP == u->val1 &&
1902 TempReg == u->tag2) {
njned619712003-10-01 16:45:04 +00001903 // t_ESP is transferred
njn9b007f62003-04-07 14:40:25 +00001904 t_ESP = u->val2;
njned619712003-10-01 16:45:04 +00001905
1906 } else {
1907 // Stop tracking t_ESP if it's clobbered by this instruction.
1908 Int tempUse [VG_MAX_REGS_USED];
1909 Bool isWrites[VG_MAX_REGS_USED];
1910 Int j, n = VG_(get_reg_usage)(u, TempReg, tempUse, isWrites);
1911
1912 for (j = 0; j < n; j++) {
1913 if (tempUse[j] == t_ESP && isWrites[j])
1914 t_ESP = INVALID_TEMPREG;
1915 }
njn9b007f62003-04-07 14:40:25 +00001916 }
1917 VG_(copy_UInstr) ( cb, u );
1918 }
1919
1920 VG_(free_UCodeBlock)(cb_in);
1921 return cb;
1922}
sewardjde4a1d02002-03-22 01:27:54 +00001923
1924/*------------------------------------------------------------*/
1925/*--- The new register allocator. ---*/
1926/*------------------------------------------------------------*/
1927
1928typedef
1929 struct {
1930 /* Becomes live for the first time after this insn ... */
1931 Int live_after;
1932 /* Becomes dead for the last time after this insn ... */
1933 Int dead_before;
1934 /* The "home" spill slot, if needed. Never changes. */
1935 Int spill_no;
1936 /* Where is it? VG_NOVALUE==in a spill slot; else in reg. */
1937 Int real_no;
1938 }
1939 TempInfo;
1940
1941
1942/* Take a ucode block and allocate its TempRegs to RealRegs, or put
1943 them in spill locations, and add spill code, if there are not
1944 enough real regs. The usual register allocation deal, in short.
1945
1946 Important redundancy of representation:
1947
1948 real_to_temp maps real reg ranks (RRRs) to TempReg nos, or
1949 to VG_NOVALUE if the real reg has no currently assigned TempReg.
1950
1951 The .real_no field of a TempInfo gives the current RRR for
1952 this TempReg, or VG_NOVALUE if the TempReg is currently
1953 in memory, in which case it is in the SpillNo denoted by
1954 spillno.
1955
1956 These pieces of information (a fwds-bwds mapping, really) must
1957 be kept consistent!
1958
1959 This allocator uses the so-called Second Chance Bin Packing
1960 algorithm, as described in "Quality and Speed in Linear-scan
1961 Register Allocation" (Traub, Holloway and Smith, ACM PLDI98,
1962 pp142-151). It is simple and fast and remarkably good at
1963 minimising the amount of spill code introduced.
1964*/
1965
1966static
1967UCodeBlock* vg_do_register_allocation ( UCodeBlock* c1 )
1968{
1969 TempInfo* temp_info;
njned619712003-10-01 16:45:04 +00001970 Int real_to_temp [VG_MAX_REALREGS];
sewardjde4a1d02002-03-22 01:27:54 +00001971 Bool is_spill_cand[VG_MAX_REALREGS];
1972 Int ss_busy_until_before[VG_MAX_SPILLSLOTS];
1973 Int i, j, k, m, r, tno, max_ss_no;
1974 Bool wr, defer, isRead, spill_reqd;
njned619712003-10-01 16:45:04 +00001975 UInt realUse [VG_MAX_REGS_USED];
1976 Int tempUse [VG_MAX_REGS_USED];
njnf4ce3d32003-02-10 10:17:26 +00001977 Bool isWrites[VG_MAX_REGS_USED];
sewardjde4a1d02002-03-22 01:27:54 +00001978 UCodeBlock* c2;
1979
1980 /* Used to denote ... well, "no value" in this fn. */
1981# define VG_NOTHING (-2)
1982
1983 /* Initialise the TempReg info. */
1984 if (c1->nextTemp > 0)
njn25e49d8e72002-09-23 09:36:25 +00001985 temp_info = VG_(arena_malloc)(VG_AR_JITTER,
1986 c1->nextTemp * sizeof(TempInfo) );
sewardjde4a1d02002-03-22 01:27:54 +00001987 else
1988 temp_info = NULL;
1989
1990 for (i = 0; i < c1->nextTemp; i++) {
1991 temp_info[i].live_after = VG_NOTHING;
1992 temp_info[i].dead_before = VG_NOTHING;
1993 temp_info[i].spill_no = VG_NOTHING;
1994 /* temp_info[i].real_no is not yet relevant. */
1995 }
1996
1997 spill_reqd = False;
1998
1999 /* Scan fwds to establish live ranges. */
2000
2001 for (i = 0; i < c1->used; i++) {
njn810086f2002-11-14 12:42:47 +00002002 k = VG_(get_reg_usage)(&c1->instrs[i], TempReg, &tempUse[0],
2003 &isWrites[0]);
njnf4ce3d32003-02-10 10:17:26 +00002004 vg_assert(k >= 0 && k <= VG_MAX_REGS_USED);
sewardjde4a1d02002-03-22 01:27:54 +00002005
2006 /* For each temp usage ... fwds in program order */
2007 for (j = 0; j < k; j++) {
njn810086f2002-11-14 12:42:47 +00002008 tno = tempUse[j];
2009 wr = isWrites[j];
sewardjde4a1d02002-03-22 01:27:54 +00002010 if (wr) {
2011 /* Writes hold a reg live until after this insn. */
2012 if (temp_info[tno].live_after == VG_NOTHING)
2013 temp_info[tno].live_after = i;
2014 if (temp_info[tno].dead_before < i + 1)
2015 temp_info[tno].dead_before = i + 1;
2016 } else {
2017 /* First use of a tmp should be a write. */
njnfa0ad422003-02-03 11:07:03 +00002018 if (temp_info[tno].live_after == VG_NOTHING) {
2019 VG_(printf)("At instr %d...\n", i);
2020 VG_(core_panic)("First use of tmp not a write,"
2021 " probably a skin instrumentation error");
2022 }
sewardjde4a1d02002-03-22 01:27:54 +00002023 /* Reads only hold it live until before this insn. */
2024 if (temp_info[tno].dead_before < i)
2025 temp_info[tno].dead_before = i;
2026 }
2027 }
2028 }
2029
2030# if 0
2031 /* Sanity check on live ranges. Expensive but correct. */
2032 for (i = 0; i < c1->nextTemp; i++) {
2033 vg_assert( (temp_info[i].live_after == VG_NOTHING
2034 && temp_info[i].dead_before == VG_NOTHING)
2035 || (temp_info[i].live_after != VG_NOTHING
2036 && temp_info[i].dead_before != VG_NOTHING) );
2037 }
2038# endif
2039
2040 /* Do a rank-based allocation of TempRegs to spill slot numbers.
2041 We put as few as possible values in spill slots, but
2042 nevertheless need to have an assignment to them just in case. */
2043
2044 max_ss_no = -1;
2045
2046 for (i = 0; i < VG_MAX_SPILLSLOTS; i++)
2047 ss_busy_until_before[i] = 0;
2048
2049 for (i = 0; i < c1->nextTemp; i++) {
2050
2051 /* True iff this temp is unused. */
2052 if (temp_info[i].live_after == VG_NOTHING)
2053 continue;
2054
2055 /* Find the lowest-numbered spill slot which is available at the
2056 start point of this interval, and assign the interval to
2057 it. */
2058 for (j = 0; j < VG_MAX_SPILLSLOTS; j++)
2059 if (ss_busy_until_before[j] <= temp_info[i].live_after)
2060 break;
2061 if (j == VG_MAX_SPILLSLOTS) {
2062 VG_(printf)("VG_MAX_SPILLSLOTS is too low; increase and recompile.\n");
njne427a662002-10-02 11:08:25 +00002063 VG_(core_panic)("register allocation failed -- out of spill slots");
sewardjde4a1d02002-03-22 01:27:54 +00002064 }
2065 ss_busy_until_before[j] = temp_info[i].dead_before;
2066 temp_info[i].spill_no = j;
2067 if (j > max_ss_no)
2068 max_ss_no = j;
2069 }
2070
2071 VG_(total_reg_rank) += (max_ss_no+1);
2072
2073 /* Show live ranges and assigned spill slot nos. */
2074
njn25e49d8e72002-09-23 09:36:25 +00002075 if (dis) {
2076 VG_(printf)("Live range assignments:\n");
sewardjde4a1d02002-03-22 01:27:54 +00002077
2078 for (i = 0; i < c1->nextTemp; i++) {
2079 if (temp_info[i].live_after == VG_NOTHING)
2080 continue;
2081 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00002082 " LR %d is after %d to before %d\tspillno %d\n",
sewardjde4a1d02002-03-22 01:27:54 +00002083 i,
2084 temp_info[i].live_after,
2085 temp_info[i].dead_before,
2086 temp_info[i].spill_no
2087 );
2088 }
njn25e49d8e72002-09-23 09:36:25 +00002089 VG_(printf)("\n");
sewardjde4a1d02002-03-22 01:27:54 +00002090 }
2091
2092 /* Now that we've established a spill slot number for each used
2093 temporary, we can go ahead and do the core of the "Second-chance
2094 binpacking" allocation algorithm. */
2095
njn25e49d8e72002-09-23 09:36:25 +00002096 if (dis) VG_(printf)("Register allocated UCode:\n");
2097
2098
sewardjde4a1d02002-03-22 01:27:54 +00002099 /* Resulting code goes here. We generate it all in a forwards
2100 pass. */
njn4ba5a792002-09-30 10:23:54 +00002101 c2 = VG_(alloc_UCodeBlock)();
sewardj22854b92002-11-30 14:00:47 +00002102 c2->orig_eip = c1->orig_eip;
sewardjde4a1d02002-03-22 01:27:54 +00002103
2104 /* At the start, no TempRegs are assigned to any real register.
2105 Correspondingly, all temps claim to be currently resident in
2106 their spill slots, as computed by the previous two passes. */
2107 for (i = 0; i < VG_MAX_REALREGS; i++)
2108 real_to_temp[i] = VG_NOTHING;
2109 for (i = 0; i < c1->nextTemp; i++)
2110 temp_info[i].real_no = VG_NOTHING;
2111
sewardjde4a1d02002-03-22 01:27:54 +00002112 /* Process each insn in turn. */
2113 for (i = 0; i < c1->used; i++) {
2114
2115 if (c1->instrs[i].opcode == NOP) continue;
2116 VG_(uinstrs_prealloc)++;
2117
2118# if 0
2119 /* Check map consistency. Expensive but correct. */
2120 for (r = 0; r < VG_MAX_REALREGS; r++) {
2121 if (real_to_temp[r] != VG_NOTHING) {
2122 tno = real_to_temp[r];
2123 vg_assert(tno >= 0 && tno < c1->nextTemp);
2124 vg_assert(temp_info[tno].real_no == r);
2125 }
2126 }
2127 for (tno = 0; tno < c1->nextTemp; tno++) {
2128 if (temp_info[tno].real_no != VG_NOTHING) {
2129 r = temp_info[tno].real_no;
2130 vg_assert(r >= 0 && r < VG_MAX_REALREGS);
2131 vg_assert(real_to_temp[r] == tno);
2132 }
2133 }
2134# endif
2135
njn25e49d8e72002-09-23 09:36:25 +00002136 if (dis)
njn4ba5a792002-09-30 10:23:54 +00002137 VG_(pp_UInstr)(i, &c1->instrs[i]);
sewardjde4a1d02002-03-22 01:27:54 +00002138
2139 /* First, free up enough real regs for this insn. This may
2140 generate spill stores since we may have to evict some TempRegs
2141 currently in real regs. Also generates spill loads. */
2142
njn810086f2002-11-14 12:42:47 +00002143 k = VG_(get_reg_usage)(&c1->instrs[i], TempReg, &tempUse[0],
2144 &isWrites[0]);
njnf4ce3d32003-02-10 10:17:26 +00002145 vg_assert(k >= 0 && k <= VG_MAX_REGS_USED);
sewardjde4a1d02002-03-22 01:27:54 +00002146
2147 /* For each ***different*** temp mentioned in the insn .... */
2148 for (j = 0; j < k; j++) {
2149
2150 /* First check if the temp is mentioned again later; if so,
2151 ignore this mention. We only want to process each temp
2152 used by the insn once, even if it is mentioned more than
2153 once. */
2154 defer = False;
njn810086f2002-11-14 12:42:47 +00002155 tno = tempUse[j];
sewardjde4a1d02002-03-22 01:27:54 +00002156 for (m = j+1; m < k; m++)
njn810086f2002-11-14 12:42:47 +00002157 if (tempUse[m] == tno)
sewardjde4a1d02002-03-22 01:27:54 +00002158 defer = True;
2159 if (defer)
2160 continue;
2161
njn810086f2002-11-14 12:42:47 +00002162 /* Now we're trying to find a register for tempUse[j].
sewardjde4a1d02002-03-22 01:27:54 +00002163 First of all, if it already has a register assigned, we
2164 don't need to do anything more. */
2165 if (temp_info[tno].real_no != VG_NOTHING)
2166 continue;
2167
2168 /* No luck. The next thing to do is see if there is a
2169 currently unassigned register available. If so, bag it. */
2170 for (r = 0; r < VG_MAX_REALREGS; r++) {
2171 if (real_to_temp[r] == VG_NOTHING)
2172 break;
2173 }
2174 if (r < VG_MAX_REALREGS) {
2175 real_to_temp[r] = tno;
2176 temp_info[tno].real_no = r;
2177 continue;
2178 }
2179
2180 /* Unfortunately, that didn't pan out either. So we'll have
2181 to eject some other unfortunate TempReg into a spill slot
2182 in order to free up a register. Of course, we need to be
2183 careful not to eject some other TempReg needed by this
2184 insn.
2185
2186 Select r in 0 .. VG_MAX_REALREGS-1 such that
2187 real_to_temp[r] is not mentioned in
njn810086f2002-11-14 12:42:47 +00002188 tempUse[0 .. k-1], since it would be just plain
sewardjde4a1d02002-03-22 01:27:54 +00002189 wrong to eject some other TempReg which we need to use in
2190 this insn.
2191
2192 It is here that it is important to make a good choice of
2193 register to spill. */
2194
2195 /* First, mark those regs which are not spill candidates. */
2196 for (r = 0; r < VG_MAX_REALREGS; r++) {
2197 is_spill_cand[r] = True;
2198 for (m = 0; m < k; m++) {
njn810086f2002-11-14 12:42:47 +00002199 if (real_to_temp[r] == tempUse[m]) {
sewardjde4a1d02002-03-22 01:27:54 +00002200 is_spill_cand[r] = False;
2201 break;
2202 }
2203 }
2204 }
2205
2206 /* We can choose any r satisfying is_spill_cand[r]. However,
2207 try to make a good choice. First, try and find r such
2208 that the associated TempReg is already dead. */
2209 for (r = 0; r < VG_MAX_REALREGS; r++) {
2210 if (is_spill_cand[r] &&
2211 temp_info[real_to_temp[r]].dead_before <= i)
2212 goto have_spill_cand;
2213 }
2214
2215 /* No spill cand is mapped to a dead TempReg. Now we really
2216 _do_ have to generate spill code. Choose r so that the
2217 next use of its associated TempReg is as far ahead as
2218 possible, in the hope that this will minimise the number of
2219 consequent reloads required. This is a bit expensive, but
2220 we don't have to do it very often. */
2221 {
2222 Int furthest_r = VG_MAX_REALREGS;
2223 Int furthest = 0;
2224 for (r = 0; r < VG_MAX_REALREGS; r++) {
2225 if (!is_spill_cand[r]) continue;
2226 for (m = i+1; m < c1->used; m++)
2227 if (uInstrMentionsTempReg(&c1->instrs[m],
2228 real_to_temp[r]))
2229 break;
2230 if (m > furthest) {
2231 furthest = m;
2232 furthest_r = r;
2233 }
2234 }
2235 r = furthest_r;
2236 goto have_spill_cand;
2237 }
2238
2239 have_spill_cand:
2240 if (r == VG_MAX_REALREGS)
njne427a662002-10-02 11:08:25 +00002241 VG_(core_panic)("new reg alloc: out of registers ?!");
sewardjde4a1d02002-03-22 01:27:54 +00002242
2243 /* Eject r. Important refinement: don't bother if the
2244 associated TempReg is now dead. */
2245 vg_assert(real_to_temp[r] != VG_NOTHING);
2246 vg_assert(real_to_temp[r] != tno);
2247 temp_info[real_to_temp[r]].real_no = VG_NOTHING;
2248 if (temp_info[real_to_temp[r]].dead_before > i) {
2249 uInstr2(c2, PUT, 4,
njn4ba5a792002-09-30 10:23:54 +00002250 RealReg, VG_(rank_to_realreg)(r),
sewardjde4a1d02002-03-22 01:27:54 +00002251 SpillNo, temp_info[real_to_temp[r]].spill_no);
2252 VG_(uinstrs_spill)++;
2253 spill_reqd = True;
njn25e49d8e72002-09-23 09:36:25 +00002254 if (dis)
njn4ba5a792002-09-30 10:23:54 +00002255 VG_(pp_UInstr)(c2->used-1, &LAST_UINSTR(c2));
sewardjde4a1d02002-03-22 01:27:54 +00002256 }
2257
2258 /* Decide if tno is read. */
2259 isRead = False;
2260 for (m = 0; m < k; m++)
njn810086f2002-11-14 12:42:47 +00002261 if (tempUse[m] == tno && !isWrites[m])
sewardjde4a1d02002-03-22 01:27:54 +00002262 isRead = True;
2263
2264 /* If so, generate a spill load. */
2265 if (isRead) {
2266 uInstr2(c2, GET, 4,
2267 SpillNo, temp_info[tno].spill_no,
njn4ba5a792002-09-30 10:23:54 +00002268 RealReg, VG_(rank_to_realreg)(r) );
sewardjde4a1d02002-03-22 01:27:54 +00002269 VG_(uinstrs_spill)++;
2270 spill_reqd = True;
njn25e49d8e72002-09-23 09:36:25 +00002271 if (dis)
njn4ba5a792002-09-30 10:23:54 +00002272 VG_(pp_UInstr)(c2->used-1, &LAST_UINSTR(c2));
sewardjde4a1d02002-03-22 01:27:54 +00002273 }
2274
2275 /* Update the forwards and backwards maps. */
2276 real_to_temp[r] = tno;
2277 temp_info[tno].real_no = r;
2278 }
2279
2280 /* By this point, all TempRegs mentioned by the insn have been
2281 bought into real regs. We now copy the insn to the output
2282 and use patchUInstr to convert its rTempRegs into
2283 realregs. */
2284 for (j = 0; j < k; j++)
njn810086f2002-11-14 12:42:47 +00002285 realUse[j] = VG_(rank_to_realreg)(temp_info[tempUse[j]].real_no);
njn4ba5a792002-09-30 10:23:54 +00002286 VG_(copy_UInstr)(c2, &c1->instrs[i]);
njn25e49d8e72002-09-23 09:36:25 +00002287 patchUInstr(&LAST_UINSTR(c2), &tempUse[0], &realUse[0], k);
sewardjde4a1d02002-03-22 01:27:54 +00002288
njn25e49d8e72002-09-23 09:36:25 +00002289 if (dis) {
njn4ba5a792002-09-30 10:23:54 +00002290 VG_(pp_UInstr)(c2->used-1, &LAST_UINSTR(c2));
sewardjde4a1d02002-03-22 01:27:54 +00002291 VG_(printf)("\n");
2292 }
2293 }
2294
2295 if (temp_info != NULL)
njn25e49d8e72002-09-23 09:36:25 +00002296 VG_(arena_free)(VG_AR_JITTER, temp_info);
sewardjde4a1d02002-03-22 01:27:54 +00002297
njn4ba5a792002-09-30 10:23:54 +00002298 VG_(free_UCodeBlock)(c1);
sewardjde4a1d02002-03-22 01:27:54 +00002299
2300 if (spill_reqd)
2301 VG_(translations_needing_spill)++;
2302
2303 return c2;
2304
2305# undef VG_NOTHING
2306
2307}
sewardj7c4b6042003-06-14 15:47:15 +00002308
njn25e49d8e72002-09-23 09:36:25 +00002309/* Analysis records liveness of all general-use RealRegs in the UCode. */
2310static void vg_realreg_liveness_analysis ( UCodeBlock* cb )
2311{
2312 Int i, j, k;
2313 RRegSet rregs_live;
njnf4ce3d32003-02-10 10:17:26 +00002314 Int regUse[VG_MAX_REGS_USED];
2315 Bool isWrites[VG_MAX_REGS_USED];
njn25e49d8e72002-09-23 09:36:25 +00002316 UInstr* u;
sewardjde4a1d02002-03-22 01:27:54 +00002317
njn25e49d8e72002-09-23 09:36:25 +00002318 /* All regs are dead at the end of the block */
2319 rregs_live = ALL_RREGS_DEAD;
sewardjde4a1d02002-03-22 01:27:54 +00002320
sewardjde4a1d02002-03-22 01:27:54 +00002321 for (i = cb->used-1; i >= 0; i--) {
2322 u = &cb->instrs[i];
2323
njn25e49d8e72002-09-23 09:36:25 +00002324 u->regs_live_after = rregs_live;
sewardj97ced732002-03-25 00:07:36 +00002325
njn810086f2002-11-14 12:42:47 +00002326 k = VG_(get_reg_usage)(u, RealReg, &regUse[0], &isWrites[0]);
sewardj97ced732002-03-25 00:07:36 +00002327
njn25e49d8e72002-09-23 09:36:25 +00002328 /* For each reg usage ... bwds in program order. Variable is live
2329 before this UInstr if it is read by this UInstr.
njn810086f2002-11-14 12:42:47 +00002330 Note that regUse[j] holds the Intel reg number, so we must
njn25e49d8e72002-09-23 09:36:25 +00002331 convert it to our rank number. */
2332 for (j = k-1; j >= 0; j--) {
njn810086f2002-11-14 12:42:47 +00002333 SET_RREG_LIVENESS ( VG_(realreg_to_rank)(regUse[j]),
njn25e49d8e72002-09-23 09:36:25 +00002334 rregs_live,
njn810086f2002-11-14 12:42:47 +00002335 !isWrites[j] );
sewardjde4a1d02002-03-22 01:27:54 +00002336 }
2337 }
sewardjde4a1d02002-03-22 01:27:54 +00002338}
2339
sewardjde4a1d02002-03-22 01:27:54 +00002340/*------------------------------------------------------------*/
2341/*--- Main entry point for the JITter. ---*/
2342/*------------------------------------------------------------*/
2343
2344/* Translate the basic block beginning at orig_addr, placing the
2345 translation in a vg_malloc'd block, the address and size of which
2346 are returned in trans_addr and trans_size. Length of the original
2347 block is also returned in orig_size. If the latter three are NULL,
2348 this call is being done for debugging purposes, in which case (a)
2349 throw away the translation once it is made, and (b) produce a load
2350 of debugging output.
njn25e49d8e72002-09-23 09:36:25 +00002351
2352 'tst' is the identity of the thread needing this block.
sewardjde4a1d02002-03-22 01:27:54 +00002353*/
njn72718642003-07-24 08:45:32 +00002354void VG_(translate) ( /*IN*/ ThreadId tid,
njn25e49d8e72002-09-23 09:36:25 +00002355 /*IN*/ Addr orig_addr,
2356 /*OUT*/ UInt* orig_size,
2357 /*OUT*/ Addr* trans_addr,
sewardj22854b92002-11-30 14:00:47 +00002358 /*OUT*/ UInt* trans_size,
2359 /*OUT*/ UShort jumps[VG_MAX_JUMPS])
sewardjde4a1d02002-03-22 01:27:54 +00002360{
fitzhardinge98abfc72003-12-16 02:05:15 +00002361 Int n_disassembled_bytes, final_code_size;
sewardjde4a1d02002-03-22 01:27:54 +00002362 Bool debugging_translation;
2363 UChar* final_code;
2364 UCodeBlock* cb;
sewardja60be0e2003-05-26 08:47:27 +00002365 Bool notrace_until_done;
sewardj1e86b8b2003-06-16 23:34:12 +00002366 UInt notrace_until_limit = 0;
fitzhardinge98abfc72003-12-16 02:05:15 +00002367 Segment *seg;
2368 Addr redir;
sewardjde4a1d02002-03-22 01:27:54 +00002369
2370 VGP_PUSHCC(VgpTranslate);
2371 debugging_translation
2372 = orig_size == NULL || trans_addr == NULL || trans_size == NULL;
2373
sewardj25c7c3a2003-07-10 00:17:58 +00002374 /* Look in the code redirect table to see if we should
2375 translate an alternative address for orig_addr. */
fitzhardinge98abfc72003-12-16 02:05:15 +00002376 redir = VG_(code_redirect)(orig_addr);
2377
2378 if (redir != orig_addr && VG_(clo_verbosity) >= 2)
2379 VG_(message)(Vg_UserMsg,
2380 "TRANSLATE: %p redirected to %p",
2381 orig_addr,
2382 redir );
2383 orig_addr = redir;
sewardj25c7c3a2003-07-10 00:17:58 +00002384
sewardja60be0e2003-05-26 08:47:27 +00002385 /* If codegen tracing, don't start tracing until
2386 notrace_until_limit blocks have gone by. This avoids printing
2387 huge amounts of useless junk when all we want to see is the last
2388 few blocks translated prior to a failure. Set
2389 notrace_until_limit to be the number of translations to be made
2390 before --trace-codegen= style printing takes effect. */
2391 notrace_until_done
fitzhardinge15117d22003-12-19 17:16:54 +00002392 = VG_(overall_in_count) >= notrace_until_limit;
sewardja60be0e2003-05-26 08:47:27 +00002393
fitzhardinge98abfc72003-12-16 02:05:15 +00002394 seg = VG_(find_segment)(orig_addr);
2395
njn25e49d8e72002-09-23 09:36:25 +00002396 if (!debugging_translation)
njn72718642003-07-24 08:45:32 +00002397 VG_TRACK( pre_mem_read, Vg_CoreTranslate, tid, "", orig_addr, 1 );
sewardjde4a1d02002-03-22 01:27:54 +00002398
fitzhardinge98abfc72003-12-16 02:05:15 +00002399 if (seg == NULL ||
2400 !VG_(seg_contains)(seg, orig_addr, 1) ||
2401 (seg->prot & (VKI_PROT_READ|VKI_PROT_EXEC)) == 0) {
fitzhardinge98abfc72003-12-16 02:05:15 +00002402 /* Code address is bad - deliver a signal instead */
2403 vg_assert(!VG_(is_addressable)(orig_addr, 1));
2404
fitzhardinge98abfc72003-12-16 02:05:15 +00002405 if (seg != NULL && VG_(seg_contains)(seg, orig_addr, 1)) {
2406 vg_assert((seg->prot & VKI_PROT_EXEC) == 0);
fitzhardingef1beb252004-03-16 09:49:08 +00002407 VG_(synth_fault_perms)(tid, orig_addr);
fitzhardinge98abfc72003-12-16 02:05:15 +00002408 } else
fitzhardingef1beb252004-03-16 09:49:08 +00002409 VG_(synth_fault_mapping)(tid, orig_addr);
jsgf855d93d2003-10-13 22:26:55 +00002410
jsgf855d93d2003-10-13 22:26:55 +00002411 return;
fitzhardinge98abfc72003-12-16 02:05:15 +00002412 } else
2413 seg->flags |= SF_CODE; /* contains cached code */
jsgf855d93d2003-10-13 22:26:55 +00002414
njn4ba5a792002-09-30 10:23:54 +00002415 cb = VG_(alloc_UCodeBlock)();
sewardj22854b92002-11-30 14:00:47 +00002416 cb->orig_eip = orig_addr;
sewardjde4a1d02002-03-22 01:27:54 +00002417
njn25e49d8e72002-09-23 09:36:25 +00002418 /* If doing any code printing, print a basic block start marker */
sewardja60be0e2003-05-26 08:47:27 +00002419 if (VG_(clo_trace_codegen) && notrace_until_done) {
njn25e49d8e72002-09-23 09:36:25 +00002420 Char fnname[64] = "";
2421 VG_(get_fnname_if_entry)(orig_addr, fnname, 64);
2422 VG_(printf)(
njne0205ff2003-04-08 00:56:14 +00002423 "==== BB %d %s(%p) in %dB, out %dB, BBs exec'd %llu ====\n\n",
njn25e49d8e72002-09-23 09:36:25 +00002424 VG_(overall_in_count), fnname, orig_addr,
2425 VG_(overall_in_osize), VG_(overall_in_tsize),
2426 VG_(bbs_done));
2427 }
2428
2429 /* True if a debug trans., or if bit N set in VG_(clo_trace_codegen). */
sewardja60be0e2003-05-26 08:47:27 +00002430# define DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(n) \
2431 ( debugging_translation \
2432 || (notrace_until_done \
2433 && (VG_(clo_trace_codegen) & (1 << (n-1))) ))
njn25e49d8e72002-09-23 09:36:25 +00002434
sewardjde4a1d02002-03-22 01:27:54 +00002435 /* Disassemble this basic block into cb. */
njn25e49d8e72002-09-23 09:36:25 +00002436 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(1);
2437 VGP_PUSHCC(VgpToUCode);
sewardjde4a1d02002-03-22 01:27:54 +00002438 n_disassembled_bytes = VG_(disBB) ( cb, orig_addr );
njn25e49d8e72002-09-23 09:36:25 +00002439 VGP_POPCC(VgpToUCode);
2440
sewardjde4a1d02002-03-22 01:27:54 +00002441 /* Try and improve the code a bit. */
2442 if (VG_(clo_optimise)) {
njn25e49d8e72002-09-23 09:36:25 +00002443 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(2);
2444 VGP_PUSHCC(VgpImprove);
sewardjde4a1d02002-03-22 01:27:54 +00002445 vg_improve ( cb );
njn25e49d8e72002-09-23 09:36:25 +00002446 VGP_POPCC(VgpImprove);
sewardjde4a1d02002-03-22 01:27:54 +00002447 }
2448
njn25e49d8e72002-09-23 09:36:25 +00002449 /* Skin's instrumentation (Nb: must set VG_(print_codegen) in case
2450 SK_(instrument) looks at it. */
2451 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(3);
2452 VGP_PUSHCC(VgpInstrument);
2453 cb = SK_(instrument) ( cb, orig_addr );
2454 if (VG_(print_codegen))
njn4ba5a792002-09-30 10:23:54 +00002455 VG_(pp_UCodeBlock) ( cb, "Instrumented UCode:" );
njn25e49d8e72002-09-23 09:36:25 +00002456 VG_(saneUCodeBlock)( cb );
2457 VGP_POPCC(VgpInstrument);
njn4f9c9342002-04-29 16:03:24 +00002458
njn9b007f62003-04-07 14:40:25 +00002459 /* Add %ESP-update hooks if the skin requires them */
2460 /* Nb: We don't print out this phase, because it doesn't do much */
2461 if (VG_(need_to_handle_esp_assignment)()) {
2462 VGP_PUSHCC(VgpESPUpdate);
2463 cb = vg_ESP_update_pass ( cb );
2464 VGP_POPCC(VgpESPUpdate);
2465 }
2466
sewardjde4a1d02002-03-22 01:27:54 +00002467 /* Allocate registers. */
njn25e49d8e72002-09-23 09:36:25 +00002468 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(4);
2469 VGP_PUSHCC(VgpRegAlloc);
sewardjde4a1d02002-03-22 01:27:54 +00002470 cb = vg_do_register_allocation ( cb );
njn25e49d8e72002-09-23 09:36:25 +00002471 VGP_POPCC(VgpRegAlloc);
sewardjde4a1d02002-03-22 01:27:54 +00002472
njn25e49d8e72002-09-23 09:36:25 +00002473 /* Do post reg-alloc %e[acd]x liveness analysis (too boring to print
2474 * anything; results can be seen when emitting final code). */
2475 VGP_PUSHCC(VgpLiveness);
2476 vg_realreg_liveness_analysis ( cb );
2477 VGP_POPCC(VgpLiveness);
2478
2479 /* Emit final code */
2480 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(5);
2481
2482 VGP_PUSHCC(VgpFromUcode);
sewardj22854b92002-11-30 14:00:47 +00002483 final_code = VG_(emit_code)(cb, &final_code_size, jumps );
njn25e49d8e72002-09-23 09:36:25 +00002484 VGP_POPCC(VgpFromUcode);
njn4ba5a792002-09-30 10:23:54 +00002485 VG_(free_UCodeBlock)(cb);
sewardjde4a1d02002-03-22 01:27:54 +00002486
njn25e49d8e72002-09-23 09:36:25 +00002487#undef DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE
2488
sewardjde4a1d02002-03-22 01:27:54 +00002489 if (debugging_translation) {
2490 /* Only done for debugging -- throw away final result. */
njn25e49d8e72002-09-23 09:36:25 +00002491 VG_(arena_free)(VG_AR_JITTER, final_code);
sewardjde4a1d02002-03-22 01:27:54 +00002492 } else {
2493 /* Doing it for real -- return values to caller. */
sewardjde4a1d02002-03-22 01:27:54 +00002494 *orig_size = n_disassembled_bytes;
2495 *trans_addr = (Addr)final_code;
2496 *trans_size = final_code_size;
2497 }
njn25e49d8e72002-09-23 09:36:25 +00002498 VGP_POPCC(VgpTranslate);
sewardjde4a1d02002-03-22 01:27:54 +00002499}
2500
2501/*--------------------------------------------------------------------*/
2502/*--- end vg_translate.c ---*/
2503/*--------------------------------------------------------------------*/
njned619712003-10-01 16:45:04 +00002504