blob: 5667970daad7a0672d3c20491c4c75f91aa5faf8 [file] [log] [blame]
sewardjde4a1d02002-03-22 01:27:54 +00001
2/*--------------------------------------------------------------------*/
3/*--- The JITter proper: register allocation & code improvement ---*/
4/*--- vg_translate.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
njnc9539842002-10-02 13:26:35 +00008 This file is part of Valgrind, an extensible x86 protected-mode
9 emulator for monitoring program execution on x86-Unixes.
sewardjde4a1d02002-03-22 01:27:54 +000010
njn0e1b5142003-04-15 14:58:06 +000011 Copyright (C) 2000-2003 Julian Seward
sewardjde4a1d02002-03-22 01:27:54 +000012 jseward@acm.org
sewardjde4a1d02002-03-22 01:27:54 +000013
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
njn25e49d8e72002-09-23 09:36:25 +000029 The GNU General Public License is contained in the file COPYING.
sewardjde4a1d02002-03-22 01:27:54 +000030*/
31
32#include "vg_include.h"
33
sewardjde4a1d02002-03-22 01:27:54 +000034/*------------------------------------------------------------*/
35/*--- Renamings of frequently-used global functions. ---*/
36/*------------------------------------------------------------*/
37
njn25e49d8e72002-09-23 09:36:25 +000038#define dis VG_(print_codegen)
sewardjde4a1d02002-03-22 01:27:54 +000039
sewardje1042472002-09-30 12:33:11 +000040
sewardjde4a1d02002-03-22 01:27:54 +000041/*------------------------------------------------------------*/
42/*--- Basics ---*/
43/*------------------------------------------------------------*/
44
njn810086f2002-11-14 12:42:47 +000045/* This one is called by the core */
njn4ba5a792002-09-30 10:23:54 +000046UCodeBlock* VG_(alloc_UCodeBlock) ( void )
sewardjde4a1d02002-03-22 01:27:54 +000047{
njn25e49d8e72002-09-23 09:36:25 +000048 UCodeBlock* cb = VG_(arena_malloc)(VG_AR_CORE, sizeof(UCodeBlock));
sewardjde4a1d02002-03-22 01:27:54 +000049 cb->used = cb->size = cb->nextTemp = 0;
50 cb->instrs = NULL;
51 return cb;
52}
53
njn810086f2002-11-14 12:42:47 +000054/* This one is called by skins */
55UCodeBlock* VG_(setup_UCodeBlock) ( UCodeBlock* cb_in )
56{
57 UCodeBlock* cb = VG_(arena_malloc)(VG_AR_CORE, sizeof(UCodeBlock));
sewardj22854b92002-11-30 14:00:47 +000058 cb->orig_eip = cb_in->orig_eip;
njn810086f2002-11-14 12:42:47 +000059 cb->used = cb->size = 0;
60 cb->nextTemp = cb_in->nextTemp;
61 cb->instrs = NULL;
62 return cb;
63}
sewardjde4a1d02002-03-22 01:27:54 +000064
njn4ba5a792002-09-30 10:23:54 +000065void VG_(free_UCodeBlock) ( UCodeBlock* cb )
sewardjde4a1d02002-03-22 01:27:54 +000066{
njn25e49d8e72002-09-23 09:36:25 +000067 if (cb->instrs) VG_(arena_free)(VG_AR_CORE, cb->instrs);
68 VG_(arena_free)(VG_AR_CORE, cb);
sewardjde4a1d02002-03-22 01:27:54 +000069}
70
71
72/* Ensure there's enough space in a block to add one uinstr. */
73static __inline__
74void ensureUInstr ( UCodeBlock* cb )
75{
76 if (cb->used == cb->size) {
77 if (cb->instrs == NULL) {
78 vg_assert(cb->size == 0);
79 vg_assert(cb->used == 0);
80 cb->size = 8;
njn25e49d8e72002-09-23 09:36:25 +000081 cb->instrs = VG_(arena_malloc)(VG_AR_CORE, 8 * sizeof(UInstr));
sewardjde4a1d02002-03-22 01:27:54 +000082 } else {
83 Int i;
njn25e49d8e72002-09-23 09:36:25 +000084 UInstr* instrs2 = VG_(arena_malloc)(VG_AR_CORE,
sewardjde4a1d02002-03-22 01:27:54 +000085 2 * sizeof(UInstr) * cb->size);
86 for (i = 0; i < cb->used; i++)
87 instrs2[i] = cb->instrs[i];
88 cb->size *= 2;
njn25e49d8e72002-09-23 09:36:25 +000089 VG_(arena_free)(VG_AR_CORE, cb->instrs);
sewardjde4a1d02002-03-22 01:27:54 +000090 cb->instrs = instrs2;
91 }
92 }
93
94 vg_assert(cb->used < cb->size);
95}
96
97
98__inline__
njn4ba5a792002-09-30 10:23:54 +000099void VG_(new_NOP) ( UInstr* u )
sewardjde4a1d02002-03-22 01:27:54 +0000100{
101 u->val1 = u->val2 = u->val3 = 0;
102 u->tag1 = u->tag2 = u->tag3 = NoValue;
103 u->flags_r = u->flags_w = FlagsEmpty;
sewardj2e93c502002-04-12 11:12:52 +0000104 u->jmpkind = JmpBoring;
njn25e49d8e72002-09-23 09:36:25 +0000105 u->signed_widen = u->has_ret_val = False;
106 u->regs_live_after = ALL_RREGS_LIVE;
sewardjde4a1d02002-03-22 01:27:54 +0000107 u->lit32 = 0;
njn25e49d8e72002-09-23 09:36:25 +0000108 u->opcode = NOP;
sewardjde4a1d02002-03-22 01:27:54 +0000109 u->size = 0;
110 u->cond = 0;
111 u->extra4b = 0;
njn25e49d8e72002-09-23 09:36:25 +0000112 u->argc = u->regparms_n = 0;
sewardjde4a1d02002-03-22 01:27:54 +0000113}
114
115
116/* Add an instruction to a ucode block, and return the index of the
117 instruction. */
118__inline__
njn4ba5a792002-09-30 10:23:54 +0000119void VG_(new_UInstr3) ( UCodeBlock* cb, Opcode opcode, Int sz,
sewardjde4a1d02002-03-22 01:27:54 +0000120 Tag tag1, UInt val1,
121 Tag tag2, UInt val2,
122 Tag tag3, UInt val3 )
123{
124 UInstr* ui;
125 ensureUInstr(cb);
126 ui = & cb->instrs[cb->used];
127 cb->used++;
njn4ba5a792002-09-30 10:23:54 +0000128 VG_(new_NOP)(ui);
sewardjde4a1d02002-03-22 01:27:54 +0000129 ui->val1 = val1;
130 ui->val2 = val2;
131 ui->val3 = val3;
132 ui->opcode = opcode;
133 ui->tag1 = tag1;
134 ui->tag2 = tag2;
135 ui->tag3 = tag3;
136 ui->size = sz;
137 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
138 if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
139 if (tag3 == TempReg) vg_assert(val3 != INVALID_TEMPREG);
140}
141
142
143__inline__
njn4ba5a792002-09-30 10:23:54 +0000144void VG_(new_UInstr2) ( UCodeBlock* cb, Opcode opcode, Int sz,
sewardjde4a1d02002-03-22 01:27:54 +0000145 Tag tag1, UInt val1,
146 Tag tag2, UInt val2 )
147{
148 UInstr* ui;
149 ensureUInstr(cb);
150 ui = & cb->instrs[cb->used];
151 cb->used++;
njn4ba5a792002-09-30 10:23:54 +0000152 VG_(new_NOP)(ui);
sewardjde4a1d02002-03-22 01:27:54 +0000153 ui->val1 = val1;
154 ui->val2 = val2;
155 ui->opcode = opcode;
156 ui->tag1 = tag1;
157 ui->tag2 = tag2;
158 ui->size = sz;
159 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
160 if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
161}
162
163
164__inline__
njn4ba5a792002-09-30 10:23:54 +0000165void VG_(new_UInstr1) ( UCodeBlock* cb, Opcode opcode, Int sz,
sewardjde4a1d02002-03-22 01:27:54 +0000166 Tag tag1, UInt val1 )
167{
168 UInstr* ui;
169 ensureUInstr(cb);
170 ui = & cb->instrs[cb->used];
171 cb->used++;
njn4ba5a792002-09-30 10:23:54 +0000172 VG_(new_NOP)(ui);
sewardjde4a1d02002-03-22 01:27:54 +0000173 ui->val1 = val1;
174 ui->opcode = opcode;
175 ui->tag1 = tag1;
176 ui->size = sz;
177 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
178}
179
180
181__inline__
njn4ba5a792002-09-30 10:23:54 +0000182void VG_(new_UInstr0) ( UCodeBlock* cb, Opcode opcode, Int sz )
sewardjde4a1d02002-03-22 01:27:54 +0000183{
184 UInstr* ui;
185 ensureUInstr(cb);
186 ui = & cb->instrs[cb->used];
187 cb->used++;
njn4ba5a792002-09-30 10:23:54 +0000188 VG_(new_NOP)(ui);
sewardjde4a1d02002-03-22 01:27:54 +0000189 ui->opcode = opcode;
190 ui->size = sz;
191}
192
sewardjde4a1d02002-03-22 01:27:54 +0000193/* Copy an instruction into the given codeblock. */
njn4f9c9342002-04-29 16:03:24 +0000194__inline__
njn4ba5a792002-09-30 10:23:54 +0000195void VG_(copy_UInstr) ( UCodeBlock* cb, UInstr* instr )
sewardjde4a1d02002-03-22 01:27:54 +0000196{
197 ensureUInstr(cb);
198 cb->instrs[cb->used] = *instr;
199 cb->used++;
200}
201
sewardjde4a1d02002-03-22 01:27:54 +0000202/* Copy auxiliary info from one uinstr to another. */
203static __inline__
204void copyAuxInfoFromTo ( UInstr* src, UInstr* dst )
205{
njn25e49d8e72002-09-23 09:36:25 +0000206 dst->cond = src->cond;
207 dst->extra4b = src->extra4b;
208 dst->signed_widen = src->signed_widen;
209 dst->jmpkind = src->jmpkind;
210 dst->flags_r = src->flags_r;
211 dst->flags_w = src->flags_w;
212 dst->argc = src->argc;
213 dst->regparms_n = src->regparms_n;
214 dst->has_ret_val = src->has_ret_val;
215 dst->regs_live_after = src->regs_live_after;
sewardjde4a1d02002-03-22 01:27:54 +0000216}
217
218
sewardjde4a1d02002-03-22 01:27:54 +0000219/* Set the lit32 field of the most recent uinsn. */
njn4ba5a792002-09-30 10:23:54 +0000220void VG_(set_lit_field) ( UCodeBlock* cb, UInt lit32 )
sewardjde4a1d02002-03-22 01:27:54 +0000221{
222 LAST_UINSTR(cb).lit32 = lit32;
223}
224
225
njn25e49d8e72002-09-23 09:36:25 +0000226/* Set the C call info fields of the most recent uinsn. */
njn4ba5a792002-09-30 10:23:54 +0000227void VG_(set_ccall_fields) ( UCodeBlock* cb, Addr fn, UChar argc, UChar
228 regparms_n, Bool has_ret_val )
njn25e49d8e72002-09-23 09:36:25 +0000229{
230 vg_assert(argc < 4);
231 vg_assert(regparms_n <= argc);
232 LAST_UINSTR(cb).lit32 = fn;
233 LAST_UINSTR(cb).argc = argc;
234 LAST_UINSTR(cb).regparms_n = regparms_n;
235 LAST_UINSTR(cb).has_ret_val = has_ret_val;
236}
237
njn810086f2002-11-14 12:42:47 +0000238/* For the last uinsn inserted into cb, set the read, written and
239 undefined flags. Undefined flags are counted as written, but it
240 seems worthwhile to distinguish them.
241*/
242__inline__
243void VG_(set_flag_fields) ( UCodeBlock* cb,
244 FlagSet rr, FlagSet ww, FlagSet uu )
245{
246 FlagSet uw = VG_UNION_FLAG_SETS(ww,uu);
247
248 vg_assert(rr == (rr & FlagsALL));
249 vg_assert(uw == (uw & FlagsALL));
250 LAST_UINSTR(cb).flags_r = rr;
251 LAST_UINSTR(cb).flags_w = uw;
252}
253
254
njn4ba5a792002-09-30 10:23:54 +0000255Bool VG_(any_flag_use) ( UInstr* u )
sewardjde4a1d02002-03-22 01:27:54 +0000256{
257 return (u->flags_r != FlagsEmpty
258 || u->flags_w != FlagsEmpty);
259}
260
njn25e49d8e72002-09-23 09:36:25 +0000261#if 1
262# define BEST_ALLOC_ORDER
263#endif
sewardjde4a1d02002-03-22 01:27:54 +0000264
265/* Convert a rank in the range 0 .. VG_MAX_REALREGS-1 into an Intel
266 register number. This effectively defines the order in which real
267 registers are allocated. %ebp is excluded since it is permanently
njn25e49d8e72002-09-23 09:36:25 +0000268 reserved for pointing at VG_(baseBlock).
sewardjde4a1d02002-03-22 01:27:54 +0000269
njn25e49d8e72002-09-23 09:36:25 +0000270 Important! This function must correspond with the value of
271 VG_MAX_REALREGS (actually, VG_MAX_REALREGS can be reduced without
272 a problem, except the generated code will obviously be worse).
sewardjde4a1d02002-03-22 01:27:54 +0000273*/
njn25e49d8e72002-09-23 09:36:25 +0000274__inline__
njn4ba5a792002-09-30 10:23:54 +0000275Int VG_(rank_to_realreg) ( Int rank )
sewardjde4a1d02002-03-22 01:27:54 +0000276{
277 switch (rank) {
njn25e49d8e72002-09-23 09:36:25 +0000278# ifdef BEST_ALLOC_ORDER
sewardjde4a1d02002-03-22 01:27:54 +0000279 /* Probably the best allocation ordering. */
280 case 0: return R_EAX;
281 case 1: return R_EBX;
282 case 2: return R_ECX;
283 case 3: return R_EDX;
284 case 4: return R_ESI;
njn25e49d8e72002-09-23 09:36:25 +0000285 case 5: return R_EDI;
sewardjde4a1d02002-03-22 01:27:54 +0000286# else
287 /* Contrary; probably the worst. Helpful for debugging, tho. */
njn25e49d8e72002-09-23 09:36:25 +0000288 case 5: return R_EAX;
289 case 4: return R_EBX;
290 case 3: return R_ECX;
291 case 2: return R_EDX;
292 case 1: return R_ESI;
293 case 0: return R_EDI;
sewardjde4a1d02002-03-22 01:27:54 +0000294# endif
njne427a662002-10-02 11:08:25 +0000295 default: VG_(core_panic)("VG_(rank_to_realreg)");
njn25e49d8e72002-09-23 09:36:25 +0000296 }
297}
298
299/* Convert an Intel register number into a rank in the range 0 ..
njn4ba5a792002-09-30 10:23:54 +0000300 VG_MAX_REALREGS-1. See related comments for rank_to_realreg()
njn25e49d8e72002-09-23 09:36:25 +0000301 above. */
302__inline__
njn4ba5a792002-09-30 10:23:54 +0000303Int VG_(realreg_to_rank) ( Int realReg )
njn25e49d8e72002-09-23 09:36:25 +0000304{
305 switch (realReg) {
306# ifdef BEST_ALLOC_ORDER
307 case R_EAX: return 0;
308 case R_EBX: return 1;
309 case R_ECX: return 2;
310 case R_EDX: return 3;
311 case R_ESI: return 4;
312 case R_EDI: return 5;
313# else
314 case R_EAX: return 5;
315 case R_EBX: return 4;
316 case R_ECX: return 3;
317 case R_EDX: return 2;
318 case R_ESI: return 1;
319 case R_EDI: return 0;
320# endif
njne427a662002-10-02 11:08:25 +0000321 default: VG_(core_panic)("VG_(realreg_to_rank)");
sewardjde4a1d02002-03-22 01:27:54 +0000322 }
323}
324
325
326/*------------------------------------------------------------*/
327/*--- Sanity checking uinstrs. ---*/
328/*------------------------------------------------------------*/
329
330/* This seems as good a place as any to record some important stuff
331 about ucode semantics.
332
333 * TempRegs are 32 bits wide. LOADs of 8/16 bit values into a
334 TempReg are defined to zero-extend the loaded value to 32 bits.
335 This is needed to make the translation of movzbl et al work
336 properly.
337
338 * Similarly, GETs of a 8/16 bit ArchRegs are zero-extended.
339
340 * Arithmetic on TempRegs is at the specified size. For example,
341 SUBW t1, t2 has to result in a real 16 bit x86 subtraction
342 being emitted -- not a 32 bit one.
343
344 * On some insns we allow the cc bit to be set. If so, the
345 intention is that the simulated machine's %eflags register
346 is copied into that of the real machine before the insn,
347 and copied back again afterwards. This means that the
348 code generated for that insn must be very careful only to
349 update %eflags in the intended way. This is particularly
350 important for the routines referenced by CALL insns.
351*/
352
353/* Meaning of operand kinds is as follows:
354
355 ArchReg is a register of the simulated CPU, stored in memory,
356 in vg_m_state.m_eax .. m_edi. These values are stored
357 using the Intel register encoding.
358
359 RealReg is a register of the real CPU. There are VG_MAX_REALREGS
360 available for allocation. As with ArchRegs, these values
361 are stored using the Intel register encoding.
362
363 TempReg is a temporary register used to express the results of
364 disassembly. There is an unlimited supply of them --
365 register allocation and spilling eventually assigns them
366 to RealRegs.
367
368 SpillNo is a spill slot number. The number of required spill
369 slots is VG_MAX_PSEUDOS, in general. Only allowed
370 as the ArchReg operand of GET and PUT.
371
372 Lit16 is a signed 16-bit literal value.
373
374 Literal is a 32-bit literal value. Each uinstr can only hold
375 one of these.
376
377 The disassembled code is expressed purely in terms of ArchReg,
378 TempReg and Literal operands. Eventually, register allocation
379 removes all the TempRegs, giving a result using ArchRegs, RealRegs,
380 and Literals. New x86 code can easily be synthesised from this.
381 There are carefully designed restrictions on which insns can have
382 which operands, intended to make it possible to generate x86 code
383 from the result of register allocation on the ucode efficiently and
384 without need of any further RealRegs.
385
njn25e49d8e72002-09-23 09:36:25 +0000386 Restrictions for the individual UInstrs are clear from the checks below.
387 Abbreviations: A=ArchReg S=SpillNo T=TempReg L=Literal
388 Ls=Lit16 R=RealReg N=NoValue
sewardje1042472002-09-30 12:33:11 +0000389 As=ArchRegS
sewardjde4a1d02002-03-22 01:27:54 +0000390
sewardjde4a1d02002-03-22 01:27:54 +0000391 Before register allocation, S operands should not appear anywhere.
392 After register allocation, all T operands should have been
393 converted into Rs, and S operands are allowed in GET and PUT --
394 denoting spill saves/restores.
395
njn25e49d8e72002-09-23 09:36:25 +0000396 Before liveness analysis, save_e[acd]x fields should all be True.
397 Afterwards, they may be False.
398
sewardjde4a1d02002-03-22 01:27:54 +0000399 The size field should be 0 for insns for which it is meaningless,
400 ie those which do not directly move/operate on data.
401*/
njn25e49d8e72002-09-23 09:36:25 +0000402Bool VG_(saneUInstr) ( Bool beforeRA, Bool beforeLiveness, UInstr* u )
sewardjde4a1d02002-03-22 01:27:54 +0000403{
njn25e49d8e72002-09-23 09:36:25 +0000404# define LIT0 (u->lit32 == 0)
sewardjb31b06d2003-06-13 00:26:02 +0000405# define LIT8 (((u->lit32) & 0xFFFFFF00) == 0)
njn25e49d8e72002-09-23 09:36:25 +0000406# define LIT1 (!(LIT0))
407# define LITm (u->tag1 == Literal ? True : LIT0 )
sewardj3d7c9c82003-03-26 21:08:13 +0000408# define SZ8 (u->size == 8)
njn25e49d8e72002-09-23 09:36:25 +0000409# define SZ4 (u->size == 4)
410# define SZ2 (u->size == 2)
411# define SZ1 (u->size == 1)
412# define SZ0 (u->size == 0)
413# define SZ42 (u->size == 4 || u->size == 2)
sewardjd7971012003-04-04 00:21:58 +0000414# define SZ48 (u->size == 4 || u->size == 8)
sewardjfebaa3b2003-05-25 01:07:34 +0000415# define SZ416 (u->size == 4 || u->size == 16)
sewardjde8aecf2003-05-27 00:46:28 +0000416# define SZsse (u->size == 4 || u->size == 8 || u->size == 16)
njn25e49d8e72002-09-23 09:36:25 +0000417# define SZi (u->size == 4 || u->size == 2 || u->size == 1)
418# define SZf ( u->size == 4 || u->size == 8 || u->size == 2 \
419 || u->size == 10 || u->size == 28 || u->size == 108)
420# define SZ4m ((u->tag1 == TempReg || u->tag1 == RealReg) \
421 ? (u->size == 4) : True)
422
423/* For these ones, two cases:
424 *
425 * 1. They are transliterations of the corresponding x86 instruction, in
426 * which case they should have its flags (except that redundant write
427 * flags can be annulled by the optimisation pass).
428 *
429 * 2. They are being used generally for other purposes, eg. helping with a
430 * 'rep'-prefixed instruction, in which case should have empty flags .
431 */
432# define emptyR (u->flags_r == FlagsEmpty)
433# define emptyW (u->flags_w == FlagsEmpty)
434# define CC0 (emptyR && emptyW)
435# define CCr (u->flags_r == FlagsALL && emptyW)
436# define CCw (emptyR && u->flags_w == FlagsALL)
437# define CCa (emptyR && (u->flags_w == FlagsOSZACP || emptyW))
438# define CCc (emptyR && (u->flags_w == FlagsOC || emptyW))
439# define CCe (emptyR && (u->flags_w == FlagsOSZAP || emptyW))
440# define CCb ((u->flags_r==FlagC || emptyR) && \
441 (u->flags_w==FlagsOSZACP || emptyW))
442# define CCd ((u->flags_r==FlagC || emptyR) && \
443 (u->flags_w==FlagsOC || emptyW))
sewardjc232b212002-12-10 22:24:03 +0000444# define CCf (CC0 || (emptyR && u->flags_w==FlagsZCP) \
445 || (u->flags_r==FlagsZCP && emptyW))
njn25e49d8e72002-09-23 09:36:25 +0000446# define CCg ((u->flags_r==FlagsOSZACP || emptyR) && emptyW)
447# define CCj (u->cond==CondAlways ? CC0 : CCg)
448
sewardjde4a1d02002-03-22 01:27:54 +0000449# define TR1 (beforeRA ? (u->tag1 == TempReg) : (u->tag1 == RealReg))
450# define TR2 (beforeRA ? (u->tag2 == TempReg) : (u->tag2 == RealReg))
451# define TR3 (beforeRA ? (u->tag3 == TempReg) : (u->tag3 == RealReg))
452# define A1 (u->tag1 == ArchReg)
453# define A2 (u->tag2 == ArchReg)
454# define AS1 ((u->tag1 == ArchReg) || ((!beforeRA && (u->tag1 == SpillNo))))
455# define AS2 ((u->tag2 == ArchReg) || ((!beforeRA && (u->tag2 == SpillNo))))
456# define AS3 ((u->tag3 == ArchReg) || ((!beforeRA && (u->tag3 == SpillNo))))
457# define L1 (u->tag1 == Literal && u->val1 == 0)
458# define L2 (u->tag2 == Literal && u->val2 == 0)
459# define Ls1 (u->tag1 == Lit16)
sewardjfebaa3b2003-05-25 01:07:34 +0000460# define Ls2 (u->tag2 == Lit16)
sewardjde4a1d02002-03-22 01:27:54 +0000461# define Ls3 (u->tag3 == Lit16)
njn25e49d8e72002-09-23 09:36:25 +0000462# define TRL1 (TR1 || L1)
463# define TRAL1 (TR1 || A1 || L1)
sewardjde4a1d02002-03-22 01:27:54 +0000464# define N1 (u->tag1 == NoValue)
465# define N2 (u->tag2 == NoValue)
466# define N3 (u->tag3 == NoValue)
sewardje1042472002-09-30 12:33:11 +0000467# define Se1 (u->tag1 == ArchRegS)
468# define Se2 (u->tag2 == ArchRegS)
sewardjde4a1d02002-03-22 01:27:54 +0000469
njn25e49d8e72002-09-23 09:36:25 +0000470# define COND0 (u->cond == 0)
471# define EXTRA4b0 (u->extra4b == 0)
472# define SG_WD0 (u->signed_widen == 0)
473# define JMPKIND0 (u->jmpkind == 0)
474# define CCALL0 (u->argc==0 && u->regparms_n==0 && u->has_ret_val==0 && \
475 ( beforeLiveness \
476 ? u->regs_live_after == ALL_RREGS_LIVE \
477 : True ))
478
479# define XCONDi ( EXTRA4b0 && SG_WD0 && JMPKIND0 && CCALL0)
480# define Xextra4b (COND0 && SG_WD0 && JMPKIND0 && CCALL0)
481# define XWIDEN (COND0 && JMPKIND0 && CCALL0)
482# define XJMP ( SG_WD0 && CCALL0)
483# define XCCALL (COND0 && EXTRA4b0 && SG_WD0 && JMPKIND0 )
484# define XOTHER (COND0 && EXTRA4b0 && SG_WD0 && JMPKIND0 && CCALL0)
485
486 /* 0 or 1 Literal args per UInstr */
sewardjde4a1d02002-03-22 01:27:54 +0000487 Int n_lits = 0;
488 if (u->tag1 == Literal) n_lits++;
489 if (u->tag2 == Literal) n_lits++;
490 if (u->tag3 == Literal) n_lits++;
491 if (n_lits > 1)
492 return False;
493
njn25e49d8e72002-09-23 09:36:25 +0000494 /* Fields not checked: val1, val2, val3 */
495
sewardjde4a1d02002-03-22 01:27:54 +0000496 switch (u->opcode) {
njn25e49d8e72002-09-23 09:36:25 +0000497
498 /* Fields checked: lit32 size flags_r/w tag1 tag2 tag3 (rest) */
sewardje1042472002-09-30 12:33:11 +0000499 case PUTSEG: return LIT0 && SZ2 && CC0 && TR1 && Se2 && N3 && XOTHER;
500 case GETSEG: return LIT0 && SZ2 && CC0 && Se1 && TR2 && N3 && XOTHER;
501 case USESEG: return LIT0 && SZ0 && CC0 && TR1 && TR2 && N3 && XOTHER;
njn25e49d8e72002-09-23 09:36:25 +0000502 case NOP: return LIT0 && SZ0 && CC0 && N1 && N2 && N3 && XOTHER;
sewardj7a5ebcf2002-11-13 22:42:13 +0000503 case LOCK: return LIT0 && SZ0 && CC0 && N1 && N2 && N3 && XOTHER;
njn25e49d8e72002-09-23 09:36:25 +0000504 case GETF: return LIT0 && SZ42 && CCr && TR1 && N2 && N3 && XOTHER;
505 case PUTF: return LIT0 && SZ42 && CCw && TR1 && N2 && N3 && XOTHER;
506 case GET: return LIT0 && SZi && CC0 && AS1 && TR2 && N3 && XOTHER;
507 case PUT: return LIT0 && SZi && CC0 && TR1 && AS2 && N3 && XOTHER;
508 case LOAD:
509 case STORE: return LIT0 && SZi && CC0 && TR1 && TR2 && N3 && XOTHER;
510 case MOV: return LITm && SZ4m && CC0 && TRL1 && TR2 && N3 && XOTHER;
511 case CMOV: return LIT0 && SZ4 && CCg && TR1 && TR2 && N3 && XCONDi;
512 case WIDEN: return LIT0 && SZi && CC0 && TR1 && N2 && N3 && XWIDEN;
513 case JMP: return LITm && SZ0 && CCj && TRL1 && N2 && N3 && XJMP;
514 case CALLM: return LIT0 && SZ0 /*any*/ && Ls1 && N2 && N3 && XOTHER;
515 case CALLM_S:
516 case CALLM_E:return LIT0 && SZ0 && CC0 && N1 && N2 && N3 && XOTHER;
517 case PUSH:
518 case POP: return LIT0 && SZi && CC0 && TR1 && N2 && N3 && XOTHER;
519 case CLEAR: return LIT0 && SZ0 && CC0 && Ls1 && N2 && N3 && XOTHER;
520 case AND:
521 case OR: return LIT0 && SZi && CCa && TR1 && TR2 && N3 && XOTHER;
522 case ADD:
523 case XOR:
524 case SUB: return LITm && SZi && CCa &&TRAL1 && TR2 && N3 && XOTHER;
525 case SBB:
526 case ADC: return LITm && SZi && CCb &&TRAL1 && TR2 && N3 && XOTHER;
527 case SHL:
528 case SHR:
529 case SAR: return LITm && SZi && CCa && TRL1 && TR2 && N3 && XOTHER;
530 case ROL:
531 case ROR: return LITm && SZi && CCc && TRL1 && TR2 && N3 && XOTHER;
532 case RCL:
533 case RCR: return LITm && SZi && CCd && TRL1 && TR2 && N3 && XOTHER;
534 case NOT: return LIT0 && SZi && CC0 && TR1 && N2 && N3 && XOTHER;
535 case NEG: return LIT0 && SZi && CCa && TR1 && N2 && N3 && XOTHER;
536 case INC:
537 case DEC: return LIT0 && SZi && CCe && TR1 && N2 && N3 && XOTHER;
538 case CC2VAL: return LIT0 && SZ1 && CCg && TR1 && N2 && N3 && XCONDi;
539 case BSWAP: return LIT0 && SZ4 && CC0 && TR1 && N2 && N3 && XOTHER;
540 case JIFZ: return LIT1 && SZ4 && CC0 && TR1 && L2 && N3 && XOTHER;
541 case FPU_R:
542 case FPU_W: return LIT0 && SZf && CC0 && Ls1 && TR2 && N3 && XOTHER;
543 case FPU: return LIT0 && SZ0 && CCf && Ls1 && N2 && N3 && XOTHER;
544 case LEA1: return /*any*/ SZ4 && CC0 && TR1 && TR2 && N3 && XOTHER;
545 case LEA2: return /*any*/ SZ4 && CC0 && TR1 && TR2 && TR3 && Xextra4b;
546 case INCEIP: return LIT0 && SZ0 && CC0 && Ls1 && N2 && N3 && XOTHER;
547 case CCALL: return LIT1 && SZ0 && CC0 &&
548 (u->argc > 0 ? TR1 : N1) &&
549 (u->argc > 1 ? TR2 : N2) &&
550 (u->argc > 2 || u->has_ret_val ? TR3 : N3) &&
551 u->regparms_n <= u->argc && XCCALL;
sewardj3d7c9c82003-03-26 21:08:13 +0000552 /* Fields checked: lit32 size flags_r/w tag1 tag2 tag3 (rest) */
553 case MMX1:
sewardj4fbe6e92003-06-15 21:54:34 +0000554 case MMX2: return LIT0 && SZ0 && CC0 && Ls1 && N2 && N3 && XOTHER;
555 case MMX3: return LIT0 && SZ0 && CC0 && Ls1 && Ls2 && N3 && XOTHER;
556 case MMX2_MemRd: return LIT0 && SZ48 && CC0 && Ls1 && TR2 && N3 && XOTHER;
557 case MMX2_MemWr: return LIT0 && SZ48 && CC0 && Ls1 && TR2 && N3 && XOTHER;
558 case MMX2_ERegRd: return LIT0 && SZ4 && CC0 && Ls1 && TR2 && N3 && XOTHER;
559 case MMX2_ERegWr: return LIT0 && SZ4 && CC0 && Ls1 && TR2 && N3 && XOTHER;
sewardjfebaa3b2003-05-25 01:07:34 +0000560
561 /* Fields checked: lit32 size flags_r/w tag1 tag2 tag3 (rest) */
562 case SSE2a_MemWr: return LIT0 && SZ416 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
sewardj1e86b8b2003-06-16 23:34:12 +0000563 case SSE2a_MemRd: return LIT0 && SZ416 && CCa && Ls1 && Ls2 && TR3 && XOTHER;
sewardj9dd209f2003-06-18 23:30:52 +0000564 case SSE2a1_MemRd: return LIT0 && SZ416 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
sewardjde8aecf2003-05-27 00:46:28 +0000565 case SSE3a_MemWr: return LIT0 && SZsse && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
sewardj1e86b8b2003-06-16 23:34:12 +0000566 case SSE3a_MemRd: return LIT0 && SZsse && CCa && Ls1 && Ls2 && TR3 && XOTHER;
sewardj4fbe6e92003-06-15 21:54:34 +0000567 case SSE3e_RegRd: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
sewardjabf8bf82003-06-15 22:28:05 +0000568 case SSE3e_RegWr: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
sewardj02af6bc2003-06-12 00:56:06 +0000569 case SSE3g_RegWr: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
sewardjb31b06d2003-06-13 00:26:02 +0000570 case SSE3g1_RegWr: return LIT8 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
sewardj4fbe6e92003-06-15 21:54:34 +0000571 case SSE3e1_RegRd: return LIT8 && SZ2 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
sewardja60be0e2003-05-26 08:47:27 +0000572 case SSE3: return LIT0 && SZ0 && CC0 && Ls1 && Ls2 && N3 && XOTHER;
sewardj1e86b8b2003-06-16 23:34:12 +0000573 case SSE4: return LIT0 && SZ0 && CCa && Ls1 && Ls2 && N3 && XOTHER;
sewardja453fb02003-06-14 13:22:36 +0000574 case SSE5: return LIT0 && SZ0 && CC0 && Ls1 && Ls2 && Ls3 && XOTHER;
sewardje3891fa2003-06-15 03:13:48 +0000575 case SSE3ag_MemRd_RegWr:
576 return SZ48 && CC0 && TR1 && TR2 && N3 && XOTHER;
njn25e49d8e72002-09-23 09:36:25 +0000577 default:
578 if (VG_(needs).extended_UCode)
njn4ba5a792002-09-30 10:23:54 +0000579 return SK_(sane_XUInstr)(beforeRA, beforeLiveness, u);
njn25e49d8e72002-09-23 09:36:25 +0000580 else {
581 VG_(printf)("unhandled opcode: %u. Perhaps "
582 "VG_(needs).extended_UCode should be set?",
583 u->opcode);
njne427a662002-10-02 11:08:25 +0000584 VG_(core_panic)("VG_(saneUInstr): unhandled opcode");
njn25e49d8e72002-09-23 09:36:25 +0000585 }
sewardjde4a1d02002-03-22 01:27:54 +0000586 }
njn25e49d8e72002-09-23 09:36:25 +0000587# undef LIT0
588# undef LIT1
sewardjb31b06d2003-06-13 00:26:02 +0000589# undef LIT8
njn25e49d8e72002-09-23 09:36:25 +0000590# undef LITm
sewardj3d7c9c82003-03-26 21:08:13 +0000591# undef SZ8
sewardjde4a1d02002-03-22 01:27:54 +0000592# undef SZ4
593# undef SZ2
594# undef SZ1
595# undef SZ0
njn25e49d8e72002-09-23 09:36:25 +0000596# undef SZ42
sewardjd7971012003-04-04 00:21:58 +0000597# undef SZ48
sewardjfebaa3b2003-05-25 01:07:34 +0000598# undef SZ416
sewardjde8aecf2003-05-27 00:46:28 +0000599# undef SZsse
njn25e49d8e72002-09-23 09:36:25 +0000600# undef SZi
601# undef SZf
602# undef SZ4m
603# undef emptyR
604# undef emptyW
605# undef CC0
606# undef CCr
607# undef CCw
608# undef CCa
609# undef CCb
610# undef CCc
611# undef CCd
612# undef CCe
613# undef CCf
614# undef CCg
615# undef CCj
sewardjde4a1d02002-03-22 01:27:54 +0000616# undef TR1
617# undef TR2
618# undef TR3
619# undef A1
620# undef A2
621# undef AS1
622# undef AS2
623# undef AS3
624# undef L1
sewardjde4a1d02002-03-22 01:27:54 +0000625# undef L2
njn25e49d8e72002-09-23 09:36:25 +0000626# undef Ls1
sewardjfebaa3b2003-05-25 01:07:34 +0000627# undef Ls2
sewardjde4a1d02002-03-22 01:27:54 +0000628# undef Ls3
njn25e49d8e72002-09-23 09:36:25 +0000629# undef TRL1
630# undef TRAL1
sewardjde4a1d02002-03-22 01:27:54 +0000631# undef N1
632# undef N2
633# undef N3
sewardje1042472002-09-30 12:33:11 +0000634# undef Se2
635# undef Se1
njn25e49d8e72002-09-23 09:36:25 +0000636# undef COND0
637# undef EXTRA4b0
638# undef SG_WD0
639# undef JMPKIND0
640# undef CCALL0
641# undef Xextra4b
642# undef XWIDEN
643# undef XJMP
644# undef XCCALL
645# undef XOTHER
sewardjde4a1d02002-03-22 01:27:54 +0000646}
647
njn25e49d8e72002-09-23 09:36:25 +0000648void VG_(saneUCodeBlock) ( UCodeBlock* cb )
649{
650 Int i;
651
652 for (i = 0; i < cb->used; i++) {
653 Bool sane = VG_(saneUInstr)(True, True, &cb->instrs[i]);
654 if (!sane) {
655 VG_(printf)("Instruction failed sanity check:\n");
njn4ba5a792002-09-30 10:23:54 +0000656 VG_(up_UInstr)(i, &cb->instrs[i]);
njn25e49d8e72002-09-23 09:36:25 +0000657 }
658 vg_assert(sane);
659 }
660}
sewardjde4a1d02002-03-22 01:27:54 +0000661
662/* Sanity checks to do with CALLMs in UCodeBlocks. */
njn25e49d8e72002-09-23 09:36:25 +0000663Bool VG_(saneUCodeBlockCalls) ( UCodeBlock* cb )
sewardjde4a1d02002-03-22 01:27:54 +0000664{
665 Int callm = 0;
666 Int callm_s = 0;
667 Int callm_e = 0;
668 Int callm_ptr, calls_ptr;
669 Int i, j, t;
670 Bool incall = False;
671
672 /* Ensure the number of CALLM, CALLM_S and CALLM_E are the same. */
673
674 for (i = 0; i < cb->used; i++) {
675 switch (cb->instrs[i].opcode) {
676 case CALLM:
677 if (!incall) return False;
678 callm++;
679 break;
680 case CALLM_S:
681 if (incall) return False;
682 incall = True;
683 callm_s++;
684 break;
685 case CALLM_E:
686 if (!incall) return False;
687 incall = False;
688 callm_e++;
689 break;
690 case PUSH: case POP: case CLEAR:
691 if (!incall) return False;
692 break;
693 default:
694 break;
695 }
696 }
697 if (incall) return False;
698 if (callm != callm_s || callm != callm_e) return False;
699
700 /* Check the sections between CALLM_S and CALLM's. Ensure that no
701 PUSH uinsn pushes any TempReg that any other PUSH in the same
702 section pushes. Ie, check that the TempReg args to PUSHes in
703 the section are unique. If not, the instrumenter generates
704 incorrect code for CALLM insns. */
705
706 callm_ptr = 0;
707
708 find_next_CALLM:
709 /* Search for the next interval, making calls_ptr .. callm_ptr
710 bracket it. */
711 while (callm_ptr < cb->used
712 && cb->instrs[callm_ptr].opcode != CALLM)
713 callm_ptr++;
714 if (callm_ptr == cb->used)
715 return True;
716 vg_assert(cb->instrs[callm_ptr].opcode == CALLM);
717
718 calls_ptr = callm_ptr - 1;
719 while (cb->instrs[calls_ptr].opcode != CALLM_S)
720 calls_ptr--;
721 vg_assert(cb->instrs[calls_ptr].opcode == CALLM_S);
722 vg_assert(calls_ptr >= 0);
723
724 /* VG_(printf)("interval from %d to %d\n", calls_ptr, callm_ptr ); */
725
726 /* For each PUSH insn in the interval ... */
727 for (i = calls_ptr + 1; i < callm_ptr; i++) {
728 if (cb->instrs[i].opcode != PUSH) continue;
729 t = cb->instrs[i].val1;
730 /* Ensure no later PUSH insns up to callm_ptr push the same
731 TempReg. Return False if any such are found. */
732 for (j = i+1; j < callm_ptr; j++) {
733 if (cb->instrs[j].opcode == PUSH &&
734 cb->instrs[j].val1 == t)
735 return False;
736 }
737 }
738
739 /* This interval is clean. Keep going ... */
740 callm_ptr++;
741 goto find_next_CALLM;
742}
743
744
745/*------------------------------------------------------------*/
746/*--- Printing uinstrs. ---*/
747/*------------------------------------------------------------*/
748
njn25e49d8e72002-09-23 09:36:25 +0000749/* Global that dictates whether to print generated code at all stages */
750Bool VG_(print_codegen);
751
njn563f96f2003-02-03 11:17:46 +0000752Char* VG_(name_UCondcode) ( Condcode cond )
sewardjde4a1d02002-03-22 01:27:54 +0000753{
754 switch (cond) {
755 case CondO: return "o";
756 case CondNO: return "no";
757 case CondB: return "b";
758 case CondNB: return "nb";
759 case CondZ: return "z";
760 case CondNZ: return "nz";
761 case CondBE: return "be";
762 case CondNBE: return "nbe";
763 case CondS: return "s";
sewardje1042472002-09-30 12:33:11 +0000764 case CondNS: return "ns";
sewardjde4a1d02002-03-22 01:27:54 +0000765 case CondP: return "p";
766 case CondNP: return "np";
767 case CondL: return "l";
768 case CondNL: return "nl";
769 case CondLE: return "le";
770 case CondNLE: return "nle";
771 case CondAlways: return "MP"; /* hack! */
njn563f96f2003-02-03 11:17:46 +0000772 default: VG_(core_panic)("name_UCondcode");
sewardjde4a1d02002-03-22 01:27:54 +0000773 }
774}
775
776
777static void vg_ppFlagSet ( Char* prefix, FlagSet set )
778{
779 VG_(printf)("%s", prefix);
780 if (set & FlagD) VG_(printf)("D");
781 if (set & FlagO) VG_(printf)("O");
782 if (set & FlagS) VG_(printf)("S");
783 if (set & FlagZ) VG_(printf)("Z");
784 if (set & FlagA) VG_(printf)("A");
785 if (set & FlagC) VG_(printf)("C");
786 if (set & FlagP) VG_(printf)("P");
787}
788
789
790static void ppTempReg ( Int tt )
791{
792 if ((tt & 1) == 0)
793 VG_(printf)("t%d", tt);
794 else
795 VG_(printf)("q%d", tt-1);
796}
797
798
njn4ba5a792002-09-30 10:23:54 +0000799void VG_(pp_UOperand) ( UInstr* u, Int operandNo, Int sz, Bool parens )
sewardjde4a1d02002-03-22 01:27:54 +0000800{
801 UInt tag, val;
802 switch (operandNo) {
803 case 1: tag = u->tag1; val = u->val1; break;
804 case 2: tag = u->tag2; val = u->val2; break;
805 case 3: tag = u->tag3; val = u->val3; break;
njne427a662002-10-02 11:08:25 +0000806 default: VG_(core_panic)("VG_(pp_UOperand)(1)");
sewardjde4a1d02002-03-22 01:27:54 +0000807 }
808 if (tag == Literal) val = u->lit32;
809
810 if (parens) VG_(printf)("(");
811 switch (tag) {
sewardje1042472002-09-30 12:33:11 +0000812 case TempReg: ppTempReg(val); break;
813 case RealReg: VG_(printf)("%s",nameIReg(sz==0 ? 4 : sz,val)); break;
814 case Literal: VG_(printf)("$0x%x", val); break;
815 case Lit16: VG_(printf)("$0x%x", val); break;
816 case NoValue: VG_(printf)("NoValue"); break;
817 case ArchReg: VG_(printf)("%S",nameIReg(sz,val)); break;
818 case ArchRegS: VG_(printf)("%S",nameSReg(val)); break;
819 case SpillNo: VG_(printf)("spill%d", val); break;
njne427a662002-10-02 11:08:25 +0000820 default: VG_(core_panic)("VG_(ppUOperand)(2)");
sewardjde4a1d02002-03-22 01:27:54 +0000821 }
822 if (parens) VG_(printf)(")");
823}
824
825
njn4ba5a792002-09-30 10:23:54 +0000826Char* VG_(name_UOpcode) ( Bool upper, Opcode opc )
sewardjde4a1d02002-03-22 01:27:54 +0000827{
828 switch (opc) {
829 case ADD: return (upper ? "ADD" : "add");
830 case ADC: return (upper ? "ADC" : "adc");
831 case AND: return (upper ? "AND" : "and");
832 case OR: return (upper ? "OR" : "or");
833 case XOR: return (upper ? "XOR" : "xor");
834 case SUB: return (upper ? "SUB" : "sub");
835 case SBB: return (upper ? "SBB" : "sbb");
836 case SHL: return (upper ? "SHL" : "shl");
837 case SHR: return (upper ? "SHR" : "shr");
838 case SAR: return (upper ? "SAR" : "sar");
839 case ROL: return (upper ? "ROL" : "rol");
840 case ROR: return (upper ? "ROR" : "ror");
841 case RCL: return (upper ? "RCL" : "rcl");
842 case RCR: return (upper ? "RCR" : "rcr");
843 case NOT: return (upper ? "NOT" : "not");
844 case NEG: return (upper ? "NEG" : "neg");
845 case INC: return (upper ? "INC" : "inc");
846 case DEC: return (upper ? "DEC" : "dec");
847 case BSWAP: return (upper ? "BSWAP" : "bswap");
848 default: break;
849 }
njne427a662002-10-02 11:08:25 +0000850 if (!upper) VG_(core_panic)("vg_name_UOpcode: invalid !upper");
sewardjde4a1d02002-03-22 01:27:54 +0000851 switch (opc) {
sewardjde4a1d02002-03-22 01:27:54 +0000852 case CALLM_S: return "CALLM_S";
853 case CALLM_E: return "CALLM_E";
854 case INCEIP: return "INCEIP";
855 case LEA1: return "LEA1";
856 case LEA2: return "LEA2";
857 case NOP: return "NOP";
sewardj7a5ebcf2002-11-13 22:42:13 +0000858 case LOCK: return "LOCK";
sewardjde4a1d02002-03-22 01:27:54 +0000859 case GET: return "GET";
860 case PUT: return "PUT";
861 case GETF: return "GETF";
862 case PUTF: return "PUTF";
sewardje1042472002-09-30 12:33:11 +0000863 case GETSEG: return "GETSEG";
864 case PUTSEG: return "PUTSEG";
865 case USESEG: return "USESEG";
sewardjde4a1d02002-03-22 01:27:54 +0000866 case LOAD: return "LD" ;
867 case STORE: return "ST" ;
868 case MOV: return "MOV";
869 case CMOV: return "CMOV";
870 case WIDEN: return "WIDEN";
871 case JMP: return "J" ;
872 case JIFZ: return "JIFZ" ;
873 case CALLM: return "CALLM";
njn25e49d8e72002-09-23 09:36:25 +0000874 case CCALL: return "CCALL";
sewardjde4a1d02002-03-22 01:27:54 +0000875 case PUSH: return "PUSH" ;
876 case POP: return "POP" ;
877 case CLEAR: return "CLEAR";
878 case CC2VAL: return "CC2VAL";
879 case FPU_R: return "FPU_R";
880 case FPU_W: return "FPU_W";
881 case FPU: return "FPU" ;
sewardj3d7c9c82003-03-26 21:08:13 +0000882 case MMX1: return "MMX1" ;
883 case MMX2: return "MMX2" ;
sewardjca860012003-03-27 23:52:58 +0000884 case MMX3: return "MMX3" ;
sewardj3d7c9c82003-03-26 21:08:13 +0000885 case MMX2_MemRd: return "MMX2_MRd" ;
886 case MMX2_MemWr: return "MMX2_MWr" ;
sewardj4fbe6e92003-06-15 21:54:34 +0000887 case MMX2_ERegRd: return "MMX2_eRRd" ;
888 case MMX2_ERegWr: return "MMX2_eRWr" ;
sewardjfebaa3b2003-05-25 01:07:34 +0000889 case SSE2a_MemWr: return "SSE2a_MWr";
890 case SSE2a_MemRd: return "SSE2a_MRd";
sewardj9dd209f2003-06-18 23:30:52 +0000891 case SSE2a1_MemRd: return "SSE2a1_MRd";
sewardj4fbe6e92003-06-15 21:54:34 +0000892 case SSE3e_RegRd: return "SSE3e_RRd";
sewardjabf8bf82003-06-15 22:28:05 +0000893 case SSE3e_RegWr: return "SSE3e_RWr";
sewardj02af6bc2003-06-12 00:56:06 +0000894 case SSE3g_RegWr: return "SSE3g_RWr";
sewardjb31b06d2003-06-13 00:26:02 +0000895 case SSE3g1_RegWr: return "SSE3g1_RWr";
sewardj4fbe6e92003-06-15 21:54:34 +0000896 case SSE3e1_RegRd: return "SSE3e1_RRd";
sewardja60be0e2003-05-26 08:47:27 +0000897 case SSE3: return "SSE3";
sewardjfebaa3b2003-05-25 01:07:34 +0000898 case SSE4: return "SSE4";
sewardja453fb02003-06-14 13:22:36 +0000899 case SSE5: return "SSE5";
sewardjfebaa3b2003-05-25 01:07:34 +0000900 case SSE3a_MemWr: return "SSE3a_MWr";
901 case SSE3a_MemRd: return "SSE3a_MRd";
sewardje3891fa2003-06-15 03:13:48 +0000902 case SSE3ag_MemRd_RegWr: return "SSE3ag_MemRd_RegWr";
njn25e49d8e72002-09-23 09:36:25 +0000903 default:
904 if (VG_(needs).extended_UCode)
njn4ba5a792002-09-30 10:23:54 +0000905 return SK_(name_XUOpcode)(opc);
njn25e49d8e72002-09-23 09:36:25 +0000906 else {
907 VG_(printf)("unhandled opcode: %u. Perhaps "
908 "VG_(needs).extended_UCode should be set?",
909 opc);
njne427a662002-10-02 11:08:25 +0000910 VG_(core_panic)("name_UOpcode: unhandled opcode");
njn25e49d8e72002-09-23 09:36:25 +0000911 }
sewardjde4a1d02002-03-22 01:27:54 +0000912 }
913}
914
sewardja38e0922002-10-01 00:50:47 +0000915static
njn4ba5a792002-09-30 10:23:54 +0000916void pp_realregs_liveness ( UInstr* u )
njn25e49d8e72002-09-23 09:36:25 +0000917{
918# define PRINT_RREG_LIVENESS(realReg,s) \
njn4ba5a792002-09-30 10:23:54 +0000919 VG_(printf)( IS_RREG_LIVE(VG_(realreg_to_rank)(realReg), \
njn25e49d8e72002-09-23 09:36:25 +0000920 u->regs_live_after) \
921 ? s : "-");
sewardjde4a1d02002-03-22 01:27:54 +0000922
njn25e49d8e72002-09-23 09:36:25 +0000923 VG_(printf)("[");
924 PRINT_RREG_LIVENESS(R_EAX, "a");
925 PRINT_RREG_LIVENESS(R_EBX, "b");
926 PRINT_RREG_LIVENESS(R_ECX, "c");
927 PRINT_RREG_LIVENESS(R_EDX, "d");
928 PRINT_RREG_LIVENESS(R_ESI, "S");
929 PRINT_RREG_LIVENESS(R_EDI, "D");
930 VG_(printf)("]");
931
932# undef PRINT_RREG_LIVENESS
933}
934
935/* Ugly-print UInstr :) */
njn4ba5a792002-09-30 10:23:54 +0000936void VG_(up_UInstr) ( Int i, UInstr* u )
njn25e49d8e72002-09-23 09:36:25 +0000937{
njn4ba5a792002-09-30 10:23:54 +0000938 VG_(pp_UInstr_regs)(i, u);
njn25e49d8e72002-09-23 09:36:25 +0000939
940 VG_(printf)("opcode: %d\n", u->opcode);
sewardjc1b86882002-10-06 21:43:50 +0000941 VG_(printf)("lit32: 0x%x\n", u->lit32);
njn25e49d8e72002-09-23 09:36:25 +0000942 VG_(printf)("size: %d\n", u->size);
943 VG_(printf)("val1,val2,val3: %d, %d, %d\n", u->val1, u->val2, u->val3);
944 VG_(printf)("tag1,tag2,tag3: %d, %d, %d\n", u->tag1, u->tag2, u->tag3);
sewardjc1b86882002-10-06 21:43:50 +0000945 VG_(printf)("flags_r: 0x%x\n", u->flags_r);
946 VG_(printf)("flags_w: 0x%x\n", u->flags_w);
947 VG_(printf)("extra4b: 0x%x\n", u->extra4b);
948 VG_(printf)("cond: 0x%x\n", u->cond);
njn25e49d8e72002-09-23 09:36:25 +0000949 VG_(printf)("signed_widen: %d\n", u->signed_widen);
950 VG_(printf)("jmpkind: %d\n", u->jmpkind);
951 VG_(printf)("argc,regparms_n: %d, %d\n", u->argc, u->regparms_n);
952 VG_(printf)("has_ret_val: %d\n", u->has_ret_val);
953 VG_(printf)("regs_live_after: ");
njn4ba5a792002-09-30 10:23:54 +0000954 pp_realregs_liveness(u);
njn25e49d8e72002-09-23 09:36:25 +0000955 VG_(printf)("\n");
956}
957
sewardja38e0922002-10-01 00:50:47 +0000958static
njn4ba5a792002-09-30 10:23:54 +0000959void pp_UInstrWorker ( Int instrNo, UInstr* u, Bool ppRegsLiveness )
sewardjde4a1d02002-03-22 01:27:54 +0000960{
961 VG_(printf)("\t%4d: %s", instrNo,
njn4ba5a792002-09-30 10:23:54 +0000962 VG_(name_UOpcode)(True, u->opcode));
sewardjde4a1d02002-03-22 01:27:54 +0000963 if (u->opcode == JMP || u->opcode == CC2VAL)
njn563f96f2003-02-03 11:17:46 +0000964 VG_(printf)("%s", VG_(name_UCondcode)(u->cond));
sewardjde4a1d02002-03-22 01:27:54 +0000965
966 switch (u->size) {
967 case 0: VG_(printf)("o"); break;
968 case 1: VG_(printf)("B"); break;
969 case 2: VG_(printf)("W"); break;
970 case 4: VG_(printf)("L"); break;
971 case 8: VG_(printf)("Q"); break;
sewardjfebaa3b2003-05-25 01:07:34 +0000972 case 16: VG_(printf)("QQ"); break;
sewardjde4a1d02002-03-22 01:27:54 +0000973 default: VG_(printf)("%d", (Int)u->size); break;
974 }
975
sewardjfebaa3b2003-05-25 01:07:34 +0000976 VG_(printf)(" \t");
977
sewardjde4a1d02002-03-22 01:27:54 +0000978 switch (u->opcode) {
979
sewardjde4a1d02002-03-22 01:27:54 +0000980 case CALLM_S: case CALLM_E:
981 break;
982
983 case INCEIP:
sewardjfebaa3b2003-05-25 01:07:34 +0000984 VG_(printf)("$%d", u->val1);
sewardjde4a1d02002-03-22 01:27:54 +0000985 break;
986
987 case LEA2:
sewardjfebaa3b2003-05-25 01:07:34 +0000988 VG_(printf)("%d(" , u->lit32);
njn4ba5a792002-09-30 10:23:54 +0000989 VG_(pp_UOperand)(u, 1, 4, False);
sewardjde4a1d02002-03-22 01:27:54 +0000990 VG_(printf)(",");
njn4ba5a792002-09-30 10:23:54 +0000991 VG_(pp_UOperand)(u, 2, 4, False);
sewardjde4a1d02002-03-22 01:27:54 +0000992 VG_(printf)(",%d), ", (Int)u->extra4b);
njn4ba5a792002-09-30 10:23:54 +0000993 VG_(pp_UOperand)(u, 3, 4, False);
sewardjde4a1d02002-03-22 01:27:54 +0000994 break;
995
996 case LEA1:
sewardjfebaa3b2003-05-25 01:07:34 +0000997 VG_(printf)("%d" , u->lit32);
njn4ba5a792002-09-30 10:23:54 +0000998 VG_(pp_UOperand)(u, 1, 4, True);
sewardjde4a1d02002-03-22 01:27:54 +0000999 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001000 VG_(pp_UOperand)(u, 2, 4, False);
sewardjde4a1d02002-03-22 01:27:54 +00001001 break;
1002
sewardj7a5ebcf2002-11-13 22:42:13 +00001003 case NOP: case LOCK:
sewardjde4a1d02002-03-22 01:27:54 +00001004 break;
1005
1006 case FPU_W:
sewardjfebaa3b2003-05-25 01:07:34 +00001007 VG_(printf)("0x%x:0x%x, ",
sewardjde4a1d02002-03-22 01:27:54 +00001008 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
njn4ba5a792002-09-30 10:23:54 +00001009 VG_(pp_UOperand)(u, 2, 4, True);
sewardjde4a1d02002-03-22 01:27:54 +00001010 break;
1011
1012 case FPU_R:
sewardjfebaa3b2003-05-25 01:07:34 +00001013 VG_(printf)("");
njn4ba5a792002-09-30 10:23:54 +00001014 VG_(pp_UOperand)(u, 2, 4, True);
sewardjde4a1d02002-03-22 01:27:54 +00001015 VG_(printf)(", 0x%x:0x%x",
1016 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
1017 break;
1018
1019 case FPU:
sewardjfebaa3b2003-05-25 01:07:34 +00001020 VG_(printf)("0x%x:0x%x",
sewardjde4a1d02002-03-22 01:27:54 +00001021 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
1022 break;
1023
sewardj3d7c9c82003-03-26 21:08:13 +00001024 case MMX1:
sewardjfebaa3b2003-05-25 01:07:34 +00001025 VG_(printf)("0x%x",
sewardj3d7c9c82003-03-26 21:08:13 +00001026 u->val1 & 0xFF );
1027 break;
1028
1029 case MMX2:
sewardjfebaa3b2003-05-25 01:07:34 +00001030 VG_(printf)("0x%x:0x%x",
sewardj3d7c9c82003-03-26 21:08:13 +00001031 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
1032 break;
1033
sewardjca860012003-03-27 23:52:58 +00001034 case MMX3:
sewardjfebaa3b2003-05-25 01:07:34 +00001035 VG_(printf)("0x%x:0x%x:0x%x",
sewardjca860012003-03-27 23:52:58 +00001036 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF, u->val2 & 0xFF );
1037 break;
1038
sewardj4fbe6e92003-06-15 21:54:34 +00001039 case MMX2_ERegWr:
1040 case MMX2_ERegRd:
sewardjfebaa3b2003-05-25 01:07:34 +00001041 VG_(printf)("0x%x:0x%x, ",
sewardjca860012003-03-27 23:52:58 +00001042 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
1043 VG_(pp_UOperand)(u, 2, 4, False);
1044 break;
1045
sewardj3d7c9c82003-03-26 21:08:13 +00001046 case MMX2_MemWr:
1047 case MMX2_MemRd:
sewardjfebaa3b2003-05-25 01:07:34 +00001048 VG_(printf)("0x%x:0x%x",
sewardj3d7c9c82003-03-26 21:08:13 +00001049 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
1050 VG_(pp_UOperand)(u, 2, 4, True);
1051 break;
1052
sewardjfebaa3b2003-05-25 01:07:34 +00001053 case SSE2a_MemWr:
1054 case SSE2a_MemRd:
1055 VG_(printf)("0x%x:0x%x:0x%x",
1056 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF, u->val2 & 0xFF );
1057 VG_(pp_UOperand)(u, 3, 4, True);
1058 break;
1059
sewardj9dd209f2003-06-18 23:30:52 +00001060 case SSE2a1_MemRd:
sewardjfebaa3b2003-05-25 01:07:34 +00001061 case SSE3a_MemWr:
1062 case SSE3a_MemRd:
1063 VG_(printf)("0x%x:0x%x:0x%x:0x%x",
1064 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
sewardjde8aecf2003-05-27 00:46:28 +00001065 (u->val2 >> 8) & 0xFF, u->val2 & 0xFF );
sewardjfebaa3b2003-05-25 01:07:34 +00001066 VG_(pp_UOperand)(u, 3, 4, True);
1067 break;
1068
sewardjabf8bf82003-06-15 22:28:05 +00001069 case SSE3e_RegWr:
sewardj4fbe6e92003-06-15 21:54:34 +00001070 case SSE3e_RegRd:
sewardj02af6bc2003-06-12 00:56:06 +00001071 case SSE3g_RegWr:
sewardjfebaa3b2003-05-25 01:07:34 +00001072 VG_(printf)("0x%x:0x%x:0x%x:0x%x",
1073 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
1074 (u->val2 >> 8) & 0xFF, u->val2 & 0xFF );
1075 VG_(pp_UOperand)(u, 3, 4, True);
1076 break;
1077
sewardjb31b06d2003-06-13 00:26:02 +00001078 case SSE3g1_RegWr:
sewardj4fbe6e92003-06-15 21:54:34 +00001079 case SSE3e1_RegRd:
sewardjb31b06d2003-06-13 00:26:02 +00001080 VG_(printf)("0x%x:0x%x:0x%x:0x%x:0x%x",
1081 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
1082 (u->val2 >> 8) & 0xFF, u->val2 & 0xFF,
1083 u->lit32 );
1084 VG_(pp_UOperand)(u, 3, 4, True);
1085 break;
1086
sewardja60be0e2003-05-26 08:47:27 +00001087 case SSE3:
1088 VG_(printf)("0x%x:0x%x:0x%x",
1089 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
1090 u->val2 & 0xFF );
1091 break;
1092
sewardjfebaa3b2003-05-25 01:07:34 +00001093 case SSE4:
1094 VG_(printf)("0x%x:0x%x:0x%x:0x%x",
1095 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
1096 (u->val2 >> 8) & 0xFF, u->val2 & 0xFF );
1097 break;
1098
sewardja453fb02003-06-14 13:22:36 +00001099 case SSE5:
1100 VG_(printf)("0x%x:0x%x:0x%x:0x%x:0x%x",
1101 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
1102 (u->val2 >> 8) & 0xFF, u->val2 & 0xFF,
1103 u->val3 & 0xFF );
1104 break;
1105
sewardje3891fa2003-06-15 03:13:48 +00001106 case SSE3ag_MemRd_RegWr:
1107 VG_(printf)("0x%x(addr=", u->lit32 );
1108 VG_(pp_UOperand)(u, 1, 4, False);
1109 VG_(printf)(", dst=");
1110 VG_(pp_UOperand)(u, 2, 4, False);
1111 VG_(printf)(")");
1112 break;
1113
sewardjde4a1d02002-03-22 01:27:54 +00001114 case GET: case PUT: case MOV: case LOAD: case STORE: case CMOV:
sewardje1042472002-09-30 12:33:11 +00001115 case GETSEG: case PUTSEG:
njn4ba5a792002-09-30 10:23:54 +00001116 VG_(pp_UOperand)(u, 1, u->size, u->opcode==LOAD);
sewardjde4a1d02002-03-22 01:27:54 +00001117 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001118 VG_(pp_UOperand)(u, 2, u->size, u->opcode==STORE);
njn25e49d8e72002-09-23 09:36:25 +00001119 break;
1120
1121 case JMP:
1122 switch (u->jmpkind) {
1123 case JmpCall: VG_(printf)("-c"); break;
1124 case JmpRet: VG_(printf)("-r"); break;
1125 case JmpSyscall: VG_(printf)("-sys"); break;
1126 case JmpClientReq: VG_(printf)("-cli"); break;
1127 default: break;
1128 }
njn4ba5a792002-09-30 10:23:54 +00001129 VG_(pp_UOperand)(u, 1, u->size, False);
njn25e49d8e72002-09-23 09:36:25 +00001130 if (CondAlways == u->cond) {
1131 /* Print x86 instruction size if filled in */
1132 if (0 != u->extra4b)
1133 VG_(printf)(" ($%u)", u->extra4b);
1134 }
sewardjde4a1d02002-03-22 01:27:54 +00001135 break;
1136
1137 case GETF: case PUTF:
njn25e49d8e72002-09-23 09:36:25 +00001138 case CC2VAL: case PUSH: case POP: case CLEAR: case CALLM:
1139 case NOT: case NEG: case INC: case DEC: case BSWAP:
njn4ba5a792002-09-30 10:23:54 +00001140 VG_(pp_UOperand)(u, 1, u->size, False);
sewardjde4a1d02002-03-22 01:27:54 +00001141 break;
1142
njn25e49d8e72002-09-23 09:36:25 +00001143 /* Print a "(s)" after args passed on stack */
1144 case CCALL:
njn25e49d8e72002-09-23 09:36:25 +00001145 if (u->has_ret_val) {
njn4ba5a792002-09-30 10:23:54 +00001146 VG_(pp_UOperand)(u, 3, 0, False);
njn25e49d8e72002-09-23 09:36:25 +00001147 VG_(printf)(" = ");
sewardj2e93c502002-04-12 11:12:52 +00001148 }
njn25e49d8e72002-09-23 09:36:25 +00001149 VG_(printf)("%p(", u->lit32);
1150 if (u->argc > 0) {
njn4ba5a792002-09-30 10:23:54 +00001151 VG_(pp_UOperand)(u, 1, 0, False);
njn25e49d8e72002-09-23 09:36:25 +00001152 if (u->regparms_n < 1)
1153 VG_(printf)("(s)");
1154 }
1155 if (u->argc > 1) {
1156 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001157 VG_(pp_UOperand)(u, 2, 0, False);
njn25e49d8e72002-09-23 09:36:25 +00001158 if (u->regparms_n < 2)
1159 VG_(printf)("(s)");
1160 }
1161 if (u->argc > 2) {
1162 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001163 VG_(pp_UOperand)(u, 3, 0, False);
njn25e49d8e72002-09-23 09:36:25 +00001164 if (u->regparms_n < 3)
1165 VG_(printf)("(s)");
1166 }
1167 VG_(printf)(") ");
njn6431be72002-07-28 09:53:34 +00001168 break;
1169
sewardje1042472002-09-30 12:33:11 +00001170 case USESEG:
sewardjde4a1d02002-03-22 01:27:54 +00001171 case JIFZ:
sewardjde4a1d02002-03-22 01:27:54 +00001172 case ADD: case ADC: case AND: case OR:
1173 case XOR: case SUB: case SBB:
1174 case SHL: case SHR: case SAR:
1175 case ROL: case ROR: case RCL: case RCR:
njn4ba5a792002-09-30 10:23:54 +00001176 VG_(pp_UOperand)(u, 1, u->size, False);
sewardjde4a1d02002-03-22 01:27:54 +00001177 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001178 VG_(pp_UOperand)(u, 2, u->size, False);
sewardjde4a1d02002-03-22 01:27:54 +00001179 break;
1180
1181 case WIDEN:
1182 VG_(printf)("_%c%c", VG_(toupper)(nameISize(u->extra4b)),
1183 u->signed_widen?'s':'z');
njn4ba5a792002-09-30 10:23:54 +00001184 VG_(pp_UOperand)(u, 1, u->size, False);
sewardjde4a1d02002-03-22 01:27:54 +00001185 break;
1186
njn25e49d8e72002-09-23 09:36:25 +00001187 default:
1188 if (VG_(needs).extended_UCode)
njn4ba5a792002-09-30 10:23:54 +00001189 SK_(pp_XUInstr)(u);
njn25e49d8e72002-09-23 09:36:25 +00001190 else {
1191 VG_(printf)("unhandled opcode: %u. Perhaps "
1192 "VG_(needs).extended_UCode should be set?",
1193 u->opcode);
njne427a662002-10-02 11:08:25 +00001194 VG_(core_panic)("pp_UInstr: unhandled opcode");
njn25e49d8e72002-09-23 09:36:25 +00001195 }
sewardjde4a1d02002-03-22 01:27:54 +00001196 }
sewardjde4a1d02002-03-22 01:27:54 +00001197 if (u->flags_r != FlagsEmpty || u->flags_w != FlagsEmpty) {
1198 VG_(printf)(" (");
1199 if (u->flags_r != FlagsEmpty)
1200 vg_ppFlagSet("-r", u->flags_r);
1201 if (u->flags_w != FlagsEmpty)
1202 vg_ppFlagSet("-w", u->flags_w);
1203 VG_(printf)(")");
1204 }
njn25e49d8e72002-09-23 09:36:25 +00001205
1206 if (ppRegsLiveness) {
1207 VG_(printf)("\t\t");
njn4ba5a792002-09-30 10:23:54 +00001208 pp_realregs_liveness ( u );
njn25e49d8e72002-09-23 09:36:25 +00001209 }
1210
sewardjde4a1d02002-03-22 01:27:54 +00001211 VG_(printf)("\n");
1212}
1213
njn4ba5a792002-09-30 10:23:54 +00001214void VG_(pp_UInstr) ( Int instrNo, UInstr* u )
njn25e49d8e72002-09-23 09:36:25 +00001215{
njn4ba5a792002-09-30 10:23:54 +00001216 pp_UInstrWorker ( instrNo, u, /*ppRegsLiveness*/False );
njn25e49d8e72002-09-23 09:36:25 +00001217}
1218
njn4ba5a792002-09-30 10:23:54 +00001219void VG_(pp_UInstr_regs) ( Int instrNo, UInstr* u )
njn25e49d8e72002-09-23 09:36:25 +00001220{
njn4ba5a792002-09-30 10:23:54 +00001221 pp_UInstrWorker ( instrNo, u, /*ppRegsLiveness*/True );
njn25e49d8e72002-09-23 09:36:25 +00001222}
sewardjde4a1d02002-03-22 01:27:54 +00001223
njn4ba5a792002-09-30 10:23:54 +00001224void VG_(pp_UCodeBlock) ( UCodeBlock* cb, Char* title )
sewardjde4a1d02002-03-22 01:27:54 +00001225{
1226 Int i;
njn25e49d8e72002-09-23 09:36:25 +00001227 VG_(printf)("%s\n", title);
sewardjde4a1d02002-03-22 01:27:54 +00001228 for (i = 0; i < cb->used; i++)
njn25e49d8e72002-09-23 09:36:25 +00001229 if (cb->instrs[i].opcode != NOP)
njn4ba5a792002-09-30 10:23:54 +00001230 VG_(pp_UInstr) ( i, &cb->instrs[i] );
sewardjde4a1d02002-03-22 01:27:54 +00001231 VG_(printf)("\n");
1232}
1233
1234
1235/*------------------------------------------------------------*/
1236/*--- uinstr helpers for register allocation ---*/
1237/*--- and code improvement. ---*/
1238/*------------------------------------------------------------*/
1239
njn25e49d8e72002-09-23 09:36:25 +00001240/* Get the temp/reg use of a uinstr, parking them in an array supplied by
njn810086f2002-11-14 12:42:47 +00001241 the caller (regs), which is assumed to be big enough. Return the number
1242 of entries. Written regs are indicated in parallel array isWrites.
1243 Insns which read _and_ write a register wind up mentioning it twice.
1244 Entries are placed in the array in program order, so that if a reg is
1245 read-modified-written, it appears first as a read and then as a write.
1246 'tag' indicates whether we are looking at TempRegs or RealRegs.
sewardjde4a1d02002-03-22 01:27:54 +00001247*/
njn25e49d8e72002-09-23 09:36:25 +00001248__inline__
njn810086f2002-11-14 12:42:47 +00001249Int VG_(get_reg_usage) ( UInstr* u, Tag tag, Int* regs, Bool* isWrites )
sewardjde4a1d02002-03-22 01:27:54 +00001250{
njn810086f2002-11-14 12:42:47 +00001251# define RD(ono) VG_UINSTR_READS_REG(ono, regs, isWrites)
1252# define WR(ono) VG_UINSTR_WRITES_REG(ono, regs, isWrites)
sewardjde4a1d02002-03-22 01:27:54 +00001253
1254 Int n = 0;
1255 switch (u->opcode) {
1256 case LEA1: RD(1); WR(2); break;
1257 case LEA2: RD(1); RD(2); WR(3); break;
1258
sewardj9dd209f2003-06-18 23:30:52 +00001259 case SSE2a1_MemRd:
sewardj4fbe6e92003-06-15 21:54:34 +00001260 case SSE3e_RegRd:
sewardjfebaa3b2003-05-25 01:07:34 +00001261 case SSE3a_MemWr:
1262 case SSE3a_MemRd:
1263 case SSE2a_MemWr:
sewardj4fbe6e92003-06-15 21:54:34 +00001264 case SSE3e1_RegRd:
sewardj02af6bc2003-06-12 00:56:06 +00001265 case SSE2a_MemRd: RD(3); break;
1266
sewardjabf8bf82003-06-15 22:28:05 +00001267 case SSE3e_RegWr:
sewardjb31b06d2003-06-13 00:26:02 +00001268 case SSE3g1_RegWr:
sewardj02af6bc2003-06-12 00:56:06 +00001269 case SSE3g_RegWr: WR(3); break;
sewardjfebaa3b2003-05-25 01:07:34 +00001270
sewardje3891fa2003-06-15 03:13:48 +00001271 case SSE3ag_MemRd_RegWr: RD(1); WR(2); break;
1272
sewardj4fbe6e92003-06-15 21:54:34 +00001273 case MMX2_ERegRd: RD(2); break;
1274 case MMX2_ERegWr: WR(2); break;
sewardjca860012003-03-27 23:52:58 +00001275
sewardja453fb02003-06-14 13:22:36 +00001276 case SSE4: case SSE3: case SSE5:
sewardjca860012003-03-27 23:52:58 +00001277 case MMX1: case MMX2: case MMX3:
njn25e49d8e72002-09-23 09:36:25 +00001278 case NOP: case FPU: case INCEIP: case CALLM_S: case CALLM_E:
sewardj7a5ebcf2002-11-13 22:42:13 +00001279 case CLEAR: case CALLM: case LOCK: break;
njn25e49d8e72002-09-23 09:36:25 +00001280
1281 case CCALL:
1282 if (u->argc > 0) RD(1);
1283 if (u->argc > 1) RD(2);
1284 if (u->argc > 2) RD(3);
1285 if (u->has_ret_val) WR(3);
1286 break;
1287
sewardj3d7c9c82003-03-26 21:08:13 +00001288 case MMX2_MemRd: case MMX2_MemWr:
sewardjde4a1d02002-03-22 01:27:54 +00001289 case FPU_R: case FPU_W: RD(2); break;
1290
sewardje1042472002-09-30 12:33:11 +00001291 case GETSEG: WR(2); break;
1292 case PUTSEG: RD(1); break;
1293
sewardjde4a1d02002-03-22 01:27:54 +00001294 case GETF: WR(1); break;
1295 case PUTF: RD(1); break;
1296
1297 case GET: WR(2); break;
1298 case PUT: RD(1); break;
1299 case LOAD: RD(1); WR(2); break;
njn25e49d8e72002-09-23 09:36:25 +00001300 case STORE: RD(1); RD(2); break;
sewardjde4a1d02002-03-22 01:27:54 +00001301 case MOV: RD(1); WR(2); break;
1302
1303 case JMP: RD(1); break;
sewardjde4a1d02002-03-22 01:27:54 +00001304
njn25e49d8e72002-09-23 09:36:25 +00001305 case PUSH: RD(1); break;
sewardjde4a1d02002-03-22 01:27:54 +00001306 case POP: WR(1); break;
1307
sewardje1042472002-09-30 12:33:11 +00001308 case USESEG:
sewardjde4a1d02002-03-22 01:27:54 +00001309 case CMOV:
1310 case ADD: case ADC: case AND: case OR:
1311 case XOR: case SUB: case SBB:
1312 RD(1); RD(2); WR(2); break;
1313
1314 case SHL: case SHR: case SAR:
1315 case ROL: case ROR: case RCL: case RCR:
1316 RD(1); RD(2); WR(2); break;
1317
njn25e49d8e72002-09-23 09:36:25 +00001318 case NOT: case NEG: case INC: case DEC: case BSWAP:
sewardjde4a1d02002-03-22 01:27:54 +00001319 RD(1); WR(1); break;
1320
1321 case WIDEN: RD(1); WR(1); break;
1322
1323 case CC2VAL: WR(1); break;
1324 case JIFZ: RD(1); break;
1325
njn25e49d8e72002-09-23 09:36:25 +00001326 default:
1327 if (VG_(needs).extended_UCode)
njn810086f2002-11-14 12:42:47 +00001328 return SK_(get_Xreg_usage)(u, tag, regs, isWrites);
njn25e49d8e72002-09-23 09:36:25 +00001329 else {
1330 VG_(printf)("unhandled opcode: %u. Perhaps "
1331 "VG_(needs).extended_UCode should be set?",
1332 u->opcode);
njne427a662002-10-02 11:08:25 +00001333 VG_(core_panic)("VG_(get_reg_usage): unhandled opcode");
njn25e49d8e72002-09-23 09:36:25 +00001334 }
sewardjde4a1d02002-03-22 01:27:54 +00001335 }
1336 return n;
1337
1338# undef RD
1339# undef WR
1340}
1341
1342
njn25e49d8e72002-09-23 09:36:25 +00001343/* Change temp regs in u into real regs, as directed by the
1344 * temps[i]-->reals[i] mapping. */
1345static __inline__
njn810086f2002-11-14 12:42:47 +00001346void patchUInstr ( UInstr* u, Int temps[], UInt reals[], Int n_tmap )
sewardjde4a1d02002-03-22 01:27:54 +00001347{
1348 Int i;
1349 if (u->tag1 == TempReg) {
1350 for (i = 0; i < n_tmap; i++)
njn810086f2002-11-14 12:42:47 +00001351 if (temps[i] == u->val1) break;
njne427a662002-10-02 11:08:25 +00001352 if (i == n_tmap) VG_(core_panic)("patchUInstr(1)");
sewardjde4a1d02002-03-22 01:27:54 +00001353 u->tag1 = RealReg;
njn25e49d8e72002-09-23 09:36:25 +00001354 u->val1 = reals[i];
sewardjde4a1d02002-03-22 01:27:54 +00001355 }
1356 if (u->tag2 == TempReg) {
1357 for (i = 0; i < n_tmap; i++)
njn810086f2002-11-14 12:42:47 +00001358 if (temps[i] == u->val2) break;
njne427a662002-10-02 11:08:25 +00001359 if (i == n_tmap) VG_(core_panic)("patchUInstr(2)");
sewardjde4a1d02002-03-22 01:27:54 +00001360 u->tag2 = RealReg;
njn25e49d8e72002-09-23 09:36:25 +00001361 u->val2 = reals[i];
sewardjde4a1d02002-03-22 01:27:54 +00001362 }
1363 if (u->tag3 == TempReg) {
1364 for (i = 0; i < n_tmap; i++)
njn810086f2002-11-14 12:42:47 +00001365 if (temps[i] == u->val3) break;
njne427a662002-10-02 11:08:25 +00001366 if (i == n_tmap) VG_(core_panic)("patchUInstr(3)");
sewardjde4a1d02002-03-22 01:27:54 +00001367 u->tag3 = RealReg;
njn25e49d8e72002-09-23 09:36:25 +00001368 u->val3 = reals[i];
sewardjde4a1d02002-03-22 01:27:54 +00001369 }
1370}
1371
1372
1373/* Tedious x86-specific hack which compensates for the fact that the
1374 register numbers for %ah .. %dh do not correspond to those for %eax
1375 .. %edx. It maps a (reg size, reg no) pair to the number of the
1376 containing 32-bit reg. */
1377static __inline__
1378Int containingArchRegOf ( Int sz, Int aregno )
1379{
1380 switch (sz) {
1381 case 4: return aregno;
1382 case 2: return aregno;
1383 case 1: return aregno >= 4 ? aregno-4 : aregno;
njne427a662002-10-02 11:08:25 +00001384 default: VG_(core_panic)("containingArchRegOf");
sewardjde4a1d02002-03-22 01:27:54 +00001385 }
1386}
1387
1388
1389/* If u reads an ArchReg, return the number of the containing arch
njn25e49d8e72002-09-23 09:36:25 +00001390 reg. Otherwise return -1. Used in redundant-PUT elimination.
1391 Note that this is not required for skins extending UCode because
1392 this happens before instrumentation. */
sewardjde4a1d02002-03-22 01:27:54 +00001393static __inline__
1394Int maybe_uinstrReadsArchReg ( UInstr* u )
1395{
1396 switch (u->opcode) {
1397 case GET:
1398 case ADD: case ADC: case AND: case OR:
1399 case XOR: case SUB: case SBB:
1400 case SHL: case SHR: case SAR: case ROL:
1401 case ROR: case RCL: case RCR:
1402 if (u->tag1 == ArchReg)
1403 return containingArchRegOf ( u->size, u->val1 );
1404 else
1405 return -1;
1406
1407 case GETF: case PUTF:
1408 case CALLM_S: case CALLM_E:
1409 case INCEIP:
1410 case LEA1:
1411 case LEA2:
1412 case NOP:
sewardj7a5ebcf2002-11-13 22:42:13 +00001413 case LOCK:
sewardjde4a1d02002-03-22 01:27:54 +00001414 case PUT:
1415 case LOAD:
1416 case STORE:
1417 case MOV:
1418 case CMOV:
1419 case JMP:
1420 case CALLM: case CLEAR: case PUSH: case POP:
1421 case NOT: case NEG: case INC: case DEC: case BSWAP:
1422 case CC2VAL:
1423 case JIFZ:
1424 case FPU: case FPU_R: case FPU_W:
sewardjca860012003-03-27 23:52:58 +00001425 case MMX1: case MMX2: case MMX3:
sewardj3d7c9c82003-03-26 21:08:13 +00001426 case MMX2_MemRd: case MMX2_MemWr:
sewardj4fbe6e92003-06-15 21:54:34 +00001427 case MMX2_ERegRd: case MMX2_ERegWr:
sewardj9dd209f2003-06-18 23:30:52 +00001428 case SSE2a_MemWr: case SSE2a_MemRd: case SSE2a1_MemRd:
sewardjfebaa3b2003-05-25 01:07:34 +00001429 case SSE3a_MemWr: case SSE3a_MemRd:
sewardjabf8bf82003-06-15 22:28:05 +00001430 case SSE3e_RegRd: case SSE3g_RegWr: case SSE3e_RegWr:
sewardj4fbe6e92003-06-15 21:54:34 +00001431 case SSE3g1_RegWr: case SSE3e1_RegRd:
sewardje3891fa2003-06-15 03:13:48 +00001432 case SSE4: case SSE3: case SSE5: case SSE3ag_MemRd_RegWr:
sewardjde4a1d02002-03-22 01:27:54 +00001433 case WIDEN:
sewardje1042472002-09-30 12:33:11 +00001434 /* GETSEG and USESEG are to do with ArchRegS, not ArchReg */
1435 case GETSEG: case PUTSEG:
1436 case USESEG:
sewardjde4a1d02002-03-22 01:27:54 +00001437 return -1;
1438
1439 default:
njn4ba5a792002-09-30 10:23:54 +00001440 VG_(pp_UInstr)(0,u);
njne427a662002-10-02 11:08:25 +00001441 VG_(core_panic)("maybe_uinstrReadsArchReg: unhandled opcode");
sewardjde4a1d02002-03-22 01:27:54 +00001442 }
1443}
1444
1445static __inline__
1446Bool uInstrMentionsTempReg ( UInstr* u, Int tempreg )
1447{
1448 Int i, k;
njnf4ce3d32003-02-10 10:17:26 +00001449 Int tempUse[VG_MAX_REGS_USED];
1450 Bool notUsed[VG_MAX_REGS_USED];
njn810086f2002-11-14 12:42:47 +00001451
1452 k = VG_(get_reg_usage) ( u, TempReg, &tempUse[0], &notUsed[0] );
sewardjde4a1d02002-03-22 01:27:54 +00001453 for (i = 0; i < k; i++)
njn810086f2002-11-14 12:42:47 +00001454 if (tempUse[i] == tempreg)
sewardjde4a1d02002-03-22 01:27:54 +00001455 return True;
1456 return False;
1457}
1458
1459
1460/*------------------------------------------------------------*/
1461/*--- ucode improvement. ---*/
1462/*------------------------------------------------------------*/
1463
1464/* Improve the code in cb by doing
1465 -- Redundant ArchReg-fetch elimination
1466 -- Redundant PUT elimination
1467 -- Redundant cond-code restore/save elimination
1468 The overall effect of these is to allow target registers to be
1469 cached in host registers over multiple target insns.
1470*/
1471static void vg_improve ( UCodeBlock* cb )
1472{
1473 Int i, j, k, m, n, ar, tr, told, actual_areg;
1474 Int areg_map[8];
1475 Bool annul_put[8];
njnf4ce3d32003-02-10 10:17:26 +00001476 Int tempUse[VG_MAX_REGS_USED];
1477 Bool isWrites[VG_MAX_REGS_USED];
sewardjde4a1d02002-03-22 01:27:54 +00001478 UInstr* u;
1479 Bool wr;
1480 Int* last_live_before;
1481 FlagSet future_dead_flags;
1482
sewardj2ca39a12003-06-14 12:03:35 +00001483# if 0
1484 /* DEBUGGING HOOK */
1485 {
1486 static int n_done=0;
1487 if (VG_(clo_stop_after) > 1000000000) {
1488 if (n_done > (VG_(clo_stop_after) - 1000000000)) {
1489 dis=False;
1490 VG_(clo_trace_codegen) = 0;
1491 return;
1492 }
1493 if (n_done == (VG_(clo_stop_after) - 1000000000)) {
1494 VG_(printf)("\n");
1495 VG_(pp_UCodeBlock) ( cb, "Incoming:" );
1496 dis = True;
1497 VG_(clo_trace_codegen) = 31;
1498 }
1499 n_done++;
1500 }
1501 }
1502 /* end DEBUGGING HOOK */
1503# endif /* 0 */
1504
njn25e49d8e72002-09-23 09:36:25 +00001505 if (dis)
1506 VG_(printf) ("Improvements:\n");
1507
sewardjde4a1d02002-03-22 01:27:54 +00001508 if (cb->nextTemp > 0)
njn25e49d8e72002-09-23 09:36:25 +00001509 last_live_before = VG_(arena_malloc) ( VG_AR_JITTER,
1510 cb->nextTemp * sizeof(Int) );
sewardjde4a1d02002-03-22 01:27:54 +00001511 else
1512 last_live_before = NULL;
1513
1514
1515 /* PASS 1: redundant GET elimination. (Actually, more general than
1516 that -- eliminates redundant fetches of ArchRegs). */
1517
1518 /* Find the live-range-ends for all temporaries. Duplicates code
1519 in the register allocator :-( */
1520
1521 for (i = 0; i < cb->nextTemp; i++) last_live_before[i] = -1;
1522
1523 for (i = cb->used-1; i >= 0; i--) {
1524 u = &cb->instrs[i];
1525
njn810086f2002-11-14 12:42:47 +00001526 k = VG_(get_reg_usage)(u, TempReg, &tempUse[0], &isWrites[0]);
sewardjde4a1d02002-03-22 01:27:54 +00001527
1528 /* For each temp usage ... bwds in program order. */
1529 for (j = k-1; j >= 0; j--) {
njn810086f2002-11-14 12:42:47 +00001530 tr = tempUse[j];
1531 wr = isWrites[j];
sewardjde4a1d02002-03-22 01:27:54 +00001532 if (last_live_before[tr] == -1) {
1533 vg_assert(tr >= 0 && tr < cb->nextTemp);
1534 last_live_before[tr] = wr ? (i+1) : i;
1535 }
1536 }
1537
1538 }
1539
1540# define BIND_ARCH_TO_TEMP(archreg,tempreg)\
1541 { Int q; \
1542 /* Invalidate any old binding(s) to tempreg. */ \
1543 for (q = 0; q < 8; q++) \
1544 if (areg_map[q] == tempreg) areg_map[q] = -1; \
1545 /* Add the new binding. */ \
1546 areg_map[archreg] = (tempreg); \
1547 }
1548
1549 /* Set up the A-reg map. */
1550 for (i = 0; i < 8; i++) areg_map[i] = -1;
1551
1552 /* Scan insns. */
1553 for (i = 0; i < cb->used; i++) {
1554 u = &cb->instrs[i];
1555 if (u->opcode == GET && u->size == 4) {
1556 /* GET; see if it can be annulled. */
1557 vg_assert(u->tag1 == ArchReg);
1558 vg_assert(u->tag2 == TempReg);
1559 ar = u->val1;
1560 tr = u->val2;
1561 told = areg_map[ar];
1562 if (told != -1 && last_live_before[told] <= i) {
1563 /* ar already has an old mapping to told, but that runs
1564 out here. Annul this GET, rename tr to told for the
1565 rest of the block, and extend told's live range to that
1566 of tr. */
njn4ba5a792002-09-30 10:23:54 +00001567 VG_(new_NOP)(u);
sewardjde4a1d02002-03-22 01:27:54 +00001568 n = last_live_before[tr] + 1;
1569 if (n > cb->used) n = cb->used;
1570 last_live_before[told] = last_live_before[tr];
1571 last_live_before[tr] = i-1;
njn25e49d8e72002-09-23 09:36:25 +00001572 if (dis)
sewardjde4a1d02002-03-22 01:27:54 +00001573 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001574 " at %2d: delete GET, rename t%d to t%d in (%d .. %d)\n",
sewardjde4a1d02002-03-22 01:27:54 +00001575 i, tr, told,i+1, n-1);
1576 for (m = i+1; m < n; m++) {
1577 if (cb->instrs[m].tag1 == TempReg
1578 && cb->instrs[m].val1 == tr)
1579 cb->instrs[m].val1 = told;
1580 if (cb->instrs[m].tag2 == TempReg
1581 && cb->instrs[m].val2 == tr)
1582 cb->instrs[m].val2 = told;
sewardjfebaa3b2003-05-25 01:07:34 +00001583 if (cb->instrs[m].tag3 == TempReg
1584 && cb->instrs[m].val3 == tr)
1585 cb->instrs[m].val3 = told;
sewardjde4a1d02002-03-22 01:27:54 +00001586 }
1587 BIND_ARCH_TO_TEMP(ar,told);
1588 }
1589 else
1590 BIND_ARCH_TO_TEMP(ar,tr);
1591 }
1592 else if (u->opcode == GET && u->size != 4) {
1593 /* Invalidate any mapping for this archreg. */
1594 actual_areg = containingArchRegOf ( u->size, u->val1 );
1595 areg_map[actual_areg] = -1;
1596 }
1597 else if (u->opcode == PUT && u->size == 4) {
1598 /* PUT; re-establish t -> a binding */
1599 vg_assert(u->tag1 == TempReg);
1600 vg_assert(u->tag2 == ArchReg);
1601 BIND_ARCH_TO_TEMP(u->val2, u->val1);
1602 }
1603 else if (u->opcode == PUT && u->size != 4) {
1604 /* Invalidate any mapping for this archreg. */
1605 actual_areg = containingArchRegOf ( u->size, u->val2 );
1606 areg_map[actual_areg] = -1;
1607 } else {
1608
1609 /* see if insn has an archreg as a read operand; if so try to
1610 map it. */
1611 if (u->tag1 == ArchReg && u->size == 4
1612 && areg_map[u->val1] != -1) {
1613 switch (u->opcode) {
1614 case ADD: case SUB: case AND: case OR: case XOR:
1615 case ADC: case SBB:
1616 case SHL: case SHR: case SAR: case ROL: case ROR:
1617 case RCL: case RCR:
njn25e49d8e72002-09-23 09:36:25 +00001618 if (dis)
sewardjde4a1d02002-03-22 01:27:54 +00001619 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001620 " at %2d: change ArchReg %S to TempReg t%d\n",
sewardjde4a1d02002-03-22 01:27:54 +00001621 i, nameIReg(4,u->val1), areg_map[u->val1]);
1622 u->tag1 = TempReg;
1623 u->val1 = areg_map[u->val1];
1624 /* Remember to extend the live range of the TempReg,
1625 if necessary. */
1626 if (last_live_before[u->val1] < i)
1627 last_live_before[u->val1] = i;
1628 break;
1629 default:
1630 break;
1631 }
1632 }
1633
1634 /* boring insn; invalidate any mappings to temps it writes */
njn810086f2002-11-14 12:42:47 +00001635 k = VG_(get_reg_usage)(u, TempReg, &tempUse[0], &isWrites[0]);
sewardjde4a1d02002-03-22 01:27:54 +00001636
1637 for (j = 0; j < k; j++) {
njn810086f2002-11-14 12:42:47 +00001638 wr = isWrites[j];
sewardjde4a1d02002-03-22 01:27:54 +00001639 if (!wr) continue;
njn810086f2002-11-14 12:42:47 +00001640 tr = tempUse[j];
sewardjde4a1d02002-03-22 01:27:54 +00001641 for (m = 0; m < 8; m++)
1642 if (areg_map[m] == tr) areg_map[m] = -1;
1643 }
1644 }
1645
1646 }
1647
1648# undef BIND_ARCH_TO_TEMP
1649
sewardj05f1aa12002-04-30 00:29:36 +00001650 /* PASS 2: redundant PUT elimination. Don't annul (delay) puts of
1651 %ESP, since the memory check machinery always requires the
1652 in-memory value of %ESP to be up to date. Although this isn't
1653 actually required by other analyses (cache simulation), it's
1654 simplest to be consistent for all end-uses. */
sewardjde4a1d02002-03-22 01:27:54 +00001655 for (j = 0; j < 8; j++)
1656 annul_put[j] = False;
1657
1658 for (i = cb->used-1; i >= 0; i--) {
1659 u = &cb->instrs[i];
1660 if (u->opcode == NOP) continue;
1661
1662 if (u->opcode == PUT && u->size == 4) {
1663 vg_assert(u->tag2 == ArchReg);
1664 actual_areg = containingArchRegOf ( 4, u->val2 );
1665 if (annul_put[actual_areg]) {
sewardj05f1aa12002-04-30 00:29:36 +00001666 vg_assert(actual_areg != R_ESP);
njn4ba5a792002-09-30 10:23:54 +00001667 VG_(new_NOP)(u);
njn25e49d8e72002-09-23 09:36:25 +00001668 if (dis)
1669 VG_(printf)(" at %2d: delete PUT\n", i );
sewardjde4a1d02002-03-22 01:27:54 +00001670 } else {
sewardj05f1aa12002-04-30 00:29:36 +00001671 if (actual_areg != R_ESP)
sewardjde4a1d02002-03-22 01:27:54 +00001672 annul_put[actual_areg] = True;
1673 }
1674 }
1675 else if (u->opcode == PUT && u->size != 4) {
1676 actual_areg = containingArchRegOf ( u->size, u->val2 );
1677 annul_put[actual_areg] = False;
1678 }
1679 else if (u->opcode == JMP || u->opcode == JIFZ
1680 || u->opcode == CALLM) {
1681 for (j = 0; j < 8; j++)
1682 annul_put[j] = False;
1683 }
1684 else {
1685 /* If an instruction reads an ArchReg, the immediately
1686 preceding PUT cannot be annulled. */
1687 actual_areg = maybe_uinstrReadsArchReg ( u );
1688 if (actual_areg != -1)
1689 annul_put[actual_areg] = False;
1690 }
1691 }
1692
1693 /* PASS 2a: redundant-move elimination. Given MOV t1, t2 and t1 is
1694 dead after this point, annul the MOV insn and rename t2 to t1.
1695 Further modifies the last_live_before map. */
1696
1697# if 0
njn4ba5a792002-09-30 10:23:54 +00001698 VG_(pp_UCodeBlock)(cb, "Before MOV elimination" );
sewardjde4a1d02002-03-22 01:27:54 +00001699 for (i = 0; i < cb->nextTemp; i++)
1700 VG_(printf)("llb[t%d]=%d ", i, last_live_before[i]);
1701 VG_(printf)("\n");
1702# endif
1703
1704 for (i = 0; i < cb->used-1; i++) {
1705 u = &cb->instrs[i];
1706 if (u->opcode != MOV) continue;
1707 if (u->tag1 == Literal) continue;
1708 vg_assert(u->tag1 == TempReg);
1709 vg_assert(u->tag2 == TempReg);
1710 if (last_live_before[u->val1] == i) {
njn25e49d8e72002-09-23 09:36:25 +00001711 if (dis)
sewardjde4a1d02002-03-22 01:27:54 +00001712 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001713 " at %2d: delete MOV, rename t%d to t%d in (%d .. %d)\n",
sewardjde4a1d02002-03-22 01:27:54 +00001714 i, u->val2, u->val1, i+1, last_live_before[u->val2] );
1715 for (j = i+1; j <= last_live_before[u->val2]; j++) {
1716 if (cb->instrs[j].tag1 == TempReg
1717 && cb->instrs[j].val1 == u->val2)
1718 cb->instrs[j].val1 = u->val1;
1719 if (cb->instrs[j].tag2 == TempReg
1720 && cb->instrs[j].val2 == u->val2)
1721 cb->instrs[j].val2 = u->val1;
sewardjfebaa3b2003-05-25 01:07:34 +00001722 if (cb->instrs[j].tag3 == TempReg
1723 && cb->instrs[j].val3 == u->val2)
1724 cb->instrs[j].val3 = u->val1;
sewardjde4a1d02002-03-22 01:27:54 +00001725 }
1726 last_live_before[u->val1] = last_live_before[u->val2];
1727 last_live_before[u->val2] = i-1;
njn4ba5a792002-09-30 10:23:54 +00001728 VG_(new_NOP)(u);
sewardjde4a1d02002-03-22 01:27:54 +00001729 }
1730 }
1731
1732 /* PASS 3: redundant condition-code restore/save elimination.
1733 Scan backwards from the end. future_dead_flags records the set
1734 of flags which are dead at this point, that is, will be written
1735 before they are next read. Earlier uinsns which write flags
1736 already in future_dead_flags can have their writes annulled.
1737 */
1738 future_dead_flags = FlagsEmpty;
1739
1740 for (i = cb->used-1; i >= 0; i--) {
1741 u = &cb->instrs[i];
1742
1743 /* We might never make it to insns beyond this one, so be
1744 conservative. */
1745 if (u->opcode == JIFZ || u->opcode == JMP) {
1746 future_dead_flags = FlagsEmpty;
1747 continue;
1748 }
1749
sewardjfbb6cda2002-07-24 09:33:52 +00001750 /* PUTF modifies the %EFLAGS in essentially unpredictable ways.
1751 For example people try to mess with bit 21 to see if CPUID
1752 works. The setting may or may not actually take hold. So we
1753 play safe here. */
1754 if (u->opcode == PUTF) {
1755 future_dead_flags = FlagsEmpty;
1756 continue;
1757 }
1758
sewardjde4a1d02002-03-22 01:27:54 +00001759 /* We can annul the flags written by this insn if it writes a
1760 subset (or eq) of the set of flags known to be dead after
1761 this insn. If not, just record the flags also written by
1762 this insn.*/
1763 if (u->flags_w != FlagsEmpty
1764 && VG_IS_FLAG_SUBSET(u->flags_w, future_dead_flags)) {
njn25e49d8e72002-09-23 09:36:25 +00001765 if (dis) {
1766 VG_(printf)(" at %2d: annul flag write ", i);
sewardjde4a1d02002-03-22 01:27:54 +00001767 vg_ppFlagSet("", u->flags_w);
1768 VG_(printf)(" due to later ");
1769 vg_ppFlagSet("", future_dead_flags);
1770 VG_(printf)("\n");
1771 }
1772 u->flags_w = FlagsEmpty;
1773 } else {
1774 future_dead_flags
1775 = VG_UNION_FLAG_SETS ( u->flags_w, future_dead_flags );
1776 }
1777
1778 /* If this insn also reads flags, empty out future_dead_flags so
1779 as to force preceding writes not to be annulled. */
1780 if (u->flags_r != FlagsEmpty)
1781 future_dead_flags = FlagsEmpty;
1782 }
1783
1784 if (last_live_before)
njn25e49d8e72002-09-23 09:36:25 +00001785 VG_(arena_free) ( VG_AR_JITTER, last_live_before );
1786
1787 if (dis) {
1788 VG_(printf)("\n");
njn4ba5a792002-09-30 10:23:54 +00001789 VG_(pp_UCodeBlock) ( cb, "Improved UCode:" );
njn25e49d8e72002-09-23 09:36:25 +00001790 }
sewardjde4a1d02002-03-22 01:27:54 +00001791}
1792
njn9b007f62003-04-07 14:40:25 +00001793/*------------------------------------------------------------*/
1794/*--- %ESP-update pass ---*/
1795/*------------------------------------------------------------*/
1796
1797/* For skins that want to know about %ESP changes, this pass adds
1798 in the appropriate hooks. We have to do it after the skin's
1799 instrumentation, so the skin doesn't have to worry about the CCALLs
1800 it adds in, and we must do it before register allocation because
1801 spilled temps make it much harder to work out the %esp deltas.
1802 Thus we have it as an extra phase between the two. */
1803static
1804UCodeBlock* vg_ESP_update_pass(UCodeBlock* cb_in)
1805{
1806 UCodeBlock* cb;
1807 UInstr* u;
1808 Int delta = 0;
1809 UInt t_ESP = INVALID_TEMPREG;
sewardj05bcdcb2003-05-18 10:05:38 +00001810 Int i;
njn9b007f62003-04-07 14:40:25 +00001811
1812 cb = VG_(setup_UCodeBlock)(cb_in);
1813
1814 for (i = 0; i < VG_(get_num_instrs)(cb_in); i++) {
1815 u = VG_(get_instr)(cb_in, i);
1816
1817 if (GET == u->opcode && R_ESP == u->val1) {
1818 t_ESP = u->val2;
1819 delta = 0;
1820
1821 } else if (PUT == u->opcode && R_ESP == u->val2 && 4 == u->size) {
1822
1823# define DO_GENERIC \
1824 if (VG_(track_events).new_mem_stack || \
1825 VG_(track_events).die_mem_stack) { \
1826 uInstr1(cb, CCALL, 0, TempReg, u->val1); \
1827 uCCall(cb, (Addr) VG_(unknown_esp_update), \
1828 1, 1, False); \
1829 }
1830
1831# define DO(kind, size) \
1832 if (VG_(track_events).kind##_mem_stack_##size) { \
1833 uInstr1(cb, CCALL, 0, TempReg, u->val1); \
1834 uCCall(cb, (Addr) VG_(track_events).kind##_mem_stack_##size,\
1835 1, 1, False); \
1836 \
1837 } else \
1838 DO_GENERIC \
1839 break
1840
1841 if (u->val1 == t_ESP) {
1842 /* Known delta, common cases handled specially. */
1843 switch (delta) {
1844 case 4: DO(die, 4);
1845 case -4: DO(new, 4);
1846 case 8: DO(die, 8);
1847 case -8: DO(new, 8);
1848 case 12: DO(die, 12);
1849 case -12: DO(new, 12);
1850 case 16: DO(die, 16);
1851 case -16: DO(new, 16);
1852 case 32: DO(die, 32);
1853 case -32: DO(new, 32);
1854 default: DO_GENERIC; break;
1855 }
1856 } else {
1857 /* Unknown delta */
1858 DO_GENERIC;
1859 }
1860 delta = 0;
1861
1862# undef DO
1863# undef DO_GENERIC
1864
1865 } else if (Literal == u->tag1 && t_ESP == u->val2) {
1866 if (ADD == u->opcode) delta += u->lit32;
1867 if (SUB == u->opcode) delta -= u->lit32;
1868
1869 } else if (MOV == u->opcode && TempReg == u->tag1 && t_ESP == u->val1 &&
1870 TempReg == u->tag2) {
1871 t_ESP = u->val2;
1872 }
1873 VG_(copy_UInstr) ( cb, u );
1874 }
1875
1876 VG_(free_UCodeBlock)(cb_in);
1877 return cb;
1878}
sewardjde4a1d02002-03-22 01:27:54 +00001879
1880/*------------------------------------------------------------*/
1881/*--- The new register allocator. ---*/
1882/*------------------------------------------------------------*/
1883
1884typedef
1885 struct {
1886 /* Becomes live for the first time after this insn ... */
1887 Int live_after;
1888 /* Becomes dead for the last time after this insn ... */
1889 Int dead_before;
1890 /* The "home" spill slot, if needed. Never changes. */
1891 Int spill_no;
1892 /* Where is it? VG_NOVALUE==in a spill slot; else in reg. */
1893 Int real_no;
1894 }
1895 TempInfo;
1896
1897
1898/* Take a ucode block and allocate its TempRegs to RealRegs, or put
1899 them in spill locations, and add spill code, if there are not
1900 enough real regs. The usual register allocation deal, in short.
1901
1902 Important redundancy of representation:
1903
1904 real_to_temp maps real reg ranks (RRRs) to TempReg nos, or
1905 to VG_NOVALUE if the real reg has no currently assigned TempReg.
1906
1907 The .real_no field of a TempInfo gives the current RRR for
1908 this TempReg, or VG_NOVALUE if the TempReg is currently
1909 in memory, in which case it is in the SpillNo denoted by
1910 spillno.
1911
1912 These pieces of information (a fwds-bwds mapping, really) must
1913 be kept consistent!
1914
1915 This allocator uses the so-called Second Chance Bin Packing
1916 algorithm, as described in "Quality and Speed in Linear-scan
1917 Register Allocation" (Traub, Holloway and Smith, ACM PLDI98,
1918 pp142-151). It is simple and fast and remarkably good at
1919 minimising the amount of spill code introduced.
1920*/
1921
1922static
1923UCodeBlock* vg_do_register_allocation ( UCodeBlock* c1 )
1924{
1925 TempInfo* temp_info;
1926 Int real_to_temp[VG_MAX_REALREGS];
1927 Bool is_spill_cand[VG_MAX_REALREGS];
1928 Int ss_busy_until_before[VG_MAX_SPILLSLOTS];
1929 Int i, j, k, m, r, tno, max_ss_no;
1930 Bool wr, defer, isRead, spill_reqd;
njnf4ce3d32003-02-10 10:17:26 +00001931 UInt realUse[VG_MAX_REGS_USED];
1932 Int tempUse[VG_MAX_REGS_USED];
1933 Bool isWrites[VG_MAX_REGS_USED];
sewardjde4a1d02002-03-22 01:27:54 +00001934 UCodeBlock* c2;
1935
1936 /* Used to denote ... well, "no value" in this fn. */
1937# define VG_NOTHING (-2)
1938
1939 /* Initialise the TempReg info. */
1940 if (c1->nextTemp > 0)
njn25e49d8e72002-09-23 09:36:25 +00001941 temp_info = VG_(arena_malloc)(VG_AR_JITTER,
1942 c1->nextTemp * sizeof(TempInfo) );
sewardjde4a1d02002-03-22 01:27:54 +00001943 else
1944 temp_info = NULL;
1945
1946 for (i = 0; i < c1->nextTemp; i++) {
1947 temp_info[i].live_after = VG_NOTHING;
1948 temp_info[i].dead_before = VG_NOTHING;
1949 temp_info[i].spill_no = VG_NOTHING;
1950 /* temp_info[i].real_no is not yet relevant. */
1951 }
1952
1953 spill_reqd = False;
1954
1955 /* Scan fwds to establish live ranges. */
1956
1957 for (i = 0; i < c1->used; i++) {
njn810086f2002-11-14 12:42:47 +00001958 k = VG_(get_reg_usage)(&c1->instrs[i], TempReg, &tempUse[0],
1959 &isWrites[0]);
njnf4ce3d32003-02-10 10:17:26 +00001960 vg_assert(k >= 0 && k <= VG_MAX_REGS_USED);
sewardjde4a1d02002-03-22 01:27:54 +00001961
1962 /* For each temp usage ... fwds in program order */
1963 for (j = 0; j < k; j++) {
njn810086f2002-11-14 12:42:47 +00001964 tno = tempUse[j];
1965 wr = isWrites[j];
sewardjde4a1d02002-03-22 01:27:54 +00001966 if (wr) {
1967 /* Writes hold a reg live until after this insn. */
1968 if (temp_info[tno].live_after == VG_NOTHING)
1969 temp_info[tno].live_after = i;
1970 if (temp_info[tno].dead_before < i + 1)
1971 temp_info[tno].dead_before = i + 1;
1972 } else {
1973 /* First use of a tmp should be a write. */
njnfa0ad422003-02-03 11:07:03 +00001974 if (temp_info[tno].live_after == VG_NOTHING) {
1975 VG_(printf)("At instr %d...\n", i);
1976 VG_(core_panic)("First use of tmp not a write,"
1977 " probably a skin instrumentation error");
1978 }
sewardjde4a1d02002-03-22 01:27:54 +00001979 /* Reads only hold it live until before this insn. */
1980 if (temp_info[tno].dead_before < i)
1981 temp_info[tno].dead_before = i;
1982 }
1983 }
1984 }
1985
1986# if 0
1987 /* Sanity check on live ranges. Expensive but correct. */
1988 for (i = 0; i < c1->nextTemp; i++) {
1989 vg_assert( (temp_info[i].live_after == VG_NOTHING
1990 && temp_info[i].dead_before == VG_NOTHING)
1991 || (temp_info[i].live_after != VG_NOTHING
1992 && temp_info[i].dead_before != VG_NOTHING) );
1993 }
1994# endif
1995
1996 /* Do a rank-based allocation of TempRegs to spill slot numbers.
1997 We put as few as possible values in spill slots, but
1998 nevertheless need to have an assignment to them just in case. */
1999
2000 max_ss_no = -1;
2001
2002 for (i = 0; i < VG_MAX_SPILLSLOTS; i++)
2003 ss_busy_until_before[i] = 0;
2004
2005 for (i = 0; i < c1->nextTemp; i++) {
2006
2007 /* True iff this temp is unused. */
2008 if (temp_info[i].live_after == VG_NOTHING)
2009 continue;
2010
2011 /* Find the lowest-numbered spill slot which is available at the
2012 start point of this interval, and assign the interval to
2013 it. */
2014 for (j = 0; j < VG_MAX_SPILLSLOTS; j++)
2015 if (ss_busy_until_before[j] <= temp_info[i].live_after)
2016 break;
2017 if (j == VG_MAX_SPILLSLOTS) {
2018 VG_(printf)("VG_MAX_SPILLSLOTS is too low; increase and recompile.\n");
njne427a662002-10-02 11:08:25 +00002019 VG_(core_panic)("register allocation failed -- out of spill slots");
sewardjde4a1d02002-03-22 01:27:54 +00002020 }
2021 ss_busy_until_before[j] = temp_info[i].dead_before;
2022 temp_info[i].spill_no = j;
2023 if (j > max_ss_no)
2024 max_ss_no = j;
2025 }
2026
2027 VG_(total_reg_rank) += (max_ss_no+1);
2028
2029 /* Show live ranges and assigned spill slot nos. */
2030
njn25e49d8e72002-09-23 09:36:25 +00002031 if (dis) {
2032 VG_(printf)("Live range assignments:\n");
sewardjde4a1d02002-03-22 01:27:54 +00002033
2034 for (i = 0; i < c1->nextTemp; i++) {
2035 if (temp_info[i].live_after == VG_NOTHING)
2036 continue;
2037 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00002038 " LR %d is after %d to before %d\tspillno %d\n",
sewardjde4a1d02002-03-22 01:27:54 +00002039 i,
2040 temp_info[i].live_after,
2041 temp_info[i].dead_before,
2042 temp_info[i].spill_no
2043 );
2044 }
njn25e49d8e72002-09-23 09:36:25 +00002045 VG_(printf)("\n");
sewardjde4a1d02002-03-22 01:27:54 +00002046 }
2047
2048 /* Now that we've established a spill slot number for each used
2049 temporary, we can go ahead and do the core of the "Second-chance
2050 binpacking" allocation algorithm. */
2051
njn25e49d8e72002-09-23 09:36:25 +00002052 if (dis) VG_(printf)("Register allocated UCode:\n");
2053
2054
sewardjde4a1d02002-03-22 01:27:54 +00002055 /* Resulting code goes here. We generate it all in a forwards
2056 pass. */
njn4ba5a792002-09-30 10:23:54 +00002057 c2 = VG_(alloc_UCodeBlock)();
sewardj22854b92002-11-30 14:00:47 +00002058 c2->orig_eip = c1->orig_eip;
sewardjde4a1d02002-03-22 01:27:54 +00002059
2060 /* At the start, no TempRegs are assigned to any real register.
2061 Correspondingly, all temps claim to be currently resident in
2062 their spill slots, as computed by the previous two passes. */
2063 for (i = 0; i < VG_MAX_REALREGS; i++)
2064 real_to_temp[i] = VG_NOTHING;
2065 for (i = 0; i < c1->nextTemp; i++)
2066 temp_info[i].real_no = VG_NOTHING;
2067
sewardjde4a1d02002-03-22 01:27:54 +00002068 /* Process each insn in turn. */
2069 for (i = 0; i < c1->used; i++) {
2070
2071 if (c1->instrs[i].opcode == NOP) continue;
2072 VG_(uinstrs_prealloc)++;
2073
2074# if 0
2075 /* Check map consistency. Expensive but correct. */
2076 for (r = 0; r < VG_MAX_REALREGS; r++) {
2077 if (real_to_temp[r] != VG_NOTHING) {
2078 tno = real_to_temp[r];
2079 vg_assert(tno >= 0 && tno < c1->nextTemp);
2080 vg_assert(temp_info[tno].real_no == r);
2081 }
2082 }
2083 for (tno = 0; tno < c1->nextTemp; tno++) {
2084 if (temp_info[tno].real_no != VG_NOTHING) {
2085 r = temp_info[tno].real_no;
2086 vg_assert(r >= 0 && r < VG_MAX_REALREGS);
2087 vg_assert(real_to_temp[r] == tno);
2088 }
2089 }
2090# endif
2091
njn25e49d8e72002-09-23 09:36:25 +00002092 if (dis)
njn4ba5a792002-09-30 10:23:54 +00002093 VG_(pp_UInstr)(i, &c1->instrs[i]);
sewardjde4a1d02002-03-22 01:27:54 +00002094
2095 /* First, free up enough real regs for this insn. This may
2096 generate spill stores since we may have to evict some TempRegs
2097 currently in real regs. Also generates spill loads. */
2098
njn810086f2002-11-14 12:42:47 +00002099 k = VG_(get_reg_usage)(&c1->instrs[i], TempReg, &tempUse[0],
2100 &isWrites[0]);
njnf4ce3d32003-02-10 10:17:26 +00002101 vg_assert(k >= 0 && k <= VG_MAX_REGS_USED);
sewardjde4a1d02002-03-22 01:27:54 +00002102
2103 /* For each ***different*** temp mentioned in the insn .... */
2104 for (j = 0; j < k; j++) {
2105
2106 /* First check if the temp is mentioned again later; if so,
2107 ignore this mention. We only want to process each temp
2108 used by the insn once, even if it is mentioned more than
2109 once. */
2110 defer = False;
njn810086f2002-11-14 12:42:47 +00002111 tno = tempUse[j];
sewardjde4a1d02002-03-22 01:27:54 +00002112 for (m = j+1; m < k; m++)
njn810086f2002-11-14 12:42:47 +00002113 if (tempUse[m] == tno)
sewardjde4a1d02002-03-22 01:27:54 +00002114 defer = True;
2115 if (defer)
2116 continue;
2117
njn810086f2002-11-14 12:42:47 +00002118 /* Now we're trying to find a register for tempUse[j].
sewardjde4a1d02002-03-22 01:27:54 +00002119 First of all, if it already has a register assigned, we
2120 don't need to do anything more. */
2121 if (temp_info[tno].real_no != VG_NOTHING)
2122 continue;
2123
2124 /* No luck. The next thing to do is see if there is a
2125 currently unassigned register available. If so, bag it. */
2126 for (r = 0; r < VG_MAX_REALREGS; r++) {
2127 if (real_to_temp[r] == VG_NOTHING)
2128 break;
2129 }
2130 if (r < VG_MAX_REALREGS) {
2131 real_to_temp[r] = tno;
2132 temp_info[tno].real_no = r;
2133 continue;
2134 }
2135
2136 /* Unfortunately, that didn't pan out either. So we'll have
2137 to eject some other unfortunate TempReg into a spill slot
2138 in order to free up a register. Of course, we need to be
2139 careful not to eject some other TempReg needed by this
2140 insn.
2141
2142 Select r in 0 .. VG_MAX_REALREGS-1 such that
2143 real_to_temp[r] is not mentioned in
njn810086f2002-11-14 12:42:47 +00002144 tempUse[0 .. k-1], since it would be just plain
sewardjde4a1d02002-03-22 01:27:54 +00002145 wrong to eject some other TempReg which we need to use in
2146 this insn.
2147
2148 It is here that it is important to make a good choice of
2149 register to spill. */
2150
2151 /* First, mark those regs which are not spill candidates. */
2152 for (r = 0; r < VG_MAX_REALREGS; r++) {
2153 is_spill_cand[r] = True;
2154 for (m = 0; m < k; m++) {
njn810086f2002-11-14 12:42:47 +00002155 if (real_to_temp[r] == tempUse[m]) {
sewardjde4a1d02002-03-22 01:27:54 +00002156 is_spill_cand[r] = False;
2157 break;
2158 }
2159 }
2160 }
2161
2162 /* We can choose any r satisfying is_spill_cand[r]. However,
2163 try to make a good choice. First, try and find r such
2164 that the associated TempReg is already dead. */
2165 for (r = 0; r < VG_MAX_REALREGS; r++) {
2166 if (is_spill_cand[r] &&
2167 temp_info[real_to_temp[r]].dead_before <= i)
2168 goto have_spill_cand;
2169 }
2170
2171 /* No spill cand is mapped to a dead TempReg. Now we really
2172 _do_ have to generate spill code. Choose r so that the
2173 next use of its associated TempReg is as far ahead as
2174 possible, in the hope that this will minimise the number of
2175 consequent reloads required. This is a bit expensive, but
2176 we don't have to do it very often. */
2177 {
2178 Int furthest_r = VG_MAX_REALREGS;
2179 Int furthest = 0;
2180 for (r = 0; r < VG_MAX_REALREGS; r++) {
2181 if (!is_spill_cand[r]) continue;
2182 for (m = i+1; m < c1->used; m++)
2183 if (uInstrMentionsTempReg(&c1->instrs[m],
2184 real_to_temp[r]))
2185 break;
2186 if (m > furthest) {
2187 furthest = m;
2188 furthest_r = r;
2189 }
2190 }
2191 r = furthest_r;
2192 goto have_spill_cand;
2193 }
2194
2195 have_spill_cand:
2196 if (r == VG_MAX_REALREGS)
njne427a662002-10-02 11:08:25 +00002197 VG_(core_panic)("new reg alloc: out of registers ?!");
sewardjde4a1d02002-03-22 01:27:54 +00002198
2199 /* Eject r. Important refinement: don't bother if the
2200 associated TempReg is now dead. */
2201 vg_assert(real_to_temp[r] != VG_NOTHING);
2202 vg_assert(real_to_temp[r] != tno);
2203 temp_info[real_to_temp[r]].real_no = VG_NOTHING;
2204 if (temp_info[real_to_temp[r]].dead_before > i) {
2205 uInstr2(c2, PUT, 4,
njn4ba5a792002-09-30 10:23:54 +00002206 RealReg, VG_(rank_to_realreg)(r),
sewardjde4a1d02002-03-22 01:27:54 +00002207 SpillNo, temp_info[real_to_temp[r]].spill_no);
2208 VG_(uinstrs_spill)++;
2209 spill_reqd = True;
njn25e49d8e72002-09-23 09:36:25 +00002210 if (dis)
njn4ba5a792002-09-30 10:23:54 +00002211 VG_(pp_UInstr)(c2->used-1, &LAST_UINSTR(c2));
sewardjde4a1d02002-03-22 01:27:54 +00002212 }
2213
2214 /* Decide if tno is read. */
2215 isRead = False;
2216 for (m = 0; m < k; m++)
njn810086f2002-11-14 12:42:47 +00002217 if (tempUse[m] == tno && !isWrites[m])
sewardjde4a1d02002-03-22 01:27:54 +00002218 isRead = True;
2219
2220 /* If so, generate a spill load. */
2221 if (isRead) {
2222 uInstr2(c2, GET, 4,
2223 SpillNo, temp_info[tno].spill_no,
njn4ba5a792002-09-30 10:23:54 +00002224 RealReg, VG_(rank_to_realreg)(r) );
sewardjde4a1d02002-03-22 01:27:54 +00002225 VG_(uinstrs_spill)++;
2226 spill_reqd = True;
njn25e49d8e72002-09-23 09:36:25 +00002227 if (dis)
njn4ba5a792002-09-30 10:23:54 +00002228 VG_(pp_UInstr)(c2->used-1, &LAST_UINSTR(c2));
sewardjde4a1d02002-03-22 01:27:54 +00002229 }
2230
2231 /* Update the forwards and backwards maps. */
2232 real_to_temp[r] = tno;
2233 temp_info[tno].real_no = r;
2234 }
2235
2236 /* By this point, all TempRegs mentioned by the insn have been
2237 bought into real regs. We now copy the insn to the output
2238 and use patchUInstr to convert its rTempRegs into
2239 realregs. */
2240 for (j = 0; j < k; j++)
njn810086f2002-11-14 12:42:47 +00002241 realUse[j] = VG_(rank_to_realreg)(temp_info[tempUse[j]].real_no);
njn4ba5a792002-09-30 10:23:54 +00002242 VG_(copy_UInstr)(c2, &c1->instrs[i]);
njn25e49d8e72002-09-23 09:36:25 +00002243 patchUInstr(&LAST_UINSTR(c2), &tempUse[0], &realUse[0], k);
sewardjde4a1d02002-03-22 01:27:54 +00002244
njn25e49d8e72002-09-23 09:36:25 +00002245 if (dis) {
njn4ba5a792002-09-30 10:23:54 +00002246 VG_(pp_UInstr)(c2->used-1, &LAST_UINSTR(c2));
sewardjde4a1d02002-03-22 01:27:54 +00002247 VG_(printf)("\n");
2248 }
2249 }
2250
2251 if (temp_info != NULL)
njn25e49d8e72002-09-23 09:36:25 +00002252 VG_(arena_free)(VG_AR_JITTER, temp_info);
sewardjde4a1d02002-03-22 01:27:54 +00002253
njn4ba5a792002-09-30 10:23:54 +00002254 VG_(free_UCodeBlock)(c1);
sewardjde4a1d02002-03-22 01:27:54 +00002255
2256 if (spill_reqd)
2257 VG_(translations_needing_spill)++;
2258
2259 return c2;
2260
2261# undef VG_NOTHING
2262
2263}
sewardj7c4b6042003-06-14 15:47:15 +00002264
njn25e49d8e72002-09-23 09:36:25 +00002265/* Analysis records liveness of all general-use RealRegs in the UCode. */
2266static void vg_realreg_liveness_analysis ( UCodeBlock* cb )
2267{
2268 Int i, j, k;
2269 RRegSet rregs_live;
njnf4ce3d32003-02-10 10:17:26 +00002270 Int regUse[VG_MAX_REGS_USED];
2271 Bool isWrites[VG_MAX_REGS_USED];
njn25e49d8e72002-09-23 09:36:25 +00002272 UInstr* u;
sewardjde4a1d02002-03-22 01:27:54 +00002273
njn25e49d8e72002-09-23 09:36:25 +00002274 /* All regs are dead at the end of the block */
2275 rregs_live = ALL_RREGS_DEAD;
sewardjde4a1d02002-03-22 01:27:54 +00002276
sewardjde4a1d02002-03-22 01:27:54 +00002277 for (i = cb->used-1; i >= 0; i--) {
2278 u = &cb->instrs[i];
2279
njn25e49d8e72002-09-23 09:36:25 +00002280 u->regs_live_after = rregs_live;
sewardj97ced732002-03-25 00:07:36 +00002281
njn810086f2002-11-14 12:42:47 +00002282 k = VG_(get_reg_usage)(u, RealReg, &regUse[0], &isWrites[0]);
sewardj97ced732002-03-25 00:07:36 +00002283
njn25e49d8e72002-09-23 09:36:25 +00002284 /* For each reg usage ... bwds in program order. Variable is live
2285 before this UInstr if it is read by this UInstr.
njn810086f2002-11-14 12:42:47 +00002286 Note that regUse[j] holds the Intel reg number, so we must
njn25e49d8e72002-09-23 09:36:25 +00002287 convert it to our rank number. */
2288 for (j = k-1; j >= 0; j--) {
njn810086f2002-11-14 12:42:47 +00002289 SET_RREG_LIVENESS ( VG_(realreg_to_rank)(regUse[j]),
njn25e49d8e72002-09-23 09:36:25 +00002290 rregs_live,
njn810086f2002-11-14 12:42:47 +00002291 !isWrites[j] );
sewardjde4a1d02002-03-22 01:27:54 +00002292 }
2293 }
sewardjde4a1d02002-03-22 01:27:54 +00002294}
2295
sewardjde4a1d02002-03-22 01:27:54 +00002296/*------------------------------------------------------------*/
2297/*--- Main entry point for the JITter. ---*/
2298/*------------------------------------------------------------*/
2299
2300/* Translate the basic block beginning at orig_addr, placing the
2301 translation in a vg_malloc'd block, the address and size of which
2302 are returned in trans_addr and trans_size. Length of the original
2303 block is also returned in orig_size. If the latter three are NULL,
2304 this call is being done for debugging purposes, in which case (a)
2305 throw away the translation once it is made, and (b) produce a load
2306 of debugging output.
njn25e49d8e72002-09-23 09:36:25 +00002307
2308 'tst' is the identity of the thread needing this block.
sewardjde4a1d02002-03-22 01:27:54 +00002309*/
njn25e49d8e72002-09-23 09:36:25 +00002310void VG_(translate) ( /*IN*/ ThreadState* tst,
2311 /*IN*/ Addr orig_addr,
2312 /*OUT*/ UInt* orig_size,
2313 /*OUT*/ Addr* trans_addr,
sewardj22854b92002-11-30 14:00:47 +00002314 /*OUT*/ UInt* trans_size,
2315 /*OUT*/ UShort jumps[VG_MAX_JUMPS])
sewardjde4a1d02002-03-22 01:27:54 +00002316{
sewardj25c7c3a2003-07-10 00:17:58 +00002317 Int n_disassembled_bytes, final_code_size, i;
sewardjde4a1d02002-03-22 01:27:54 +00002318 Bool debugging_translation;
2319 UChar* final_code;
2320 UCodeBlock* cb;
sewardja60be0e2003-05-26 08:47:27 +00002321 Bool notrace_until_done;
sewardj1e86b8b2003-06-16 23:34:12 +00002322 UInt notrace_until_limit = 0;
sewardjde4a1d02002-03-22 01:27:54 +00002323
2324 VGP_PUSHCC(VgpTranslate);
2325 debugging_translation
2326 = orig_size == NULL || trans_addr == NULL || trans_size == NULL;
2327
sewardj25c7c3a2003-07-10 00:17:58 +00002328 /* Look in the code redirect table to see if we should
2329 translate an alternative address for orig_addr. */
2330 for (i = 0; VG_(code_redirect_table)[i].entry_pt_orig != 0; i++) {
2331 if (orig_addr == VG_(code_redirect_table)[i].entry_pt_orig) {
2332 if (VG_(clo_verbosity) >= 2)
2333 VG_(message)(Vg_UserMsg,
2334 "TRANSLATE: %p redirected to %p",
2335 orig_addr,
2336 VG_(code_redirect_table)[i].entry_pt_subst );
2337 orig_addr = VG_(code_redirect_table)[i].entry_pt_subst;
2338 break;
2339 }
2340 }
2341
sewardja60be0e2003-05-26 08:47:27 +00002342 /* If codegen tracing, don't start tracing until
2343 notrace_until_limit blocks have gone by. This avoids printing
2344 huge amounts of useless junk when all we want to see is the last
2345 few blocks translated prior to a failure. Set
2346 notrace_until_limit to be the number of translations to be made
2347 before --trace-codegen= style printing takes effect. */
2348 notrace_until_done
2349 = VG_(overall_in_count) > notrace_until_limit;
2350
njn25e49d8e72002-09-23 09:36:25 +00002351 if (!debugging_translation)
2352 VG_TRACK( pre_mem_read, Vg_CoreTranslate, tst, "", orig_addr, 1 );
sewardjde4a1d02002-03-22 01:27:54 +00002353
njn4ba5a792002-09-30 10:23:54 +00002354 cb = VG_(alloc_UCodeBlock)();
sewardj22854b92002-11-30 14:00:47 +00002355 cb->orig_eip = orig_addr;
sewardjde4a1d02002-03-22 01:27:54 +00002356
njn25e49d8e72002-09-23 09:36:25 +00002357 /* If doing any code printing, print a basic block start marker */
sewardja60be0e2003-05-26 08:47:27 +00002358 if (VG_(clo_trace_codegen) && notrace_until_done) {
njn25e49d8e72002-09-23 09:36:25 +00002359 Char fnname[64] = "";
2360 VG_(get_fnname_if_entry)(orig_addr, fnname, 64);
2361 VG_(printf)(
njne0205ff2003-04-08 00:56:14 +00002362 "==== BB %d %s(%p) in %dB, out %dB, BBs exec'd %llu ====\n\n",
njn25e49d8e72002-09-23 09:36:25 +00002363 VG_(overall_in_count), fnname, orig_addr,
2364 VG_(overall_in_osize), VG_(overall_in_tsize),
2365 VG_(bbs_done));
2366 }
2367
2368 /* True if a debug trans., or if bit N set in VG_(clo_trace_codegen). */
sewardja60be0e2003-05-26 08:47:27 +00002369# define DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(n) \
2370 ( debugging_translation \
2371 || (notrace_until_done \
2372 && (VG_(clo_trace_codegen) & (1 << (n-1))) ))
njn25e49d8e72002-09-23 09:36:25 +00002373
sewardjde4a1d02002-03-22 01:27:54 +00002374 /* Disassemble this basic block into cb. */
njn25e49d8e72002-09-23 09:36:25 +00002375 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(1);
2376 VGP_PUSHCC(VgpToUCode);
sewardjde4a1d02002-03-22 01:27:54 +00002377 n_disassembled_bytes = VG_(disBB) ( cb, orig_addr );
njn25e49d8e72002-09-23 09:36:25 +00002378 VGP_POPCC(VgpToUCode);
2379
sewardjde4a1d02002-03-22 01:27:54 +00002380 /* Try and improve the code a bit. */
2381 if (VG_(clo_optimise)) {
njn25e49d8e72002-09-23 09:36:25 +00002382 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(2);
2383 VGP_PUSHCC(VgpImprove);
sewardjde4a1d02002-03-22 01:27:54 +00002384 vg_improve ( cb );
njn25e49d8e72002-09-23 09:36:25 +00002385 VGP_POPCC(VgpImprove);
sewardjde4a1d02002-03-22 01:27:54 +00002386 }
2387
njn25e49d8e72002-09-23 09:36:25 +00002388 /* Skin's instrumentation (Nb: must set VG_(print_codegen) in case
2389 SK_(instrument) looks at it. */
2390 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(3);
2391 VGP_PUSHCC(VgpInstrument);
2392 cb = SK_(instrument) ( cb, orig_addr );
2393 if (VG_(print_codegen))
njn4ba5a792002-09-30 10:23:54 +00002394 VG_(pp_UCodeBlock) ( cb, "Instrumented UCode:" );
njn25e49d8e72002-09-23 09:36:25 +00002395 VG_(saneUCodeBlock)( cb );
2396 VGP_POPCC(VgpInstrument);
njn4f9c9342002-04-29 16:03:24 +00002397
njn9b007f62003-04-07 14:40:25 +00002398 /* Add %ESP-update hooks if the skin requires them */
2399 /* Nb: We don't print out this phase, because it doesn't do much */
2400 if (VG_(need_to_handle_esp_assignment)()) {
2401 VGP_PUSHCC(VgpESPUpdate);
2402 cb = vg_ESP_update_pass ( cb );
2403 VGP_POPCC(VgpESPUpdate);
2404 }
2405
sewardjde4a1d02002-03-22 01:27:54 +00002406 /* Allocate registers. */
njn25e49d8e72002-09-23 09:36:25 +00002407 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(4);
2408 VGP_PUSHCC(VgpRegAlloc);
sewardjde4a1d02002-03-22 01:27:54 +00002409 cb = vg_do_register_allocation ( cb );
njn25e49d8e72002-09-23 09:36:25 +00002410 VGP_POPCC(VgpRegAlloc);
sewardjde4a1d02002-03-22 01:27:54 +00002411
njn25e49d8e72002-09-23 09:36:25 +00002412 /* Do post reg-alloc %e[acd]x liveness analysis (too boring to print
2413 * anything; results can be seen when emitting final code). */
2414 VGP_PUSHCC(VgpLiveness);
2415 vg_realreg_liveness_analysis ( cb );
2416 VGP_POPCC(VgpLiveness);
2417
2418 /* Emit final code */
2419 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(5);
2420
2421 VGP_PUSHCC(VgpFromUcode);
sewardj22854b92002-11-30 14:00:47 +00002422 final_code = VG_(emit_code)(cb, &final_code_size, jumps );
njn25e49d8e72002-09-23 09:36:25 +00002423 VGP_POPCC(VgpFromUcode);
njn4ba5a792002-09-30 10:23:54 +00002424 VG_(free_UCodeBlock)(cb);
sewardjde4a1d02002-03-22 01:27:54 +00002425
njn25e49d8e72002-09-23 09:36:25 +00002426#undef DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE
2427
sewardjde4a1d02002-03-22 01:27:54 +00002428 if (debugging_translation) {
2429 /* Only done for debugging -- throw away final result. */
njn25e49d8e72002-09-23 09:36:25 +00002430 VG_(arena_free)(VG_AR_JITTER, final_code);
sewardjde4a1d02002-03-22 01:27:54 +00002431 } else {
2432 /* Doing it for real -- return values to caller. */
sewardjde4a1d02002-03-22 01:27:54 +00002433 *orig_size = n_disassembled_bytes;
2434 *trans_addr = (Addr)final_code;
2435 *trans_size = final_code_size;
2436 }
njn25e49d8e72002-09-23 09:36:25 +00002437 VGP_POPCC(VgpTranslate);
sewardjde4a1d02002-03-22 01:27:54 +00002438}
2439
2440/*--------------------------------------------------------------------*/
2441/*--- end vg_translate.c ---*/
2442/*--------------------------------------------------------------------*/