blob: 42fafed3e5110cf157b71dfded098d40df16f534 [file] [log] [blame]
sewardjde4a1d02002-03-22 01:27:54 +00001
2/*--------------------------------------------------------------------*/
3/*--- The JITter proper: register allocation & code improvement ---*/
4/*--- vg_translate.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
njnc9539842002-10-02 13:26:35 +00008 This file is part of Valgrind, an extensible x86 protected-mode
9 emulator for monitoring program execution on x86-Unixes.
sewardjde4a1d02002-03-22 01:27:54 +000010
njn0e1b5142003-04-15 14:58:06 +000011 Copyright (C) 2000-2003 Julian Seward
sewardjde4a1d02002-03-22 01:27:54 +000012 jseward@acm.org
sewardjde4a1d02002-03-22 01:27:54 +000013
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
njn25e49d8e72002-09-23 09:36:25 +000029 The GNU General Public License is contained in the file COPYING.
sewardjde4a1d02002-03-22 01:27:54 +000030*/
31
32#include "vg_include.h"
33
sewardjde4a1d02002-03-22 01:27:54 +000034/*------------------------------------------------------------*/
35/*--- Renamings of frequently-used global functions. ---*/
36/*------------------------------------------------------------*/
37
njn25e49d8e72002-09-23 09:36:25 +000038#define dis VG_(print_codegen)
sewardjde4a1d02002-03-22 01:27:54 +000039
sewardje1042472002-09-30 12:33:11 +000040
sewardjde4a1d02002-03-22 01:27:54 +000041/*------------------------------------------------------------*/
42/*--- Basics ---*/
43/*------------------------------------------------------------*/
44
njn810086f2002-11-14 12:42:47 +000045/* This one is called by the core */
njn4ba5a792002-09-30 10:23:54 +000046UCodeBlock* VG_(alloc_UCodeBlock) ( void )
sewardjde4a1d02002-03-22 01:27:54 +000047{
njn25e49d8e72002-09-23 09:36:25 +000048 UCodeBlock* cb = VG_(arena_malloc)(VG_AR_CORE, sizeof(UCodeBlock));
sewardjde4a1d02002-03-22 01:27:54 +000049 cb->used = cb->size = cb->nextTemp = 0;
50 cb->instrs = NULL;
51 return cb;
52}
53
njn810086f2002-11-14 12:42:47 +000054/* This one is called by skins */
55UCodeBlock* VG_(setup_UCodeBlock) ( UCodeBlock* cb_in )
56{
57 UCodeBlock* cb = VG_(arena_malloc)(VG_AR_CORE, sizeof(UCodeBlock));
sewardj22854b92002-11-30 14:00:47 +000058 cb->orig_eip = cb_in->orig_eip;
njn810086f2002-11-14 12:42:47 +000059 cb->used = cb->size = 0;
60 cb->nextTemp = cb_in->nextTemp;
61 cb->instrs = NULL;
62 return cb;
63}
sewardjde4a1d02002-03-22 01:27:54 +000064
njn4ba5a792002-09-30 10:23:54 +000065void VG_(free_UCodeBlock) ( UCodeBlock* cb )
sewardjde4a1d02002-03-22 01:27:54 +000066{
njn25e49d8e72002-09-23 09:36:25 +000067 if (cb->instrs) VG_(arena_free)(VG_AR_CORE, cb->instrs);
68 VG_(arena_free)(VG_AR_CORE, cb);
sewardjde4a1d02002-03-22 01:27:54 +000069}
70
71
72/* Ensure there's enough space in a block to add one uinstr. */
daywalkerb18d2532003-09-27 20:15:01 +000073static
sewardjde4a1d02002-03-22 01:27:54 +000074void ensureUInstr ( UCodeBlock* cb )
75{
76 if (cb->used == cb->size) {
77 if (cb->instrs == NULL) {
78 vg_assert(cb->size == 0);
79 vg_assert(cb->used == 0);
80 cb->size = 8;
njn25e49d8e72002-09-23 09:36:25 +000081 cb->instrs = VG_(arena_malloc)(VG_AR_CORE, 8 * sizeof(UInstr));
sewardjde4a1d02002-03-22 01:27:54 +000082 } else {
83 Int i;
njn25e49d8e72002-09-23 09:36:25 +000084 UInstr* instrs2 = VG_(arena_malloc)(VG_AR_CORE,
sewardjde4a1d02002-03-22 01:27:54 +000085 2 * sizeof(UInstr) * cb->size);
86 for (i = 0; i < cb->used; i++)
87 instrs2[i] = cb->instrs[i];
88 cb->size *= 2;
njn25e49d8e72002-09-23 09:36:25 +000089 VG_(arena_free)(VG_AR_CORE, cb->instrs);
sewardjde4a1d02002-03-22 01:27:54 +000090 cb->instrs = instrs2;
91 }
92 }
93
94 vg_assert(cb->used < cb->size);
95}
96
97
98__inline__
njn4ba5a792002-09-30 10:23:54 +000099void VG_(new_NOP) ( UInstr* u )
sewardjde4a1d02002-03-22 01:27:54 +0000100{
101 u->val1 = u->val2 = u->val3 = 0;
102 u->tag1 = u->tag2 = u->tag3 = NoValue;
103 u->flags_r = u->flags_w = FlagsEmpty;
sewardj2e93c502002-04-12 11:12:52 +0000104 u->jmpkind = JmpBoring;
njn25e49d8e72002-09-23 09:36:25 +0000105 u->signed_widen = u->has_ret_val = False;
106 u->regs_live_after = ALL_RREGS_LIVE;
sewardjde4a1d02002-03-22 01:27:54 +0000107 u->lit32 = 0;
njn25e49d8e72002-09-23 09:36:25 +0000108 u->opcode = NOP;
sewardjde4a1d02002-03-22 01:27:54 +0000109 u->size = 0;
110 u->cond = 0;
111 u->extra4b = 0;
njn25e49d8e72002-09-23 09:36:25 +0000112 u->argc = u->regparms_n = 0;
sewardjde4a1d02002-03-22 01:27:54 +0000113}
114
115
116/* Add an instruction to a ucode block, and return the index of the
117 instruction. */
118__inline__
njn4ba5a792002-09-30 10:23:54 +0000119void VG_(new_UInstr3) ( UCodeBlock* cb, Opcode opcode, Int sz,
sewardjde4a1d02002-03-22 01:27:54 +0000120 Tag tag1, UInt val1,
121 Tag tag2, UInt val2,
122 Tag tag3, UInt val3 )
123{
124 UInstr* ui;
125 ensureUInstr(cb);
126 ui = & cb->instrs[cb->used];
127 cb->used++;
njn4ba5a792002-09-30 10:23:54 +0000128 VG_(new_NOP)(ui);
sewardjde4a1d02002-03-22 01:27:54 +0000129 ui->val1 = val1;
130 ui->val2 = val2;
131 ui->val3 = val3;
132 ui->opcode = opcode;
133 ui->tag1 = tag1;
134 ui->tag2 = tag2;
135 ui->tag3 = tag3;
136 ui->size = sz;
137 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
138 if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
139 if (tag3 == TempReg) vg_assert(val3 != INVALID_TEMPREG);
140}
141
142
143__inline__
njn4ba5a792002-09-30 10:23:54 +0000144void VG_(new_UInstr2) ( UCodeBlock* cb, Opcode opcode, Int sz,
sewardjde4a1d02002-03-22 01:27:54 +0000145 Tag tag1, UInt val1,
146 Tag tag2, UInt val2 )
147{
148 UInstr* ui;
149 ensureUInstr(cb);
150 ui = & cb->instrs[cb->used];
151 cb->used++;
njn4ba5a792002-09-30 10:23:54 +0000152 VG_(new_NOP)(ui);
sewardjde4a1d02002-03-22 01:27:54 +0000153 ui->val1 = val1;
154 ui->val2 = val2;
155 ui->opcode = opcode;
156 ui->tag1 = tag1;
157 ui->tag2 = tag2;
158 ui->size = sz;
159 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
160 if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
161}
162
163
164__inline__
njn4ba5a792002-09-30 10:23:54 +0000165void VG_(new_UInstr1) ( UCodeBlock* cb, Opcode opcode, Int sz,
sewardjde4a1d02002-03-22 01:27:54 +0000166 Tag tag1, UInt val1 )
167{
168 UInstr* ui;
169 ensureUInstr(cb);
170 ui = & cb->instrs[cb->used];
171 cb->used++;
njn4ba5a792002-09-30 10:23:54 +0000172 VG_(new_NOP)(ui);
sewardjde4a1d02002-03-22 01:27:54 +0000173 ui->val1 = val1;
174 ui->opcode = opcode;
175 ui->tag1 = tag1;
176 ui->size = sz;
177 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
178}
179
180
181__inline__
njn4ba5a792002-09-30 10:23:54 +0000182void VG_(new_UInstr0) ( UCodeBlock* cb, Opcode opcode, Int sz )
sewardjde4a1d02002-03-22 01:27:54 +0000183{
184 UInstr* ui;
185 ensureUInstr(cb);
186 ui = & cb->instrs[cb->used];
187 cb->used++;
njn4ba5a792002-09-30 10:23:54 +0000188 VG_(new_NOP)(ui);
sewardjde4a1d02002-03-22 01:27:54 +0000189 ui->opcode = opcode;
190 ui->size = sz;
191}
192
sewardjde4a1d02002-03-22 01:27:54 +0000193/* Copy an instruction into the given codeblock. */
njn4f9c9342002-04-29 16:03:24 +0000194__inline__
njn4ba5a792002-09-30 10:23:54 +0000195void VG_(copy_UInstr) ( UCodeBlock* cb, UInstr* instr )
sewardjde4a1d02002-03-22 01:27:54 +0000196{
197 ensureUInstr(cb);
198 cb->instrs[cb->used] = *instr;
199 cb->used++;
200}
201
sewardjde4a1d02002-03-22 01:27:54 +0000202/* Copy auxiliary info from one uinstr to another. */
203static __inline__
204void copyAuxInfoFromTo ( UInstr* src, UInstr* dst )
205{
njn25e49d8e72002-09-23 09:36:25 +0000206 dst->cond = src->cond;
207 dst->extra4b = src->extra4b;
208 dst->signed_widen = src->signed_widen;
209 dst->jmpkind = src->jmpkind;
210 dst->flags_r = src->flags_r;
211 dst->flags_w = src->flags_w;
212 dst->argc = src->argc;
213 dst->regparms_n = src->regparms_n;
214 dst->has_ret_val = src->has_ret_val;
215 dst->regs_live_after = src->regs_live_after;
sewardjde4a1d02002-03-22 01:27:54 +0000216}
217
218
sewardjde4a1d02002-03-22 01:27:54 +0000219/* Set the lit32 field of the most recent uinsn. */
njn4ba5a792002-09-30 10:23:54 +0000220void VG_(set_lit_field) ( UCodeBlock* cb, UInt lit32 )
sewardjde4a1d02002-03-22 01:27:54 +0000221{
222 LAST_UINSTR(cb).lit32 = lit32;
223}
224
225
njn25e49d8e72002-09-23 09:36:25 +0000226/* Set the C call info fields of the most recent uinsn. */
njn4ba5a792002-09-30 10:23:54 +0000227void VG_(set_ccall_fields) ( UCodeBlock* cb, Addr fn, UChar argc, UChar
228 regparms_n, Bool has_ret_val )
njn25e49d8e72002-09-23 09:36:25 +0000229{
230 vg_assert(argc < 4);
231 vg_assert(regparms_n <= argc);
232 LAST_UINSTR(cb).lit32 = fn;
233 LAST_UINSTR(cb).argc = argc;
234 LAST_UINSTR(cb).regparms_n = regparms_n;
235 LAST_UINSTR(cb).has_ret_val = has_ret_val;
236}
237
njn810086f2002-11-14 12:42:47 +0000238/* For the last uinsn inserted into cb, set the read, written and
239 undefined flags. Undefined flags are counted as written, but it
240 seems worthwhile to distinguish them.
241*/
242__inline__
243void VG_(set_flag_fields) ( UCodeBlock* cb,
244 FlagSet rr, FlagSet ww, FlagSet uu )
245{
246 FlagSet uw = VG_UNION_FLAG_SETS(ww,uu);
247
248 vg_assert(rr == (rr & FlagsALL));
249 vg_assert(uw == (uw & FlagsALL));
250 LAST_UINSTR(cb).flags_r = rr;
251 LAST_UINSTR(cb).flags_w = uw;
252}
253
254
njn4ba5a792002-09-30 10:23:54 +0000255Bool VG_(any_flag_use) ( UInstr* u )
sewardjde4a1d02002-03-22 01:27:54 +0000256{
257 return (u->flags_r != FlagsEmpty
258 || u->flags_w != FlagsEmpty);
259}
260
njn25e49d8e72002-09-23 09:36:25 +0000261#if 1
262# define BEST_ALLOC_ORDER
263#endif
sewardjde4a1d02002-03-22 01:27:54 +0000264
265/* Convert a rank in the range 0 .. VG_MAX_REALREGS-1 into an Intel
266 register number. This effectively defines the order in which real
267 registers are allocated. %ebp is excluded since it is permanently
njn25e49d8e72002-09-23 09:36:25 +0000268 reserved for pointing at VG_(baseBlock).
sewardjde4a1d02002-03-22 01:27:54 +0000269
njn25e49d8e72002-09-23 09:36:25 +0000270 Important! This function must correspond with the value of
271 VG_MAX_REALREGS (actually, VG_MAX_REALREGS can be reduced without
272 a problem, except the generated code will obviously be worse).
sewardjde4a1d02002-03-22 01:27:54 +0000273*/
njn25e49d8e72002-09-23 09:36:25 +0000274__inline__
njn4ba5a792002-09-30 10:23:54 +0000275Int VG_(rank_to_realreg) ( Int rank )
sewardjde4a1d02002-03-22 01:27:54 +0000276{
277 switch (rank) {
njn25e49d8e72002-09-23 09:36:25 +0000278# ifdef BEST_ALLOC_ORDER
sewardjde4a1d02002-03-22 01:27:54 +0000279 /* Probably the best allocation ordering. */
280 case 0: return R_EAX;
281 case 1: return R_EBX;
282 case 2: return R_ECX;
283 case 3: return R_EDX;
284 case 4: return R_ESI;
njn25e49d8e72002-09-23 09:36:25 +0000285 case 5: return R_EDI;
sewardjde4a1d02002-03-22 01:27:54 +0000286# else
287 /* Contrary; probably the worst. Helpful for debugging, tho. */
njn25e49d8e72002-09-23 09:36:25 +0000288 case 5: return R_EAX;
289 case 4: return R_EBX;
290 case 3: return R_ECX;
291 case 2: return R_EDX;
292 case 1: return R_ESI;
293 case 0: return R_EDI;
sewardjde4a1d02002-03-22 01:27:54 +0000294# endif
njne427a662002-10-02 11:08:25 +0000295 default: VG_(core_panic)("VG_(rank_to_realreg)");
njn25e49d8e72002-09-23 09:36:25 +0000296 }
297}
298
299/* Convert an Intel register number into a rank in the range 0 ..
njn4ba5a792002-09-30 10:23:54 +0000300 VG_MAX_REALREGS-1. See related comments for rank_to_realreg()
njn25e49d8e72002-09-23 09:36:25 +0000301 above. */
302__inline__
njn4ba5a792002-09-30 10:23:54 +0000303Int VG_(realreg_to_rank) ( Int realReg )
njn25e49d8e72002-09-23 09:36:25 +0000304{
305 switch (realReg) {
306# ifdef BEST_ALLOC_ORDER
307 case R_EAX: return 0;
308 case R_EBX: return 1;
309 case R_ECX: return 2;
310 case R_EDX: return 3;
311 case R_ESI: return 4;
312 case R_EDI: return 5;
313# else
314 case R_EAX: return 5;
315 case R_EBX: return 4;
316 case R_ECX: return 3;
317 case R_EDX: return 2;
318 case R_ESI: return 1;
319 case R_EDI: return 0;
320# endif
njne427a662002-10-02 11:08:25 +0000321 default: VG_(core_panic)("VG_(realreg_to_rank)");
sewardjde4a1d02002-03-22 01:27:54 +0000322 }
323}
324
325
326/*------------------------------------------------------------*/
327/*--- Sanity checking uinstrs. ---*/
328/*------------------------------------------------------------*/
329
330/* This seems as good a place as any to record some important stuff
331 about ucode semantics.
332
333 * TempRegs are 32 bits wide. LOADs of 8/16 bit values into a
334 TempReg are defined to zero-extend the loaded value to 32 bits.
335 This is needed to make the translation of movzbl et al work
336 properly.
337
338 * Similarly, GETs of a 8/16 bit ArchRegs are zero-extended.
339
340 * Arithmetic on TempRegs is at the specified size. For example,
341 SUBW t1, t2 has to result in a real 16 bit x86 subtraction
342 being emitted -- not a 32 bit one.
343
344 * On some insns we allow the cc bit to be set. If so, the
345 intention is that the simulated machine's %eflags register
346 is copied into that of the real machine before the insn,
347 and copied back again afterwards. This means that the
348 code generated for that insn must be very careful only to
349 update %eflags in the intended way. This is particularly
350 important for the routines referenced by CALL insns.
351*/
352
353/* Meaning of operand kinds is as follows:
354
355 ArchReg is a register of the simulated CPU, stored in memory,
356 in vg_m_state.m_eax .. m_edi. These values are stored
357 using the Intel register encoding.
358
359 RealReg is a register of the real CPU. There are VG_MAX_REALREGS
360 available for allocation. As with ArchRegs, these values
361 are stored using the Intel register encoding.
362
363 TempReg is a temporary register used to express the results of
364 disassembly. There is an unlimited supply of them --
365 register allocation and spilling eventually assigns them
366 to RealRegs.
367
368 SpillNo is a spill slot number. The number of required spill
369 slots is VG_MAX_PSEUDOS, in general. Only allowed
370 as the ArchReg operand of GET and PUT.
371
372 Lit16 is a signed 16-bit literal value.
373
374 Literal is a 32-bit literal value. Each uinstr can only hold
375 one of these.
376
377 The disassembled code is expressed purely in terms of ArchReg,
378 TempReg and Literal operands. Eventually, register allocation
379 removes all the TempRegs, giving a result using ArchRegs, RealRegs,
380 and Literals. New x86 code can easily be synthesised from this.
381 There are carefully designed restrictions on which insns can have
382 which operands, intended to make it possible to generate x86 code
383 from the result of register allocation on the ucode efficiently and
384 without need of any further RealRegs.
385
njn25e49d8e72002-09-23 09:36:25 +0000386 Restrictions for the individual UInstrs are clear from the checks below.
387 Abbreviations: A=ArchReg S=SpillNo T=TempReg L=Literal
388 Ls=Lit16 R=RealReg N=NoValue
sewardje1042472002-09-30 12:33:11 +0000389 As=ArchRegS
sewardjde4a1d02002-03-22 01:27:54 +0000390
sewardjde4a1d02002-03-22 01:27:54 +0000391 Before register allocation, S operands should not appear anywhere.
392 After register allocation, all T operands should have been
393 converted into Rs, and S operands are allowed in GET and PUT --
394 denoting spill saves/restores.
395
njn25e49d8e72002-09-23 09:36:25 +0000396 Before liveness analysis, save_e[acd]x fields should all be True.
397 Afterwards, they may be False.
398
sewardjde4a1d02002-03-22 01:27:54 +0000399 The size field should be 0 for insns for which it is meaningless,
400 ie those which do not directly move/operate on data.
401*/
njn25e49d8e72002-09-23 09:36:25 +0000402Bool VG_(saneUInstr) ( Bool beforeRA, Bool beforeLiveness, UInstr* u )
sewardjde4a1d02002-03-22 01:27:54 +0000403{
njn25e49d8e72002-09-23 09:36:25 +0000404# define LIT0 (u->lit32 == 0)
sewardjb31b06d2003-06-13 00:26:02 +0000405# define LIT8 (((u->lit32) & 0xFFFFFF00) == 0)
njn25e49d8e72002-09-23 09:36:25 +0000406# define LIT1 (!(LIT0))
407# define LITm (u->tag1 == Literal ? True : LIT0 )
sewardj77d30a22003-10-19 08:18:52 +0000408# define SZ16 (u->size == 16)
sewardj3d7c9c82003-03-26 21:08:13 +0000409# define SZ8 (u->size == 8)
njn25e49d8e72002-09-23 09:36:25 +0000410# define SZ4 (u->size == 4)
411# define SZ2 (u->size == 2)
412# define SZ1 (u->size == 1)
413# define SZ0 (u->size == 0)
414# define SZ42 (u->size == 4 || u->size == 2)
sewardjd7971012003-04-04 00:21:58 +0000415# define SZ48 (u->size == 4 || u->size == 8)
sewardjfebaa3b2003-05-25 01:07:34 +0000416# define SZ416 (u->size == 4 || u->size == 16)
sewardjde8aecf2003-05-27 00:46:28 +0000417# define SZsse (u->size == 4 || u->size == 8 || u->size == 16)
njn25e49d8e72002-09-23 09:36:25 +0000418# define SZi (u->size == 4 || u->size == 2 || u->size == 1)
419# define SZf ( u->size == 4 || u->size == 8 || u->size == 2 \
420 || u->size == 10 || u->size == 28 || u->size == 108)
421# define SZ4m ((u->tag1 == TempReg || u->tag1 == RealReg) \
422 ? (u->size == 4) : True)
423
424/* For these ones, two cases:
425 *
426 * 1. They are transliterations of the corresponding x86 instruction, in
427 * which case they should have its flags (except that redundant write
428 * flags can be annulled by the optimisation pass).
429 *
430 * 2. They are being used generally for other purposes, eg. helping with a
431 * 'rep'-prefixed instruction, in which case should have empty flags .
432 */
433# define emptyR (u->flags_r == FlagsEmpty)
434# define emptyW (u->flags_w == FlagsEmpty)
435# define CC0 (emptyR && emptyW)
436# define CCr (u->flags_r == FlagsALL && emptyW)
437# define CCw (emptyR && u->flags_w == FlagsALL)
438# define CCa (emptyR && (u->flags_w == FlagsOSZACP || emptyW))
439# define CCc (emptyR && (u->flags_w == FlagsOC || emptyW))
440# define CCe (emptyR && (u->flags_w == FlagsOSZAP || emptyW))
441# define CCb ((u->flags_r==FlagC || emptyR) && \
442 (u->flags_w==FlagsOSZACP || emptyW))
443# define CCd ((u->flags_r==FlagC || emptyR) && \
444 (u->flags_w==FlagsOC || emptyW))
sewardjc232b212002-12-10 22:24:03 +0000445# define CCf (CC0 || (emptyR && u->flags_w==FlagsZCP) \
446 || (u->flags_r==FlagsZCP && emptyW))
njn25e49d8e72002-09-23 09:36:25 +0000447# define CCg ((u->flags_r==FlagsOSZACP || emptyR) && emptyW)
448# define CCj (u->cond==CondAlways ? CC0 : CCg)
449
sewardjde4a1d02002-03-22 01:27:54 +0000450# define TR1 (beforeRA ? (u->tag1 == TempReg) : (u->tag1 == RealReg))
451# define TR2 (beforeRA ? (u->tag2 == TempReg) : (u->tag2 == RealReg))
452# define TR3 (beforeRA ? (u->tag3 == TempReg) : (u->tag3 == RealReg))
453# define A1 (u->tag1 == ArchReg)
454# define A2 (u->tag2 == ArchReg)
455# define AS1 ((u->tag1 == ArchReg) || ((!beforeRA && (u->tag1 == SpillNo))))
456# define AS2 ((u->tag2 == ArchReg) || ((!beforeRA && (u->tag2 == SpillNo))))
457# define AS3 ((u->tag3 == ArchReg) || ((!beforeRA && (u->tag3 == SpillNo))))
458# define L1 (u->tag1 == Literal && u->val1 == 0)
459# define L2 (u->tag2 == Literal && u->val2 == 0)
460# define Ls1 (u->tag1 == Lit16)
sewardjfebaa3b2003-05-25 01:07:34 +0000461# define Ls2 (u->tag2 == Lit16)
sewardjde4a1d02002-03-22 01:27:54 +0000462# define Ls3 (u->tag3 == Lit16)
njn25e49d8e72002-09-23 09:36:25 +0000463# define TRL1 (TR1 || L1)
464# define TRAL1 (TR1 || A1 || L1)
jsgf5efa4fd2003-10-14 21:49:11 +0000465# define TRA1 (TR1 || A1)
466# define TRA2 (TR2 || A2)
sewardjde4a1d02002-03-22 01:27:54 +0000467# define N1 (u->tag1 == NoValue)
468# define N2 (u->tag2 == NoValue)
469# define N3 (u->tag3 == NoValue)
sewardje1042472002-09-30 12:33:11 +0000470# define Se1 (u->tag1 == ArchRegS)
471# define Se2 (u->tag2 == ArchRegS)
sewardjde4a1d02002-03-22 01:27:54 +0000472
njn25e49d8e72002-09-23 09:36:25 +0000473# define COND0 (u->cond == 0)
474# define EXTRA4b0 (u->extra4b == 0)
475# define SG_WD0 (u->signed_widen == 0)
476# define JMPKIND0 (u->jmpkind == 0)
477# define CCALL0 (u->argc==0 && u->regparms_n==0 && u->has_ret_val==0 && \
478 ( beforeLiveness \
479 ? u->regs_live_after == ALL_RREGS_LIVE \
480 : True ))
481
482# define XCONDi ( EXTRA4b0 && SG_WD0 && JMPKIND0 && CCALL0)
483# define Xextra4b (COND0 && SG_WD0 && JMPKIND0 && CCALL0)
484# define XWIDEN (COND0 && JMPKIND0 && CCALL0)
485# define XJMP ( SG_WD0 && CCALL0)
486# define XCCALL (COND0 && EXTRA4b0 && SG_WD0 && JMPKIND0 )
487# define XOTHER (COND0 && EXTRA4b0 && SG_WD0 && JMPKIND0 && CCALL0)
488
489 /* 0 or 1 Literal args per UInstr */
sewardjde4a1d02002-03-22 01:27:54 +0000490 Int n_lits = 0;
491 if (u->tag1 == Literal) n_lits++;
492 if (u->tag2 == Literal) n_lits++;
493 if (u->tag3 == Literal) n_lits++;
494 if (n_lits > 1)
495 return False;
496
njn25e49d8e72002-09-23 09:36:25 +0000497 /* Fields not checked: val1, val2, val3 */
498
sewardjde4a1d02002-03-22 01:27:54 +0000499 switch (u->opcode) {
njn25e49d8e72002-09-23 09:36:25 +0000500
501 /* Fields checked: lit32 size flags_r/w tag1 tag2 tag3 (rest) */
sewardje1042472002-09-30 12:33:11 +0000502 case PUTSEG: return LIT0 && SZ2 && CC0 && TR1 && Se2 && N3 && XOTHER;
503 case GETSEG: return LIT0 && SZ2 && CC0 && Se1 && TR2 && N3 && XOTHER;
504 case USESEG: return LIT0 && SZ0 && CC0 && TR1 && TR2 && N3 && XOTHER;
njn25e49d8e72002-09-23 09:36:25 +0000505 case NOP: return LIT0 && SZ0 && CC0 && N1 && N2 && N3 && XOTHER;
sewardj7a5ebcf2002-11-13 22:42:13 +0000506 case LOCK: return LIT0 && SZ0 && CC0 && N1 && N2 && N3 && XOTHER;
njn25e49d8e72002-09-23 09:36:25 +0000507 case GETF: return LIT0 && SZ42 && CCr && TR1 && N2 && N3 && XOTHER;
508 case PUTF: return LIT0 && SZ42 && CCw && TR1 && N2 && N3 && XOTHER;
509 case GET: return LIT0 && SZi && CC0 && AS1 && TR2 && N3 && XOTHER;
510 case PUT: return LIT0 && SZi && CC0 && TR1 && AS2 && N3 && XOTHER;
511 case LOAD:
512 case STORE: return LIT0 && SZi && CC0 && TR1 && TR2 && N3 && XOTHER;
513 case MOV: return LITm && SZ4m && CC0 && TRL1 && TR2 && N3 && XOTHER;
514 case CMOV: return LIT0 && SZ4 && CCg && TR1 && TR2 && N3 && XCONDi;
njn95bc3862003-09-30 13:22:30 +0000515 case WIDEN: return LIT0 && SZ42 && CC0 && TR1 && N2 && N3 && XWIDEN;
njn25e49d8e72002-09-23 09:36:25 +0000516 case JMP: return LITm && SZ0 && CCj && TRL1 && N2 && N3 && XJMP;
517 case CALLM: return LIT0 && SZ0 /*any*/ && Ls1 && N2 && N3 && XOTHER;
518 case CALLM_S:
519 case CALLM_E:return LIT0 && SZ0 && CC0 && N1 && N2 && N3 && XOTHER;
520 case PUSH:
521 case POP: return LIT0 && SZi && CC0 && TR1 && N2 && N3 && XOTHER;
522 case CLEAR: return LIT0 && SZ0 && CC0 && Ls1 && N2 && N3 && XOTHER;
523 case AND:
524 case OR: return LIT0 && SZi && CCa && TR1 && TR2 && N3 && XOTHER;
jsgf5efa4fd2003-10-14 21:49:11 +0000525 case MUL: return LIT0 && SZ42 && CCa && TRA1 &&TRA2 && N3 && XOTHER;
njn25e49d8e72002-09-23 09:36:25 +0000526 case ADD:
527 case XOR:
528 case SUB: return LITm && SZi && CCa &&TRAL1 && TR2 && N3 && XOTHER;
529 case SBB:
530 case ADC: return LITm && SZi && CCb &&TRAL1 && TR2 && N3 && XOTHER;
531 case SHL:
532 case SHR:
533 case SAR: return LITm && SZi && CCa && TRL1 && TR2 && N3 && XOTHER;
534 case ROL:
535 case ROR: return LITm && SZi && CCc && TRL1 && TR2 && N3 && XOTHER;
536 case RCL:
537 case RCR: return LITm && SZi && CCd && TRL1 && TR2 && N3 && XOTHER;
538 case NOT: return LIT0 && SZi && CC0 && TR1 && N2 && N3 && XOTHER;
539 case NEG: return LIT0 && SZi && CCa && TR1 && N2 && N3 && XOTHER;
540 case INC:
541 case DEC: return LIT0 && SZi && CCe && TR1 && N2 && N3 && XOTHER;
542 case CC2VAL: return LIT0 && SZ1 && CCg && TR1 && N2 && N3 && XCONDi;
543 case BSWAP: return LIT0 && SZ4 && CC0 && TR1 && N2 && N3 && XOTHER;
544 case JIFZ: return LIT1 && SZ4 && CC0 && TR1 && L2 && N3 && XOTHER;
545 case FPU_R:
546 case FPU_W: return LIT0 && SZf && CC0 && Ls1 && TR2 && N3 && XOTHER;
547 case FPU: return LIT0 && SZ0 && CCf && Ls1 && N2 && N3 && XOTHER;
548 case LEA1: return /*any*/ SZ4 && CC0 && TR1 && TR2 && N3 && XOTHER;
549 case LEA2: return /*any*/ SZ4 && CC0 && TR1 && TR2 && TR3 && Xextra4b;
550 case INCEIP: return LIT0 && SZ0 && CC0 && Ls1 && N2 && N3 && XOTHER;
551 case CCALL: return LIT1 && SZ0 && CC0 &&
552 (u->argc > 0 ? TR1 : N1) &&
553 (u->argc > 1 ? TR2 : N2) &&
554 (u->argc > 2 || u->has_ret_val ? TR3 : N3) &&
555 u->regparms_n <= u->argc && XCCALL;
sewardj3d7c9c82003-03-26 21:08:13 +0000556 /* Fields checked: lit32 size flags_r/w tag1 tag2 tag3 (rest) */
557 case MMX1:
sewardj4fbe6e92003-06-15 21:54:34 +0000558 case MMX2: return LIT0 && SZ0 && CC0 && Ls1 && N2 && N3 && XOTHER;
559 case MMX3: return LIT0 && SZ0 && CC0 && Ls1 && Ls2 && N3 && XOTHER;
560 case MMX2_MemRd: return LIT0 && SZ48 && CC0 && Ls1 && TR2 && N3 && XOTHER;
561 case MMX2_MemWr: return LIT0 && SZ48 && CC0 && Ls1 && TR2 && N3 && XOTHER;
562 case MMX2_ERegRd: return LIT0 && SZ4 && CC0 && Ls1 && TR2 && N3 && XOTHER;
563 case MMX2_ERegWr: return LIT0 && SZ4 && CC0 && Ls1 && TR2 && N3 && XOTHER;
sewardjfebaa3b2003-05-25 01:07:34 +0000564
565 /* Fields checked: lit32 size flags_r/w tag1 tag2 tag3 (rest) */
566 case SSE2a_MemWr: return LIT0 && SZ416 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
sewardj1e86b8b2003-06-16 23:34:12 +0000567 case SSE2a_MemRd: return LIT0 && SZ416 && CCa && Ls1 && Ls2 && TR3 && XOTHER;
sewardj9dd209f2003-06-18 23:30:52 +0000568 case SSE2a1_MemRd: return LIT0 && SZ416 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
sewardjde8aecf2003-05-27 00:46:28 +0000569 case SSE3a_MemWr: return LIT0 && SZsse && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
sewardj1e86b8b2003-06-16 23:34:12 +0000570 case SSE3a_MemRd: return LIT0 && SZsse && CCa && Ls1 && Ls2 && TR3 && XOTHER;
sewardj4fbe6e92003-06-15 21:54:34 +0000571 case SSE3e_RegRd: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
sewardjabf8bf82003-06-15 22:28:05 +0000572 case SSE3e_RegWr: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
sewardj77d30a22003-10-19 08:18:52 +0000573 case SSE3a1_MemRd: return LIT8 && SZ16 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
sewardj02af6bc2003-06-12 00:56:06 +0000574 case SSE3g_RegWr: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
sewardjb31b06d2003-06-13 00:26:02 +0000575 case SSE3g1_RegWr: return LIT8 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
sewardj4fbe6e92003-06-15 21:54:34 +0000576 case SSE3e1_RegRd: return LIT8 && SZ2 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
sewardj77d30a22003-10-19 08:18:52 +0000577 case SSE3: return LIT0 && SZ0 && CCa && Ls1 && Ls2 && N3 && XOTHER;
sewardj1e86b8b2003-06-16 23:34:12 +0000578 case SSE4: return LIT0 && SZ0 && CCa && Ls1 && Ls2 && N3 && XOTHER;
sewardja453fb02003-06-14 13:22:36 +0000579 case SSE5: return LIT0 && SZ0 && CC0 && Ls1 && Ls2 && Ls3 && XOTHER;
sewardje3891fa2003-06-15 03:13:48 +0000580 case SSE3ag_MemRd_RegWr:
581 return SZ48 && CC0 && TR1 && TR2 && N3 && XOTHER;
njn25e49d8e72002-09-23 09:36:25 +0000582 default:
583 if (VG_(needs).extended_UCode)
njn4ba5a792002-09-30 10:23:54 +0000584 return SK_(sane_XUInstr)(beforeRA, beforeLiveness, u);
njn25e49d8e72002-09-23 09:36:25 +0000585 else {
586 VG_(printf)("unhandled opcode: %u. Perhaps "
587 "VG_(needs).extended_UCode should be set?",
588 u->opcode);
njne427a662002-10-02 11:08:25 +0000589 VG_(core_panic)("VG_(saneUInstr): unhandled opcode");
njn25e49d8e72002-09-23 09:36:25 +0000590 }
sewardjde4a1d02002-03-22 01:27:54 +0000591 }
njn25e49d8e72002-09-23 09:36:25 +0000592# undef LIT0
593# undef LIT1
sewardjb31b06d2003-06-13 00:26:02 +0000594# undef LIT8
njn25e49d8e72002-09-23 09:36:25 +0000595# undef LITm
sewardj77d30a22003-10-19 08:18:52 +0000596# undef SZ16
sewardj3d7c9c82003-03-26 21:08:13 +0000597# undef SZ8
sewardjde4a1d02002-03-22 01:27:54 +0000598# undef SZ4
599# undef SZ2
600# undef SZ1
601# undef SZ0
njn25e49d8e72002-09-23 09:36:25 +0000602# undef SZ42
sewardjd7971012003-04-04 00:21:58 +0000603# undef SZ48
sewardjfebaa3b2003-05-25 01:07:34 +0000604# undef SZ416
sewardjde8aecf2003-05-27 00:46:28 +0000605# undef SZsse
njn25e49d8e72002-09-23 09:36:25 +0000606# undef SZi
607# undef SZf
608# undef SZ4m
609# undef emptyR
610# undef emptyW
611# undef CC0
612# undef CCr
613# undef CCw
614# undef CCa
615# undef CCb
616# undef CCc
617# undef CCd
618# undef CCe
619# undef CCf
620# undef CCg
621# undef CCj
sewardjde4a1d02002-03-22 01:27:54 +0000622# undef TR1
623# undef TR2
624# undef TR3
625# undef A1
626# undef A2
627# undef AS1
628# undef AS2
629# undef AS3
630# undef L1
sewardjde4a1d02002-03-22 01:27:54 +0000631# undef L2
njn25e49d8e72002-09-23 09:36:25 +0000632# undef Ls1
sewardjfebaa3b2003-05-25 01:07:34 +0000633# undef Ls2
sewardjde4a1d02002-03-22 01:27:54 +0000634# undef Ls3
njn25e49d8e72002-09-23 09:36:25 +0000635# undef TRL1
636# undef TRAL1
sewardjde4a1d02002-03-22 01:27:54 +0000637# undef N1
638# undef N2
639# undef N3
sewardje1042472002-09-30 12:33:11 +0000640# undef Se2
641# undef Se1
njn25e49d8e72002-09-23 09:36:25 +0000642# undef COND0
643# undef EXTRA4b0
644# undef SG_WD0
645# undef JMPKIND0
646# undef CCALL0
647# undef Xextra4b
648# undef XWIDEN
649# undef XJMP
650# undef XCCALL
651# undef XOTHER
sewardjde4a1d02002-03-22 01:27:54 +0000652}
653
njn25e49d8e72002-09-23 09:36:25 +0000654void VG_(saneUCodeBlock) ( UCodeBlock* cb )
655{
656 Int i;
657
658 for (i = 0; i < cb->used; i++) {
659 Bool sane = VG_(saneUInstr)(True, True, &cb->instrs[i]);
660 if (!sane) {
661 VG_(printf)("Instruction failed sanity check:\n");
njn4ba5a792002-09-30 10:23:54 +0000662 VG_(up_UInstr)(i, &cb->instrs[i]);
njn25e49d8e72002-09-23 09:36:25 +0000663 }
664 vg_assert(sane);
665 }
666}
sewardjde4a1d02002-03-22 01:27:54 +0000667
668/* Sanity checks to do with CALLMs in UCodeBlocks. */
njn25e49d8e72002-09-23 09:36:25 +0000669Bool VG_(saneUCodeBlockCalls) ( UCodeBlock* cb )
sewardjde4a1d02002-03-22 01:27:54 +0000670{
671 Int callm = 0;
672 Int callm_s = 0;
673 Int callm_e = 0;
674 Int callm_ptr, calls_ptr;
675 Int i, j, t;
676 Bool incall = False;
677
678 /* Ensure the number of CALLM, CALLM_S and CALLM_E are the same. */
679
680 for (i = 0; i < cb->used; i++) {
681 switch (cb->instrs[i].opcode) {
682 case CALLM:
683 if (!incall) return False;
684 callm++;
685 break;
686 case CALLM_S:
687 if (incall) return False;
688 incall = True;
689 callm_s++;
690 break;
691 case CALLM_E:
692 if (!incall) return False;
693 incall = False;
694 callm_e++;
695 break;
696 case PUSH: case POP: case CLEAR:
697 if (!incall) return False;
698 break;
699 default:
700 break;
701 }
702 }
703 if (incall) return False;
704 if (callm != callm_s || callm != callm_e) return False;
705
706 /* Check the sections between CALLM_S and CALLM's. Ensure that no
707 PUSH uinsn pushes any TempReg that any other PUSH in the same
708 section pushes. Ie, check that the TempReg args to PUSHes in
709 the section are unique. If not, the instrumenter generates
710 incorrect code for CALLM insns. */
711
712 callm_ptr = 0;
713
714 find_next_CALLM:
715 /* Search for the next interval, making calls_ptr .. callm_ptr
716 bracket it. */
717 while (callm_ptr < cb->used
718 && cb->instrs[callm_ptr].opcode != CALLM)
719 callm_ptr++;
720 if (callm_ptr == cb->used)
721 return True;
722 vg_assert(cb->instrs[callm_ptr].opcode == CALLM);
723
724 calls_ptr = callm_ptr - 1;
725 while (cb->instrs[calls_ptr].opcode != CALLM_S)
726 calls_ptr--;
727 vg_assert(cb->instrs[calls_ptr].opcode == CALLM_S);
728 vg_assert(calls_ptr >= 0);
729
730 /* VG_(printf)("interval from %d to %d\n", calls_ptr, callm_ptr ); */
731
732 /* For each PUSH insn in the interval ... */
733 for (i = calls_ptr + 1; i < callm_ptr; i++) {
734 if (cb->instrs[i].opcode != PUSH) continue;
735 t = cb->instrs[i].val1;
736 /* Ensure no later PUSH insns up to callm_ptr push the same
737 TempReg. Return False if any such are found. */
738 for (j = i+1; j < callm_ptr; j++) {
739 if (cb->instrs[j].opcode == PUSH &&
740 cb->instrs[j].val1 == t)
741 return False;
742 }
743 }
744
745 /* This interval is clean. Keep going ... */
746 callm_ptr++;
747 goto find_next_CALLM;
748}
749
750
751/*------------------------------------------------------------*/
752/*--- Printing uinstrs. ---*/
753/*------------------------------------------------------------*/
754
njn25e49d8e72002-09-23 09:36:25 +0000755/* Global that dictates whether to print generated code at all stages */
756Bool VG_(print_codegen);
757
njn563f96f2003-02-03 11:17:46 +0000758Char* VG_(name_UCondcode) ( Condcode cond )
sewardjde4a1d02002-03-22 01:27:54 +0000759{
760 switch (cond) {
761 case CondO: return "o";
762 case CondNO: return "no";
763 case CondB: return "b";
764 case CondNB: return "nb";
765 case CondZ: return "z";
766 case CondNZ: return "nz";
767 case CondBE: return "be";
768 case CondNBE: return "nbe";
769 case CondS: return "s";
sewardje1042472002-09-30 12:33:11 +0000770 case CondNS: return "ns";
sewardjde4a1d02002-03-22 01:27:54 +0000771 case CondP: return "p";
772 case CondNP: return "np";
773 case CondL: return "l";
774 case CondNL: return "nl";
775 case CondLE: return "le";
776 case CondNLE: return "nle";
777 case CondAlways: return "MP"; /* hack! */
njn563f96f2003-02-03 11:17:46 +0000778 default: VG_(core_panic)("name_UCondcode");
sewardjde4a1d02002-03-22 01:27:54 +0000779 }
780}
781
782
783static void vg_ppFlagSet ( Char* prefix, FlagSet set )
784{
785 VG_(printf)("%s", prefix);
786 if (set & FlagD) VG_(printf)("D");
787 if (set & FlagO) VG_(printf)("O");
788 if (set & FlagS) VG_(printf)("S");
789 if (set & FlagZ) VG_(printf)("Z");
790 if (set & FlagA) VG_(printf)("A");
791 if (set & FlagC) VG_(printf)("C");
792 if (set & FlagP) VG_(printf)("P");
793}
794
795
796static void ppTempReg ( Int tt )
797{
798 if ((tt & 1) == 0)
799 VG_(printf)("t%d", tt);
800 else
801 VG_(printf)("q%d", tt-1);
802}
803
804
njn4ba5a792002-09-30 10:23:54 +0000805void VG_(pp_UOperand) ( UInstr* u, Int operandNo, Int sz, Bool parens )
sewardjde4a1d02002-03-22 01:27:54 +0000806{
807 UInt tag, val;
808 switch (operandNo) {
809 case 1: tag = u->tag1; val = u->val1; break;
810 case 2: tag = u->tag2; val = u->val2; break;
811 case 3: tag = u->tag3; val = u->val3; break;
njne427a662002-10-02 11:08:25 +0000812 default: VG_(core_panic)("VG_(pp_UOperand)(1)");
sewardjde4a1d02002-03-22 01:27:54 +0000813 }
814 if (tag == Literal) val = u->lit32;
815
816 if (parens) VG_(printf)("(");
817 switch (tag) {
sewardje1042472002-09-30 12:33:11 +0000818 case TempReg: ppTempReg(val); break;
819 case RealReg: VG_(printf)("%s",nameIReg(sz==0 ? 4 : sz,val)); break;
820 case Literal: VG_(printf)("$0x%x", val); break;
821 case Lit16: VG_(printf)("$0x%x", val); break;
822 case NoValue: VG_(printf)("NoValue"); break;
823 case ArchReg: VG_(printf)("%S",nameIReg(sz,val)); break;
824 case ArchRegS: VG_(printf)("%S",nameSReg(val)); break;
825 case SpillNo: VG_(printf)("spill%d", val); break;
njne427a662002-10-02 11:08:25 +0000826 default: VG_(core_panic)("VG_(ppUOperand)(2)");
sewardjde4a1d02002-03-22 01:27:54 +0000827 }
828 if (parens) VG_(printf)(")");
829}
830
831
njn4ba5a792002-09-30 10:23:54 +0000832Char* VG_(name_UOpcode) ( Bool upper, Opcode opc )
sewardjde4a1d02002-03-22 01:27:54 +0000833{
834 switch (opc) {
835 case ADD: return (upper ? "ADD" : "add");
836 case ADC: return (upper ? "ADC" : "adc");
837 case AND: return (upper ? "AND" : "and");
838 case OR: return (upper ? "OR" : "or");
839 case XOR: return (upper ? "XOR" : "xor");
840 case SUB: return (upper ? "SUB" : "sub");
841 case SBB: return (upper ? "SBB" : "sbb");
842 case SHL: return (upper ? "SHL" : "shl");
843 case SHR: return (upper ? "SHR" : "shr");
844 case SAR: return (upper ? "SAR" : "sar");
845 case ROL: return (upper ? "ROL" : "rol");
846 case ROR: return (upper ? "ROR" : "ror");
847 case RCL: return (upper ? "RCL" : "rcl");
848 case RCR: return (upper ? "RCR" : "rcr");
jsgf5efa4fd2003-10-14 21:49:11 +0000849 case MUL: return (upper ? "MUL" : "mul");
sewardjde4a1d02002-03-22 01:27:54 +0000850 case NOT: return (upper ? "NOT" : "not");
851 case NEG: return (upper ? "NEG" : "neg");
852 case INC: return (upper ? "INC" : "inc");
853 case DEC: return (upper ? "DEC" : "dec");
854 case BSWAP: return (upper ? "BSWAP" : "bswap");
855 default: break;
856 }
njne427a662002-10-02 11:08:25 +0000857 if (!upper) VG_(core_panic)("vg_name_UOpcode: invalid !upper");
sewardjde4a1d02002-03-22 01:27:54 +0000858 switch (opc) {
sewardjde4a1d02002-03-22 01:27:54 +0000859 case CALLM_S: return "CALLM_S";
860 case CALLM_E: return "CALLM_E";
861 case INCEIP: return "INCEIP";
862 case LEA1: return "LEA1";
863 case LEA2: return "LEA2";
864 case NOP: return "NOP";
sewardj7a5ebcf2002-11-13 22:42:13 +0000865 case LOCK: return "LOCK";
sewardjde4a1d02002-03-22 01:27:54 +0000866 case GET: return "GET";
867 case PUT: return "PUT";
868 case GETF: return "GETF";
869 case PUTF: return "PUTF";
sewardje1042472002-09-30 12:33:11 +0000870 case GETSEG: return "GETSEG";
871 case PUTSEG: return "PUTSEG";
872 case USESEG: return "USESEG";
sewardjde4a1d02002-03-22 01:27:54 +0000873 case LOAD: return "LD" ;
874 case STORE: return "ST" ;
875 case MOV: return "MOV";
876 case CMOV: return "CMOV";
877 case WIDEN: return "WIDEN";
878 case JMP: return "J" ;
879 case JIFZ: return "JIFZ" ;
880 case CALLM: return "CALLM";
njn25e49d8e72002-09-23 09:36:25 +0000881 case CCALL: return "CCALL";
sewardjde4a1d02002-03-22 01:27:54 +0000882 case PUSH: return "PUSH" ;
883 case POP: return "POP" ;
884 case CLEAR: return "CLEAR";
885 case CC2VAL: return "CC2VAL";
886 case FPU_R: return "FPU_R";
887 case FPU_W: return "FPU_W";
888 case FPU: return "FPU" ;
sewardj3d7c9c82003-03-26 21:08:13 +0000889 case MMX1: return "MMX1" ;
890 case MMX2: return "MMX2" ;
sewardjca860012003-03-27 23:52:58 +0000891 case MMX3: return "MMX3" ;
sewardj3d7c9c82003-03-26 21:08:13 +0000892 case MMX2_MemRd: return "MMX2_MRd" ;
893 case MMX2_MemWr: return "MMX2_MWr" ;
sewardj4fbe6e92003-06-15 21:54:34 +0000894 case MMX2_ERegRd: return "MMX2_eRRd" ;
895 case MMX2_ERegWr: return "MMX2_eRWr" ;
sewardjfebaa3b2003-05-25 01:07:34 +0000896 case SSE2a_MemWr: return "SSE2a_MWr";
897 case SSE2a_MemRd: return "SSE2a_MRd";
sewardj9dd209f2003-06-18 23:30:52 +0000898 case SSE2a1_MemRd: return "SSE2a1_MRd";
sewardj4fbe6e92003-06-15 21:54:34 +0000899 case SSE3e_RegRd: return "SSE3e_RRd";
sewardjabf8bf82003-06-15 22:28:05 +0000900 case SSE3e_RegWr: return "SSE3e_RWr";
sewardj02af6bc2003-06-12 00:56:06 +0000901 case SSE3g_RegWr: return "SSE3g_RWr";
sewardj77d30a22003-10-19 08:18:52 +0000902 case SSE3a1_MemRd: return "SSE3a1_MRd";
sewardjb31b06d2003-06-13 00:26:02 +0000903 case SSE3g1_RegWr: return "SSE3g1_RWr";
sewardj4fbe6e92003-06-15 21:54:34 +0000904 case SSE3e1_RegRd: return "SSE3e1_RRd";
sewardja60be0e2003-05-26 08:47:27 +0000905 case SSE3: return "SSE3";
sewardjfebaa3b2003-05-25 01:07:34 +0000906 case SSE4: return "SSE4";
sewardja453fb02003-06-14 13:22:36 +0000907 case SSE5: return "SSE5";
sewardjfebaa3b2003-05-25 01:07:34 +0000908 case SSE3a_MemWr: return "SSE3a_MWr";
909 case SSE3a_MemRd: return "SSE3a_MRd";
sewardje3891fa2003-06-15 03:13:48 +0000910 case SSE3ag_MemRd_RegWr: return "SSE3ag_MemRd_RegWr";
njn25e49d8e72002-09-23 09:36:25 +0000911 default:
912 if (VG_(needs).extended_UCode)
njn4ba5a792002-09-30 10:23:54 +0000913 return SK_(name_XUOpcode)(opc);
njn25e49d8e72002-09-23 09:36:25 +0000914 else {
915 VG_(printf)("unhandled opcode: %u. Perhaps "
916 "VG_(needs).extended_UCode should be set?",
917 opc);
njne427a662002-10-02 11:08:25 +0000918 VG_(core_panic)("name_UOpcode: unhandled opcode");
njn25e49d8e72002-09-23 09:36:25 +0000919 }
sewardjde4a1d02002-03-22 01:27:54 +0000920 }
921}
922
sewardja38e0922002-10-01 00:50:47 +0000923static
njn4ba5a792002-09-30 10:23:54 +0000924void pp_realregs_liveness ( UInstr* u )
njn25e49d8e72002-09-23 09:36:25 +0000925{
926# define PRINT_RREG_LIVENESS(realReg,s) \
njn4ba5a792002-09-30 10:23:54 +0000927 VG_(printf)( IS_RREG_LIVE(VG_(realreg_to_rank)(realReg), \
njn25e49d8e72002-09-23 09:36:25 +0000928 u->regs_live_after) \
929 ? s : "-");
sewardjde4a1d02002-03-22 01:27:54 +0000930
njn25e49d8e72002-09-23 09:36:25 +0000931 VG_(printf)("[");
932 PRINT_RREG_LIVENESS(R_EAX, "a");
933 PRINT_RREG_LIVENESS(R_EBX, "b");
934 PRINT_RREG_LIVENESS(R_ECX, "c");
935 PRINT_RREG_LIVENESS(R_EDX, "d");
936 PRINT_RREG_LIVENESS(R_ESI, "S");
937 PRINT_RREG_LIVENESS(R_EDI, "D");
938 VG_(printf)("]");
939
940# undef PRINT_RREG_LIVENESS
941}
942
943/* Ugly-print UInstr :) */
njn4ba5a792002-09-30 10:23:54 +0000944void VG_(up_UInstr) ( Int i, UInstr* u )
njn25e49d8e72002-09-23 09:36:25 +0000945{
njn4ba5a792002-09-30 10:23:54 +0000946 VG_(pp_UInstr_regs)(i, u);
njn25e49d8e72002-09-23 09:36:25 +0000947
948 VG_(printf)("opcode: %d\n", u->opcode);
sewardjc1b86882002-10-06 21:43:50 +0000949 VG_(printf)("lit32: 0x%x\n", u->lit32);
njn25e49d8e72002-09-23 09:36:25 +0000950 VG_(printf)("size: %d\n", u->size);
951 VG_(printf)("val1,val2,val3: %d, %d, %d\n", u->val1, u->val2, u->val3);
952 VG_(printf)("tag1,tag2,tag3: %d, %d, %d\n", u->tag1, u->tag2, u->tag3);
sewardjc1b86882002-10-06 21:43:50 +0000953 VG_(printf)("flags_r: 0x%x\n", u->flags_r);
954 VG_(printf)("flags_w: 0x%x\n", u->flags_w);
955 VG_(printf)("extra4b: 0x%x\n", u->extra4b);
956 VG_(printf)("cond: 0x%x\n", u->cond);
njn25e49d8e72002-09-23 09:36:25 +0000957 VG_(printf)("signed_widen: %d\n", u->signed_widen);
958 VG_(printf)("jmpkind: %d\n", u->jmpkind);
959 VG_(printf)("argc,regparms_n: %d, %d\n", u->argc, u->regparms_n);
960 VG_(printf)("has_ret_val: %d\n", u->has_ret_val);
961 VG_(printf)("regs_live_after: ");
njn4ba5a792002-09-30 10:23:54 +0000962 pp_realregs_liveness(u);
njn25e49d8e72002-09-23 09:36:25 +0000963 VG_(printf)("\n");
964}
965
sewardja38e0922002-10-01 00:50:47 +0000966static
njn4ba5a792002-09-30 10:23:54 +0000967void pp_UInstrWorker ( Int instrNo, UInstr* u, Bool ppRegsLiveness )
sewardjde4a1d02002-03-22 01:27:54 +0000968{
969 VG_(printf)("\t%4d: %s", instrNo,
njn4ba5a792002-09-30 10:23:54 +0000970 VG_(name_UOpcode)(True, u->opcode));
sewardjde4a1d02002-03-22 01:27:54 +0000971 if (u->opcode == JMP || u->opcode == CC2VAL)
njn563f96f2003-02-03 11:17:46 +0000972 VG_(printf)("%s", VG_(name_UCondcode)(u->cond));
sewardjde4a1d02002-03-22 01:27:54 +0000973
974 switch (u->size) {
975 case 0: VG_(printf)("o"); break;
976 case 1: VG_(printf)("B"); break;
977 case 2: VG_(printf)("W"); break;
978 case 4: VG_(printf)("L"); break;
979 case 8: VG_(printf)("Q"); break;
sewardjfebaa3b2003-05-25 01:07:34 +0000980 case 16: VG_(printf)("QQ"); break;
sewardjde4a1d02002-03-22 01:27:54 +0000981 default: VG_(printf)("%d", (Int)u->size); break;
982 }
983
sewardjfebaa3b2003-05-25 01:07:34 +0000984 VG_(printf)(" \t");
985
sewardjde4a1d02002-03-22 01:27:54 +0000986 switch (u->opcode) {
987
sewardjde4a1d02002-03-22 01:27:54 +0000988 case CALLM_S: case CALLM_E:
989 break;
990
991 case INCEIP:
sewardjfebaa3b2003-05-25 01:07:34 +0000992 VG_(printf)("$%d", u->val1);
sewardjde4a1d02002-03-22 01:27:54 +0000993 break;
994
995 case LEA2:
sewardjfebaa3b2003-05-25 01:07:34 +0000996 VG_(printf)("%d(" , u->lit32);
njn4ba5a792002-09-30 10:23:54 +0000997 VG_(pp_UOperand)(u, 1, 4, False);
sewardjde4a1d02002-03-22 01:27:54 +0000998 VG_(printf)(",");
njn4ba5a792002-09-30 10:23:54 +0000999 VG_(pp_UOperand)(u, 2, 4, False);
sewardjde4a1d02002-03-22 01:27:54 +00001000 VG_(printf)(",%d), ", (Int)u->extra4b);
njn4ba5a792002-09-30 10:23:54 +00001001 VG_(pp_UOperand)(u, 3, 4, False);
sewardjde4a1d02002-03-22 01:27:54 +00001002 break;
1003
1004 case LEA1:
sewardjfebaa3b2003-05-25 01:07:34 +00001005 VG_(printf)("%d" , u->lit32);
njn4ba5a792002-09-30 10:23:54 +00001006 VG_(pp_UOperand)(u, 1, 4, True);
sewardjde4a1d02002-03-22 01:27:54 +00001007 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001008 VG_(pp_UOperand)(u, 2, 4, False);
sewardjde4a1d02002-03-22 01:27:54 +00001009 break;
1010
sewardj7a5ebcf2002-11-13 22:42:13 +00001011 case NOP: case LOCK:
sewardjde4a1d02002-03-22 01:27:54 +00001012 break;
1013
1014 case FPU_W:
sewardjfebaa3b2003-05-25 01:07:34 +00001015 VG_(printf)("0x%x:0x%x, ",
sewardjde4a1d02002-03-22 01:27:54 +00001016 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
njn4ba5a792002-09-30 10:23:54 +00001017 VG_(pp_UOperand)(u, 2, 4, True);
sewardjde4a1d02002-03-22 01:27:54 +00001018 break;
1019
1020 case FPU_R:
sewardjfebaa3b2003-05-25 01:07:34 +00001021 VG_(printf)("");
njn4ba5a792002-09-30 10:23:54 +00001022 VG_(pp_UOperand)(u, 2, 4, True);
sewardjde4a1d02002-03-22 01:27:54 +00001023 VG_(printf)(", 0x%x:0x%x",
1024 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
1025 break;
1026
1027 case FPU:
sewardjfebaa3b2003-05-25 01:07:34 +00001028 VG_(printf)("0x%x:0x%x",
sewardjde4a1d02002-03-22 01:27:54 +00001029 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
1030 break;
1031
sewardj3d7c9c82003-03-26 21:08:13 +00001032 case MMX1:
sewardjfebaa3b2003-05-25 01:07:34 +00001033 VG_(printf)("0x%x",
sewardj3d7c9c82003-03-26 21:08:13 +00001034 u->val1 & 0xFF );
1035 break;
1036
1037 case MMX2:
sewardjfebaa3b2003-05-25 01:07:34 +00001038 VG_(printf)("0x%x:0x%x",
sewardj3d7c9c82003-03-26 21:08:13 +00001039 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
1040 break;
1041
sewardjca860012003-03-27 23:52:58 +00001042 case MMX3:
sewardjfebaa3b2003-05-25 01:07:34 +00001043 VG_(printf)("0x%x:0x%x:0x%x",
sewardjca860012003-03-27 23:52:58 +00001044 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF, u->val2 & 0xFF );
1045 break;
1046
sewardj4fbe6e92003-06-15 21:54:34 +00001047 case MMX2_ERegWr:
1048 case MMX2_ERegRd:
sewardjfebaa3b2003-05-25 01:07:34 +00001049 VG_(printf)("0x%x:0x%x, ",
sewardjca860012003-03-27 23:52:58 +00001050 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
1051 VG_(pp_UOperand)(u, 2, 4, False);
1052 break;
1053
sewardj3d7c9c82003-03-26 21:08:13 +00001054 case MMX2_MemWr:
1055 case MMX2_MemRd:
sewardjfebaa3b2003-05-25 01:07:34 +00001056 VG_(printf)("0x%x:0x%x",
sewardj3d7c9c82003-03-26 21:08:13 +00001057 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
1058 VG_(pp_UOperand)(u, 2, 4, True);
1059 break;
1060
sewardjfebaa3b2003-05-25 01:07:34 +00001061 case SSE2a_MemWr:
1062 case SSE2a_MemRd:
1063 VG_(printf)("0x%x:0x%x:0x%x",
1064 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF, u->val2 & 0xFF );
1065 VG_(pp_UOperand)(u, 3, 4, True);
1066 break;
1067
sewardj9dd209f2003-06-18 23:30:52 +00001068 case SSE2a1_MemRd:
sewardjfebaa3b2003-05-25 01:07:34 +00001069 case SSE3a_MemWr:
1070 case SSE3a_MemRd:
1071 VG_(printf)("0x%x:0x%x:0x%x:0x%x",
1072 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
sewardjde8aecf2003-05-27 00:46:28 +00001073 (u->val2 >> 8) & 0xFF, u->val2 & 0xFF );
sewardjfebaa3b2003-05-25 01:07:34 +00001074 VG_(pp_UOperand)(u, 3, 4, True);
1075 break;
1076
sewardjabf8bf82003-06-15 22:28:05 +00001077 case SSE3e_RegWr:
sewardj4fbe6e92003-06-15 21:54:34 +00001078 case SSE3e_RegRd:
sewardj02af6bc2003-06-12 00:56:06 +00001079 case SSE3g_RegWr:
sewardjfebaa3b2003-05-25 01:07:34 +00001080 VG_(printf)("0x%x:0x%x:0x%x:0x%x",
1081 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
1082 (u->val2 >> 8) & 0xFF, u->val2 & 0xFF );
1083 VG_(pp_UOperand)(u, 3, 4, True);
1084 break;
1085
sewardjb31b06d2003-06-13 00:26:02 +00001086 case SSE3g1_RegWr:
sewardj4fbe6e92003-06-15 21:54:34 +00001087 case SSE3e1_RegRd:
sewardj77d30a22003-10-19 08:18:52 +00001088 case SSE3a1_MemRd:
sewardjb31b06d2003-06-13 00:26:02 +00001089 VG_(printf)("0x%x:0x%x:0x%x:0x%x:0x%x",
1090 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
1091 (u->val2 >> 8) & 0xFF, u->val2 & 0xFF,
1092 u->lit32 );
1093 VG_(pp_UOperand)(u, 3, 4, True);
1094 break;
1095
sewardja60be0e2003-05-26 08:47:27 +00001096 case SSE3:
1097 VG_(printf)("0x%x:0x%x:0x%x",
1098 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
1099 u->val2 & 0xFF );
1100 break;
1101
sewardjfebaa3b2003-05-25 01:07:34 +00001102 case SSE4:
1103 VG_(printf)("0x%x:0x%x:0x%x:0x%x",
1104 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
1105 (u->val2 >> 8) & 0xFF, u->val2 & 0xFF );
1106 break;
1107
sewardja453fb02003-06-14 13:22:36 +00001108 case SSE5:
1109 VG_(printf)("0x%x:0x%x:0x%x:0x%x:0x%x",
1110 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
1111 (u->val2 >> 8) & 0xFF, u->val2 & 0xFF,
1112 u->val3 & 0xFF );
1113 break;
1114
sewardje3891fa2003-06-15 03:13:48 +00001115 case SSE3ag_MemRd_RegWr:
1116 VG_(printf)("0x%x(addr=", u->lit32 );
1117 VG_(pp_UOperand)(u, 1, 4, False);
1118 VG_(printf)(", dst=");
1119 VG_(pp_UOperand)(u, 2, 4, False);
1120 VG_(printf)(")");
1121 break;
1122
sewardjde4a1d02002-03-22 01:27:54 +00001123 case GET: case PUT: case MOV: case LOAD: case STORE: case CMOV:
sewardje1042472002-09-30 12:33:11 +00001124 case GETSEG: case PUTSEG:
njn4ba5a792002-09-30 10:23:54 +00001125 VG_(pp_UOperand)(u, 1, u->size, u->opcode==LOAD);
sewardjde4a1d02002-03-22 01:27:54 +00001126 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001127 VG_(pp_UOperand)(u, 2, u->size, u->opcode==STORE);
njn25e49d8e72002-09-23 09:36:25 +00001128 break;
1129
1130 case JMP:
1131 switch (u->jmpkind) {
1132 case JmpCall: VG_(printf)("-c"); break;
1133 case JmpRet: VG_(printf)("-r"); break;
1134 case JmpSyscall: VG_(printf)("-sys"); break;
1135 case JmpClientReq: VG_(printf)("-cli"); break;
1136 default: break;
1137 }
njn4ba5a792002-09-30 10:23:54 +00001138 VG_(pp_UOperand)(u, 1, u->size, False);
njn25e49d8e72002-09-23 09:36:25 +00001139 if (CondAlways == u->cond) {
1140 /* Print x86 instruction size if filled in */
1141 if (0 != u->extra4b)
1142 VG_(printf)(" ($%u)", u->extra4b);
1143 }
sewardjde4a1d02002-03-22 01:27:54 +00001144 break;
1145
1146 case GETF: case PUTF:
njn25e49d8e72002-09-23 09:36:25 +00001147 case CC2VAL: case PUSH: case POP: case CLEAR: case CALLM:
1148 case NOT: case NEG: case INC: case DEC: case BSWAP:
njn4ba5a792002-09-30 10:23:54 +00001149 VG_(pp_UOperand)(u, 1, u->size, False);
sewardjde4a1d02002-03-22 01:27:54 +00001150 break;
1151
njn25e49d8e72002-09-23 09:36:25 +00001152 /* Print a "(s)" after args passed on stack */
1153 case CCALL:
njn25e49d8e72002-09-23 09:36:25 +00001154 if (u->has_ret_val) {
njn4ba5a792002-09-30 10:23:54 +00001155 VG_(pp_UOperand)(u, 3, 0, False);
njn25e49d8e72002-09-23 09:36:25 +00001156 VG_(printf)(" = ");
sewardj2e93c502002-04-12 11:12:52 +00001157 }
njn25e49d8e72002-09-23 09:36:25 +00001158 VG_(printf)("%p(", u->lit32);
1159 if (u->argc > 0) {
njn4ba5a792002-09-30 10:23:54 +00001160 VG_(pp_UOperand)(u, 1, 0, False);
njn25e49d8e72002-09-23 09:36:25 +00001161 if (u->regparms_n < 1)
1162 VG_(printf)("(s)");
1163 }
1164 if (u->argc > 1) {
1165 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001166 VG_(pp_UOperand)(u, 2, 0, False);
njn25e49d8e72002-09-23 09:36:25 +00001167 if (u->regparms_n < 2)
1168 VG_(printf)("(s)");
1169 }
1170 if (u->argc > 2) {
1171 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001172 VG_(pp_UOperand)(u, 3, 0, False);
njn25e49d8e72002-09-23 09:36:25 +00001173 if (u->regparms_n < 3)
1174 VG_(printf)("(s)");
1175 }
1176 VG_(printf)(") ");
njn6431be72002-07-28 09:53:34 +00001177 break;
1178
sewardje1042472002-09-30 12:33:11 +00001179 case USESEG:
sewardjde4a1d02002-03-22 01:27:54 +00001180 case JIFZ:
sewardjde4a1d02002-03-22 01:27:54 +00001181 case ADD: case ADC: case AND: case OR:
1182 case XOR: case SUB: case SBB:
1183 case SHL: case SHR: case SAR:
jsgf5efa4fd2003-10-14 21:49:11 +00001184 case ROL: case ROR: case RCL: case RCR:
1185 case MUL:
njn4ba5a792002-09-30 10:23:54 +00001186 VG_(pp_UOperand)(u, 1, u->size, False);
sewardjde4a1d02002-03-22 01:27:54 +00001187 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001188 VG_(pp_UOperand)(u, 2, u->size, False);
sewardjde4a1d02002-03-22 01:27:54 +00001189 break;
1190
1191 case WIDEN:
1192 VG_(printf)("_%c%c", VG_(toupper)(nameISize(u->extra4b)),
1193 u->signed_widen?'s':'z');
njn4ba5a792002-09-30 10:23:54 +00001194 VG_(pp_UOperand)(u, 1, u->size, False);
sewardjde4a1d02002-03-22 01:27:54 +00001195 break;
1196
njn25e49d8e72002-09-23 09:36:25 +00001197 default:
1198 if (VG_(needs).extended_UCode)
njn4ba5a792002-09-30 10:23:54 +00001199 SK_(pp_XUInstr)(u);
njn25e49d8e72002-09-23 09:36:25 +00001200 else {
1201 VG_(printf)("unhandled opcode: %u. Perhaps "
1202 "VG_(needs).extended_UCode should be set?",
1203 u->opcode);
njne427a662002-10-02 11:08:25 +00001204 VG_(core_panic)("pp_UInstr: unhandled opcode");
njn25e49d8e72002-09-23 09:36:25 +00001205 }
sewardjde4a1d02002-03-22 01:27:54 +00001206 }
sewardjde4a1d02002-03-22 01:27:54 +00001207 if (u->flags_r != FlagsEmpty || u->flags_w != FlagsEmpty) {
1208 VG_(printf)(" (");
1209 if (u->flags_r != FlagsEmpty)
1210 vg_ppFlagSet("-r", u->flags_r);
1211 if (u->flags_w != FlagsEmpty)
1212 vg_ppFlagSet("-w", u->flags_w);
1213 VG_(printf)(")");
1214 }
njn25e49d8e72002-09-23 09:36:25 +00001215
1216 if (ppRegsLiveness) {
1217 VG_(printf)("\t\t");
njn4ba5a792002-09-30 10:23:54 +00001218 pp_realregs_liveness ( u );
njn25e49d8e72002-09-23 09:36:25 +00001219 }
1220
sewardjde4a1d02002-03-22 01:27:54 +00001221 VG_(printf)("\n");
1222}
1223
njn4ba5a792002-09-30 10:23:54 +00001224void VG_(pp_UInstr) ( Int instrNo, UInstr* u )
njn25e49d8e72002-09-23 09:36:25 +00001225{
njn4ba5a792002-09-30 10:23:54 +00001226 pp_UInstrWorker ( instrNo, u, /*ppRegsLiveness*/False );
njn25e49d8e72002-09-23 09:36:25 +00001227}
1228
njn4ba5a792002-09-30 10:23:54 +00001229void VG_(pp_UInstr_regs) ( Int instrNo, UInstr* u )
njn25e49d8e72002-09-23 09:36:25 +00001230{
njn4ba5a792002-09-30 10:23:54 +00001231 pp_UInstrWorker ( instrNo, u, /*ppRegsLiveness*/True );
njn25e49d8e72002-09-23 09:36:25 +00001232}
sewardjde4a1d02002-03-22 01:27:54 +00001233
njn4ba5a792002-09-30 10:23:54 +00001234void VG_(pp_UCodeBlock) ( UCodeBlock* cb, Char* title )
sewardjde4a1d02002-03-22 01:27:54 +00001235{
1236 Int i;
njn25e49d8e72002-09-23 09:36:25 +00001237 VG_(printf)("%s\n", title);
sewardjde4a1d02002-03-22 01:27:54 +00001238 for (i = 0; i < cb->used; i++)
njn25e49d8e72002-09-23 09:36:25 +00001239 if (cb->instrs[i].opcode != NOP)
njn4ba5a792002-09-30 10:23:54 +00001240 VG_(pp_UInstr) ( i, &cb->instrs[i] );
sewardjde4a1d02002-03-22 01:27:54 +00001241 VG_(printf)("\n");
1242}
1243
1244
1245/*------------------------------------------------------------*/
1246/*--- uinstr helpers for register allocation ---*/
1247/*--- and code improvement. ---*/
1248/*------------------------------------------------------------*/
1249
njn25e49d8e72002-09-23 09:36:25 +00001250/* Get the temp/reg use of a uinstr, parking them in an array supplied by
njn810086f2002-11-14 12:42:47 +00001251 the caller (regs), which is assumed to be big enough. Return the number
1252 of entries. Written regs are indicated in parallel array isWrites.
1253 Insns which read _and_ write a register wind up mentioning it twice.
1254 Entries are placed in the array in program order, so that if a reg is
1255 read-modified-written, it appears first as a read and then as a write.
1256 'tag' indicates whether we are looking at TempRegs or RealRegs.
sewardjde4a1d02002-03-22 01:27:54 +00001257*/
njn810086f2002-11-14 12:42:47 +00001258Int VG_(get_reg_usage) ( UInstr* u, Tag tag, Int* regs, Bool* isWrites )
sewardjde4a1d02002-03-22 01:27:54 +00001259{
njn810086f2002-11-14 12:42:47 +00001260# define RD(ono) VG_UINSTR_READS_REG(ono, regs, isWrites)
1261# define WR(ono) VG_UINSTR_WRITES_REG(ono, regs, isWrites)
sewardjde4a1d02002-03-22 01:27:54 +00001262
1263 Int n = 0;
1264 switch (u->opcode) {
1265 case LEA1: RD(1); WR(2); break;
1266 case LEA2: RD(1); RD(2); WR(3); break;
1267
sewardj77d30a22003-10-19 08:18:52 +00001268 case SSE3a1_MemRd:
sewardj9dd209f2003-06-18 23:30:52 +00001269 case SSE2a1_MemRd:
sewardj4fbe6e92003-06-15 21:54:34 +00001270 case SSE3e_RegRd:
sewardjfebaa3b2003-05-25 01:07:34 +00001271 case SSE3a_MemWr:
1272 case SSE3a_MemRd:
1273 case SSE2a_MemWr:
sewardj4fbe6e92003-06-15 21:54:34 +00001274 case SSE3e1_RegRd:
sewardj02af6bc2003-06-12 00:56:06 +00001275 case SSE2a_MemRd: RD(3); break;
1276
sewardjabf8bf82003-06-15 22:28:05 +00001277 case SSE3e_RegWr:
sewardjb31b06d2003-06-13 00:26:02 +00001278 case SSE3g1_RegWr:
sewardj02af6bc2003-06-12 00:56:06 +00001279 case SSE3g_RegWr: WR(3); break;
sewardjfebaa3b2003-05-25 01:07:34 +00001280
sewardje3891fa2003-06-15 03:13:48 +00001281 case SSE3ag_MemRd_RegWr: RD(1); WR(2); break;
1282
sewardj4fbe6e92003-06-15 21:54:34 +00001283 case MMX2_ERegRd: RD(2); break;
1284 case MMX2_ERegWr: WR(2); break;
sewardjca860012003-03-27 23:52:58 +00001285
sewardja453fb02003-06-14 13:22:36 +00001286 case SSE4: case SSE3: case SSE5:
sewardjca860012003-03-27 23:52:58 +00001287 case MMX1: case MMX2: case MMX3:
njn25e49d8e72002-09-23 09:36:25 +00001288 case NOP: case FPU: case INCEIP: case CALLM_S: case CALLM_E:
sewardj7a5ebcf2002-11-13 22:42:13 +00001289 case CLEAR: case CALLM: case LOCK: break;
njn25e49d8e72002-09-23 09:36:25 +00001290
1291 case CCALL:
1292 if (u->argc > 0) RD(1);
1293 if (u->argc > 1) RD(2);
1294 if (u->argc > 2) RD(3);
1295 if (u->has_ret_val) WR(3);
1296 break;
1297
sewardj3d7c9c82003-03-26 21:08:13 +00001298 case MMX2_MemRd: case MMX2_MemWr:
sewardjde4a1d02002-03-22 01:27:54 +00001299 case FPU_R: case FPU_W: RD(2); break;
1300
sewardje1042472002-09-30 12:33:11 +00001301 case GETSEG: WR(2); break;
1302 case PUTSEG: RD(1); break;
1303
sewardjde4a1d02002-03-22 01:27:54 +00001304 case GETF: WR(1); break;
1305 case PUTF: RD(1); break;
1306
1307 case GET: WR(2); break;
1308 case PUT: RD(1); break;
1309 case LOAD: RD(1); WR(2); break;
njn25e49d8e72002-09-23 09:36:25 +00001310 case STORE: RD(1); RD(2); break;
sewardjde4a1d02002-03-22 01:27:54 +00001311 case MOV: RD(1); WR(2); break;
1312
1313 case JMP: RD(1); break;
sewardjde4a1d02002-03-22 01:27:54 +00001314
njn25e49d8e72002-09-23 09:36:25 +00001315 case PUSH: RD(1); break;
sewardjde4a1d02002-03-22 01:27:54 +00001316 case POP: WR(1); break;
1317
sewardje1042472002-09-30 12:33:11 +00001318 case USESEG:
sewardjde4a1d02002-03-22 01:27:54 +00001319 case CMOV:
1320 case ADD: case ADC: case AND: case OR:
1321 case XOR: case SUB: case SBB:
jsgf5efa4fd2003-10-14 21:49:11 +00001322 case MUL:
sewardjde4a1d02002-03-22 01:27:54 +00001323 RD(1); RD(2); WR(2); break;
1324
1325 case SHL: case SHR: case SAR:
1326 case ROL: case ROR: case RCL: case RCR:
1327 RD(1); RD(2); WR(2); break;
1328
njn25e49d8e72002-09-23 09:36:25 +00001329 case NOT: case NEG: case INC: case DEC: case BSWAP:
sewardjde4a1d02002-03-22 01:27:54 +00001330 RD(1); WR(1); break;
1331
1332 case WIDEN: RD(1); WR(1); break;
1333
1334 case CC2VAL: WR(1); break;
1335 case JIFZ: RD(1); break;
1336
njn25e49d8e72002-09-23 09:36:25 +00001337 default:
1338 if (VG_(needs).extended_UCode)
njn810086f2002-11-14 12:42:47 +00001339 return SK_(get_Xreg_usage)(u, tag, regs, isWrites);
njn25e49d8e72002-09-23 09:36:25 +00001340 else {
1341 VG_(printf)("unhandled opcode: %u. Perhaps "
1342 "VG_(needs).extended_UCode should be set?",
1343 u->opcode);
njne427a662002-10-02 11:08:25 +00001344 VG_(core_panic)("VG_(get_reg_usage): unhandled opcode");
njn25e49d8e72002-09-23 09:36:25 +00001345 }
sewardjde4a1d02002-03-22 01:27:54 +00001346 }
1347 return n;
1348
1349# undef RD
1350# undef WR
1351}
1352
1353
njn25e49d8e72002-09-23 09:36:25 +00001354/* Change temp regs in u into real regs, as directed by the
1355 * temps[i]-->reals[i] mapping. */
sewardj56867352003-10-12 10:27:06 +00001356static
njn810086f2002-11-14 12:42:47 +00001357void patchUInstr ( UInstr* u, Int temps[], UInt reals[], Int n_tmap )
sewardjde4a1d02002-03-22 01:27:54 +00001358{
1359 Int i;
1360 if (u->tag1 == TempReg) {
1361 for (i = 0; i < n_tmap; i++)
njn810086f2002-11-14 12:42:47 +00001362 if (temps[i] == u->val1) break;
njne427a662002-10-02 11:08:25 +00001363 if (i == n_tmap) VG_(core_panic)("patchUInstr(1)");
sewardjde4a1d02002-03-22 01:27:54 +00001364 u->tag1 = RealReg;
njn25e49d8e72002-09-23 09:36:25 +00001365 u->val1 = reals[i];
sewardjde4a1d02002-03-22 01:27:54 +00001366 }
1367 if (u->tag2 == TempReg) {
1368 for (i = 0; i < n_tmap; i++)
njn810086f2002-11-14 12:42:47 +00001369 if (temps[i] == u->val2) break;
njne427a662002-10-02 11:08:25 +00001370 if (i == n_tmap) VG_(core_panic)("patchUInstr(2)");
sewardjde4a1d02002-03-22 01:27:54 +00001371 u->tag2 = RealReg;
njn25e49d8e72002-09-23 09:36:25 +00001372 u->val2 = reals[i];
sewardjde4a1d02002-03-22 01:27:54 +00001373 }
1374 if (u->tag3 == TempReg) {
1375 for (i = 0; i < n_tmap; i++)
njn810086f2002-11-14 12:42:47 +00001376 if (temps[i] == u->val3) break;
njne427a662002-10-02 11:08:25 +00001377 if (i == n_tmap) VG_(core_panic)("patchUInstr(3)");
sewardjde4a1d02002-03-22 01:27:54 +00001378 u->tag3 = RealReg;
njn25e49d8e72002-09-23 09:36:25 +00001379 u->val3 = reals[i];
sewardjde4a1d02002-03-22 01:27:54 +00001380 }
1381}
1382
1383
1384/* Tedious x86-specific hack which compensates for the fact that the
1385 register numbers for %ah .. %dh do not correspond to those for %eax
1386 .. %edx. It maps a (reg size, reg no) pair to the number of the
1387 containing 32-bit reg. */
1388static __inline__
1389Int containingArchRegOf ( Int sz, Int aregno )
1390{
1391 switch (sz) {
1392 case 4: return aregno;
1393 case 2: return aregno;
1394 case 1: return aregno >= 4 ? aregno-4 : aregno;
njne427a662002-10-02 11:08:25 +00001395 default: VG_(core_panic)("containingArchRegOf");
sewardjde4a1d02002-03-22 01:27:54 +00001396 }
1397}
1398
1399
1400/* If u reads an ArchReg, return the number of the containing arch
njn25e49d8e72002-09-23 09:36:25 +00001401 reg. Otherwise return -1. Used in redundant-PUT elimination.
1402 Note that this is not required for skins extending UCode because
1403 this happens before instrumentation. */
sewardj56867352003-10-12 10:27:06 +00001404static
sewardjde4a1d02002-03-22 01:27:54 +00001405Int maybe_uinstrReadsArchReg ( UInstr* u )
1406{
1407 switch (u->opcode) {
1408 case GET:
1409 case ADD: case ADC: case AND: case OR:
1410 case XOR: case SUB: case SBB:
1411 case SHL: case SHR: case SAR: case ROL:
1412 case ROR: case RCL: case RCR:
jsgf5efa4fd2003-10-14 21:49:11 +00001413 case MUL:
sewardjde4a1d02002-03-22 01:27:54 +00001414 if (u->tag1 == ArchReg)
1415 return containingArchRegOf ( u->size, u->val1 );
1416 else
1417 return -1;
1418
1419 case GETF: case PUTF:
1420 case CALLM_S: case CALLM_E:
1421 case INCEIP:
1422 case LEA1:
1423 case LEA2:
1424 case NOP:
sewardj7a5ebcf2002-11-13 22:42:13 +00001425 case LOCK:
sewardjde4a1d02002-03-22 01:27:54 +00001426 case PUT:
1427 case LOAD:
1428 case STORE:
1429 case MOV:
1430 case CMOV:
1431 case JMP:
1432 case CALLM: case CLEAR: case PUSH: case POP:
1433 case NOT: case NEG: case INC: case DEC: case BSWAP:
1434 case CC2VAL:
1435 case JIFZ:
1436 case FPU: case FPU_R: case FPU_W:
sewardjca860012003-03-27 23:52:58 +00001437 case MMX1: case MMX2: case MMX3:
sewardj3d7c9c82003-03-26 21:08:13 +00001438 case MMX2_MemRd: case MMX2_MemWr:
sewardj4fbe6e92003-06-15 21:54:34 +00001439 case MMX2_ERegRd: case MMX2_ERegWr:
sewardj9dd209f2003-06-18 23:30:52 +00001440 case SSE2a_MemWr: case SSE2a_MemRd: case SSE2a1_MemRd:
sewardj77d30a22003-10-19 08:18:52 +00001441 case SSE3a_MemWr: case SSE3a_MemRd: case SSE3a1_MemRd:
sewardjabf8bf82003-06-15 22:28:05 +00001442 case SSE3e_RegRd: case SSE3g_RegWr: case SSE3e_RegWr:
sewardj4fbe6e92003-06-15 21:54:34 +00001443 case SSE3g1_RegWr: case SSE3e1_RegRd:
sewardje3891fa2003-06-15 03:13:48 +00001444 case SSE4: case SSE3: case SSE5: case SSE3ag_MemRd_RegWr:
sewardjde4a1d02002-03-22 01:27:54 +00001445 case WIDEN:
sewardje1042472002-09-30 12:33:11 +00001446 /* GETSEG and USESEG are to do with ArchRegS, not ArchReg */
1447 case GETSEG: case PUTSEG:
1448 case USESEG:
sewardjde4a1d02002-03-22 01:27:54 +00001449 return -1;
1450
1451 default:
njn4ba5a792002-09-30 10:23:54 +00001452 VG_(pp_UInstr)(0,u);
njne427a662002-10-02 11:08:25 +00001453 VG_(core_panic)("maybe_uinstrReadsArchReg: unhandled opcode");
sewardjde4a1d02002-03-22 01:27:54 +00001454 }
1455}
1456
1457static __inline__
1458Bool uInstrMentionsTempReg ( UInstr* u, Int tempreg )
1459{
1460 Int i, k;
njnf4ce3d32003-02-10 10:17:26 +00001461 Int tempUse[VG_MAX_REGS_USED];
1462 Bool notUsed[VG_MAX_REGS_USED];
njn810086f2002-11-14 12:42:47 +00001463
1464 k = VG_(get_reg_usage) ( u, TempReg, &tempUse[0], &notUsed[0] );
sewardjde4a1d02002-03-22 01:27:54 +00001465 for (i = 0; i < k; i++)
njn810086f2002-11-14 12:42:47 +00001466 if (tempUse[i] == tempreg)
sewardjde4a1d02002-03-22 01:27:54 +00001467 return True;
1468 return False;
1469}
1470
1471
1472/*------------------------------------------------------------*/
1473/*--- ucode improvement. ---*/
1474/*------------------------------------------------------------*/
1475
1476/* Improve the code in cb by doing
1477 -- Redundant ArchReg-fetch elimination
1478 -- Redundant PUT elimination
1479 -- Redundant cond-code restore/save elimination
1480 The overall effect of these is to allow target registers to be
1481 cached in host registers over multiple target insns.
1482*/
1483static void vg_improve ( UCodeBlock* cb )
1484{
1485 Int i, j, k, m, n, ar, tr, told, actual_areg;
1486 Int areg_map[8];
1487 Bool annul_put[8];
njnf4ce3d32003-02-10 10:17:26 +00001488 Int tempUse[VG_MAX_REGS_USED];
1489 Bool isWrites[VG_MAX_REGS_USED];
sewardjde4a1d02002-03-22 01:27:54 +00001490 UInstr* u;
1491 Bool wr;
1492 Int* last_live_before;
1493 FlagSet future_dead_flags;
1494
sewardj2ca39a12003-06-14 12:03:35 +00001495# if 0
1496 /* DEBUGGING HOOK */
1497 {
1498 static int n_done=0;
1499 if (VG_(clo_stop_after) > 1000000000) {
1500 if (n_done > (VG_(clo_stop_after) - 1000000000)) {
1501 dis=False;
1502 VG_(clo_trace_codegen) = 0;
1503 return;
1504 }
1505 if (n_done == (VG_(clo_stop_after) - 1000000000)) {
1506 VG_(printf)("\n");
1507 VG_(pp_UCodeBlock) ( cb, "Incoming:" );
1508 dis = True;
1509 VG_(clo_trace_codegen) = 31;
1510 }
1511 n_done++;
1512 }
1513 }
1514 /* end DEBUGGING HOOK */
1515# endif /* 0 */
1516
njn25e49d8e72002-09-23 09:36:25 +00001517 if (dis)
1518 VG_(printf) ("Improvements:\n");
1519
sewardjde4a1d02002-03-22 01:27:54 +00001520 if (cb->nextTemp > 0)
njn25e49d8e72002-09-23 09:36:25 +00001521 last_live_before = VG_(arena_malloc) ( VG_AR_JITTER,
1522 cb->nextTemp * sizeof(Int) );
sewardjde4a1d02002-03-22 01:27:54 +00001523 else
1524 last_live_before = NULL;
1525
1526
1527 /* PASS 1: redundant GET elimination. (Actually, more general than
1528 that -- eliminates redundant fetches of ArchRegs). */
1529
1530 /* Find the live-range-ends for all temporaries. Duplicates code
1531 in the register allocator :-( */
1532
1533 for (i = 0; i < cb->nextTemp; i++) last_live_before[i] = -1;
1534
1535 for (i = cb->used-1; i >= 0; i--) {
1536 u = &cb->instrs[i];
1537
njn810086f2002-11-14 12:42:47 +00001538 k = VG_(get_reg_usage)(u, TempReg, &tempUse[0], &isWrites[0]);
sewardjde4a1d02002-03-22 01:27:54 +00001539
1540 /* For each temp usage ... bwds in program order. */
1541 for (j = k-1; j >= 0; j--) {
njn810086f2002-11-14 12:42:47 +00001542 tr = tempUse[j];
1543 wr = isWrites[j];
sewardjde4a1d02002-03-22 01:27:54 +00001544 if (last_live_before[tr] == -1) {
1545 vg_assert(tr >= 0 && tr < cb->nextTemp);
1546 last_live_before[tr] = wr ? (i+1) : i;
1547 }
1548 }
1549
1550 }
1551
1552# define BIND_ARCH_TO_TEMP(archreg,tempreg)\
1553 { Int q; \
1554 /* Invalidate any old binding(s) to tempreg. */ \
1555 for (q = 0; q < 8; q++) \
1556 if (areg_map[q] == tempreg) areg_map[q] = -1; \
1557 /* Add the new binding. */ \
1558 areg_map[archreg] = (tempreg); \
1559 }
1560
1561 /* Set up the A-reg map. */
1562 for (i = 0; i < 8; i++) areg_map[i] = -1;
1563
1564 /* Scan insns. */
1565 for (i = 0; i < cb->used; i++) {
1566 u = &cb->instrs[i];
1567 if (u->opcode == GET && u->size == 4) {
1568 /* GET; see if it can be annulled. */
1569 vg_assert(u->tag1 == ArchReg);
1570 vg_assert(u->tag2 == TempReg);
1571 ar = u->val1;
1572 tr = u->val2;
1573 told = areg_map[ar];
1574 if (told != -1 && last_live_before[told] <= i) {
1575 /* ar already has an old mapping to told, but that runs
1576 out here. Annul this GET, rename tr to told for the
1577 rest of the block, and extend told's live range to that
1578 of tr. */
njn4ba5a792002-09-30 10:23:54 +00001579 VG_(new_NOP)(u);
sewardjde4a1d02002-03-22 01:27:54 +00001580 n = last_live_before[tr] + 1;
1581 if (n > cb->used) n = cb->used;
1582 last_live_before[told] = last_live_before[tr];
1583 last_live_before[tr] = i-1;
njn25e49d8e72002-09-23 09:36:25 +00001584 if (dis)
sewardjde4a1d02002-03-22 01:27:54 +00001585 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001586 " at %2d: delete GET, rename t%d to t%d in (%d .. %d)\n",
sewardjde4a1d02002-03-22 01:27:54 +00001587 i, tr, told,i+1, n-1);
1588 for (m = i+1; m < n; m++) {
1589 if (cb->instrs[m].tag1 == TempReg
1590 && cb->instrs[m].val1 == tr)
1591 cb->instrs[m].val1 = told;
1592 if (cb->instrs[m].tag2 == TempReg
1593 && cb->instrs[m].val2 == tr)
1594 cb->instrs[m].val2 = told;
sewardjfebaa3b2003-05-25 01:07:34 +00001595 if (cb->instrs[m].tag3 == TempReg
1596 && cb->instrs[m].val3 == tr)
1597 cb->instrs[m].val3 = told;
sewardjde4a1d02002-03-22 01:27:54 +00001598 }
1599 BIND_ARCH_TO_TEMP(ar,told);
1600 }
1601 else
1602 BIND_ARCH_TO_TEMP(ar,tr);
1603 }
1604 else if (u->opcode == GET && u->size != 4) {
1605 /* Invalidate any mapping for this archreg. */
1606 actual_areg = containingArchRegOf ( u->size, u->val1 );
1607 areg_map[actual_areg] = -1;
1608 }
1609 else if (u->opcode == PUT && u->size == 4) {
1610 /* PUT; re-establish t -> a binding */
1611 vg_assert(u->tag1 == TempReg);
1612 vg_assert(u->tag2 == ArchReg);
1613 BIND_ARCH_TO_TEMP(u->val2, u->val1);
1614 }
1615 else if (u->opcode == PUT && u->size != 4) {
1616 /* Invalidate any mapping for this archreg. */
1617 actual_areg = containingArchRegOf ( u->size, u->val2 );
1618 areg_map[actual_areg] = -1;
1619 } else {
1620
1621 /* see if insn has an archreg as a read operand; if so try to
1622 map it. */
1623 if (u->tag1 == ArchReg && u->size == 4
1624 && areg_map[u->val1] != -1) {
1625 switch (u->opcode) {
1626 case ADD: case SUB: case AND: case OR: case XOR:
1627 case ADC: case SBB:
1628 case SHL: case SHR: case SAR: case ROL: case ROR:
1629 case RCL: case RCR:
jsgf5efa4fd2003-10-14 21:49:11 +00001630 case MUL:
njn25e49d8e72002-09-23 09:36:25 +00001631 if (dis)
sewardjde4a1d02002-03-22 01:27:54 +00001632 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001633 " at %2d: change ArchReg %S to TempReg t%d\n",
sewardjde4a1d02002-03-22 01:27:54 +00001634 i, nameIReg(4,u->val1), areg_map[u->val1]);
1635 u->tag1 = TempReg;
1636 u->val1 = areg_map[u->val1];
1637 /* Remember to extend the live range of the TempReg,
1638 if necessary. */
1639 if (last_live_before[u->val1] < i)
1640 last_live_before[u->val1] = i;
1641 break;
1642 default:
1643 break;
1644 }
1645 }
1646
1647 /* boring insn; invalidate any mappings to temps it writes */
njn810086f2002-11-14 12:42:47 +00001648 k = VG_(get_reg_usage)(u, TempReg, &tempUse[0], &isWrites[0]);
sewardjde4a1d02002-03-22 01:27:54 +00001649
1650 for (j = 0; j < k; j++) {
njn810086f2002-11-14 12:42:47 +00001651 wr = isWrites[j];
sewardjde4a1d02002-03-22 01:27:54 +00001652 if (!wr) continue;
njn810086f2002-11-14 12:42:47 +00001653 tr = tempUse[j];
sewardjde4a1d02002-03-22 01:27:54 +00001654 for (m = 0; m < 8; m++)
1655 if (areg_map[m] == tr) areg_map[m] = -1;
1656 }
1657 }
1658
1659 }
1660
1661# undef BIND_ARCH_TO_TEMP
1662
sewardj05f1aa12002-04-30 00:29:36 +00001663 /* PASS 2: redundant PUT elimination. Don't annul (delay) puts of
1664 %ESP, since the memory check machinery always requires the
1665 in-memory value of %ESP to be up to date. Although this isn't
1666 actually required by other analyses (cache simulation), it's
1667 simplest to be consistent for all end-uses. */
sewardjde4a1d02002-03-22 01:27:54 +00001668 for (j = 0; j < 8; j++)
1669 annul_put[j] = False;
1670
1671 for (i = cb->used-1; i >= 0; i--) {
1672 u = &cb->instrs[i];
1673 if (u->opcode == NOP) continue;
1674
1675 if (u->opcode == PUT && u->size == 4) {
1676 vg_assert(u->tag2 == ArchReg);
1677 actual_areg = containingArchRegOf ( 4, u->val2 );
1678 if (annul_put[actual_areg]) {
sewardj05f1aa12002-04-30 00:29:36 +00001679 vg_assert(actual_areg != R_ESP);
njn4ba5a792002-09-30 10:23:54 +00001680 VG_(new_NOP)(u);
njn25e49d8e72002-09-23 09:36:25 +00001681 if (dis)
1682 VG_(printf)(" at %2d: delete PUT\n", i );
sewardjde4a1d02002-03-22 01:27:54 +00001683 } else {
sewardj05f1aa12002-04-30 00:29:36 +00001684 if (actual_areg != R_ESP)
sewardjde4a1d02002-03-22 01:27:54 +00001685 annul_put[actual_areg] = True;
1686 }
1687 }
1688 else if (u->opcode == PUT && u->size != 4) {
1689 actual_areg = containingArchRegOf ( u->size, u->val2 );
1690 annul_put[actual_areg] = False;
1691 }
1692 else if (u->opcode == JMP || u->opcode == JIFZ
1693 || u->opcode == CALLM) {
1694 for (j = 0; j < 8; j++)
1695 annul_put[j] = False;
1696 }
1697 else {
1698 /* If an instruction reads an ArchReg, the immediately
1699 preceding PUT cannot be annulled. */
1700 actual_areg = maybe_uinstrReadsArchReg ( u );
1701 if (actual_areg != -1)
1702 annul_put[actual_areg] = False;
1703 }
1704 }
1705
1706 /* PASS 2a: redundant-move elimination. Given MOV t1, t2 and t1 is
1707 dead after this point, annul the MOV insn and rename t2 to t1.
1708 Further modifies the last_live_before map. */
1709
1710# if 0
njn4ba5a792002-09-30 10:23:54 +00001711 VG_(pp_UCodeBlock)(cb, "Before MOV elimination" );
sewardjde4a1d02002-03-22 01:27:54 +00001712 for (i = 0; i < cb->nextTemp; i++)
1713 VG_(printf)("llb[t%d]=%d ", i, last_live_before[i]);
1714 VG_(printf)("\n");
1715# endif
1716
1717 for (i = 0; i < cb->used-1; i++) {
1718 u = &cb->instrs[i];
1719 if (u->opcode != MOV) continue;
1720 if (u->tag1 == Literal) continue;
1721 vg_assert(u->tag1 == TempReg);
1722 vg_assert(u->tag2 == TempReg);
1723 if (last_live_before[u->val1] == i) {
njn25e49d8e72002-09-23 09:36:25 +00001724 if (dis)
sewardjde4a1d02002-03-22 01:27:54 +00001725 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001726 " at %2d: delete MOV, rename t%d to t%d in (%d .. %d)\n",
sewardjde4a1d02002-03-22 01:27:54 +00001727 i, u->val2, u->val1, i+1, last_live_before[u->val2] );
1728 for (j = i+1; j <= last_live_before[u->val2]; j++) {
1729 if (cb->instrs[j].tag1 == TempReg
1730 && cb->instrs[j].val1 == u->val2)
1731 cb->instrs[j].val1 = u->val1;
1732 if (cb->instrs[j].tag2 == TempReg
1733 && cb->instrs[j].val2 == u->val2)
1734 cb->instrs[j].val2 = u->val1;
sewardjfebaa3b2003-05-25 01:07:34 +00001735 if (cb->instrs[j].tag3 == TempReg
1736 && cb->instrs[j].val3 == u->val2)
1737 cb->instrs[j].val3 = u->val1;
sewardjde4a1d02002-03-22 01:27:54 +00001738 }
1739 last_live_before[u->val1] = last_live_before[u->val2];
1740 last_live_before[u->val2] = i-1;
njn4ba5a792002-09-30 10:23:54 +00001741 VG_(new_NOP)(u);
sewardjde4a1d02002-03-22 01:27:54 +00001742 }
1743 }
1744
1745 /* PASS 3: redundant condition-code restore/save elimination.
1746 Scan backwards from the end. future_dead_flags records the set
1747 of flags which are dead at this point, that is, will be written
1748 before they are next read. Earlier uinsns which write flags
1749 already in future_dead_flags can have their writes annulled.
1750 */
1751 future_dead_flags = FlagsEmpty;
1752
1753 for (i = cb->used-1; i >= 0; i--) {
1754 u = &cb->instrs[i];
1755
1756 /* We might never make it to insns beyond this one, so be
1757 conservative. */
1758 if (u->opcode == JIFZ || u->opcode == JMP) {
1759 future_dead_flags = FlagsEmpty;
1760 continue;
1761 }
1762
sewardjfbb6cda2002-07-24 09:33:52 +00001763 /* PUTF modifies the %EFLAGS in essentially unpredictable ways.
1764 For example people try to mess with bit 21 to see if CPUID
1765 works. The setting may or may not actually take hold. So we
1766 play safe here. */
1767 if (u->opcode == PUTF) {
1768 future_dead_flags = FlagsEmpty;
1769 continue;
1770 }
1771
sewardjde4a1d02002-03-22 01:27:54 +00001772 /* We can annul the flags written by this insn if it writes a
1773 subset (or eq) of the set of flags known to be dead after
1774 this insn. If not, just record the flags also written by
1775 this insn.*/
1776 if (u->flags_w != FlagsEmpty
1777 && VG_IS_FLAG_SUBSET(u->flags_w, future_dead_flags)) {
njn25e49d8e72002-09-23 09:36:25 +00001778 if (dis) {
1779 VG_(printf)(" at %2d: annul flag write ", i);
sewardjde4a1d02002-03-22 01:27:54 +00001780 vg_ppFlagSet("", u->flags_w);
1781 VG_(printf)(" due to later ");
1782 vg_ppFlagSet("", future_dead_flags);
1783 VG_(printf)("\n");
1784 }
1785 u->flags_w = FlagsEmpty;
1786 } else {
1787 future_dead_flags
1788 = VG_UNION_FLAG_SETS ( u->flags_w, future_dead_flags );
1789 }
1790
1791 /* If this insn also reads flags, empty out future_dead_flags so
1792 as to force preceding writes not to be annulled. */
1793 if (u->flags_r != FlagsEmpty)
1794 future_dead_flags = FlagsEmpty;
1795 }
1796
1797 if (last_live_before)
njn25e49d8e72002-09-23 09:36:25 +00001798 VG_(arena_free) ( VG_AR_JITTER, last_live_before );
1799
1800 if (dis) {
1801 VG_(printf)("\n");
njn4ba5a792002-09-30 10:23:54 +00001802 VG_(pp_UCodeBlock) ( cb, "Improved UCode:" );
njn25e49d8e72002-09-23 09:36:25 +00001803 }
sewardjde4a1d02002-03-22 01:27:54 +00001804}
1805
njn9b007f62003-04-07 14:40:25 +00001806/*------------------------------------------------------------*/
1807/*--- %ESP-update pass ---*/
1808/*------------------------------------------------------------*/
1809
1810/* For skins that want to know about %ESP changes, this pass adds
1811 in the appropriate hooks. We have to do it after the skin's
1812 instrumentation, so the skin doesn't have to worry about the CCALLs
1813 it adds in, and we must do it before register allocation because
1814 spilled temps make it much harder to work out the %esp deltas.
njned619712003-10-01 16:45:04 +00001815 Thus we have it as an extra phase between the two.
1816
1817 We look for "GETL %ESP, t_ESP", then track ADDs and SUBs of
1818 literal values to t_ESP, and the total delta of the ADDs/SUBs. Then if
1819 "PUTL t_ESP, %ESP" happens, we call the helper with the known delta. We
1820 also cope with "MOVL t_ESP, tX", making tX the new t_ESP. If any other
1821 instruction clobbers t_ESP, we don't track it anymore, and fall back to
1822 the delta-is-unknown case. That case is also used when the delta is not
1823 a nice small amount, or an unknown amount.
1824*/
njn9b007f62003-04-07 14:40:25 +00001825static
1826UCodeBlock* vg_ESP_update_pass(UCodeBlock* cb_in)
1827{
1828 UCodeBlock* cb;
1829 UInstr* u;
1830 Int delta = 0;
1831 UInt t_ESP = INVALID_TEMPREG;
sewardj05bcdcb2003-05-18 10:05:38 +00001832 Int i;
njn9b007f62003-04-07 14:40:25 +00001833
1834 cb = VG_(setup_UCodeBlock)(cb_in);
1835
1836 for (i = 0; i < VG_(get_num_instrs)(cb_in); i++) {
1837 u = VG_(get_instr)(cb_in, i);
1838
1839 if (GET == u->opcode && R_ESP == u->val1) {
1840 t_ESP = u->val2;
1841 delta = 0;
1842
1843 } else if (PUT == u->opcode && R_ESP == u->val2 && 4 == u->size) {
1844
fitzhardinge98abfc72003-12-16 02:05:15 +00001845# define DO_GENERIC \
1846 if (VG_(defined_new_mem_stack)() || \
1847 VG_(defined_die_mem_stack)()) { \
1848 uInstr1(cb, CCALL, 0, TempReg, u->val1); \
1849 uCCall(cb, (Addr) VG_(unknown_esp_update), \
1850 1, 1, False); \
njn9b007f62003-04-07 14:40:25 +00001851 }
1852
fitzhardinge98abfc72003-12-16 02:05:15 +00001853# define DO(kind, size) \
1854 if (VG_(defined_##kind##_mem_stack_##size)()) { \
1855 uInstr1(cb, CCALL, 0, TempReg, u->val1); \
1856 uCCall(cb, (Addr) VG_(tool_interface).track_##kind##_mem_stack_##size, \
1857 1, 1, False); \
1858 \
1859 } else \
1860 DO_GENERIC \
njn9b007f62003-04-07 14:40:25 +00001861 break
1862
1863 if (u->val1 == t_ESP) {
1864 /* Known delta, common cases handled specially. */
1865 switch (delta) {
njned619712003-10-01 16:45:04 +00001866 case 0: break;
njn9b007f62003-04-07 14:40:25 +00001867 case 4: DO(die, 4);
1868 case -4: DO(new, 4);
1869 case 8: DO(die, 8);
1870 case -8: DO(new, 8);
1871 case 12: DO(die, 12);
1872 case -12: DO(new, 12);
1873 case 16: DO(die, 16);
1874 case -16: DO(new, 16);
1875 case 32: DO(die, 32);
1876 case -32: DO(new, 32);
1877 default: DO_GENERIC; break;
1878 }
1879 } else {
1880 /* Unknown delta */
1881 DO_GENERIC;
njned619712003-10-01 16:45:04 +00001882
daywalker972a7592003-10-01 10:19:08 +00001883 /* now we know the temp that points to %ESP */
njned619712003-10-01 16:45:04 +00001884 t_ESP = u->val1;
njn9b007f62003-04-07 14:40:25 +00001885 }
1886 delta = 0;
1887
1888# undef DO
1889# undef DO_GENERIC
1890
njned619712003-10-01 16:45:04 +00001891 } else if (ADD == u->opcode && Literal == u->tag1 && t_ESP == u->val2) {
1892 delta += u->lit32;
1893
1894 } else if (SUB == u->opcode && Literal == u->tag1 && t_ESP == u->val2) {
1895 delta -= u->lit32;
njn9b007f62003-04-07 14:40:25 +00001896
1897 } else if (MOV == u->opcode && TempReg == u->tag1 && t_ESP == u->val1 &&
1898 TempReg == u->tag2) {
njned619712003-10-01 16:45:04 +00001899 // t_ESP is transferred
njn9b007f62003-04-07 14:40:25 +00001900 t_ESP = u->val2;
njned619712003-10-01 16:45:04 +00001901
1902 } else {
1903 // Stop tracking t_ESP if it's clobbered by this instruction.
1904 Int tempUse [VG_MAX_REGS_USED];
1905 Bool isWrites[VG_MAX_REGS_USED];
1906 Int j, n = VG_(get_reg_usage)(u, TempReg, tempUse, isWrites);
1907
1908 for (j = 0; j < n; j++) {
1909 if (tempUse[j] == t_ESP && isWrites[j])
1910 t_ESP = INVALID_TEMPREG;
1911 }
njn9b007f62003-04-07 14:40:25 +00001912 }
1913 VG_(copy_UInstr) ( cb, u );
1914 }
1915
1916 VG_(free_UCodeBlock)(cb_in);
1917 return cb;
1918}
sewardjde4a1d02002-03-22 01:27:54 +00001919
1920/*------------------------------------------------------------*/
1921/*--- The new register allocator. ---*/
1922/*------------------------------------------------------------*/
1923
1924typedef
1925 struct {
1926 /* Becomes live for the first time after this insn ... */
1927 Int live_after;
1928 /* Becomes dead for the last time after this insn ... */
1929 Int dead_before;
1930 /* The "home" spill slot, if needed. Never changes. */
1931 Int spill_no;
1932 /* Where is it? VG_NOVALUE==in a spill slot; else in reg. */
1933 Int real_no;
1934 }
1935 TempInfo;
1936
1937
1938/* Take a ucode block and allocate its TempRegs to RealRegs, or put
1939 them in spill locations, and add spill code, if there are not
1940 enough real regs. The usual register allocation deal, in short.
1941
1942 Important redundancy of representation:
1943
1944 real_to_temp maps real reg ranks (RRRs) to TempReg nos, or
1945 to VG_NOVALUE if the real reg has no currently assigned TempReg.
1946
1947 The .real_no field of a TempInfo gives the current RRR for
1948 this TempReg, or VG_NOVALUE if the TempReg is currently
1949 in memory, in which case it is in the SpillNo denoted by
1950 spillno.
1951
1952 These pieces of information (a fwds-bwds mapping, really) must
1953 be kept consistent!
1954
1955 This allocator uses the so-called Second Chance Bin Packing
1956 algorithm, as described in "Quality and Speed in Linear-scan
1957 Register Allocation" (Traub, Holloway and Smith, ACM PLDI98,
1958 pp142-151). It is simple and fast and remarkably good at
1959 minimising the amount of spill code introduced.
1960*/
1961
1962static
1963UCodeBlock* vg_do_register_allocation ( UCodeBlock* c1 )
1964{
1965 TempInfo* temp_info;
njned619712003-10-01 16:45:04 +00001966 Int real_to_temp [VG_MAX_REALREGS];
sewardjde4a1d02002-03-22 01:27:54 +00001967 Bool is_spill_cand[VG_MAX_REALREGS];
1968 Int ss_busy_until_before[VG_MAX_SPILLSLOTS];
1969 Int i, j, k, m, r, tno, max_ss_no;
1970 Bool wr, defer, isRead, spill_reqd;
njned619712003-10-01 16:45:04 +00001971 UInt realUse [VG_MAX_REGS_USED];
1972 Int tempUse [VG_MAX_REGS_USED];
njnf4ce3d32003-02-10 10:17:26 +00001973 Bool isWrites[VG_MAX_REGS_USED];
sewardjde4a1d02002-03-22 01:27:54 +00001974 UCodeBlock* c2;
1975
1976 /* Used to denote ... well, "no value" in this fn. */
1977# define VG_NOTHING (-2)
1978
1979 /* Initialise the TempReg info. */
1980 if (c1->nextTemp > 0)
njn25e49d8e72002-09-23 09:36:25 +00001981 temp_info = VG_(arena_malloc)(VG_AR_JITTER,
1982 c1->nextTemp * sizeof(TempInfo) );
sewardjde4a1d02002-03-22 01:27:54 +00001983 else
1984 temp_info = NULL;
1985
1986 for (i = 0; i < c1->nextTemp; i++) {
1987 temp_info[i].live_after = VG_NOTHING;
1988 temp_info[i].dead_before = VG_NOTHING;
1989 temp_info[i].spill_no = VG_NOTHING;
1990 /* temp_info[i].real_no is not yet relevant. */
1991 }
1992
1993 spill_reqd = False;
1994
1995 /* Scan fwds to establish live ranges. */
1996
1997 for (i = 0; i < c1->used; i++) {
njn810086f2002-11-14 12:42:47 +00001998 k = VG_(get_reg_usage)(&c1->instrs[i], TempReg, &tempUse[0],
1999 &isWrites[0]);
njnf4ce3d32003-02-10 10:17:26 +00002000 vg_assert(k >= 0 && k <= VG_MAX_REGS_USED);
sewardjde4a1d02002-03-22 01:27:54 +00002001
2002 /* For each temp usage ... fwds in program order */
2003 for (j = 0; j < k; j++) {
njn810086f2002-11-14 12:42:47 +00002004 tno = tempUse[j];
2005 wr = isWrites[j];
sewardjde4a1d02002-03-22 01:27:54 +00002006 if (wr) {
2007 /* Writes hold a reg live until after this insn. */
2008 if (temp_info[tno].live_after == VG_NOTHING)
2009 temp_info[tno].live_after = i;
2010 if (temp_info[tno].dead_before < i + 1)
2011 temp_info[tno].dead_before = i + 1;
2012 } else {
2013 /* First use of a tmp should be a write. */
njnfa0ad422003-02-03 11:07:03 +00002014 if (temp_info[tno].live_after == VG_NOTHING) {
2015 VG_(printf)("At instr %d...\n", i);
2016 VG_(core_panic)("First use of tmp not a write,"
2017 " probably a skin instrumentation error");
2018 }
sewardjde4a1d02002-03-22 01:27:54 +00002019 /* Reads only hold it live until before this insn. */
2020 if (temp_info[tno].dead_before < i)
2021 temp_info[tno].dead_before = i;
2022 }
2023 }
2024 }
2025
2026# if 0
2027 /* Sanity check on live ranges. Expensive but correct. */
2028 for (i = 0; i < c1->nextTemp; i++) {
2029 vg_assert( (temp_info[i].live_after == VG_NOTHING
2030 && temp_info[i].dead_before == VG_NOTHING)
2031 || (temp_info[i].live_after != VG_NOTHING
2032 && temp_info[i].dead_before != VG_NOTHING) );
2033 }
2034# endif
2035
2036 /* Do a rank-based allocation of TempRegs to spill slot numbers.
2037 We put as few as possible values in spill slots, but
2038 nevertheless need to have an assignment to them just in case. */
2039
2040 max_ss_no = -1;
2041
2042 for (i = 0; i < VG_MAX_SPILLSLOTS; i++)
2043 ss_busy_until_before[i] = 0;
2044
2045 for (i = 0; i < c1->nextTemp; i++) {
2046
2047 /* True iff this temp is unused. */
2048 if (temp_info[i].live_after == VG_NOTHING)
2049 continue;
2050
2051 /* Find the lowest-numbered spill slot which is available at the
2052 start point of this interval, and assign the interval to
2053 it. */
2054 for (j = 0; j < VG_MAX_SPILLSLOTS; j++)
2055 if (ss_busy_until_before[j] <= temp_info[i].live_after)
2056 break;
2057 if (j == VG_MAX_SPILLSLOTS) {
2058 VG_(printf)("VG_MAX_SPILLSLOTS is too low; increase and recompile.\n");
njne427a662002-10-02 11:08:25 +00002059 VG_(core_panic)("register allocation failed -- out of spill slots");
sewardjde4a1d02002-03-22 01:27:54 +00002060 }
2061 ss_busy_until_before[j] = temp_info[i].dead_before;
2062 temp_info[i].spill_no = j;
2063 if (j > max_ss_no)
2064 max_ss_no = j;
2065 }
2066
2067 VG_(total_reg_rank) += (max_ss_no+1);
2068
2069 /* Show live ranges and assigned spill slot nos. */
2070
njn25e49d8e72002-09-23 09:36:25 +00002071 if (dis) {
2072 VG_(printf)("Live range assignments:\n");
sewardjde4a1d02002-03-22 01:27:54 +00002073
2074 for (i = 0; i < c1->nextTemp; i++) {
2075 if (temp_info[i].live_after == VG_NOTHING)
2076 continue;
2077 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00002078 " LR %d is after %d to before %d\tspillno %d\n",
sewardjde4a1d02002-03-22 01:27:54 +00002079 i,
2080 temp_info[i].live_after,
2081 temp_info[i].dead_before,
2082 temp_info[i].spill_no
2083 );
2084 }
njn25e49d8e72002-09-23 09:36:25 +00002085 VG_(printf)("\n");
sewardjde4a1d02002-03-22 01:27:54 +00002086 }
2087
2088 /* Now that we've established a spill slot number for each used
2089 temporary, we can go ahead and do the core of the "Second-chance
2090 binpacking" allocation algorithm. */
2091
njn25e49d8e72002-09-23 09:36:25 +00002092 if (dis) VG_(printf)("Register allocated UCode:\n");
2093
2094
sewardjde4a1d02002-03-22 01:27:54 +00002095 /* Resulting code goes here. We generate it all in a forwards
2096 pass. */
njn4ba5a792002-09-30 10:23:54 +00002097 c2 = VG_(alloc_UCodeBlock)();
sewardj22854b92002-11-30 14:00:47 +00002098 c2->orig_eip = c1->orig_eip;
sewardjde4a1d02002-03-22 01:27:54 +00002099
2100 /* At the start, no TempRegs are assigned to any real register.
2101 Correspondingly, all temps claim to be currently resident in
2102 their spill slots, as computed by the previous two passes. */
2103 for (i = 0; i < VG_MAX_REALREGS; i++)
2104 real_to_temp[i] = VG_NOTHING;
2105 for (i = 0; i < c1->nextTemp; i++)
2106 temp_info[i].real_no = VG_NOTHING;
2107
sewardjde4a1d02002-03-22 01:27:54 +00002108 /* Process each insn in turn. */
2109 for (i = 0; i < c1->used; i++) {
2110
2111 if (c1->instrs[i].opcode == NOP) continue;
2112 VG_(uinstrs_prealloc)++;
2113
2114# if 0
2115 /* Check map consistency. Expensive but correct. */
2116 for (r = 0; r < VG_MAX_REALREGS; r++) {
2117 if (real_to_temp[r] != VG_NOTHING) {
2118 tno = real_to_temp[r];
2119 vg_assert(tno >= 0 && tno < c1->nextTemp);
2120 vg_assert(temp_info[tno].real_no == r);
2121 }
2122 }
2123 for (tno = 0; tno < c1->nextTemp; tno++) {
2124 if (temp_info[tno].real_no != VG_NOTHING) {
2125 r = temp_info[tno].real_no;
2126 vg_assert(r >= 0 && r < VG_MAX_REALREGS);
2127 vg_assert(real_to_temp[r] == tno);
2128 }
2129 }
2130# endif
2131
njn25e49d8e72002-09-23 09:36:25 +00002132 if (dis)
njn4ba5a792002-09-30 10:23:54 +00002133 VG_(pp_UInstr)(i, &c1->instrs[i]);
sewardjde4a1d02002-03-22 01:27:54 +00002134
2135 /* First, free up enough real regs for this insn. This may
2136 generate spill stores since we may have to evict some TempRegs
2137 currently in real regs. Also generates spill loads. */
2138
njn810086f2002-11-14 12:42:47 +00002139 k = VG_(get_reg_usage)(&c1->instrs[i], TempReg, &tempUse[0],
2140 &isWrites[0]);
njnf4ce3d32003-02-10 10:17:26 +00002141 vg_assert(k >= 0 && k <= VG_MAX_REGS_USED);
sewardjde4a1d02002-03-22 01:27:54 +00002142
2143 /* For each ***different*** temp mentioned in the insn .... */
2144 for (j = 0; j < k; j++) {
2145
2146 /* First check if the temp is mentioned again later; if so,
2147 ignore this mention. We only want to process each temp
2148 used by the insn once, even if it is mentioned more than
2149 once. */
2150 defer = False;
njn810086f2002-11-14 12:42:47 +00002151 tno = tempUse[j];
sewardjde4a1d02002-03-22 01:27:54 +00002152 for (m = j+1; m < k; m++)
njn810086f2002-11-14 12:42:47 +00002153 if (tempUse[m] == tno)
sewardjde4a1d02002-03-22 01:27:54 +00002154 defer = True;
2155 if (defer)
2156 continue;
2157
njn810086f2002-11-14 12:42:47 +00002158 /* Now we're trying to find a register for tempUse[j].
sewardjde4a1d02002-03-22 01:27:54 +00002159 First of all, if it already has a register assigned, we
2160 don't need to do anything more. */
2161 if (temp_info[tno].real_no != VG_NOTHING)
2162 continue;
2163
2164 /* No luck. The next thing to do is see if there is a
2165 currently unassigned register available. If so, bag it. */
2166 for (r = 0; r < VG_MAX_REALREGS; r++) {
2167 if (real_to_temp[r] == VG_NOTHING)
2168 break;
2169 }
2170 if (r < VG_MAX_REALREGS) {
2171 real_to_temp[r] = tno;
2172 temp_info[tno].real_no = r;
2173 continue;
2174 }
2175
2176 /* Unfortunately, that didn't pan out either. So we'll have
2177 to eject some other unfortunate TempReg into a spill slot
2178 in order to free up a register. Of course, we need to be
2179 careful not to eject some other TempReg needed by this
2180 insn.
2181
2182 Select r in 0 .. VG_MAX_REALREGS-1 such that
2183 real_to_temp[r] is not mentioned in
njn810086f2002-11-14 12:42:47 +00002184 tempUse[0 .. k-1], since it would be just plain
sewardjde4a1d02002-03-22 01:27:54 +00002185 wrong to eject some other TempReg which we need to use in
2186 this insn.
2187
2188 It is here that it is important to make a good choice of
2189 register to spill. */
2190
2191 /* First, mark those regs which are not spill candidates. */
2192 for (r = 0; r < VG_MAX_REALREGS; r++) {
2193 is_spill_cand[r] = True;
2194 for (m = 0; m < k; m++) {
njn810086f2002-11-14 12:42:47 +00002195 if (real_to_temp[r] == tempUse[m]) {
sewardjde4a1d02002-03-22 01:27:54 +00002196 is_spill_cand[r] = False;
2197 break;
2198 }
2199 }
2200 }
2201
2202 /* We can choose any r satisfying is_spill_cand[r]. However,
2203 try to make a good choice. First, try and find r such
2204 that the associated TempReg is already dead. */
2205 for (r = 0; r < VG_MAX_REALREGS; r++) {
2206 if (is_spill_cand[r] &&
2207 temp_info[real_to_temp[r]].dead_before <= i)
2208 goto have_spill_cand;
2209 }
2210
2211 /* No spill cand is mapped to a dead TempReg. Now we really
2212 _do_ have to generate spill code. Choose r so that the
2213 next use of its associated TempReg is as far ahead as
2214 possible, in the hope that this will minimise the number of
2215 consequent reloads required. This is a bit expensive, but
2216 we don't have to do it very often. */
2217 {
2218 Int furthest_r = VG_MAX_REALREGS;
2219 Int furthest = 0;
2220 for (r = 0; r < VG_MAX_REALREGS; r++) {
2221 if (!is_spill_cand[r]) continue;
2222 for (m = i+1; m < c1->used; m++)
2223 if (uInstrMentionsTempReg(&c1->instrs[m],
2224 real_to_temp[r]))
2225 break;
2226 if (m > furthest) {
2227 furthest = m;
2228 furthest_r = r;
2229 }
2230 }
2231 r = furthest_r;
2232 goto have_spill_cand;
2233 }
2234
2235 have_spill_cand:
2236 if (r == VG_MAX_REALREGS)
njne427a662002-10-02 11:08:25 +00002237 VG_(core_panic)("new reg alloc: out of registers ?!");
sewardjde4a1d02002-03-22 01:27:54 +00002238
2239 /* Eject r. Important refinement: don't bother if the
2240 associated TempReg is now dead. */
2241 vg_assert(real_to_temp[r] != VG_NOTHING);
2242 vg_assert(real_to_temp[r] != tno);
2243 temp_info[real_to_temp[r]].real_no = VG_NOTHING;
2244 if (temp_info[real_to_temp[r]].dead_before > i) {
2245 uInstr2(c2, PUT, 4,
njn4ba5a792002-09-30 10:23:54 +00002246 RealReg, VG_(rank_to_realreg)(r),
sewardjde4a1d02002-03-22 01:27:54 +00002247 SpillNo, temp_info[real_to_temp[r]].spill_no);
2248 VG_(uinstrs_spill)++;
2249 spill_reqd = True;
njn25e49d8e72002-09-23 09:36:25 +00002250 if (dis)
njn4ba5a792002-09-30 10:23:54 +00002251 VG_(pp_UInstr)(c2->used-1, &LAST_UINSTR(c2));
sewardjde4a1d02002-03-22 01:27:54 +00002252 }
2253
2254 /* Decide if tno is read. */
2255 isRead = False;
2256 for (m = 0; m < k; m++)
njn810086f2002-11-14 12:42:47 +00002257 if (tempUse[m] == tno && !isWrites[m])
sewardjde4a1d02002-03-22 01:27:54 +00002258 isRead = True;
2259
2260 /* If so, generate a spill load. */
2261 if (isRead) {
2262 uInstr2(c2, GET, 4,
2263 SpillNo, temp_info[tno].spill_no,
njn4ba5a792002-09-30 10:23:54 +00002264 RealReg, VG_(rank_to_realreg)(r) );
sewardjde4a1d02002-03-22 01:27:54 +00002265 VG_(uinstrs_spill)++;
2266 spill_reqd = True;
njn25e49d8e72002-09-23 09:36:25 +00002267 if (dis)
njn4ba5a792002-09-30 10:23:54 +00002268 VG_(pp_UInstr)(c2->used-1, &LAST_UINSTR(c2));
sewardjde4a1d02002-03-22 01:27:54 +00002269 }
2270
2271 /* Update the forwards and backwards maps. */
2272 real_to_temp[r] = tno;
2273 temp_info[tno].real_no = r;
2274 }
2275
2276 /* By this point, all TempRegs mentioned by the insn have been
2277 bought into real regs. We now copy the insn to the output
2278 and use patchUInstr to convert its rTempRegs into
2279 realregs. */
2280 for (j = 0; j < k; j++)
njn810086f2002-11-14 12:42:47 +00002281 realUse[j] = VG_(rank_to_realreg)(temp_info[tempUse[j]].real_no);
njn4ba5a792002-09-30 10:23:54 +00002282 VG_(copy_UInstr)(c2, &c1->instrs[i]);
njn25e49d8e72002-09-23 09:36:25 +00002283 patchUInstr(&LAST_UINSTR(c2), &tempUse[0], &realUse[0], k);
sewardjde4a1d02002-03-22 01:27:54 +00002284
njn25e49d8e72002-09-23 09:36:25 +00002285 if (dis) {
njn4ba5a792002-09-30 10:23:54 +00002286 VG_(pp_UInstr)(c2->used-1, &LAST_UINSTR(c2));
sewardjde4a1d02002-03-22 01:27:54 +00002287 VG_(printf)("\n");
2288 }
2289 }
2290
2291 if (temp_info != NULL)
njn25e49d8e72002-09-23 09:36:25 +00002292 VG_(arena_free)(VG_AR_JITTER, temp_info);
sewardjde4a1d02002-03-22 01:27:54 +00002293
njn4ba5a792002-09-30 10:23:54 +00002294 VG_(free_UCodeBlock)(c1);
sewardjde4a1d02002-03-22 01:27:54 +00002295
2296 if (spill_reqd)
2297 VG_(translations_needing_spill)++;
2298
2299 return c2;
2300
2301# undef VG_NOTHING
2302
2303}
sewardj7c4b6042003-06-14 15:47:15 +00002304
njn25e49d8e72002-09-23 09:36:25 +00002305/* Analysis records liveness of all general-use RealRegs in the UCode. */
2306static void vg_realreg_liveness_analysis ( UCodeBlock* cb )
2307{
2308 Int i, j, k;
2309 RRegSet rregs_live;
njnf4ce3d32003-02-10 10:17:26 +00002310 Int regUse[VG_MAX_REGS_USED];
2311 Bool isWrites[VG_MAX_REGS_USED];
njn25e49d8e72002-09-23 09:36:25 +00002312 UInstr* u;
sewardjde4a1d02002-03-22 01:27:54 +00002313
njn25e49d8e72002-09-23 09:36:25 +00002314 /* All regs are dead at the end of the block */
2315 rregs_live = ALL_RREGS_DEAD;
sewardjde4a1d02002-03-22 01:27:54 +00002316
sewardjde4a1d02002-03-22 01:27:54 +00002317 for (i = cb->used-1; i >= 0; i--) {
2318 u = &cb->instrs[i];
2319
njn25e49d8e72002-09-23 09:36:25 +00002320 u->regs_live_after = rregs_live;
sewardj97ced732002-03-25 00:07:36 +00002321
njn810086f2002-11-14 12:42:47 +00002322 k = VG_(get_reg_usage)(u, RealReg, &regUse[0], &isWrites[0]);
sewardj97ced732002-03-25 00:07:36 +00002323
njn25e49d8e72002-09-23 09:36:25 +00002324 /* For each reg usage ... bwds in program order. Variable is live
2325 before this UInstr if it is read by this UInstr.
njn810086f2002-11-14 12:42:47 +00002326 Note that regUse[j] holds the Intel reg number, so we must
njn25e49d8e72002-09-23 09:36:25 +00002327 convert it to our rank number. */
2328 for (j = k-1; j >= 0; j--) {
njn810086f2002-11-14 12:42:47 +00002329 SET_RREG_LIVENESS ( VG_(realreg_to_rank)(regUse[j]),
njn25e49d8e72002-09-23 09:36:25 +00002330 rregs_live,
njn810086f2002-11-14 12:42:47 +00002331 !isWrites[j] );
sewardjde4a1d02002-03-22 01:27:54 +00002332 }
2333 }
sewardjde4a1d02002-03-22 01:27:54 +00002334}
2335
sewardjde4a1d02002-03-22 01:27:54 +00002336/*------------------------------------------------------------*/
2337/*--- Main entry point for the JITter. ---*/
2338/*------------------------------------------------------------*/
2339
2340/* Translate the basic block beginning at orig_addr, placing the
2341 translation in a vg_malloc'd block, the address and size of which
2342 are returned in trans_addr and trans_size. Length of the original
2343 block is also returned in orig_size. If the latter three are NULL,
2344 this call is being done for debugging purposes, in which case (a)
2345 throw away the translation once it is made, and (b) produce a load
2346 of debugging output.
njn25e49d8e72002-09-23 09:36:25 +00002347
2348 'tst' is the identity of the thread needing this block.
sewardjde4a1d02002-03-22 01:27:54 +00002349*/
njn72718642003-07-24 08:45:32 +00002350void VG_(translate) ( /*IN*/ ThreadId tid,
njn25e49d8e72002-09-23 09:36:25 +00002351 /*IN*/ Addr orig_addr,
2352 /*OUT*/ UInt* orig_size,
2353 /*OUT*/ Addr* trans_addr,
sewardj22854b92002-11-30 14:00:47 +00002354 /*OUT*/ UInt* trans_size,
2355 /*OUT*/ UShort jumps[VG_MAX_JUMPS])
sewardjde4a1d02002-03-22 01:27:54 +00002356{
fitzhardinge98abfc72003-12-16 02:05:15 +00002357 Int n_disassembled_bytes, final_code_size;
sewardjde4a1d02002-03-22 01:27:54 +00002358 Bool debugging_translation;
2359 UChar* final_code;
2360 UCodeBlock* cb;
sewardja60be0e2003-05-26 08:47:27 +00002361 Bool notrace_until_done;
sewardj1e86b8b2003-06-16 23:34:12 +00002362 UInt notrace_until_limit = 0;
fitzhardinge98abfc72003-12-16 02:05:15 +00002363 Segment *seg;
2364 Addr redir;
sewardjde4a1d02002-03-22 01:27:54 +00002365
2366 VGP_PUSHCC(VgpTranslate);
2367 debugging_translation
2368 = orig_size == NULL || trans_addr == NULL || trans_size == NULL;
2369
sewardj25c7c3a2003-07-10 00:17:58 +00002370 /* Look in the code redirect table to see if we should
2371 translate an alternative address for orig_addr. */
fitzhardinge98abfc72003-12-16 02:05:15 +00002372 redir = VG_(code_redirect)(orig_addr);
2373
2374 if (redir != orig_addr && VG_(clo_verbosity) >= 2)
2375 VG_(message)(Vg_UserMsg,
2376 "TRANSLATE: %p redirected to %p",
2377 orig_addr,
2378 redir );
2379 orig_addr = redir;
sewardj25c7c3a2003-07-10 00:17:58 +00002380
sewardja60be0e2003-05-26 08:47:27 +00002381 /* If codegen tracing, don't start tracing until
2382 notrace_until_limit blocks have gone by. This avoids printing
2383 huge amounts of useless junk when all we want to see is the last
2384 few blocks translated prior to a failure. Set
2385 notrace_until_limit to be the number of translations to be made
2386 before --trace-codegen= style printing takes effect. */
2387 notrace_until_done
2388 = VG_(overall_in_count) > notrace_until_limit;
2389
fitzhardinge98abfc72003-12-16 02:05:15 +00002390 seg = VG_(find_segment)(orig_addr);
2391
njn25e49d8e72002-09-23 09:36:25 +00002392 if (!debugging_translation)
njn72718642003-07-24 08:45:32 +00002393 VG_TRACK( pre_mem_read, Vg_CoreTranslate, tid, "", orig_addr, 1 );
sewardjde4a1d02002-03-22 01:27:54 +00002394
fitzhardinge98abfc72003-12-16 02:05:15 +00002395 if (seg == NULL ||
2396 !VG_(seg_contains)(seg, orig_addr, 1) ||
2397 (seg->prot & (VKI_PROT_READ|VKI_PROT_EXEC)) == 0) {
jsgf855d93d2003-10-13 22:26:55 +00002398 vki_ksiginfo_t info;
2399
fitzhardinge98abfc72003-12-16 02:05:15 +00002400 /* Code address is bad - deliver a signal instead */
2401 vg_assert(!VG_(is_addressable)(orig_addr, 1));
2402
jsgf855d93d2003-10-13 22:26:55 +00002403 info.si_signo = VKI_SIGSEGV;
fitzhardinge98abfc72003-12-16 02:05:15 +00002404
2405 if (seg != NULL && VG_(seg_contains)(seg, orig_addr, 1)) {
2406 vg_assert((seg->prot & VKI_PROT_EXEC) == 0);
2407 info.si_code = 2; /* invalid permissions for mapped object */
2408 } else
2409 info.si_code = 1; /* address not mapped to object */
njnc9d4ba72003-10-15 10:34:03 +00002410 info._sifields._sigfault._addr = (void*)orig_addr;
jsgf855d93d2003-10-13 22:26:55 +00002411
2412 VG_(deliver_signal)(tid, &info, False);
2413 return;
fitzhardinge98abfc72003-12-16 02:05:15 +00002414 } else
2415 seg->flags |= SF_CODE; /* contains cached code */
jsgf855d93d2003-10-13 22:26:55 +00002416
njn4ba5a792002-09-30 10:23:54 +00002417 cb = VG_(alloc_UCodeBlock)();
sewardj22854b92002-11-30 14:00:47 +00002418 cb->orig_eip = orig_addr;
sewardjde4a1d02002-03-22 01:27:54 +00002419
njn25e49d8e72002-09-23 09:36:25 +00002420 /* If doing any code printing, print a basic block start marker */
sewardja60be0e2003-05-26 08:47:27 +00002421 if (VG_(clo_trace_codegen) && notrace_until_done) {
njn25e49d8e72002-09-23 09:36:25 +00002422 Char fnname[64] = "";
2423 VG_(get_fnname_if_entry)(orig_addr, fnname, 64);
2424 VG_(printf)(
njne0205ff2003-04-08 00:56:14 +00002425 "==== BB %d %s(%p) in %dB, out %dB, BBs exec'd %llu ====\n\n",
njn25e49d8e72002-09-23 09:36:25 +00002426 VG_(overall_in_count), fnname, orig_addr,
2427 VG_(overall_in_osize), VG_(overall_in_tsize),
2428 VG_(bbs_done));
2429 }
2430
2431 /* True if a debug trans., or if bit N set in VG_(clo_trace_codegen). */
sewardja60be0e2003-05-26 08:47:27 +00002432# define DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(n) \
2433 ( debugging_translation \
2434 || (notrace_until_done \
2435 && (VG_(clo_trace_codegen) & (1 << (n-1))) ))
njn25e49d8e72002-09-23 09:36:25 +00002436
sewardjde4a1d02002-03-22 01:27:54 +00002437 /* Disassemble this basic block into cb. */
njn25e49d8e72002-09-23 09:36:25 +00002438 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(1);
2439 VGP_PUSHCC(VgpToUCode);
sewardjde4a1d02002-03-22 01:27:54 +00002440 n_disassembled_bytes = VG_(disBB) ( cb, orig_addr );
njn25e49d8e72002-09-23 09:36:25 +00002441 VGP_POPCC(VgpToUCode);
2442
sewardjde4a1d02002-03-22 01:27:54 +00002443 /* Try and improve the code a bit. */
2444 if (VG_(clo_optimise)) {
njn25e49d8e72002-09-23 09:36:25 +00002445 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(2);
2446 VGP_PUSHCC(VgpImprove);
sewardjde4a1d02002-03-22 01:27:54 +00002447 vg_improve ( cb );
njn25e49d8e72002-09-23 09:36:25 +00002448 VGP_POPCC(VgpImprove);
sewardjde4a1d02002-03-22 01:27:54 +00002449 }
2450
njn25e49d8e72002-09-23 09:36:25 +00002451 /* Skin's instrumentation (Nb: must set VG_(print_codegen) in case
2452 SK_(instrument) looks at it. */
2453 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(3);
2454 VGP_PUSHCC(VgpInstrument);
2455 cb = SK_(instrument) ( cb, orig_addr );
2456 if (VG_(print_codegen))
njn4ba5a792002-09-30 10:23:54 +00002457 VG_(pp_UCodeBlock) ( cb, "Instrumented UCode:" );
njn25e49d8e72002-09-23 09:36:25 +00002458 VG_(saneUCodeBlock)( cb );
2459 VGP_POPCC(VgpInstrument);
njn4f9c9342002-04-29 16:03:24 +00002460
njn9b007f62003-04-07 14:40:25 +00002461 /* Add %ESP-update hooks if the skin requires them */
2462 /* Nb: We don't print out this phase, because it doesn't do much */
2463 if (VG_(need_to_handle_esp_assignment)()) {
2464 VGP_PUSHCC(VgpESPUpdate);
2465 cb = vg_ESP_update_pass ( cb );
2466 VGP_POPCC(VgpESPUpdate);
2467 }
2468
sewardjde4a1d02002-03-22 01:27:54 +00002469 /* Allocate registers. */
njn25e49d8e72002-09-23 09:36:25 +00002470 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(4);
2471 VGP_PUSHCC(VgpRegAlloc);
sewardjde4a1d02002-03-22 01:27:54 +00002472 cb = vg_do_register_allocation ( cb );
njn25e49d8e72002-09-23 09:36:25 +00002473 VGP_POPCC(VgpRegAlloc);
sewardjde4a1d02002-03-22 01:27:54 +00002474
njn25e49d8e72002-09-23 09:36:25 +00002475 /* Do post reg-alloc %e[acd]x liveness analysis (too boring to print
2476 * anything; results can be seen when emitting final code). */
2477 VGP_PUSHCC(VgpLiveness);
2478 vg_realreg_liveness_analysis ( cb );
2479 VGP_POPCC(VgpLiveness);
2480
2481 /* Emit final code */
2482 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(5);
2483
2484 VGP_PUSHCC(VgpFromUcode);
sewardj22854b92002-11-30 14:00:47 +00002485 final_code = VG_(emit_code)(cb, &final_code_size, jumps );
njn25e49d8e72002-09-23 09:36:25 +00002486 VGP_POPCC(VgpFromUcode);
njn4ba5a792002-09-30 10:23:54 +00002487 VG_(free_UCodeBlock)(cb);
sewardjde4a1d02002-03-22 01:27:54 +00002488
njn25e49d8e72002-09-23 09:36:25 +00002489#undef DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE
2490
sewardjde4a1d02002-03-22 01:27:54 +00002491 if (debugging_translation) {
2492 /* Only done for debugging -- throw away final result. */
njn25e49d8e72002-09-23 09:36:25 +00002493 VG_(arena_free)(VG_AR_JITTER, final_code);
sewardjde4a1d02002-03-22 01:27:54 +00002494 } else {
2495 /* Doing it for real -- return values to caller. */
sewardjde4a1d02002-03-22 01:27:54 +00002496 *orig_size = n_disassembled_bytes;
2497 *trans_addr = (Addr)final_code;
2498 *trans_size = final_code_size;
2499 }
njn25e49d8e72002-09-23 09:36:25 +00002500 VGP_POPCC(VgpTranslate);
sewardjde4a1d02002-03-22 01:27:54 +00002501}
2502
2503/*--------------------------------------------------------------------*/
2504/*--- end vg_translate.c ---*/
2505/*--------------------------------------------------------------------*/
njned619712003-10-01 16:45:04 +00002506