blob: f07bb0ee9ada2fa83bd3c2af03894a8e5781a774 [file] [log] [blame]
sewardjde4a1d02002-03-22 01:27:54 +00001
2/*--------------------------------------------------------------------*/
3/*--- The JITter proper: register allocation & code improvement ---*/
4/*--- vg_translate.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
njnc9539842002-10-02 13:26:35 +00008 This file is part of Valgrind, an extensible x86 protected-mode
9 emulator for monitoring program execution on x86-Unixes.
sewardjde4a1d02002-03-22 01:27:54 +000010
njn0e1b5142003-04-15 14:58:06 +000011 Copyright (C) 2000-2003 Julian Seward
sewardjde4a1d02002-03-22 01:27:54 +000012 jseward@acm.org
sewardjde4a1d02002-03-22 01:27:54 +000013
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
njn25e49d8e72002-09-23 09:36:25 +000029 The GNU General Public License is contained in the file COPYING.
sewardjde4a1d02002-03-22 01:27:54 +000030*/
31
32#include "vg_include.h"
33
sewardjde4a1d02002-03-22 01:27:54 +000034/*------------------------------------------------------------*/
35/*--- Renamings of frequently-used global functions. ---*/
36/*------------------------------------------------------------*/
37
njn25e49d8e72002-09-23 09:36:25 +000038#define dis VG_(print_codegen)
sewardjde4a1d02002-03-22 01:27:54 +000039
sewardje1042472002-09-30 12:33:11 +000040
sewardjde4a1d02002-03-22 01:27:54 +000041/*------------------------------------------------------------*/
42/*--- Basics ---*/
43/*------------------------------------------------------------*/
44
njn810086f2002-11-14 12:42:47 +000045/* This one is called by the core */
njn4ba5a792002-09-30 10:23:54 +000046UCodeBlock* VG_(alloc_UCodeBlock) ( void )
sewardjde4a1d02002-03-22 01:27:54 +000047{
njn25e49d8e72002-09-23 09:36:25 +000048 UCodeBlock* cb = VG_(arena_malloc)(VG_AR_CORE, sizeof(UCodeBlock));
sewardjde4a1d02002-03-22 01:27:54 +000049 cb->used = cb->size = cb->nextTemp = 0;
50 cb->instrs = NULL;
51 return cb;
52}
53
njn810086f2002-11-14 12:42:47 +000054/* This one is called by skins */
55UCodeBlock* VG_(setup_UCodeBlock) ( UCodeBlock* cb_in )
56{
57 UCodeBlock* cb = VG_(arena_malloc)(VG_AR_CORE, sizeof(UCodeBlock));
sewardj22854b92002-11-30 14:00:47 +000058 cb->orig_eip = cb_in->orig_eip;
njn810086f2002-11-14 12:42:47 +000059 cb->used = cb->size = 0;
60 cb->nextTemp = cb_in->nextTemp;
61 cb->instrs = NULL;
62 return cb;
63}
sewardjde4a1d02002-03-22 01:27:54 +000064
njn4ba5a792002-09-30 10:23:54 +000065void VG_(free_UCodeBlock) ( UCodeBlock* cb )
sewardjde4a1d02002-03-22 01:27:54 +000066{
njn25e49d8e72002-09-23 09:36:25 +000067 if (cb->instrs) VG_(arena_free)(VG_AR_CORE, cb->instrs);
68 VG_(arena_free)(VG_AR_CORE, cb);
sewardjde4a1d02002-03-22 01:27:54 +000069}
70
71
72/* Ensure there's enough space in a block to add one uinstr. */
73static __inline__
74void ensureUInstr ( UCodeBlock* cb )
75{
76 if (cb->used == cb->size) {
77 if (cb->instrs == NULL) {
78 vg_assert(cb->size == 0);
79 vg_assert(cb->used == 0);
80 cb->size = 8;
njn25e49d8e72002-09-23 09:36:25 +000081 cb->instrs = VG_(arena_malloc)(VG_AR_CORE, 8 * sizeof(UInstr));
sewardjde4a1d02002-03-22 01:27:54 +000082 } else {
83 Int i;
njn25e49d8e72002-09-23 09:36:25 +000084 UInstr* instrs2 = VG_(arena_malloc)(VG_AR_CORE,
sewardjde4a1d02002-03-22 01:27:54 +000085 2 * sizeof(UInstr) * cb->size);
86 for (i = 0; i < cb->used; i++)
87 instrs2[i] = cb->instrs[i];
88 cb->size *= 2;
njn25e49d8e72002-09-23 09:36:25 +000089 VG_(arena_free)(VG_AR_CORE, cb->instrs);
sewardjde4a1d02002-03-22 01:27:54 +000090 cb->instrs = instrs2;
91 }
92 }
93
94 vg_assert(cb->used < cb->size);
95}
96
97
98__inline__
njn4ba5a792002-09-30 10:23:54 +000099void VG_(new_NOP) ( UInstr* u )
sewardjde4a1d02002-03-22 01:27:54 +0000100{
101 u->val1 = u->val2 = u->val3 = 0;
102 u->tag1 = u->tag2 = u->tag3 = NoValue;
103 u->flags_r = u->flags_w = FlagsEmpty;
sewardj2e93c502002-04-12 11:12:52 +0000104 u->jmpkind = JmpBoring;
njn25e49d8e72002-09-23 09:36:25 +0000105 u->signed_widen = u->has_ret_val = False;
106 u->regs_live_after = ALL_RREGS_LIVE;
sewardjde4a1d02002-03-22 01:27:54 +0000107 u->lit32 = 0;
njn25e49d8e72002-09-23 09:36:25 +0000108 u->opcode = NOP;
sewardjde4a1d02002-03-22 01:27:54 +0000109 u->size = 0;
110 u->cond = 0;
111 u->extra4b = 0;
njn25e49d8e72002-09-23 09:36:25 +0000112 u->argc = u->regparms_n = 0;
sewardjde4a1d02002-03-22 01:27:54 +0000113}
114
115
116/* Add an instruction to a ucode block, and return the index of the
117 instruction. */
118__inline__
njn4ba5a792002-09-30 10:23:54 +0000119void VG_(new_UInstr3) ( UCodeBlock* cb, Opcode opcode, Int sz,
sewardjde4a1d02002-03-22 01:27:54 +0000120 Tag tag1, UInt val1,
121 Tag tag2, UInt val2,
122 Tag tag3, UInt val3 )
123{
124 UInstr* ui;
125 ensureUInstr(cb);
126 ui = & cb->instrs[cb->used];
127 cb->used++;
njn4ba5a792002-09-30 10:23:54 +0000128 VG_(new_NOP)(ui);
sewardjde4a1d02002-03-22 01:27:54 +0000129 ui->val1 = val1;
130 ui->val2 = val2;
131 ui->val3 = val3;
132 ui->opcode = opcode;
133 ui->tag1 = tag1;
134 ui->tag2 = tag2;
135 ui->tag3 = tag3;
136 ui->size = sz;
137 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
138 if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
139 if (tag3 == TempReg) vg_assert(val3 != INVALID_TEMPREG);
140}
141
142
143__inline__
njn4ba5a792002-09-30 10:23:54 +0000144void VG_(new_UInstr2) ( UCodeBlock* cb, Opcode opcode, Int sz,
sewardjde4a1d02002-03-22 01:27:54 +0000145 Tag tag1, UInt val1,
146 Tag tag2, UInt val2 )
147{
148 UInstr* ui;
149 ensureUInstr(cb);
150 ui = & cb->instrs[cb->used];
151 cb->used++;
njn4ba5a792002-09-30 10:23:54 +0000152 VG_(new_NOP)(ui);
sewardjde4a1d02002-03-22 01:27:54 +0000153 ui->val1 = val1;
154 ui->val2 = val2;
155 ui->opcode = opcode;
156 ui->tag1 = tag1;
157 ui->tag2 = tag2;
158 ui->size = sz;
159 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
160 if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
161}
162
163
164__inline__
njn4ba5a792002-09-30 10:23:54 +0000165void VG_(new_UInstr1) ( UCodeBlock* cb, Opcode opcode, Int sz,
sewardjde4a1d02002-03-22 01:27:54 +0000166 Tag tag1, UInt val1 )
167{
168 UInstr* ui;
169 ensureUInstr(cb);
170 ui = & cb->instrs[cb->used];
171 cb->used++;
njn4ba5a792002-09-30 10:23:54 +0000172 VG_(new_NOP)(ui);
sewardjde4a1d02002-03-22 01:27:54 +0000173 ui->val1 = val1;
174 ui->opcode = opcode;
175 ui->tag1 = tag1;
176 ui->size = sz;
177 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
178}
179
180
181__inline__
njn4ba5a792002-09-30 10:23:54 +0000182void VG_(new_UInstr0) ( UCodeBlock* cb, Opcode opcode, Int sz )
sewardjde4a1d02002-03-22 01:27:54 +0000183{
184 UInstr* ui;
185 ensureUInstr(cb);
186 ui = & cb->instrs[cb->used];
187 cb->used++;
njn4ba5a792002-09-30 10:23:54 +0000188 VG_(new_NOP)(ui);
sewardjde4a1d02002-03-22 01:27:54 +0000189 ui->opcode = opcode;
190 ui->size = sz;
191}
192
sewardjde4a1d02002-03-22 01:27:54 +0000193/* Copy an instruction into the given codeblock. */
njn4f9c9342002-04-29 16:03:24 +0000194__inline__
njn4ba5a792002-09-30 10:23:54 +0000195void VG_(copy_UInstr) ( UCodeBlock* cb, UInstr* instr )
sewardjde4a1d02002-03-22 01:27:54 +0000196{
197 ensureUInstr(cb);
198 cb->instrs[cb->used] = *instr;
199 cb->used++;
200}
201
sewardjde4a1d02002-03-22 01:27:54 +0000202/* Copy auxiliary info from one uinstr to another. */
203static __inline__
204void copyAuxInfoFromTo ( UInstr* src, UInstr* dst )
205{
njn25e49d8e72002-09-23 09:36:25 +0000206 dst->cond = src->cond;
207 dst->extra4b = src->extra4b;
208 dst->signed_widen = src->signed_widen;
209 dst->jmpkind = src->jmpkind;
210 dst->flags_r = src->flags_r;
211 dst->flags_w = src->flags_w;
212 dst->argc = src->argc;
213 dst->regparms_n = src->regparms_n;
214 dst->has_ret_val = src->has_ret_val;
215 dst->regs_live_after = src->regs_live_after;
sewardjde4a1d02002-03-22 01:27:54 +0000216}
217
218
sewardjde4a1d02002-03-22 01:27:54 +0000219/* Set the lit32 field of the most recent uinsn. */
njn4ba5a792002-09-30 10:23:54 +0000220void VG_(set_lit_field) ( UCodeBlock* cb, UInt lit32 )
sewardjde4a1d02002-03-22 01:27:54 +0000221{
222 LAST_UINSTR(cb).lit32 = lit32;
223}
224
225
njn25e49d8e72002-09-23 09:36:25 +0000226/* Set the C call info fields of the most recent uinsn. */
njn4ba5a792002-09-30 10:23:54 +0000227void VG_(set_ccall_fields) ( UCodeBlock* cb, Addr fn, UChar argc, UChar
228 regparms_n, Bool has_ret_val )
njn25e49d8e72002-09-23 09:36:25 +0000229{
230 vg_assert(argc < 4);
231 vg_assert(regparms_n <= argc);
232 LAST_UINSTR(cb).lit32 = fn;
233 LAST_UINSTR(cb).argc = argc;
234 LAST_UINSTR(cb).regparms_n = regparms_n;
235 LAST_UINSTR(cb).has_ret_val = has_ret_val;
236}
237
njn810086f2002-11-14 12:42:47 +0000238/* For the last uinsn inserted into cb, set the read, written and
239 undefined flags. Undefined flags are counted as written, but it
240 seems worthwhile to distinguish them.
241*/
242__inline__
243void VG_(set_flag_fields) ( UCodeBlock* cb,
244 FlagSet rr, FlagSet ww, FlagSet uu )
245{
246 FlagSet uw = VG_UNION_FLAG_SETS(ww,uu);
247
248 vg_assert(rr == (rr & FlagsALL));
249 vg_assert(uw == (uw & FlagsALL));
250 LAST_UINSTR(cb).flags_r = rr;
251 LAST_UINSTR(cb).flags_w = uw;
252}
253
254
njn4ba5a792002-09-30 10:23:54 +0000255Bool VG_(any_flag_use) ( UInstr* u )
sewardjde4a1d02002-03-22 01:27:54 +0000256{
257 return (u->flags_r != FlagsEmpty
258 || u->flags_w != FlagsEmpty);
259}
260
njn25e49d8e72002-09-23 09:36:25 +0000261#if 1
262# define BEST_ALLOC_ORDER
263#endif
sewardjde4a1d02002-03-22 01:27:54 +0000264
265/* Convert a rank in the range 0 .. VG_MAX_REALREGS-1 into an Intel
266 register number. This effectively defines the order in which real
267 registers are allocated. %ebp is excluded since it is permanently
njn25e49d8e72002-09-23 09:36:25 +0000268 reserved for pointing at VG_(baseBlock).
sewardjde4a1d02002-03-22 01:27:54 +0000269
njn25e49d8e72002-09-23 09:36:25 +0000270 Important! This function must correspond with the value of
271 VG_MAX_REALREGS (actually, VG_MAX_REALREGS can be reduced without
272 a problem, except the generated code will obviously be worse).
sewardjde4a1d02002-03-22 01:27:54 +0000273*/
njn25e49d8e72002-09-23 09:36:25 +0000274__inline__
njn4ba5a792002-09-30 10:23:54 +0000275Int VG_(rank_to_realreg) ( Int rank )
sewardjde4a1d02002-03-22 01:27:54 +0000276{
277 switch (rank) {
njn25e49d8e72002-09-23 09:36:25 +0000278# ifdef BEST_ALLOC_ORDER
sewardjde4a1d02002-03-22 01:27:54 +0000279 /* Probably the best allocation ordering. */
280 case 0: return R_EAX;
281 case 1: return R_EBX;
282 case 2: return R_ECX;
283 case 3: return R_EDX;
284 case 4: return R_ESI;
njn25e49d8e72002-09-23 09:36:25 +0000285 case 5: return R_EDI;
sewardjde4a1d02002-03-22 01:27:54 +0000286# else
287 /* Contrary; probably the worst. Helpful for debugging, tho. */
njn25e49d8e72002-09-23 09:36:25 +0000288 case 5: return R_EAX;
289 case 4: return R_EBX;
290 case 3: return R_ECX;
291 case 2: return R_EDX;
292 case 1: return R_ESI;
293 case 0: return R_EDI;
sewardjde4a1d02002-03-22 01:27:54 +0000294# endif
njne427a662002-10-02 11:08:25 +0000295 default: VG_(core_panic)("VG_(rank_to_realreg)");
njn25e49d8e72002-09-23 09:36:25 +0000296 }
297}
298
299/* Convert an Intel register number into a rank in the range 0 ..
njn4ba5a792002-09-30 10:23:54 +0000300 VG_MAX_REALREGS-1. See related comments for rank_to_realreg()
njn25e49d8e72002-09-23 09:36:25 +0000301 above. */
302__inline__
njn4ba5a792002-09-30 10:23:54 +0000303Int VG_(realreg_to_rank) ( Int realReg )
njn25e49d8e72002-09-23 09:36:25 +0000304{
305 switch (realReg) {
306# ifdef BEST_ALLOC_ORDER
307 case R_EAX: return 0;
308 case R_EBX: return 1;
309 case R_ECX: return 2;
310 case R_EDX: return 3;
311 case R_ESI: return 4;
312 case R_EDI: return 5;
313# else
314 case R_EAX: return 5;
315 case R_EBX: return 4;
316 case R_ECX: return 3;
317 case R_EDX: return 2;
318 case R_ESI: return 1;
319 case R_EDI: return 0;
320# endif
njne427a662002-10-02 11:08:25 +0000321 default: VG_(core_panic)("VG_(realreg_to_rank)");
sewardjde4a1d02002-03-22 01:27:54 +0000322 }
323}
324
325
326/*------------------------------------------------------------*/
327/*--- Sanity checking uinstrs. ---*/
328/*------------------------------------------------------------*/
329
330/* This seems as good a place as any to record some important stuff
331 about ucode semantics.
332
333 * TempRegs are 32 bits wide. LOADs of 8/16 bit values into a
334 TempReg are defined to zero-extend the loaded value to 32 bits.
335 This is needed to make the translation of movzbl et al work
336 properly.
337
338 * Similarly, GETs of a 8/16 bit ArchRegs are zero-extended.
339
340 * Arithmetic on TempRegs is at the specified size. For example,
341 SUBW t1, t2 has to result in a real 16 bit x86 subtraction
342 being emitted -- not a 32 bit one.
343
344 * On some insns we allow the cc bit to be set. If so, the
345 intention is that the simulated machine's %eflags register
346 is copied into that of the real machine before the insn,
347 and copied back again afterwards. This means that the
348 code generated for that insn must be very careful only to
349 update %eflags in the intended way. This is particularly
350 important for the routines referenced by CALL insns.
351*/
352
353/* Meaning of operand kinds is as follows:
354
355 ArchReg is a register of the simulated CPU, stored in memory,
356 in vg_m_state.m_eax .. m_edi. These values are stored
357 using the Intel register encoding.
358
359 RealReg is a register of the real CPU. There are VG_MAX_REALREGS
360 available for allocation. As with ArchRegs, these values
361 are stored using the Intel register encoding.
362
363 TempReg is a temporary register used to express the results of
364 disassembly. There is an unlimited supply of them --
365 register allocation and spilling eventually assigns them
366 to RealRegs.
367
368 SpillNo is a spill slot number. The number of required spill
369 slots is VG_MAX_PSEUDOS, in general. Only allowed
370 as the ArchReg operand of GET and PUT.
371
372 Lit16 is a signed 16-bit literal value.
373
374 Literal is a 32-bit literal value. Each uinstr can only hold
375 one of these.
376
377 The disassembled code is expressed purely in terms of ArchReg,
378 TempReg and Literal operands. Eventually, register allocation
379 removes all the TempRegs, giving a result using ArchRegs, RealRegs,
380 and Literals. New x86 code can easily be synthesised from this.
381 There are carefully designed restrictions on which insns can have
382 which operands, intended to make it possible to generate x86 code
383 from the result of register allocation on the ucode efficiently and
384 without need of any further RealRegs.
385
njn25e49d8e72002-09-23 09:36:25 +0000386 Restrictions for the individual UInstrs are clear from the checks below.
387 Abbreviations: A=ArchReg S=SpillNo T=TempReg L=Literal
388 Ls=Lit16 R=RealReg N=NoValue
sewardje1042472002-09-30 12:33:11 +0000389 As=ArchRegS
sewardjde4a1d02002-03-22 01:27:54 +0000390
sewardjde4a1d02002-03-22 01:27:54 +0000391 Before register allocation, S operands should not appear anywhere.
392 After register allocation, all T operands should have been
393 converted into Rs, and S operands are allowed in GET and PUT --
394 denoting spill saves/restores.
395
njn25e49d8e72002-09-23 09:36:25 +0000396 Before liveness analysis, save_e[acd]x fields should all be True.
397 Afterwards, they may be False.
398
sewardjde4a1d02002-03-22 01:27:54 +0000399 The size field should be 0 for insns for which it is meaningless,
400 ie those which do not directly move/operate on data.
401*/
njn25e49d8e72002-09-23 09:36:25 +0000402Bool VG_(saneUInstr) ( Bool beforeRA, Bool beforeLiveness, UInstr* u )
sewardjde4a1d02002-03-22 01:27:54 +0000403{
njn25e49d8e72002-09-23 09:36:25 +0000404# define LIT0 (u->lit32 == 0)
sewardjb31b06d2003-06-13 00:26:02 +0000405# define LIT8 (((u->lit32) & 0xFFFFFF00) == 0)
njn25e49d8e72002-09-23 09:36:25 +0000406# define LIT1 (!(LIT0))
407# define LITm (u->tag1 == Literal ? True : LIT0 )
sewardj3d7c9c82003-03-26 21:08:13 +0000408# define SZ8 (u->size == 8)
njn25e49d8e72002-09-23 09:36:25 +0000409# define SZ4 (u->size == 4)
410# define SZ2 (u->size == 2)
411# define SZ1 (u->size == 1)
412# define SZ0 (u->size == 0)
413# define SZ42 (u->size == 4 || u->size == 2)
sewardjd7971012003-04-04 00:21:58 +0000414# define SZ48 (u->size == 4 || u->size == 8)
sewardjfebaa3b2003-05-25 01:07:34 +0000415# define SZ416 (u->size == 4 || u->size == 16)
sewardjde8aecf2003-05-27 00:46:28 +0000416# define SZsse (u->size == 4 || u->size == 8 || u->size == 16)
njn25e49d8e72002-09-23 09:36:25 +0000417# define SZi (u->size == 4 || u->size == 2 || u->size == 1)
418# define SZf ( u->size == 4 || u->size == 8 || u->size == 2 \
419 || u->size == 10 || u->size == 28 || u->size == 108)
420# define SZ4m ((u->tag1 == TempReg || u->tag1 == RealReg) \
421 ? (u->size == 4) : True)
422
423/* For these ones, two cases:
424 *
425 * 1. They are transliterations of the corresponding x86 instruction, in
426 * which case they should have its flags (except that redundant write
427 * flags can be annulled by the optimisation pass).
428 *
429 * 2. They are being used generally for other purposes, eg. helping with a
430 * 'rep'-prefixed instruction, in which case should have empty flags .
431 */
432# define emptyR (u->flags_r == FlagsEmpty)
433# define emptyW (u->flags_w == FlagsEmpty)
434# define CC0 (emptyR && emptyW)
435# define CCr (u->flags_r == FlagsALL && emptyW)
436# define CCw (emptyR && u->flags_w == FlagsALL)
437# define CCa (emptyR && (u->flags_w == FlagsOSZACP || emptyW))
438# define CCc (emptyR && (u->flags_w == FlagsOC || emptyW))
439# define CCe (emptyR && (u->flags_w == FlagsOSZAP || emptyW))
440# define CCb ((u->flags_r==FlagC || emptyR) && \
441 (u->flags_w==FlagsOSZACP || emptyW))
442# define CCd ((u->flags_r==FlagC || emptyR) && \
443 (u->flags_w==FlagsOC || emptyW))
sewardjc232b212002-12-10 22:24:03 +0000444# define CCf (CC0 || (emptyR && u->flags_w==FlagsZCP) \
445 || (u->flags_r==FlagsZCP && emptyW))
njn25e49d8e72002-09-23 09:36:25 +0000446# define CCg ((u->flags_r==FlagsOSZACP || emptyR) && emptyW)
447# define CCj (u->cond==CondAlways ? CC0 : CCg)
448
sewardjde4a1d02002-03-22 01:27:54 +0000449# define TR1 (beforeRA ? (u->tag1 == TempReg) : (u->tag1 == RealReg))
450# define TR2 (beforeRA ? (u->tag2 == TempReg) : (u->tag2 == RealReg))
451# define TR3 (beforeRA ? (u->tag3 == TempReg) : (u->tag3 == RealReg))
452# define A1 (u->tag1 == ArchReg)
453# define A2 (u->tag2 == ArchReg)
454# define AS1 ((u->tag1 == ArchReg) || ((!beforeRA && (u->tag1 == SpillNo))))
455# define AS2 ((u->tag2 == ArchReg) || ((!beforeRA && (u->tag2 == SpillNo))))
456# define AS3 ((u->tag3 == ArchReg) || ((!beforeRA && (u->tag3 == SpillNo))))
457# define L1 (u->tag1 == Literal && u->val1 == 0)
458# define L2 (u->tag2 == Literal && u->val2 == 0)
459# define Ls1 (u->tag1 == Lit16)
sewardjfebaa3b2003-05-25 01:07:34 +0000460# define Ls2 (u->tag2 == Lit16)
sewardjde4a1d02002-03-22 01:27:54 +0000461# define Ls3 (u->tag3 == Lit16)
njn25e49d8e72002-09-23 09:36:25 +0000462# define TRL1 (TR1 || L1)
463# define TRAL1 (TR1 || A1 || L1)
sewardjde4a1d02002-03-22 01:27:54 +0000464# define N1 (u->tag1 == NoValue)
465# define N2 (u->tag2 == NoValue)
466# define N3 (u->tag3 == NoValue)
sewardje1042472002-09-30 12:33:11 +0000467# define Se1 (u->tag1 == ArchRegS)
468# define Se2 (u->tag2 == ArchRegS)
sewardjde4a1d02002-03-22 01:27:54 +0000469
njn25e49d8e72002-09-23 09:36:25 +0000470# define COND0 (u->cond == 0)
471# define EXTRA4b0 (u->extra4b == 0)
472# define SG_WD0 (u->signed_widen == 0)
473# define JMPKIND0 (u->jmpkind == 0)
474# define CCALL0 (u->argc==0 && u->regparms_n==0 && u->has_ret_val==0 && \
475 ( beforeLiveness \
476 ? u->regs_live_after == ALL_RREGS_LIVE \
477 : True ))
478
479# define XCONDi ( EXTRA4b0 && SG_WD0 && JMPKIND0 && CCALL0)
480# define Xextra4b (COND0 && SG_WD0 && JMPKIND0 && CCALL0)
481# define XWIDEN (COND0 && JMPKIND0 && CCALL0)
482# define XJMP ( SG_WD0 && CCALL0)
483# define XCCALL (COND0 && EXTRA4b0 && SG_WD0 && JMPKIND0 )
484# define XOTHER (COND0 && EXTRA4b0 && SG_WD0 && JMPKIND0 && CCALL0)
485
486 /* 0 or 1 Literal args per UInstr */
sewardjde4a1d02002-03-22 01:27:54 +0000487 Int n_lits = 0;
488 if (u->tag1 == Literal) n_lits++;
489 if (u->tag2 == Literal) n_lits++;
490 if (u->tag3 == Literal) n_lits++;
491 if (n_lits > 1)
492 return False;
493
njn25e49d8e72002-09-23 09:36:25 +0000494 /* Fields not checked: val1, val2, val3 */
495
sewardjde4a1d02002-03-22 01:27:54 +0000496 switch (u->opcode) {
njn25e49d8e72002-09-23 09:36:25 +0000497
498 /* Fields checked: lit32 size flags_r/w tag1 tag2 tag3 (rest) */
sewardje1042472002-09-30 12:33:11 +0000499 case PUTSEG: return LIT0 && SZ2 && CC0 && TR1 && Se2 && N3 && XOTHER;
500 case GETSEG: return LIT0 && SZ2 && CC0 && Se1 && TR2 && N3 && XOTHER;
501 case USESEG: return LIT0 && SZ0 && CC0 && TR1 && TR2 && N3 && XOTHER;
njn25e49d8e72002-09-23 09:36:25 +0000502 case NOP: return LIT0 && SZ0 && CC0 && N1 && N2 && N3 && XOTHER;
sewardj7a5ebcf2002-11-13 22:42:13 +0000503 case LOCK: return LIT0 && SZ0 && CC0 && N1 && N2 && N3 && XOTHER;
njn25e49d8e72002-09-23 09:36:25 +0000504 case GETF: return LIT0 && SZ42 && CCr && TR1 && N2 && N3 && XOTHER;
505 case PUTF: return LIT0 && SZ42 && CCw && TR1 && N2 && N3 && XOTHER;
506 case GET: return LIT0 && SZi && CC0 && AS1 && TR2 && N3 && XOTHER;
507 case PUT: return LIT0 && SZi && CC0 && TR1 && AS2 && N3 && XOTHER;
508 case LOAD:
509 case STORE: return LIT0 && SZi && CC0 && TR1 && TR2 && N3 && XOTHER;
510 case MOV: return LITm && SZ4m && CC0 && TRL1 && TR2 && N3 && XOTHER;
511 case CMOV: return LIT0 && SZ4 && CCg && TR1 && TR2 && N3 && XCONDi;
512 case WIDEN: return LIT0 && SZi && CC0 && TR1 && N2 && N3 && XWIDEN;
513 case JMP: return LITm && SZ0 && CCj && TRL1 && N2 && N3 && XJMP;
514 case CALLM: return LIT0 && SZ0 /*any*/ && Ls1 && N2 && N3 && XOTHER;
515 case CALLM_S:
516 case CALLM_E:return LIT0 && SZ0 && CC0 && N1 && N2 && N3 && XOTHER;
517 case PUSH:
518 case POP: return LIT0 && SZi && CC0 && TR1 && N2 && N3 && XOTHER;
519 case CLEAR: return LIT0 && SZ0 && CC0 && Ls1 && N2 && N3 && XOTHER;
520 case AND:
521 case OR: return LIT0 && SZi && CCa && TR1 && TR2 && N3 && XOTHER;
522 case ADD:
523 case XOR:
524 case SUB: return LITm && SZi && CCa &&TRAL1 && TR2 && N3 && XOTHER;
525 case SBB:
526 case ADC: return LITm && SZi && CCb &&TRAL1 && TR2 && N3 && XOTHER;
527 case SHL:
528 case SHR:
529 case SAR: return LITm && SZi && CCa && TRL1 && TR2 && N3 && XOTHER;
530 case ROL:
531 case ROR: return LITm && SZi && CCc && TRL1 && TR2 && N3 && XOTHER;
532 case RCL:
533 case RCR: return LITm && SZi && CCd && TRL1 && TR2 && N3 && XOTHER;
534 case NOT: return LIT0 && SZi && CC0 && TR1 && N2 && N3 && XOTHER;
535 case NEG: return LIT0 && SZi && CCa && TR1 && N2 && N3 && XOTHER;
536 case INC:
537 case DEC: return LIT0 && SZi && CCe && TR1 && N2 && N3 && XOTHER;
538 case CC2VAL: return LIT0 && SZ1 && CCg && TR1 && N2 && N3 && XCONDi;
539 case BSWAP: return LIT0 && SZ4 && CC0 && TR1 && N2 && N3 && XOTHER;
540 case JIFZ: return LIT1 && SZ4 && CC0 && TR1 && L2 && N3 && XOTHER;
541 case FPU_R:
542 case FPU_W: return LIT0 && SZf && CC0 && Ls1 && TR2 && N3 && XOTHER;
543 case FPU: return LIT0 && SZ0 && CCf && Ls1 && N2 && N3 && XOTHER;
544 case LEA1: return /*any*/ SZ4 && CC0 && TR1 && TR2 && N3 && XOTHER;
545 case LEA2: return /*any*/ SZ4 && CC0 && TR1 && TR2 && TR3 && Xextra4b;
546 case INCEIP: return LIT0 && SZ0 && CC0 && Ls1 && N2 && N3 && XOTHER;
547 case CCALL: return LIT1 && SZ0 && CC0 &&
548 (u->argc > 0 ? TR1 : N1) &&
549 (u->argc > 1 ? TR2 : N2) &&
550 (u->argc > 2 || u->has_ret_val ? TR3 : N3) &&
551 u->regparms_n <= u->argc && XCCALL;
sewardj3d7c9c82003-03-26 21:08:13 +0000552 /* Fields checked: lit32 size flags_r/w tag1 tag2 tag3 (rest) */
553 case MMX1:
554 case MMX2: return LIT0 && SZ0 && CC0 && Ls1 && N2 && N3 && XOTHER;
sewardjfebaa3b2003-05-25 01:07:34 +0000555 case MMX3: return LIT0 && SZ0 && CC0 && Ls1 && Ls2 && N3 && XOTHER;
sewardjd7971012003-04-04 00:21:58 +0000556 case MMX2_MemRd: return LIT0 && SZ48 && CC0 && Ls1 && TR2 && N3 && XOTHER;
sewardjd1c9e432003-04-04 20:40:34 +0000557 case MMX2_MemWr: return LIT0 && SZ48 && CC0 && Ls1 && TR2 && N3 && XOTHER;
sewardjca860012003-03-27 23:52:58 +0000558 case MMX2_RegRd: return LIT0 && SZ4 && CC0 && Ls1 && TR2 && N3 && XOTHER;
sewardjd1c9e432003-04-04 20:40:34 +0000559 case MMX2_RegWr: return LIT0 && SZ4 && CC0 && Ls1 && TR2 && N3 && XOTHER;
sewardjfebaa3b2003-05-25 01:07:34 +0000560
561 /* Fields checked: lit32 size flags_r/w tag1 tag2 tag3 (rest) */
562 case SSE2a_MemWr: return LIT0 && SZ416 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
563 case SSE2a_MemRd: return LIT0 && SZ416 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
sewardjde8aecf2003-05-27 00:46:28 +0000564 case SSE3a_MemWr: return LIT0 && SZsse && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
sewardj02af6bc2003-06-12 00:56:06 +0000565 case SSE3a_MemRd: return LIT0 && SZsse && CCf && Ls1 && Ls2 && TR3 && XOTHER;
sewardjfebaa3b2003-05-25 01:07:34 +0000566 case SSE3g_RegRd: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
sewardj02af6bc2003-06-12 00:56:06 +0000567 case SSE3g_RegWr: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
sewardjb31b06d2003-06-13 00:26:02 +0000568 case SSE3g1_RegWr: return LIT8 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
569 case SSE3g1_RegRd: return LIT8 && SZ2 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
sewardja60be0e2003-05-26 08:47:27 +0000570 case SSE3: return LIT0 && SZ0 && CC0 && Ls1 && Ls2 && N3 && XOTHER;
sewardj2ca39a12003-06-14 12:03:35 +0000571 case SSE4: return LIT0 && SZ0 && CCf && Ls1 && Ls2 && N3 && XOTHER;
sewardja453fb02003-06-14 13:22:36 +0000572 case SSE5: return LIT0 && SZ0 && CC0 && Ls1 && Ls2 && Ls3 && XOTHER;
njn25e49d8e72002-09-23 09:36:25 +0000573 default:
574 if (VG_(needs).extended_UCode)
njn4ba5a792002-09-30 10:23:54 +0000575 return SK_(sane_XUInstr)(beforeRA, beforeLiveness, u);
njn25e49d8e72002-09-23 09:36:25 +0000576 else {
577 VG_(printf)("unhandled opcode: %u. Perhaps "
578 "VG_(needs).extended_UCode should be set?",
579 u->opcode);
njne427a662002-10-02 11:08:25 +0000580 VG_(core_panic)("VG_(saneUInstr): unhandled opcode");
njn25e49d8e72002-09-23 09:36:25 +0000581 }
sewardjde4a1d02002-03-22 01:27:54 +0000582 }
njn25e49d8e72002-09-23 09:36:25 +0000583# undef LIT0
584# undef LIT1
sewardjb31b06d2003-06-13 00:26:02 +0000585# undef LIT8
njn25e49d8e72002-09-23 09:36:25 +0000586# undef LITm
sewardj3d7c9c82003-03-26 21:08:13 +0000587# undef SZ8
sewardjde4a1d02002-03-22 01:27:54 +0000588# undef SZ4
589# undef SZ2
590# undef SZ1
591# undef SZ0
njn25e49d8e72002-09-23 09:36:25 +0000592# undef SZ42
sewardjd7971012003-04-04 00:21:58 +0000593# undef SZ48
sewardjfebaa3b2003-05-25 01:07:34 +0000594# undef SZ416
sewardjde8aecf2003-05-27 00:46:28 +0000595# undef SZsse
njn25e49d8e72002-09-23 09:36:25 +0000596# undef SZi
597# undef SZf
598# undef SZ4m
599# undef emptyR
600# undef emptyW
601# undef CC0
602# undef CCr
603# undef CCw
604# undef CCa
605# undef CCb
606# undef CCc
607# undef CCd
608# undef CCe
609# undef CCf
610# undef CCg
611# undef CCj
sewardjde4a1d02002-03-22 01:27:54 +0000612# undef TR1
613# undef TR2
614# undef TR3
615# undef A1
616# undef A2
617# undef AS1
618# undef AS2
619# undef AS3
620# undef L1
sewardjde4a1d02002-03-22 01:27:54 +0000621# undef L2
njn25e49d8e72002-09-23 09:36:25 +0000622# undef Ls1
sewardjfebaa3b2003-05-25 01:07:34 +0000623# undef Ls2
sewardjde4a1d02002-03-22 01:27:54 +0000624# undef Ls3
njn25e49d8e72002-09-23 09:36:25 +0000625# undef TRL1
626# undef TRAL1
sewardjde4a1d02002-03-22 01:27:54 +0000627# undef N1
628# undef N2
629# undef N3
sewardje1042472002-09-30 12:33:11 +0000630# undef Se2
631# undef Se1
njn25e49d8e72002-09-23 09:36:25 +0000632# undef COND0
633# undef EXTRA4b0
634# undef SG_WD0
635# undef JMPKIND0
636# undef CCALL0
637# undef Xextra4b
638# undef XWIDEN
639# undef XJMP
640# undef XCCALL
641# undef XOTHER
sewardjde4a1d02002-03-22 01:27:54 +0000642}
643
njn25e49d8e72002-09-23 09:36:25 +0000644void VG_(saneUCodeBlock) ( UCodeBlock* cb )
645{
646 Int i;
647
648 for (i = 0; i < cb->used; i++) {
649 Bool sane = VG_(saneUInstr)(True, True, &cb->instrs[i]);
650 if (!sane) {
651 VG_(printf)("Instruction failed sanity check:\n");
njn4ba5a792002-09-30 10:23:54 +0000652 VG_(up_UInstr)(i, &cb->instrs[i]);
njn25e49d8e72002-09-23 09:36:25 +0000653 }
654 vg_assert(sane);
655 }
656}
sewardjde4a1d02002-03-22 01:27:54 +0000657
658/* Sanity checks to do with CALLMs in UCodeBlocks. */
njn25e49d8e72002-09-23 09:36:25 +0000659Bool VG_(saneUCodeBlockCalls) ( UCodeBlock* cb )
sewardjde4a1d02002-03-22 01:27:54 +0000660{
661 Int callm = 0;
662 Int callm_s = 0;
663 Int callm_e = 0;
664 Int callm_ptr, calls_ptr;
665 Int i, j, t;
666 Bool incall = False;
667
668 /* Ensure the number of CALLM, CALLM_S and CALLM_E are the same. */
669
670 for (i = 0; i < cb->used; i++) {
671 switch (cb->instrs[i].opcode) {
672 case CALLM:
673 if (!incall) return False;
674 callm++;
675 break;
676 case CALLM_S:
677 if (incall) return False;
678 incall = True;
679 callm_s++;
680 break;
681 case CALLM_E:
682 if (!incall) return False;
683 incall = False;
684 callm_e++;
685 break;
686 case PUSH: case POP: case CLEAR:
687 if (!incall) return False;
688 break;
689 default:
690 break;
691 }
692 }
693 if (incall) return False;
694 if (callm != callm_s || callm != callm_e) return False;
695
696 /* Check the sections between CALLM_S and CALLM's. Ensure that no
697 PUSH uinsn pushes any TempReg that any other PUSH in the same
698 section pushes. Ie, check that the TempReg args to PUSHes in
699 the section are unique. If not, the instrumenter generates
700 incorrect code for CALLM insns. */
701
702 callm_ptr = 0;
703
704 find_next_CALLM:
705 /* Search for the next interval, making calls_ptr .. callm_ptr
706 bracket it. */
707 while (callm_ptr < cb->used
708 && cb->instrs[callm_ptr].opcode != CALLM)
709 callm_ptr++;
710 if (callm_ptr == cb->used)
711 return True;
712 vg_assert(cb->instrs[callm_ptr].opcode == CALLM);
713
714 calls_ptr = callm_ptr - 1;
715 while (cb->instrs[calls_ptr].opcode != CALLM_S)
716 calls_ptr--;
717 vg_assert(cb->instrs[calls_ptr].opcode == CALLM_S);
718 vg_assert(calls_ptr >= 0);
719
720 /* VG_(printf)("interval from %d to %d\n", calls_ptr, callm_ptr ); */
721
722 /* For each PUSH insn in the interval ... */
723 for (i = calls_ptr + 1; i < callm_ptr; i++) {
724 if (cb->instrs[i].opcode != PUSH) continue;
725 t = cb->instrs[i].val1;
726 /* Ensure no later PUSH insns up to callm_ptr push the same
727 TempReg. Return False if any such are found. */
728 for (j = i+1; j < callm_ptr; j++) {
729 if (cb->instrs[j].opcode == PUSH &&
730 cb->instrs[j].val1 == t)
731 return False;
732 }
733 }
734
735 /* This interval is clean. Keep going ... */
736 callm_ptr++;
737 goto find_next_CALLM;
738}
739
740
741/*------------------------------------------------------------*/
742/*--- Printing uinstrs. ---*/
743/*------------------------------------------------------------*/
744
njn25e49d8e72002-09-23 09:36:25 +0000745/* Global that dictates whether to print generated code at all stages */
746Bool VG_(print_codegen);
747
njn563f96f2003-02-03 11:17:46 +0000748Char* VG_(name_UCondcode) ( Condcode cond )
sewardjde4a1d02002-03-22 01:27:54 +0000749{
750 switch (cond) {
751 case CondO: return "o";
752 case CondNO: return "no";
753 case CondB: return "b";
754 case CondNB: return "nb";
755 case CondZ: return "z";
756 case CondNZ: return "nz";
757 case CondBE: return "be";
758 case CondNBE: return "nbe";
759 case CondS: return "s";
sewardje1042472002-09-30 12:33:11 +0000760 case CondNS: return "ns";
sewardjde4a1d02002-03-22 01:27:54 +0000761 case CondP: return "p";
762 case CondNP: return "np";
763 case CondL: return "l";
764 case CondNL: return "nl";
765 case CondLE: return "le";
766 case CondNLE: return "nle";
767 case CondAlways: return "MP"; /* hack! */
njn563f96f2003-02-03 11:17:46 +0000768 default: VG_(core_panic)("name_UCondcode");
sewardjde4a1d02002-03-22 01:27:54 +0000769 }
770}
771
772
773static void vg_ppFlagSet ( Char* prefix, FlagSet set )
774{
775 VG_(printf)("%s", prefix);
776 if (set & FlagD) VG_(printf)("D");
777 if (set & FlagO) VG_(printf)("O");
778 if (set & FlagS) VG_(printf)("S");
779 if (set & FlagZ) VG_(printf)("Z");
780 if (set & FlagA) VG_(printf)("A");
781 if (set & FlagC) VG_(printf)("C");
782 if (set & FlagP) VG_(printf)("P");
783}
784
785
786static void ppTempReg ( Int tt )
787{
788 if ((tt & 1) == 0)
789 VG_(printf)("t%d", tt);
790 else
791 VG_(printf)("q%d", tt-1);
792}
793
794
njn4ba5a792002-09-30 10:23:54 +0000795void VG_(pp_UOperand) ( UInstr* u, Int operandNo, Int sz, Bool parens )
sewardjde4a1d02002-03-22 01:27:54 +0000796{
797 UInt tag, val;
798 switch (operandNo) {
799 case 1: tag = u->tag1; val = u->val1; break;
800 case 2: tag = u->tag2; val = u->val2; break;
801 case 3: tag = u->tag3; val = u->val3; break;
njne427a662002-10-02 11:08:25 +0000802 default: VG_(core_panic)("VG_(pp_UOperand)(1)");
sewardjde4a1d02002-03-22 01:27:54 +0000803 }
804 if (tag == Literal) val = u->lit32;
805
806 if (parens) VG_(printf)("(");
807 switch (tag) {
sewardje1042472002-09-30 12:33:11 +0000808 case TempReg: ppTempReg(val); break;
809 case RealReg: VG_(printf)("%s",nameIReg(sz==0 ? 4 : sz,val)); break;
810 case Literal: VG_(printf)("$0x%x", val); break;
811 case Lit16: VG_(printf)("$0x%x", val); break;
812 case NoValue: VG_(printf)("NoValue"); break;
813 case ArchReg: VG_(printf)("%S",nameIReg(sz,val)); break;
814 case ArchRegS: VG_(printf)("%S",nameSReg(val)); break;
815 case SpillNo: VG_(printf)("spill%d", val); break;
njne427a662002-10-02 11:08:25 +0000816 default: VG_(core_panic)("VG_(ppUOperand)(2)");
sewardjde4a1d02002-03-22 01:27:54 +0000817 }
818 if (parens) VG_(printf)(")");
819}
820
821
njn4ba5a792002-09-30 10:23:54 +0000822Char* VG_(name_UOpcode) ( Bool upper, Opcode opc )
sewardjde4a1d02002-03-22 01:27:54 +0000823{
824 switch (opc) {
825 case ADD: return (upper ? "ADD" : "add");
826 case ADC: return (upper ? "ADC" : "adc");
827 case AND: return (upper ? "AND" : "and");
828 case OR: return (upper ? "OR" : "or");
829 case XOR: return (upper ? "XOR" : "xor");
830 case SUB: return (upper ? "SUB" : "sub");
831 case SBB: return (upper ? "SBB" : "sbb");
832 case SHL: return (upper ? "SHL" : "shl");
833 case SHR: return (upper ? "SHR" : "shr");
834 case SAR: return (upper ? "SAR" : "sar");
835 case ROL: return (upper ? "ROL" : "rol");
836 case ROR: return (upper ? "ROR" : "ror");
837 case RCL: return (upper ? "RCL" : "rcl");
838 case RCR: return (upper ? "RCR" : "rcr");
839 case NOT: return (upper ? "NOT" : "not");
840 case NEG: return (upper ? "NEG" : "neg");
841 case INC: return (upper ? "INC" : "inc");
842 case DEC: return (upper ? "DEC" : "dec");
843 case BSWAP: return (upper ? "BSWAP" : "bswap");
844 default: break;
845 }
njne427a662002-10-02 11:08:25 +0000846 if (!upper) VG_(core_panic)("vg_name_UOpcode: invalid !upper");
sewardjde4a1d02002-03-22 01:27:54 +0000847 switch (opc) {
sewardjde4a1d02002-03-22 01:27:54 +0000848 case CALLM_S: return "CALLM_S";
849 case CALLM_E: return "CALLM_E";
850 case INCEIP: return "INCEIP";
851 case LEA1: return "LEA1";
852 case LEA2: return "LEA2";
853 case NOP: return "NOP";
sewardj7a5ebcf2002-11-13 22:42:13 +0000854 case LOCK: return "LOCK";
sewardjde4a1d02002-03-22 01:27:54 +0000855 case GET: return "GET";
856 case PUT: return "PUT";
857 case GETF: return "GETF";
858 case PUTF: return "PUTF";
sewardje1042472002-09-30 12:33:11 +0000859 case GETSEG: return "GETSEG";
860 case PUTSEG: return "PUTSEG";
861 case USESEG: return "USESEG";
sewardjde4a1d02002-03-22 01:27:54 +0000862 case LOAD: return "LD" ;
863 case STORE: return "ST" ;
864 case MOV: return "MOV";
865 case CMOV: return "CMOV";
866 case WIDEN: return "WIDEN";
867 case JMP: return "J" ;
868 case JIFZ: return "JIFZ" ;
869 case CALLM: return "CALLM";
njn25e49d8e72002-09-23 09:36:25 +0000870 case CCALL: return "CCALL";
sewardjde4a1d02002-03-22 01:27:54 +0000871 case PUSH: return "PUSH" ;
872 case POP: return "POP" ;
873 case CLEAR: return "CLEAR";
874 case CC2VAL: return "CC2VAL";
875 case FPU_R: return "FPU_R";
876 case FPU_W: return "FPU_W";
877 case FPU: return "FPU" ;
sewardj3d7c9c82003-03-26 21:08:13 +0000878 case MMX1: return "MMX1" ;
879 case MMX2: return "MMX2" ;
sewardjca860012003-03-27 23:52:58 +0000880 case MMX3: return "MMX3" ;
sewardj3d7c9c82003-03-26 21:08:13 +0000881 case MMX2_MemRd: return "MMX2_MRd" ;
882 case MMX2_MemWr: return "MMX2_MWr" ;
sewardjca860012003-03-27 23:52:58 +0000883 case MMX2_RegRd: return "MMX2_RRd" ;
sewardjd1c9e432003-04-04 20:40:34 +0000884 case MMX2_RegWr: return "MMX2_RWr" ;
sewardjfebaa3b2003-05-25 01:07:34 +0000885 case SSE2a_MemWr: return "SSE2a_MWr";
886 case SSE2a_MemRd: return "SSE2a_MRd";
887 case SSE3g_RegRd: return "SSE3g_RRd";
sewardj02af6bc2003-06-12 00:56:06 +0000888 case SSE3g_RegWr: return "SSE3g_RWr";
sewardjb31b06d2003-06-13 00:26:02 +0000889 case SSE3g1_RegWr: return "SSE3g1_RWr";
890 case SSE3g1_RegRd: return "SSE3g1_RRd";
sewardja60be0e2003-05-26 08:47:27 +0000891 case SSE3: return "SSE3";
sewardjfebaa3b2003-05-25 01:07:34 +0000892 case SSE4: return "SSE4";
sewardja453fb02003-06-14 13:22:36 +0000893 case SSE5: return "SSE5";
sewardjfebaa3b2003-05-25 01:07:34 +0000894 case SSE3a_MemWr: return "SSE3a_MWr";
895 case SSE3a_MemRd: return "SSE3a_MRd";
njn25e49d8e72002-09-23 09:36:25 +0000896 default:
897 if (VG_(needs).extended_UCode)
njn4ba5a792002-09-30 10:23:54 +0000898 return SK_(name_XUOpcode)(opc);
njn25e49d8e72002-09-23 09:36:25 +0000899 else {
900 VG_(printf)("unhandled opcode: %u. Perhaps "
901 "VG_(needs).extended_UCode should be set?",
902 opc);
njne427a662002-10-02 11:08:25 +0000903 VG_(core_panic)("name_UOpcode: unhandled opcode");
njn25e49d8e72002-09-23 09:36:25 +0000904 }
sewardjde4a1d02002-03-22 01:27:54 +0000905 }
906}
907
sewardja38e0922002-10-01 00:50:47 +0000908static
njn4ba5a792002-09-30 10:23:54 +0000909void pp_realregs_liveness ( UInstr* u )
njn25e49d8e72002-09-23 09:36:25 +0000910{
911# define PRINT_RREG_LIVENESS(realReg,s) \
njn4ba5a792002-09-30 10:23:54 +0000912 VG_(printf)( IS_RREG_LIVE(VG_(realreg_to_rank)(realReg), \
njn25e49d8e72002-09-23 09:36:25 +0000913 u->regs_live_after) \
914 ? s : "-");
sewardjde4a1d02002-03-22 01:27:54 +0000915
njn25e49d8e72002-09-23 09:36:25 +0000916 VG_(printf)("[");
917 PRINT_RREG_LIVENESS(R_EAX, "a");
918 PRINT_RREG_LIVENESS(R_EBX, "b");
919 PRINT_RREG_LIVENESS(R_ECX, "c");
920 PRINT_RREG_LIVENESS(R_EDX, "d");
921 PRINT_RREG_LIVENESS(R_ESI, "S");
922 PRINT_RREG_LIVENESS(R_EDI, "D");
923 VG_(printf)("]");
924
925# undef PRINT_RREG_LIVENESS
926}
927
928/* Ugly-print UInstr :) */
njn4ba5a792002-09-30 10:23:54 +0000929void VG_(up_UInstr) ( Int i, UInstr* u )
njn25e49d8e72002-09-23 09:36:25 +0000930{
njn4ba5a792002-09-30 10:23:54 +0000931 VG_(pp_UInstr_regs)(i, u);
njn25e49d8e72002-09-23 09:36:25 +0000932
933 VG_(printf)("opcode: %d\n", u->opcode);
sewardjc1b86882002-10-06 21:43:50 +0000934 VG_(printf)("lit32: 0x%x\n", u->lit32);
njn25e49d8e72002-09-23 09:36:25 +0000935 VG_(printf)("size: %d\n", u->size);
936 VG_(printf)("val1,val2,val3: %d, %d, %d\n", u->val1, u->val2, u->val3);
937 VG_(printf)("tag1,tag2,tag3: %d, %d, %d\n", u->tag1, u->tag2, u->tag3);
sewardjc1b86882002-10-06 21:43:50 +0000938 VG_(printf)("flags_r: 0x%x\n", u->flags_r);
939 VG_(printf)("flags_w: 0x%x\n", u->flags_w);
940 VG_(printf)("extra4b: 0x%x\n", u->extra4b);
941 VG_(printf)("cond: 0x%x\n", u->cond);
njn25e49d8e72002-09-23 09:36:25 +0000942 VG_(printf)("signed_widen: %d\n", u->signed_widen);
943 VG_(printf)("jmpkind: %d\n", u->jmpkind);
944 VG_(printf)("argc,regparms_n: %d, %d\n", u->argc, u->regparms_n);
945 VG_(printf)("has_ret_val: %d\n", u->has_ret_val);
946 VG_(printf)("regs_live_after: ");
njn4ba5a792002-09-30 10:23:54 +0000947 pp_realregs_liveness(u);
njn25e49d8e72002-09-23 09:36:25 +0000948 VG_(printf)("\n");
949}
950
sewardja38e0922002-10-01 00:50:47 +0000951static
njn4ba5a792002-09-30 10:23:54 +0000952void pp_UInstrWorker ( Int instrNo, UInstr* u, Bool ppRegsLiveness )
sewardjde4a1d02002-03-22 01:27:54 +0000953{
954 VG_(printf)("\t%4d: %s", instrNo,
njn4ba5a792002-09-30 10:23:54 +0000955 VG_(name_UOpcode)(True, u->opcode));
sewardjde4a1d02002-03-22 01:27:54 +0000956 if (u->opcode == JMP || u->opcode == CC2VAL)
njn563f96f2003-02-03 11:17:46 +0000957 VG_(printf)("%s", VG_(name_UCondcode)(u->cond));
sewardjde4a1d02002-03-22 01:27:54 +0000958
959 switch (u->size) {
960 case 0: VG_(printf)("o"); break;
961 case 1: VG_(printf)("B"); break;
962 case 2: VG_(printf)("W"); break;
963 case 4: VG_(printf)("L"); break;
964 case 8: VG_(printf)("Q"); break;
sewardjfebaa3b2003-05-25 01:07:34 +0000965 case 16: VG_(printf)("QQ"); break;
sewardjde4a1d02002-03-22 01:27:54 +0000966 default: VG_(printf)("%d", (Int)u->size); break;
967 }
968
sewardjfebaa3b2003-05-25 01:07:34 +0000969 VG_(printf)(" \t");
970
sewardjde4a1d02002-03-22 01:27:54 +0000971 switch (u->opcode) {
972
sewardjde4a1d02002-03-22 01:27:54 +0000973 case CALLM_S: case CALLM_E:
974 break;
975
976 case INCEIP:
sewardjfebaa3b2003-05-25 01:07:34 +0000977 VG_(printf)("$%d", u->val1);
sewardjde4a1d02002-03-22 01:27:54 +0000978 break;
979
980 case LEA2:
sewardjfebaa3b2003-05-25 01:07:34 +0000981 VG_(printf)("%d(" , u->lit32);
njn4ba5a792002-09-30 10:23:54 +0000982 VG_(pp_UOperand)(u, 1, 4, False);
sewardjde4a1d02002-03-22 01:27:54 +0000983 VG_(printf)(",");
njn4ba5a792002-09-30 10:23:54 +0000984 VG_(pp_UOperand)(u, 2, 4, False);
sewardjde4a1d02002-03-22 01:27:54 +0000985 VG_(printf)(",%d), ", (Int)u->extra4b);
njn4ba5a792002-09-30 10:23:54 +0000986 VG_(pp_UOperand)(u, 3, 4, False);
sewardjde4a1d02002-03-22 01:27:54 +0000987 break;
988
989 case LEA1:
sewardjfebaa3b2003-05-25 01:07:34 +0000990 VG_(printf)("%d" , u->lit32);
njn4ba5a792002-09-30 10:23:54 +0000991 VG_(pp_UOperand)(u, 1, 4, True);
sewardjde4a1d02002-03-22 01:27:54 +0000992 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +0000993 VG_(pp_UOperand)(u, 2, 4, False);
sewardjde4a1d02002-03-22 01:27:54 +0000994 break;
995
sewardj7a5ebcf2002-11-13 22:42:13 +0000996 case NOP: case LOCK:
sewardjde4a1d02002-03-22 01:27:54 +0000997 break;
998
999 case FPU_W:
sewardjfebaa3b2003-05-25 01:07:34 +00001000 VG_(printf)("0x%x:0x%x, ",
sewardjde4a1d02002-03-22 01:27:54 +00001001 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
njn4ba5a792002-09-30 10:23:54 +00001002 VG_(pp_UOperand)(u, 2, 4, True);
sewardjde4a1d02002-03-22 01:27:54 +00001003 break;
1004
1005 case FPU_R:
sewardjfebaa3b2003-05-25 01:07:34 +00001006 VG_(printf)("");
njn4ba5a792002-09-30 10:23:54 +00001007 VG_(pp_UOperand)(u, 2, 4, True);
sewardjde4a1d02002-03-22 01:27:54 +00001008 VG_(printf)(", 0x%x:0x%x",
1009 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
1010 break;
1011
1012 case FPU:
sewardjfebaa3b2003-05-25 01:07:34 +00001013 VG_(printf)("0x%x:0x%x",
sewardjde4a1d02002-03-22 01:27:54 +00001014 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
1015 break;
1016
sewardj3d7c9c82003-03-26 21:08:13 +00001017 case MMX1:
sewardjfebaa3b2003-05-25 01:07:34 +00001018 VG_(printf)("0x%x",
sewardj3d7c9c82003-03-26 21:08:13 +00001019 u->val1 & 0xFF );
1020 break;
1021
1022 case MMX2:
sewardjfebaa3b2003-05-25 01:07:34 +00001023 VG_(printf)("0x%x:0x%x",
sewardj3d7c9c82003-03-26 21:08:13 +00001024 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
1025 break;
1026
sewardjca860012003-03-27 23:52:58 +00001027 case MMX3:
sewardjfebaa3b2003-05-25 01:07:34 +00001028 VG_(printf)("0x%x:0x%x:0x%x",
sewardjca860012003-03-27 23:52:58 +00001029 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF, u->val2 & 0xFF );
1030 break;
1031
sewardjd1c9e432003-04-04 20:40:34 +00001032 case MMX2_RegWr:
sewardjca860012003-03-27 23:52:58 +00001033 case MMX2_RegRd:
sewardjfebaa3b2003-05-25 01:07:34 +00001034 VG_(printf)("0x%x:0x%x, ",
sewardjca860012003-03-27 23:52:58 +00001035 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
1036 VG_(pp_UOperand)(u, 2, 4, False);
1037 break;
1038
sewardj3d7c9c82003-03-26 21:08:13 +00001039 case MMX2_MemWr:
1040 case MMX2_MemRd:
sewardjfebaa3b2003-05-25 01:07:34 +00001041 VG_(printf)("0x%x:0x%x",
sewardj3d7c9c82003-03-26 21:08:13 +00001042 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
1043 VG_(pp_UOperand)(u, 2, 4, True);
1044 break;
1045
sewardjfebaa3b2003-05-25 01:07:34 +00001046 case SSE2a_MemWr:
1047 case SSE2a_MemRd:
1048 VG_(printf)("0x%x:0x%x:0x%x",
1049 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF, u->val2 & 0xFF );
1050 VG_(pp_UOperand)(u, 3, 4, True);
1051 break;
1052
1053 case SSE3a_MemWr:
1054 case SSE3a_MemRd:
1055 VG_(printf)("0x%x:0x%x:0x%x:0x%x",
1056 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
sewardjde8aecf2003-05-27 00:46:28 +00001057 (u->val2 >> 8) & 0xFF, u->val2 & 0xFF );
sewardjfebaa3b2003-05-25 01:07:34 +00001058 VG_(pp_UOperand)(u, 3, 4, True);
1059 break;
1060
1061 case SSE3g_RegRd:
sewardj02af6bc2003-06-12 00:56:06 +00001062 case SSE3g_RegWr:
sewardjfebaa3b2003-05-25 01:07:34 +00001063 VG_(printf)("0x%x:0x%x:0x%x:0x%x",
1064 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
1065 (u->val2 >> 8) & 0xFF, u->val2 & 0xFF );
1066 VG_(pp_UOperand)(u, 3, 4, True);
1067 break;
1068
sewardjb31b06d2003-06-13 00:26:02 +00001069 case SSE3g1_RegWr:
1070 case SSE3g1_RegRd:
1071 VG_(printf)("0x%x:0x%x:0x%x:0x%x:0x%x",
1072 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
1073 (u->val2 >> 8) & 0xFF, u->val2 & 0xFF,
1074 u->lit32 );
1075 VG_(pp_UOperand)(u, 3, 4, True);
1076 break;
1077
sewardja60be0e2003-05-26 08:47:27 +00001078 case SSE3:
1079 VG_(printf)("0x%x:0x%x:0x%x",
1080 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
1081 u->val2 & 0xFF );
1082 break;
1083
sewardjfebaa3b2003-05-25 01:07:34 +00001084 case SSE4:
1085 VG_(printf)("0x%x:0x%x:0x%x:0x%x",
1086 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
1087 (u->val2 >> 8) & 0xFF, u->val2 & 0xFF );
1088 break;
1089
sewardja453fb02003-06-14 13:22:36 +00001090 case SSE5:
1091 VG_(printf)("0x%x:0x%x:0x%x:0x%x:0x%x",
1092 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
1093 (u->val2 >> 8) & 0xFF, u->val2 & 0xFF,
1094 u->val3 & 0xFF );
1095 break;
1096
sewardjde4a1d02002-03-22 01:27:54 +00001097 case GET: case PUT: case MOV: case LOAD: case STORE: case CMOV:
sewardje1042472002-09-30 12:33:11 +00001098 case GETSEG: case PUTSEG:
njn4ba5a792002-09-30 10:23:54 +00001099 VG_(pp_UOperand)(u, 1, u->size, u->opcode==LOAD);
sewardjde4a1d02002-03-22 01:27:54 +00001100 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001101 VG_(pp_UOperand)(u, 2, u->size, u->opcode==STORE);
njn25e49d8e72002-09-23 09:36:25 +00001102 break;
1103
1104 case JMP:
1105 switch (u->jmpkind) {
1106 case JmpCall: VG_(printf)("-c"); break;
1107 case JmpRet: VG_(printf)("-r"); break;
1108 case JmpSyscall: VG_(printf)("-sys"); break;
1109 case JmpClientReq: VG_(printf)("-cli"); break;
1110 default: break;
1111 }
njn4ba5a792002-09-30 10:23:54 +00001112 VG_(pp_UOperand)(u, 1, u->size, False);
njn25e49d8e72002-09-23 09:36:25 +00001113 if (CondAlways == u->cond) {
1114 /* Print x86 instruction size if filled in */
1115 if (0 != u->extra4b)
1116 VG_(printf)(" ($%u)", u->extra4b);
1117 }
sewardjde4a1d02002-03-22 01:27:54 +00001118 break;
1119
1120 case GETF: case PUTF:
njn25e49d8e72002-09-23 09:36:25 +00001121 case CC2VAL: case PUSH: case POP: case CLEAR: case CALLM:
1122 case NOT: case NEG: case INC: case DEC: case BSWAP:
njn4ba5a792002-09-30 10:23:54 +00001123 VG_(pp_UOperand)(u, 1, u->size, False);
sewardjde4a1d02002-03-22 01:27:54 +00001124 break;
1125
njn25e49d8e72002-09-23 09:36:25 +00001126 /* Print a "(s)" after args passed on stack */
1127 case CCALL:
njn25e49d8e72002-09-23 09:36:25 +00001128 if (u->has_ret_val) {
njn4ba5a792002-09-30 10:23:54 +00001129 VG_(pp_UOperand)(u, 3, 0, False);
njn25e49d8e72002-09-23 09:36:25 +00001130 VG_(printf)(" = ");
sewardj2e93c502002-04-12 11:12:52 +00001131 }
njn25e49d8e72002-09-23 09:36:25 +00001132 VG_(printf)("%p(", u->lit32);
1133 if (u->argc > 0) {
njn4ba5a792002-09-30 10:23:54 +00001134 VG_(pp_UOperand)(u, 1, 0, False);
njn25e49d8e72002-09-23 09:36:25 +00001135 if (u->regparms_n < 1)
1136 VG_(printf)("(s)");
1137 }
1138 if (u->argc > 1) {
1139 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001140 VG_(pp_UOperand)(u, 2, 0, False);
njn25e49d8e72002-09-23 09:36:25 +00001141 if (u->regparms_n < 2)
1142 VG_(printf)("(s)");
1143 }
1144 if (u->argc > 2) {
1145 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001146 VG_(pp_UOperand)(u, 3, 0, False);
njn25e49d8e72002-09-23 09:36:25 +00001147 if (u->regparms_n < 3)
1148 VG_(printf)("(s)");
1149 }
1150 VG_(printf)(") ");
njn6431be72002-07-28 09:53:34 +00001151 break;
1152
sewardje1042472002-09-30 12:33:11 +00001153 case USESEG:
sewardjde4a1d02002-03-22 01:27:54 +00001154 case JIFZ:
sewardjde4a1d02002-03-22 01:27:54 +00001155 case ADD: case ADC: case AND: case OR:
1156 case XOR: case SUB: case SBB:
1157 case SHL: case SHR: case SAR:
1158 case ROL: case ROR: case RCL: case RCR:
njn4ba5a792002-09-30 10:23:54 +00001159 VG_(pp_UOperand)(u, 1, u->size, False);
sewardjde4a1d02002-03-22 01:27:54 +00001160 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001161 VG_(pp_UOperand)(u, 2, u->size, False);
sewardjde4a1d02002-03-22 01:27:54 +00001162 break;
1163
1164 case WIDEN:
1165 VG_(printf)("_%c%c", VG_(toupper)(nameISize(u->extra4b)),
1166 u->signed_widen?'s':'z');
njn4ba5a792002-09-30 10:23:54 +00001167 VG_(pp_UOperand)(u, 1, u->size, False);
sewardjde4a1d02002-03-22 01:27:54 +00001168 break;
1169
njn25e49d8e72002-09-23 09:36:25 +00001170 default:
1171 if (VG_(needs).extended_UCode)
njn4ba5a792002-09-30 10:23:54 +00001172 SK_(pp_XUInstr)(u);
njn25e49d8e72002-09-23 09:36:25 +00001173 else {
1174 VG_(printf)("unhandled opcode: %u. Perhaps "
1175 "VG_(needs).extended_UCode should be set?",
1176 u->opcode);
njne427a662002-10-02 11:08:25 +00001177 VG_(core_panic)("pp_UInstr: unhandled opcode");
njn25e49d8e72002-09-23 09:36:25 +00001178 }
sewardjde4a1d02002-03-22 01:27:54 +00001179 }
sewardjde4a1d02002-03-22 01:27:54 +00001180 if (u->flags_r != FlagsEmpty || u->flags_w != FlagsEmpty) {
1181 VG_(printf)(" (");
1182 if (u->flags_r != FlagsEmpty)
1183 vg_ppFlagSet("-r", u->flags_r);
1184 if (u->flags_w != FlagsEmpty)
1185 vg_ppFlagSet("-w", u->flags_w);
1186 VG_(printf)(")");
1187 }
njn25e49d8e72002-09-23 09:36:25 +00001188
1189 if (ppRegsLiveness) {
1190 VG_(printf)("\t\t");
njn4ba5a792002-09-30 10:23:54 +00001191 pp_realregs_liveness ( u );
njn25e49d8e72002-09-23 09:36:25 +00001192 }
1193
sewardjde4a1d02002-03-22 01:27:54 +00001194 VG_(printf)("\n");
1195}
1196
njn4ba5a792002-09-30 10:23:54 +00001197void VG_(pp_UInstr) ( Int instrNo, UInstr* u )
njn25e49d8e72002-09-23 09:36:25 +00001198{
njn4ba5a792002-09-30 10:23:54 +00001199 pp_UInstrWorker ( instrNo, u, /*ppRegsLiveness*/False );
njn25e49d8e72002-09-23 09:36:25 +00001200}
1201
njn4ba5a792002-09-30 10:23:54 +00001202void VG_(pp_UInstr_regs) ( Int instrNo, UInstr* u )
njn25e49d8e72002-09-23 09:36:25 +00001203{
njn4ba5a792002-09-30 10:23:54 +00001204 pp_UInstrWorker ( instrNo, u, /*ppRegsLiveness*/True );
njn25e49d8e72002-09-23 09:36:25 +00001205}
sewardjde4a1d02002-03-22 01:27:54 +00001206
njn4ba5a792002-09-30 10:23:54 +00001207void VG_(pp_UCodeBlock) ( UCodeBlock* cb, Char* title )
sewardjde4a1d02002-03-22 01:27:54 +00001208{
1209 Int i;
njn25e49d8e72002-09-23 09:36:25 +00001210 VG_(printf)("%s\n", title);
sewardjde4a1d02002-03-22 01:27:54 +00001211 for (i = 0; i < cb->used; i++)
njn25e49d8e72002-09-23 09:36:25 +00001212 if (cb->instrs[i].opcode != NOP)
njn4ba5a792002-09-30 10:23:54 +00001213 VG_(pp_UInstr) ( i, &cb->instrs[i] );
sewardjde4a1d02002-03-22 01:27:54 +00001214 VG_(printf)("\n");
1215}
1216
1217
1218/*------------------------------------------------------------*/
1219/*--- uinstr helpers for register allocation ---*/
1220/*--- and code improvement. ---*/
1221/*------------------------------------------------------------*/
1222
njn25e49d8e72002-09-23 09:36:25 +00001223/* Get the temp/reg use of a uinstr, parking them in an array supplied by
njn810086f2002-11-14 12:42:47 +00001224 the caller (regs), which is assumed to be big enough. Return the number
1225 of entries. Written regs are indicated in parallel array isWrites.
1226 Insns which read _and_ write a register wind up mentioning it twice.
1227 Entries are placed in the array in program order, so that if a reg is
1228 read-modified-written, it appears first as a read and then as a write.
1229 'tag' indicates whether we are looking at TempRegs or RealRegs.
sewardjde4a1d02002-03-22 01:27:54 +00001230*/
njn25e49d8e72002-09-23 09:36:25 +00001231__inline__
njn810086f2002-11-14 12:42:47 +00001232Int VG_(get_reg_usage) ( UInstr* u, Tag tag, Int* regs, Bool* isWrites )
sewardjde4a1d02002-03-22 01:27:54 +00001233{
njn810086f2002-11-14 12:42:47 +00001234# define RD(ono) VG_UINSTR_READS_REG(ono, regs, isWrites)
1235# define WR(ono) VG_UINSTR_WRITES_REG(ono, regs, isWrites)
sewardjde4a1d02002-03-22 01:27:54 +00001236
1237 Int n = 0;
1238 switch (u->opcode) {
1239 case LEA1: RD(1); WR(2); break;
1240 case LEA2: RD(1); RD(2); WR(3); break;
1241
sewardjfebaa3b2003-05-25 01:07:34 +00001242 case SSE3g_RegRd:
1243 case SSE3a_MemWr:
1244 case SSE3a_MemRd:
1245 case SSE2a_MemWr:
sewardjb31b06d2003-06-13 00:26:02 +00001246 case SSE3g1_RegRd:
sewardj02af6bc2003-06-12 00:56:06 +00001247 case SSE2a_MemRd: RD(3); break;
1248
sewardjb31b06d2003-06-13 00:26:02 +00001249 case SSE3g1_RegWr:
sewardj02af6bc2003-06-12 00:56:06 +00001250 case SSE3g_RegWr: WR(3); break;
sewardjfebaa3b2003-05-25 01:07:34 +00001251
sewardjca860012003-03-27 23:52:58 +00001252 case MMX2_RegRd: RD(2); break;
sewardjd1c9e432003-04-04 20:40:34 +00001253 case MMX2_RegWr: WR(2); break;
sewardjca860012003-03-27 23:52:58 +00001254
sewardja453fb02003-06-14 13:22:36 +00001255 case SSE4: case SSE3: case SSE5:
sewardjca860012003-03-27 23:52:58 +00001256 case MMX1: case MMX2: case MMX3:
njn25e49d8e72002-09-23 09:36:25 +00001257 case NOP: case FPU: case INCEIP: case CALLM_S: case CALLM_E:
sewardj7a5ebcf2002-11-13 22:42:13 +00001258 case CLEAR: case CALLM: case LOCK: break;
njn25e49d8e72002-09-23 09:36:25 +00001259
1260 case CCALL:
1261 if (u->argc > 0) RD(1);
1262 if (u->argc > 1) RD(2);
1263 if (u->argc > 2) RD(3);
1264 if (u->has_ret_val) WR(3);
1265 break;
1266
sewardj3d7c9c82003-03-26 21:08:13 +00001267 case MMX2_MemRd: case MMX2_MemWr:
sewardjde4a1d02002-03-22 01:27:54 +00001268 case FPU_R: case FPU_W: RD(2); break;
1269
sewardje1042472002-09-30 12:33:11 +00001270 case GETSEG: WR(2); break;
1271 case PUTSEG: RD(1); break;
1272
sewardjde4a1d02002-03-22 01:27:54 +00001273 case GETF: WR(1); break;
1274 case PUTF: RD(1); break;
1275
1276 case GET: WR(2); break;
1277 case PUT: RD(1); break;
1278 case LOAD: RD(1); WR(2); break;
njn25e49d8e72002-09-23 09:36:25 +00001279 case STORE: RD(1); RD(2); break;
sewardjde4a1d02002-03-22 01:27:54 +00001280 case MOV: RD(1); WR(2); break;
1281
1282 case JMP: RD(1); break;
sewardjde4a1d02002-03-22 01:27:54 +00001283
njn25e49d8e72002-09-23 09:36:25 +00001284 case PUSH: RD(1); break;
sewardjde4a1d02002-03-22 01:27:54 +00001285 case POP: WR(1); break;
1286
sewardje1042472002-09-30 12:33:11 +00001287 case USESEG:
sewardjde4a1d02002-03-22 01:27:54 +00001288 case CMOV:
1289 case ADD: case ADC: case AND: case OR:
1290 case XOR: case SUB: case SBB:
1291 RD(1); RD(2); WR(2); break;
1292
1293 case SHL: case SHR: case SAR:
1294 case ROL: case ROR: case RCL: case RCR:
1295 RD(1); RD(2); WR(2); break;
1296
njn25e49d8e72002-09-23 09:36:25 +00001297 case NOT: case NEG: case INC: case DEC: case BSWAP:
sewardjde4a1d02002-03-22 01:27:54 +00001298 RD(1); WR(1); break;
1299
1300 case WIDEN: RD(1); WR(1); break;
1301
1302 case CC2VAL: WR(1); break;
1303 case JIFZ: RD(1); break;
1304
njn25e49d8e72002-09-23 09:36:25 +00001305 default:
1306 if (VG_(needs).extended_UCode)
njn810086f2002-11-14 12:42:47 +00001307 return SK_(get_Xreg_usage)(u, tag, regs, isWrites);
njn25e49d8e72002-09-23 09:36:25 +00001308 else {
1309 VG_(printf)("unhandled opcode: %u. Perhaps "
1310 "VG_(needs).extended_UCode should be set?",
1311 u->opcode);
njne427a662002-10-02 11:08:25 +00001312 VG_(core_panic)("VG_(get_reg_usage): unhandled opcode");
njn25e49d8e72002-09-23 09:36:25 +00001313 }
sewardjde4a1d02002-03-22 01:27:54 +00001314 }
1315 return n;
1316
1317# undef RD
1318# undef WR
1319}
1320
1321
njn25e49d8e72002-09-23 09:36:25 +00001322/* Change temp regs in u into real regs, as directed by the
1323 * temps[i]-->reals[i] mapping. */
1324static __inline__
njn810086f2002-11-14 12:42:47 +00001325void patchUInstr ( UInstr* u, Int temps[], UInt reals[], Int n_tmap )
sewardjde4a1d02002-03-22 01:27:54 +00001326{
1327 Int i;
1328 if (u->tag1 == TempReg) {
1329 for (i = 0; i < n_tmap; i++)
njn810086f2002-11-14 12:42:47 +00001330 if (temps[i] == u->val1) break;
njne427a662002-10-02 11:08:25 +00001331 if (i == n_tmap) VG_(core_panic)("patchUInstr(1)");
sewardjde4a1d02002-03-22 01:27:54 +00001332 u->tag1 = RealReg;
njn25e49d8e72002-09-23 09:36:25 +00001333 u->val1 = reals[i];
sewardjde4a1d02002-03-22 01:27:54 +00001334 }
1335 if (u->tag2 == TempReg) {
1336 for (i = 0; i < n_tmap; i++)
njn810086f2002-11-14 12:42:47 +00001337 if (temps[i] == u->val2) break;
njne427a662002-10-02 11:08:25 +00001338 if (i == n_tmap) VG_(core_panic)("patchUInstr(2)");
sewardjde4a1d02002-03-22 01:27:54 +00001339 u->tag2 = RealReg;
njn25e49d8e72002-09-23 09:36:25 +00001340 u->val2 = reals[i];
sewardjde4a1d02002-03-22 01:27:54 +00001341 }
1342 if (u->tag3 == TempReg) {
1343 for (i = 0; i < n_tmap; i++)
njn810086f2002-11-14 12:42:47 +00001344 if (temps[i] == u->val3) break;
njne427a662002-10-02 11:08:25 +00001345 if (i == n_tmap) VG_(core_panic)("patchUInstr(3)");
sewardjde4a1d02002-03-22 01:27:54 +00001346 u->tag3 = RealReg;
njn25e49d8e72002-09-23 09:36:25 +00001347 u->val3 = reals[i];
sewardjde4a1d02002-03-22 01:27:54 +00001348 }
1349}
1350
1351
1352/* Tedious x86-specific hack which compensates for the fact that the
1353 register numbers for %ah .. %dh do not correspond to those for %eax
1354 .. %edx. It maps a (reg size, reg no) pair to the number of the
1355 containing 32-bit reg. */
1356static __inline__
1357Int containingArchRegOf ( Int sz, Int aregno )
1358{
1359 switch (sz) {
1360 case 4: return aregno;
1361 case 2: return aregno;
1362 case 1: return aregno >= 4 ? aregno-4 : aregno;
njne427a662002-10-02 11:08:25 +00001363 default: VG_(core_panic)("containingArchRegOf");
sewardjde4a1d02002-03-22 01:27:54 +00001364 }
1365}
1366
1367
1368/* If u reads an ArchReg, return the number of the containing arch
njn25e49d8e72002-09-23 09:36:25 +00001369 reg. Otherwise return -1. Used in redundant-PUT elimination.
1370 Note that this is not required for skins extending UCode because
1371 this happens before instrumentation. */
sewardjde4a1d02002-03-22 01:27:54 +00001372static __inline__
1373Int maybe_uinstrReadsArchReg ( UInstr* u )
1374{
1375 switch (u->opcode) {
1376 case GET:
1377 case ADD: case ADC: case AND: case OR:
1378 case XOR: case SUB: case SBB:
1379 case SHL: case SHR: case SAR: case ROL:
1380 case ROR: case RCL: case RCR:
1381 if (u->tag1 == ArchReg)
1382 return containingArchRegOf ( u->size, u->val1 );
1383 else
1384 return -1;
1385
1386 case GETF: case PUTF:
1387 case CALLM_S: case CALLM_E:
1388 case INCEIP:
1389 case LEA1:
1390 case LEA2:
1391 case NOP:
sewardj7a5ebcf2002-11-13 22:42:13 +00001392 case LOCK:
sewardjde4a1d02002-03-22 01:27:54 +00001393 case PUT:
1394 case LOAD:
1395 case STORE:
1396 case MOV:
1397 case CMOV:
1398 case JMP:
1399 case CALLM: case CLEAR: case PUSH: case POP:
1400 case NOT: case NEG: case INC: case DEC: case BSWAP:
1401 case CC2VAL:
1402 case JIFZ:
1403 case FPU: case FPU_R: case FPU_W:
sewardjca860012003-03-27 23:52:58 +00001404 case MMX1: case MMX2: case MMX3:
sewardj3d7c9c82003-03-26 21:08:13 +00001405 case MMX2_MemRd: case MMX2_MemWr:
sewardjd1c9e432003-04-04 20:40:34 +00001406 case MMX2_RegRd: case MMX2_RegWr:
sewardjfebaa3b2003-05-25 01:07:34 +00001407 case SSE2a_MemWr: case SSE2a_MemRd:
1408 case SSE3a_MemWr: case SSE3a_MemRd:
sewardjb31b06d2003-06-13 00:26:02 +00001409 case SSE3g_RegRd: case SSE3g_RegWr:
1410 case SSE3g1_RegWr: case SSE3g1_RegRd:
sewardja453fb02003-06-14 13:22:36 +00001411 case SSE4: case SSE3: case SSE5:
sewardjde4a1d02002-03-22 01:27:54 +00001412 case WIDEN:
sewardje1042472002-09-30 12:33:11 +00001413 /* GETSEG and USESEG are to do with ArchRegS, not ArchReg */
1414 case GETSEG: case PUTSEG:
1415 case USESEG:
sewardjde4a1d02002-03-22 01:27:54 +00001416 return -1;
1417
1418 default:
njn4ba5a792002-09-30 10:23:54 +00001419 VG_(pp_UInstr)(0,u);
njne427a662002-10-02 11:08:25 +00001420 VG_(core_panic)("maybe_uinstrReadsArchReg: unhandled opcode");
sewardjde4a1d02002-03-22 01:27:54 +00001421 }
1422}
1423
1424static __inline__
1425Bool uInstrMentionsTempReg ( UInstr* u, Int tempreg )
1426{
1427 Int i, k;
njnf4ce3d32003-02-10 10:17:26 +00001428 Int tempUse[VG_MAX_REGS_USED];
1429 Bool notUsed[VG_MAX_REGS_USED];
njn810086f2002-11-14 12:42:47 +00001430
1431 k = VG_(get_reg_usage) ( u, TempReg, &tempUse[0], &notUsed[0] );
sewardjde4a1d02002-03-22 01:27:54 +00001432 for (i = 0; i < k; i++)
njn810086f2002-11-14 12:42:47 +00001433 if (tempUse[i] == tempreg)
sewardjde4a1d02002-03-22 01:27:54 +00001434 return True;
1435 return False;
1436}
1437
1438
1439/*------------------------------------------------------------*/
1440/*--- ucode improvement. ---*/
1441/*------------------------------------------------------------*/
1442
1443/* Improve the code in cb by doing
1444 -- Redundant ArchReg-fetch elimination
1445 -- Redundant PUT elimination
1446 -- Redundant cond-code restore/save elimination
1447 The overall effect of these is to allow target registers to be
1448 cached in host registers over multiple target insns.
1449*/
1450static void vg_improve ( UCodeBlock* cb )
1451{
1452 Int i, j, k, m, n, ar, tr, told, actual_areg;
1453 Int areg_map[8];
1454 Bool annul_put[8];
njnf4ce3d32003-02-10 10:17:26 +00001455 Int tempUse[VG_MAX_REGS_USED];
1456 Bool isWrites[VG_MAX_REGS_USED];
sewardjde4a1d02002-03-22 01:27:54 +00001457 UInstr* u;
1458 Bool wr;
1459 Int* last_live_before;
1460 FlagSet future_dead_flags;
1461
sewardj2ca39a12003-06-14 12:03:35 +00001462# if 0
1463 /* DEBUGGING HOOK */
1464 {
1465 static int n_done=0;
1466 if (VG_(clo_stop_after) > 1000000000) {
1467 if (n_done > (VG_(clo_stop_after) - 1000000000)) {
1468 dis=False;
1469 VG_(clo_trace_codegen) = 0;
1470 return;
1471 }
1472 if (n_done == (VG_(clo_stop_after) - 1000000000)) {
1473 VG_(printf)("\n");
1474 VG_(pp_UCodeBlock) ( cb, "Incoming:" );
1475 dis = True;
1476 VG_(clo_trace_codegen) = 31;
1477 }
1478 n_done++;
1479 }
1480 }
1481 /* end DEBUGGING HOOK */
1482# endif /* 0 */
1483
njn25e49d8e72002-09-23 09:36:25 +00001484 if (dis)
1485 VG_(printf) ("Improvements:\n");
1486
sewardjde4a1d02002-03-22 01:27:54 +00001487 if (cb->nextTemp > 0)
njn25e49d8e72002-09-23 09:36:25 +00001488 last_live_before = VG_(arena_malloc) ( VG_AR_JITTER,
1489 cb->nextTemp * sizeof(Int) );
sewardjde4a1d02002-03-22 01:27:54 +00001490 else
1491 last_live_before = NULL;
1492
1493
1494 /* PASS 1: redundant GET elimination. (Actually, more general than
1495 that -- eliminates redundant fetches of ArchRegs). */
1496
1497 /* Find the live-range-ends for all temporaries. Duplicates code
1498 in the register allocator :-( */
1499
1500 for (i = 0; i < cb->nextTemp; i++) last_live_before[i] = -1;
1501
1502 for (i = cb->used-1; i >= 0; i--) {
1503 u = &cb->instrs[i];
1504
njn810086f2002-11-14 12:42:47 +00001505 k = VG_(get_reg_usage)(u, TempReg, &tempUse[0], &isWrites[0]);
sewardjde4a1d02002-03-22 01:27:54 +00001506
1507 /* For each temp usage ... bwds in program order. */
1508 for (j = k-1; j >= 0; j--) {
njn810086f2002-11-14 12:42:47 +00001509 tr = tempUse[j];
1510 wr = isWrites[j];
sewardjde4a1d02002-03-22 01:27:54 +00001511 if (last_live_before[tr] == -1) {
1512 vg_assert(tr >= 0 && tr < cb->nextTemp);
1513 last_live_before[tr] = wr ? (i+1) : i;
1514 }
1515 }
1516
1517 }
1518
1519# define BIND_ARCH_TO_TEMP(archreg,tempreg)\
1520 { Int q; \
1521 /* Invalidate any old binding(s) to tempreg. */ \
1522 for (q = 0; q < 8; q++) \
1523 if (areg_map[q] == tempreg) areg_map[q] = -1; \
1524 /* Add the new binding. */ \
1525 areg_map[archreg] = (tempreg); \
1526 }
1527
1528 /* Set up the A-reg map. */
1529 for (i = 0; i < 8; i++) areg_map[i] = -1;
1530
1531 /* Scan insns. */
1532 for (i = 0; i < cb->used; i++) {
1533 u = &cb->instrs[i];
1534 if (u->opcode == GET && u->size == 4) {
1535 /* GET; see if it can be annulled. */
1536 vg_assert(u->tag1 == ArchReg);
1537 vg_assert(u->tag2 == TempReg);
1538 ar = u->val1;
1539 tr = u->val2;
1540 told = areg_map[ar];
1541 if (told != -1 && last_live_before[told] <= i) {
1542 /* ar already has an old mapping to told, but that runs
1543 out here. Annul this GET, rename tr to told for the
1544 rest of the block, and extend told's live range to that
1545 of tr. */
njn4ba5a792002-09-30 10:23:54 +00001546 VG_(new_NOP)(u);
sewardjde4a1d02002-03-22 01:27:54 +00001547 n = last_live_before[tr] + 1;
1548 if (n > cb->used) n = cb->used;
1549 last_live_before[told] = last_live_before[tr];
1550 last_live_before[tr] = i-1;
njn25e49d8e72002-09-23 09:36:25 +00001551 if (dis)
sewardjde4a1d02002-03-22 01:27:54 +00001552 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001553 " at %2d: delete GET, rename t%d to t%d in (%d .. %d)\n",
sewardjde4a1d02002-03-22 01:27:54 +00001554 i, tr, told,i+1, n-1);
1555 for (m = i+1; m < n; m++) {
1556 if (cb->instrs[m].tag1 == TempReg
1557 && cb->instrs[m].val1 == tr)
1558 cb->instrs[m].val1 = told;
1559 if (cb->instrs[m].tag2 == TempReg
1560 && cb->instrs[m].val2 == tr)
1561 cb->instrs[m].val2 = told;
sewardjfebaa3b2003-05-25 01:07:34 +00001562 if (cb->instrs[m].tag3 == TempReg
1563 && cb->instrs[m].val3 == tr)
1564 cb->instrs[m].val3 = told;
sewardjde4a1d02002-03-22 01:27:54 +00001565 }
1566 BIND_ARCH_TO_TEMP(ar,told);
1567 }
1568 else
1569 BIND_ARCH_TO_TEMP(ar,tr);
1570 }
1571 else if (u->opcode == GET && u->size != 4) {
1572 /* Invalidate any mapping for this archreg. */
1573 actual_areg = containingArchRegOf ( u->size, u->val1 );
1574 areg_map[actual_areg] = -1;
1575 }
1576 else if (u->opcode == PUT && u->size == 4) {
1577 /* PUT; re-establish t -> a binding */
1578 vg_assert(u->tag1 == TempReg);
1579 vg_assert(u->tag2 == ArchReg);
1580 BIND_ARCH_TO_TEMP(u->val2, u->val1);
1581 }
1582 else if (u->opcode == PUT && u->size != 4) {
1583 /* Invalidate any mapping for this archreg. */
1584 actual_areg = containingArchRegOf ( u->size, u->val2 );
1585 areg_map[actual_areg] = -1;
1586 } else {
1587
1588 /* see if insn has an archreg as a read operand; if so try to
1589 map it. */
1590 if (u->tag1 == ArchReg && u->size == 4
1591 && areg_map[u->val1] != -1) {
1592 switch (u->opcode) {
1593 case ADD: case SUB: case AND: case OR: case XOR:
1594 case ADC: case SBB:
1595 case SHL: case SHR: case SAR: case ROL: case ROR:
1596 case RCL: case RCR:
njn25e49d8e72002-09-23 09:36:25 +00001597 if (dis)
sewardjde4a1d02002-03-22 01:27:54 +00001598 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001599 " at %2d: change ArchReg %S to TempReg t%d\n",
sewardjde4a1d02002-03-22 01:27:54 +00001600 i, nameIReg(4,u->val1), areg_map[u->val1]);
1601 u->tag1 = TempReg;
1602 u->val1 = areg_map[u->val1];
1603 /* Remember to extend the live range of the TempReg,
1604 if necessary. */
1605 if (last_live_before[u->val1] < i)
1606 last_live_before[u->val1] = i;
1607 break;
1608 default:
1609 break;
1610 }
1611 }
1612
1613 /* boring insn; invalidate any mappings to temps it writes */
njn810086f2002-11-14 12:42:47 +00001614 k = VG_(get_reg_usage)(u, TempReg, &tempUse[0], &isWrites[0]);
sewardjde4a1d02002-03-22 01:27:54 +00001615
1616 for (j = 0; j < k; j++) {
njn810086f2002-11-14 12:42:47 +00001617 wr = isWrites[j];
sewardjde4a1d02002-03-22 01:27:54 +00001618 if (!wr) continue;
njn810086f2002-11-14 12:42:47 +00001619 tr = tempUse[j];
sewardjde4a1d02002-03-22 01:27:54 +00001620 for (m = 0; m < 8; m++)
1621 if (areg_map[m] == tr) areg_map[m] = -1;
1622 }
1623 }
1624
1625 }
1626
1627# undef BIND_ARCH_TO_TEMP
1628
sewardj05f1aa12002-04-30 00:29:36 +00001629 /* PASS 2: redundant PUT elimination. Don't annul (delay) puts of
1630 %ESP, since the memory check machinery always requires the
1631 in-memory value of %ESP to be up to date. Although this isn't
1632 actually required by other analyses (cache simulation), it's
1633 simplest to be consistent for all end-uses. */
sewardjde4a1d02002-03-22 01:27:54 +00001634 for (j = 0; j < 8; j++)
1635 annul_put[j] = False;
1636
1637 for (i = cb->used-1; i >= 0; i--) {
1638 u = &cb->instrs[i];
1639 if (u->opcode == NOP) continue;
1640
1641 if (u->opcode == PUT && u->size == 4) {
1642 vg_assert(u->tag2 == ArchReg);
1643 actual_areg = containingArchRegOf ( 4, u->val2 );
1644 if (annul_put[actual_areg]) {
sewardj05f1aa12002-04-30 00:29:36 +00001645 vg_assert(actual_areg != R_ESP);
njn4ba5a792002-09-30 10:23:54 +00001646 VG_(new_NOP)(u);
njn25e49d8e72002-09-23 09:36:25 +00001647 if (dis)
1648 VG_(printf)(" at %2d: delete PUT\n", i );
sewardjde4a1d02002-03-22 01:27:54 +00001649 } else {
sewardj05f1aa12002-04-30 00:29:36 +00001650 if (actual_areg != R_ESP)
sewardjde4a1d02002-03-22 01:27:54 +00001651 annul_put[actual_areg] = True;
1652 }
1653 }
1654 else if (u->opcode == PUT && u->size != 4) {
1655 actual_areg = containingArchRegOf ( u->size, u->val2 );
1656 annul_put[actual_areg] = False;
1657 }
1658 else if (u->opcode == JMP || u->opcode == JIFZ
1659 || u->opcode == CALLM) {
1660 for (j = 0; j < 8; j++)
1661 annul_put[j] = False;
1662 }
1663 else {
1664 /* If an instruction reads an ArchReg, the immediately
1665 preceding PUT cannot be annulled. */
1666 actual_areg = maybe_uinstrReadsArchReg ( u );
1667 if (actual_areg != -1)
1668 annul_put[actual_areg] = False;
1669 }
1670 }
1671
1672 /* PASS 2a: redundant-move elimination. Given MOV t1, t2 and t1 is
1673 dead after this point, annul the MOV insn and rename t2 to t1.
1674 Further modifies the last_live_before map. */
1675
1676# if 0
njn4ba5a792002-09-30 10:23:54 +00001677 VG_(pp_UCodeBlock)(cb, "Before MOV elimination" );
sewardjde4a1d02002-03-22 01:27:54 +00001678 for (i = 0; i < cb->nextTemp; i++)
1679 VG_(printf)("llb[t%d]=%d ", i, last_live_before[i]);
1680 VG_(printf)("\n");
1681# endif
1682
1683 for (i = 0; i < cb->used-1; i++) {
1684 u = &cb->instrs[i];
1685 if (u->opcode != MOV) continue;
1686 if (u->tag1 == Literal) continue;
1687 vg_assert(u->tag1 == TempReg);
1688 vg_assert(u->tag2 == TempReg);
1689 if (last_live_before[u->val1] == i) {
njn25e49d8e72002-09-23 09:36:25 +00001690 if (dis)
sewardjde4a1d02002-03-22 01:27:54 +00001691 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001692 " at %2d: delete MOV, rename t%d to t%d in (%d .. %d)\n",
sewardjde4a1d02002-03-22 01:27:54 +00001693 i, u->val2, u->val1, i+1, last_live_before[u->val2] );
1694 for (j = i+1; j <= last_live_before[u->val2]; j++) {
1695 if (cb->instrs[j].tag1 == TempReg
1696 && cb->instrs[j].val1 == u->val2)
1697 cb->instrs[j].val1 = u->val1;
1698 if (cb->instrs[j].tag2 == TempReg
1699 && cb->instrs[j].val2 == u->val2)
1700 cb->instrs[j].val2 = u->val1;
sewardjfebaa3b2003-05-25 01:07:34 +00001701 if (cb->instrs[j].tag3 == TempReg
1702 && cb->instrs[j].val3 == u->val2)
1703 cb->instrs[j].val3 = u->val1;
sewardjde4a1d02002-03-22 01:27:54 +00001704 }
1705 last_live_before[u->val1] = last_live_before[u->val2];
1706 last_live_before[u->val2] = i-1;
njn4ba5a792002-09-30 10:23:54 +00001707 VG_(new_NOP)(u);
sewardjde4a1d02002-03-22 01:27:54 +00001708 }
1709 }
1710
1711 /* PASS 3: redundant condition-code restore/save elimination.
1712 Scan backwards from the end. future_dead_flags records the set
1713 of flags which are dead at this point, that is, will be written
1714 before they are next read. Earlier uinsns which write flags
1715 already in future_dead_flags can have their writes annulled.
1716 */
1717 future_dead_flags = FlagsEmpty;
1718
1719 for (i = cb->used-1; i >= 0; i--) {
1720 u = &cb->instrs[i];
1721
1722 /* We might never make it to insns beyond this one, so be
1723 conservative. */
1724 if (u->opcode == JIFZ || u->opcode == JMP) {
1725 future_dead_flags = FlagsEmpty;
1726 continue;
1727 }
1728
sewardjfbb6cda2002-07-24 09:33:52 +00001729 /* PUTF modifies the %EFLAGS in essentially unpredictable ways.
1730 For example people try to mess with bit 21 to see if CPUID
1731 works. The setting may or may not actually take hold. So we
1732 play safe here. */
1733 if (u->opcode == PUTF) {
1734 future_dead_flags = FlagsEmpty;
1735 continue;
1736 }
1737
sewardjde4a1d02002-03-22 01:27:54 +00001738 /* We can annul the flags written by this insn if it writes a
1739 subset (or eq) of the set of flags known to be dead after
1740 this insn. If not, just record the flags also written by
1741 this insn.*/
1742 if (u->flags_w != FlagsEmpty
1743 && VG_IS_FLAG_SUBSET(u->flags_w, future_dead_flags)) {
njn25e49d8e72002-09-23 09:36:25 +00001744 if (dis) {
1745 VG_(printf)(" at %2d: annul flag write ", i);
sewardjde4a1d02002-03-22 01:27:54 +00001746 vg_ppFlagSet("", u->flags_w);
1747 VG_(printf)(" due to later ");
1748 vg_ppFlagSet("", future_dead_flags);
1749 VG_(printf)("\n");
1750 }
1751 u->flags_w = FlagsEmpty;
1752 } else {
1753 future_dead_flags
1754 = VG_UNION_FLAG_SETS ( u->flags_w, future_dead_flags );
1755 }
1756
1757 /* If this insn also reads flags, empty out future_dead_flags so
1758 as to force preceding writes not to be annulled. */
1759 if (u->flags_r != FlagsEmpty)
1760 future_dead_flags = FlagsEmpty;
1761 }
1762
1763 if (last_live_before)
njn25e49d8e72002-09-23 09:36:25 +00001764 VG_(arena_free) ( VG_AR_JITTER, last_live_before );
1765
1766 if (dis) {
1767 VG_(printf)("\n");
njn4ba5a792002-09-30 10:23:54 +00001768 VG_(pp_UCodeBlock) ( cb, "Improved UCode:" );
njn25e49d8e72002-09-23 09:36:25 +00001769 }
sewardjde4a1d02002-03-22 01:27:54 +00001770}
1771
njn9b007f62003-04-07 14:40:25 +00001772/*------------------------------------------------------------*/
1773/*--- %ESP-update pass ---*/
1774/*------------------------------------------------------------*/
1775
1776/* For skins that want to know about %ESP changes, this pass adds
1777 in the appropriate hooks. We have to do it after the skin's
1778 instrumentation, so the skin doesn't have to worry about the CCALLs
1779 it adds in, and we must do it before register allocation because
1780 spilled temps make it much harder to work out the %esp deltas.
1781 Thus we have it as an extra phase between the two. */
1782static
1783UCodeBlock* vg_ESP_update_pass(UCodeBlock* cb_in)
1784{
1785 UCodeBlock* cb;
1786 UInstr* u;
1787 Int delta = 0;
1788 UInt t_ESP = INVALID_TEMPREG;
sewardj05bcdcb2003-05-18 10:05:38 +00001789 Int i;
njn9b007f62003-04-07 14:40:25 +00001790
1791 cb = VG_(setup_UCodeBlock)(cb_in);
1792
1793 for (i = 0; i < VG_(get_num_instrs)(cb_in); i++) {
1794 u = VG_(get_instr)(cb_in, i);
1795
1796 if (GET == u->opcode && R_ESP == u->val1) {
1797 t_ESP = u->val2;
1798 delta = 0;
1799
1800 } else if (PUT == u->opcode && R_ESP == u->val2 && 4 == u->size) {
1801
1802# define DO_GENERIC \
1803 if (VG_(track_events).new_mem_stack || \
1804 VG_(track_events).die_mem_stack) { \
1805 uInstr1(cb, CCALL, 0, TempReg, u->val1); \
1806 uCCall(cb, (Addr) VG_(unknown_esp_update), \
1807 1, 1, False); \
1808 }
1809
1810# define DO(kind, size) \
1811 if (VG_(track_events).kind##_mem_stack_##size) { \
1812 uInstr1(cb, CCALL, 0, TempReg, u->val1); \
1813 uCCall(cb, (Addr) VG_(track_events).kind##_mem_stack_##size,\
1814 1, 1, False); \
1815 \
1816 } else \
1817 DO_GENERIC \
1818 break
1819
1820 if (u->val1 == t_ESP) {
1821 /* Known delta, common cases handled specially. */
1822 switch (delta) {
1823 case 4: DO(die, 4);
1824 case -4: DO(new, 4);
1825 case 8: DO(die, 8);
1826 case -8: DO(new, 8);
1827 case 12: DO(die, 12);
1828 case -12: DO(new, 12);
1829 case 16: DO(die, 16);
1830 case -16: DO(new, 16);
1831 case 32: DO(die, 32);
1832 case -32: DO(new, 32);
1833 default: DO_GENERIC; break;
1834 }
1835 } else {
1836 /* Unknown delta */
1837 DO_GENERIC;
1838 }
1839 delta = 0;
1840
1841# undef DO
1842# undef DO_GENERIC
1843
1844 } else if (Literal == u->tag1 && t_ESP == u->val2) {
1845 if (ADD == u->opcode) delta += u->lit32;
1846 if (SUB == u->opcode) delta -= u->lit32;
1847
1848 } else if (MOV == u->opcode && TempReg == u->tag1 && t_ESP == u->val1 &&
1849 TempReg == u->tag2) {
1850 t_ESP = u->val2;
1851 }
1852 VG_(copy_UInstr) ( cb, u );
1853 }
1854
1855 VG_(free_UCodeBlock)(cb_in);
1856 return cb;
1857}
sewardjde4a1d02002-03-22 01:27:54 +00001858
1859/*------------------------------------------------------------*/
1860/*--- The new register allocator. ---*/
1861/*------------------------------------------------------------*/
1862
1863typedef
1864 struct {
1865 /* Becomes live for the first time after this insn ... */
1866 Int live_after;
1867 /* Becomes dead for the last time after this insn ... */
1868 Int dead_before;
1869 /* The "home" spill slot, if needed. Never changes. */
1870 Int spill_no;
1871 /* Where is it? VG_NOVALUE==in a spill slot; else in reg. */
1872 Int real_no;
1873 }
1874 TempInfo;
1875
1876
1877/* Take a ucode block and allocate its TempRegs to RealRegs, or put
1878 them in spill locations, and add spill code, if there are not
1879 enough real regs. The usual register allocation deal, in short.
1880
1881 Important redundancy of representation:
1882
1883 real_to_temp maps real reg ranks (RRRs) to TempReg nos, or
1884 to VG_NOVALUE if the real reg has no currently assigned TempReg.
1885
1886 The .real_no field of a TempInfo gives the current RRR for
1887 this TempReg, or VG_NOVALUE if the TempReg is currently
1888 in memory, in which case it is in the SpillNo denoted by
1889 spillno.
1890
1891 These pieces of information (a fwds-bwds mapping, really) must
1892 be kept consistent!
1893
1894 This allocator uses the so-called Second Chance Bin Packing
1895 algorithm, as described in "Quality and Speed in Linear-scan
1896 Register Allocation" (Traub, Holloway and Smith, ACM PLDI98,
1897 pp142-151). It is simple and fast and remarkably good at
1898 minimising the amount of spill code introduced.
1899*/
1900
1901static
1902UCodeBlock* vg_do_register_allocation ( UCodeBlock* c1 )
1903{
1904 TempInfo* temp_info;
1905 Int real_to_temp[VG_MAX_REALREGS];
1906 Bool is_spill_cand[VG_MAX_REALREGS];
1907 Int ss_busy_until_before[VG_MAX_SPILLSLOTS];
1908 Int i, j, k, m, r, tno, max_ss_no;
1909 Bool wr, defer, isRead, spill_reqd;
njnf4ce3d32003-02-10 10:17:26 +00001910 UInt realUse[VG_MAX_REGS_USED];
1911 Int tempUse[VG_MAX_REGS_USED];
1912 Bool isWrites[VG_MAX_REGS_USED];
sewardjde4a1d02002-03-22 01:27:54 +00001913 UCodeBlock* c2;
1914
1915 /* Used to denote ... well, "no value" in this fn. */
1916# define VG_NOTHING (-2)
1917
1918 /* Initialise the TempReg info. */
1919 if (c1->nextTemp > 0)
njn25e49d8e72002-09-23 09:36:25 +00001920 temp_info = VG_(arena_malloc)(VG_AR_JITTER,
1921 c1->nextTemp * sizeof(TempInfo) );
sewardjde4a1d02002-03-22 01:27:54 +00001922 else
1923 temp_info = NULL;
1924
1925 for (i = 0; i < c1->nextTemp; i++) {
1926 temp_info[i].live_after = VG_NOTHING;
1927 temp_info[i].dead_before = VG_NOTHING;
1928 temp_info[i].spill_no = VG_NOTHING;
1929 /* temp_info[i].real_no is not yet relevant. */
1930 }
1931
1932 spill_reqd = False;
1933
1934 /* Scan fwds to establish live ranges. */
1935
1936 for (i = 0; i < c1->used; i++) {
njn810086f2002-11-14 12:42:47 +00001937 k = VG_(get_reg_usage)(&c1->instrs[i], TempReg, &tempUse[0],
1938 &isWrites[0]);
njnf4ce3d32003-02-10 10:17:26 +00001939 vg_assert(k >= 0 && k <= VG_MAX_REGS_USED);
sewardjde4a1d02002-03-22 01:27:54 +00001940
1941 /* For each temp usage ... fwds in program order */
1942 for (j = 0; j < k; j++) {
njn810086f2002-11-14 12:42:47 +00001943 tno = tempUse[j];
1944 wr = isWrites[j];
sewardjde4a1d02002-03-22 01:27:54 +00001945 if (wr) {
1946 /* Writes hold a reg live until after this insn. */
1947 if (temp_info[tno].live_after == VG_NOTHING)
1948 temp_info[tno].live_after = i;
1949 if (temp_info[tno].dead_before < i + 1)
1950 temp_info[tno].dead_before = i + 1;
1951 } else {
1952 /* First use of a tmp should be a write. */
njnfa0ad422003-02-03 11:07:03 +00001953 if (temp_info[tno].live_after == VG_NOTHING) {
1954 VG_(printf)("At instr %d...\n", i);
1955 VG_(core_panic)("First use of tmp not a write,"
1956 " probably a skin instrumentation error");
1957 }
sewardjde4a1d02002-03-22 01:27:54 +00001958 /* Reads only hold it live until before this insn. */
1959 if (temp_info[tno].dead_before < i)
1960 temp_info[tno].dead_before = i;
1961 }
1962 }
1963 }
1964
1965# if 0
1966 /* Sanity check on live ranges. Expensive but correct. */
1967 for (i = 0; i < c1->nextTemp; i++) {
1968 vg_assert( (temp_info[i].live_after == VG_NOTHING
1969 && temp_info[i].dead_before == VG_NOTHING)
1970 || (temp_info[i].live_after != VG_NOTHING
1971 && temp_info[i].dead_before != VG_NOTHING) );
1972 }
1973# endif
1974
1975 /* Do a rank-based allocation of TempRegs to spill slot numbers.
1976 We put as few as possible values in spill slots, but
1977 nevertheless need to have an assignment to them just in case. */
1978
1979 max_ss_no = -1;
1980
1981 for (i = 0; i < VG_MAX_SPILLSLOTS; i++)
1982 ss_busy_until_before[i] = 0;
1983
1984 for (i = 0; i < c1->nextTemp; i++) {
1985
1986 /* True iff this temp is unused. */
1987 if (temp_info[i].live_after == VG_NOTHING)
1988 continue;
1989
1990 /* Find the lowest-numbered spill slot which is available at the
1991 start point of this interval, and assign the interval to
1992 it. */
1993 for (j = 0; j < VG_MAX_SPILLSLOTS; j++)
1994 if (ss_busy_until_before[j] <= temp_info[i].live_after)
1995 break;
1996 if (j == VG_MAX_SPILLSLOTS) {
1997 VG_(printf)("VG_MAX_SPILLSLOTS is too low; increase and recompile.\n");
njne427a662002-10-02 11:08:25 +00001998 VG_(core_panic)("register allocation failed -- out of spill slots");
sewardjde4a1d02002-03-22 01:27:54 +00001999 }
2000 ss_busy_until_before[j] = temp_info[i].dead_before;
2001 temp_info[i].spill_no = j;
2002 if (j > max_ss_no)
2003 max_ss_no = j;
2004 }
2005
2006 VG_(total_reg_rank) += (max_ss_no+1);
2007
2008 /* Show live ranges and assigned spill slot nos. */
2009
njn25e49d8e72002-09-23 09:36:25 +00002010 if (dis) {
2011 VG_(printf)("Live range assignments:\n");
sewardjde4a1d02002-03-22 01:27:54 +00002012
2013 for (i = 0; i < c1->nextTemp; i++) {
2014 if (temp_info[i].live_after == VG_NOTHING)
2015 continue;
2016 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00002017 " LR %d is after %d to before %d\tspillno %d\n",
sewardjde4a1d02002-03-22 01:27:54 +00002018 i,
2019 temp_info[i].live_after,
2020 temp_info[i].dead_before,
2021 temp_info[i].spill_no
2022 );
2023 }
njn25e49d8e72002-09-23 09:36:25 +00002024 VG_(printf)("\n");
sewardjde4a1d02002-03-22 01:27:54 +00002025 }
2026
2027 /* Now that we've established a spill slot number for each used
2028 temporary, we can go ahead and do the core of the "Second-chance
2029 binpacking" allocation algorithm. */
2030
njn25e49d8e72002-09-23 09:36:25 +00002031 if (dis) VG_(printf)("Register allocated UCode:\n");
2032
2033
sewardjde4a1d02002-03-22 01:27:54 +00002034 /* Resulting code goes here. We generate it all in a forwards
2035 pass. */
njn4ba5a792002-09-30 10:23:54 +00002036 c2 = VG_(alloc_UCodeBlock)();
sewardj22854b92002-11-30 14:00:47 +00002037 c2->orig_eip = c1->orig_eip;
sewardjde4a1d02002-03-22 01:27:54 +00002038
2039 /* At the start, no TempRegs are assigned to any real register.
2040 Correspondingly, all temps claim to be currently resident in
2041 their spill slots, as computed by the previous two passes. */
2042 for (i = 0; i < VG_MAX_REALREGS; i++)
2043 real_to_temp[i] = VG_NOTHING;
2044 for (i = 0; i < c1->nextTemp; i++)
2045 temp_info[i].real_no = VG_NOTHING;
2046
sewardjde4a1d02002-03-22 01:27:54 +00002047 /* Process each insn in turn. */
2048 for (i = 0; i < c1->used; i++) {
2049
2050 if (c1->instrs[i].opcode == NOP) continue;
2051 VG_(uinstrs_prealloc)++;
2052
2053# if 0
2054 /* Check map consistency. Expensive but correct. */
2055 for (r = 0; r < VG_MAX_REALREGS; r++) {
2056 if (real_to_temp[r] != VG_NOTHING) {
2057 tno = real_to_temp[r];
2058 vg_assert(tno >= 0 && tno < c1->nextTemp);
2059 vg_assert(temp_info[tno].real_no == r);
2060 }
2061 }
2062 for (tno = 0; tno < c1->nextTemp; tno++) {
2063 if (temp_info[tno].real_no != VG_NOTHING) {
2064 r = temp_info[tno].real_no;
2065 vg_assert(r >= 0 && r < VG_MAX_REALREGS);
2066 vg_assert(real_to_temp[r] == tno);
2067 }
2068 }
2069# endif
2070
njn25e49d8e72002-09-23 09:36:25 +00002071 if (dis)
njn4ba5a792002-09-30 10:23:54 +00002072 VG_(pp_UInstr)(i, &c1->instrs[i]);
sewardjde4a1d02002-03-22 01:27:54 +00002073
2074 /* First, free up enough real regs for this insn. This may
2075 generate spill stores since we may have to evict some TempRegs
2076 currently in real regs. Also generates spill loads. */
2077
njn810086f2002-11-14 12:42:47 +00002078 k = VG_(get_reg_usage)(&c1->instrs[i], TempReg, &tempUse[0],
2079 &isWrites[0]);
njnf4ce3d32003-02-10 10:17:26 +00002080 vg_assert(k >= 0 && k <= VG_MAX_REGS_USED);
sewardjde4a1d02002-03-22 01:27:54 +00002081
2082 /* For each ***different*** temp mentioned in the insn .... */
2083 for (j = 0; j < k; j++) {
2084
2085 /* First check if the temp is mentioned again later; if so,
2086 ignore this mention. We only want to process each temp
2087 used by the insn once, even if it is mentioned more than
2088 once. */
2089 defer = False;
njn810086f2002-11-14 12:42:47 +00002090 tno = tempUse[j];
sewardjde4a1d02002-03-22 01:27:54 +00002091 for (m = j+1; m < k; m++)
njn810086f2002-11-14 12:42:47 +00002092 if (tempUse[m] == tno)
sewardjde4a1d02002-03-22 01:27:54 +00002093 defer = True;
2094 if (defer)
2095 continue;
2096
njn810086f2002-11-14 12:42:47 +00002097 /* Now we're trying to find a register for tempUse[j].
sewardjde4a1d02002-03-22 01:27:54 +00002098 First of all, if it already has a register assigned, we
2099 don't need to do anything more. */
2100 if (temp_info[tno].real_no != VG_NOTHING)
2101 continue;
2102
2103 /* No luck. The next thing to do is see if there is a
2104 currently unassigned register available. If so, bag it. */
2105 for (r = 0; r < VG_MAX_REALREGS; r++) {
2106 if (real_to_temp[r] == VG_NOTHING)
2107 break;
2108 }
2109 if (r < VG_MAX_REALREGS) {
2110 real_to_temp[r] = tno;
2111 temp_info[tno].real_no = r;
2112 continue;
2113 }
2114
2115 /* Unfortunately, that didn't pan out either. So we'll have
2116 to eject some other unfortunate TempReg into a spill slot
2117 in order to free up a register. Of course, we need to be
2118 careful not to eject some other TempReg needed by this
2119 insn.
2120
2121 Select r in 0 .. VG_MAX_REALREGS-1 such that
2122 real_to_temp[r] is not mentioned in
njn810086f2002-11-14 12:42:47 +00002123 tempUse[0 .. k-1], since it would be just plain
sewardjde4a1d02002-03-22 01:27:54 +00002124 wrong to eject some other TempReg which we need to use in
2125 this insn.
2126
2127 It is here that it is important to make a good choice of
2128 register to spill. */
2129
2130 /* First, mark those regs which are not spill candidates. */
2131 for (r = 0; r < VG_MAX_REALREGS; r++) {
2132 is_spill_cand[r] = True;
2133 for (m = 0; m < k; m++) {
njn810086f2002-11-14 12:42:47 +00002134 if (real_to_temp[r] == tempUse[m]) {
sewardjde4a1d02002-03-22 01:27:54 +00002135 is_spill_cand[r] = False;
2136 break;
2137 }
2138 }
2139 }
2140
2141 /* We can choose any r satisfying is_spill_cand[r]. However,
2142 try to make a good choice. First, try and find r such
2143 that the associated TempReg is already dead. */
2144 for (r = 0; r < VG_MAX_REALREGS; r++) {
2145 if (is_spill_cand[r] &&
2146 temp_info[real_to_temp[r]].dead_before <= i)
2147 goto have_spill_cand;
2148 }
2149
2150 /* No spill cand is mapped to a dead TempReg. Now we really
2151 _do_ have to generate spill code. Choose r so that the
2152 next use of its associated TempReg is as far ahead as
2153 possible, in the hope that this will minimise the number of
2154 consequent reloads required. This is a bit expensive, but
2155 we don't have to do it very often. */
2156 {
2157 Int furthest_r = VG_MAX_REALREGS;
2158 Int furthest = 0;
2159 for (r = 0; r < VG_MAX_REALREGS; r++) {
2160 if (!is_spill_cand[r]) continue;
2161 for (m = i+1; m < c1->used; m++)
2162 if (uInstrMentionsTempReg(&c1->instrs[m],
2163 real_to_temp[r]))
2164 break;
2165 if (m > furthest) {
2166 furthest = m;
2167 furthest_r = r;
2168 }
2169 }
2170 r = furthest_r;
2171 goto have_spill_cand;
2172 }
2173
2174 have_spill_cand:
2175 if (r == VG_MAX_REALREGS)
njne427a662002-10-02 11:08:25 +00002176 VG_(core_panic)("new reg alloc: out of registers ?!");
sewardjde4a1d02002-03-22 01:27:54 +00002177
2178 /* Eject r. Important refinement: don't bother if the
2179 associated TempReg is now dead. */
2180 vg_assert(real_to_temp[r] != VG_NOTHING);
2181 vg_assert(real_to_temp[r] != tno);
2182 temp_info[real_to_temp[r]].real_no = VG_NOTHING;
2183 if (temp_info[real_to_temp[r]].dead_before > i) {
2184 uInstr2(c2, PUT, 4,
njn4ba5a792002-09-30 10:23:54 +00002185 RealReg, VG_(rank_to_realreg)(r),
sewardjde4a1d02002-03-22 01:27:54 +00002186 SpillNo, temp_info[real_to_temp[r]].spill_no);
2187 VG_(uinstrs_spill)++;
2188 spill_reqd = True;
njn25e49d8e72002-09-23 09:36:25 +00002189 if (dis)
njn4ba5a792002-09-30 10:23:54 +00002190 VG_(pp_UInstr)(c2->used-1, &LAST_UINSTR(c2));
sewardjde4a1d02002-03-22 01:27:54 +00002191 }
2192
2193 /* Decide if tno is read. */
2194 isRead = False;
2195 for (m = 0; m < k; m++)
njn810086f2002-11-14 12:42:47 +00002196 if (tempUse[m] == tno && !isWrites[m])
sewardjde4a1d02002-03-22 01:27:54 +00002197 isRead = True;
2198
2199 /* If so, generate a spill load. */
2200 if (isRead) {
2201 uInstr2(c2, GET, 4,
2202 SpillNo, temp_info[tno].spill_no,
njn4ba5a792002-09-30 10:23:54 +00002203 RealReg, VG_(rank_to_realreg)(r) );
sewardjde4a1d02002-03-22 01:27:54 +00002204 VG_(uinstrs_spill)++;
2205 spill_reqd = True;
njn25e49d8e72002-09-23 09:36:25 +00002206 if (dis)
njn4ba5a792002-09-30 10:23:54 +00002207 VG_(pp_UInstr)(c2->used-1, &LAST_UINSTR(c2));
sewardjde4a1d02002-03-22 01:27:54 +00002208 }
2209
2210 /* Update the forwards and backwards maps. */
2211 real_to_temp[r] = tno;
2212 temp_info[tno].real_no = r;
2213 }
2214
2215 /* By this point, all TempRegs mentioned by the insn have been
2216 bought into real regs. We now copy the insn to the output
2217 and use patchUInstr to convert its rTempRegs into
2218 realregs. */
2219 for (j = 0; j < k; j++)
njn810086f2002-11-14 12:42:47 +00002220 realUse[j] = VG_(rank_to_realreg)(temp_info[tempUse[j]].real_no);
njn4ba5a792002-09-30 10:23:54 +00002221 VG_(copy_UInstr)(c2, &c1->instrs[i]);
njn25e49d8e72002-09-23 09:36:25 +00002222 patchUInstr(&LAST_UINSTR(c2), &tempUse[0], &realUse[0], k);
sewardjde4a1d02002-03-22 01:27:54 +00002223
njn25e49d8e72002-09-23 09:36:25 +00002224 if (dis) {
njn4ba5a792002-09-30 10:23:54 +00002225 VG_(pp_UInstr)(c2->used-1, &LAST_UINSTR(c2));
sewardjde4a1d02002-03-22 01:27:54 +00002226 VG_(printf)("\n");
2227 }
2228 }
2229
2230 if (temp_info != NULL)
njn25e49d8e72002-09-23 09:36:25 +00002231 VG_(arena_free)(VG_AR_JITTER, temp_info);
sewardjde4a1d02002-03-22 01:27:54 +00002232
njn4ba5a792002-09-30 10:23:54 +00002233 VG_(free_UCodeBlock)(c1);
sewardjde4a1d02002-03-22 01:27:54 +00002234
2235 if (spill_reqd)
2236 VG_(translations_needing_spill)++;
2237
2238 return c2;
2239
2240# undef VG_NOTHING
2241
2242}
njn25e49d8e72002-09-23 09:36:25 +00002243extern void fooble(int);
2244/* Analysis records liveness of all general-use RealRegs in the UCode. */
2245static void vg_realreg_liveness_analysis ( UCodeBlock* cb )
2246{
2247 Int i, j, k;
2248 RRegSet rregs_live;
njnf4ce3d32003-02-10 10:17:26 +00002249 Int regUse[VG_MAX_REGS_USED];
2250 Bool isWrites[VG_MAX_REGS_USED];
njn25e49d8e72002-09-23 09:36:25 +00002251 UInstr* u;
sewardjde4a1d02002-03-22 01:27:54 +00002252
njn25e49d8e72002-09-23 09:36:25 +00002253 /* All regs are dead at the end of the block */
2254 rregs_live = ALL_RREGS_DEAD;
sewardjde4a1d02002-03-22 01:27:54 +00002255
sewardjde4a1d02002-03-22 01:27:54 +00002256 for (i = cb->used-1; i >= 0; i--) {
2257 u = &cb->instrs[i];
2258
njn25e49d8e72002-09-23 09:36:25 +00002259 u->regs_live_after = rregs_live;
sewardj97ced732002-03-25 00:07:36 +00002260
njn810086f2002-11-14 12:42:47 +00002261 k = VG_(get_reg_usage)(u, RealReg, &regUse[0], &isWrites[0]);
sewardj97ced732002-03-25 00:07:36 +00002262
njn25e49d8e72002-09-23 09:36:25 +00002263 /* For each reg usage ... bwds in program order. Variable is live
2264 before this UInstr if it is read by this UInstr.
njn810086f2002-11-14 12:42:47 +00002265 Note that regUse[j] holds the Intel reg number, so we must
njn25e49d8e72002-09-23 09:36:25 +00002266 convert it to our rank number. */
2267 for (j = k-1; j >= 0; j--) {
njn810086f2002-11-14 12:42:47 +00002268 SET_RREG_LIVENESS ( VG_(realreg_to_rank)(regUse[j]),
njn25e49d8e72002-09-23 09:36:25 +00002269 rregs_live,
njn810086f2002-11-14 12:42:47 +00002270 !isWrites[j] );
sewardjde4a1d02002-03-22 01:27:54 +00002271 }
2272 }
sewardjde4a1d02002-03-22 01:27:54 +00002273}
2274
sewardjde4a1d02002-03-22 01:27:54 +00002275/*------------------------------------------------------------*/
2276/*--- Main entry point for the JITter. ---*/
2277/*------------------------------------------------------------*/
2278
2279/* Translate the basic block beginning at orig_addr, placing the
2280 translation in a vg_malloc'd block, the address and size of which
2281 are returned in trans_addr and trans_size. Length of the original
2282 block is also returned in orig_size. If the latter three are NULL,
2283 this call is being done for debugging purposes, in which case (a)
2284 throw away the translation once it is made, and (b) produce a load
2285 of debugging output.
njn25e49d8e72002-09-23 09:36:25 +00002286
2287 'tst' is the identity of the thread needing this block.
sewardjde4a1d02002-03-22 01:27:54 +00002288*/
njn25e49d8e72002-09-23 09:36:25 +00002289void VG_(translate) ( /*IN*/ ThreadState* tst,
2290 /*IN*/ Addr orig_addr,
2291 /*OUT*/ UInt* orig_size,
2292 /*OUT*/ Addr* trans_addr,
sewardj22854b92002-11-30 14:00:47 +00002293 /*OUT*/ UInt* trans_size,
2294 /*OUT*/ UShort jumps[VG_MAX_JUMPS])
sewardjde4a1d02002-03-22 01:27:54 +00002295{
2296 Int n_disassembled_bytes, final_code_size;
2297 Bool debugging_translation;
2298 UChar* final_code;
2299 UCodeBlock* cb;
sewardja60be0e2003-05-26 08:47:27 +00002300 Bool notrace_until_done;
sewardja453fb02003-06-14 13:22:36 +00002301 UInt notrace_until_limit = 18000;
sewardjde4a1d02002-03-22 01:27:54 +00002302
2303 VGP_PUSHCC(VgpTranslate);
2304 debugging_translation
2305 = orig_size == NULL || trans_addr == NULL || trans_size == NULL;
2306
sewardja60be0e2003-05-26 08:47:27 +00002307 /* If codegen tracing, don't start tracing until
2308 notrace_until_limit blocks have gone by. This avoids printing
2309 huge amounts of useless junk when all we want to see is the last
2310 few blocks translated prior to a failure. Set
2311 notrace_until_limit to be the number of translations to be made
2312 before --trace-codegen= style printing takes effect. */
2313 notrace_until_done
2314 = VG_(overall_in_count) > notrace_until_limit;
2315
njn25e49d8e72002-09-23 09:36:25 +00002316 if (!debugging_translation)
2317 VG_TRACK( pre_mem_read, Vg_CoreTranslate, tst, "", orig_addr, 1 );
sewardjde4a1d02002-03-22 01:27:54 +00002318
njn4ba5a792002-09-30 10:23:54 +00002319 cb = VG_(alloc_UCodeBlock)();
sewardj22854b92002-11-30 14:00:47 +00002320 cb->orig_eip = orig_addr;
sewardjde4a1d02002-03-22 01:27:54 +00002321
njn25e49d8e72002-09-23 09:36:25 +00002322 /* If doing any code printing, print a basic block start marker */
sewardja60be0e2003-05-26 08:47:27 +00002323 if (VG_(clo_trace_codegen) && notrace_until_done) {
njn25e49d8e72002-09-23 09:36:25 +00002324 Char fnname[64] = "";
2325 VG_(get_fnname_if_entry)(orig_addr, fnname, 64);
2326 VG_(printf)(
njne0205ff2003-04-08 00:56:14 +00002327 "==== BB %d %s(%p) in %dB, out %dB, BBs exec'd %llu ====\n\n",
njn25e49d8e72002-09-23 09:36:25 +00002328 VG_(overall_in_count), fnname, orig_addr,
2329 VG_(overall_in_osize), VG_(overall_in_tsize),
2330 VG_(bbs_done));
2331 }
2332
2333 /* True if a debug trans., or if bit N set in VG_(clo_trace_codegen). */
sewardja60be0e2003-05-26 08:47:27 +00002334# define DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(n) \
2335 ( debugging_translation \
2336 || (notrace_until_done \
2337 && (VG_(clo_trace_codegen) & (1 << (n-1))) ))
njn25e49d8e72002-09-23 09:36:25 +00002338
sewardjde4a1d02002-03-22 01:27:54 +00002339 /* Disassemble this basic block into cb. */
njn25e49d8e72002-09-23 09:36:25 +00002340 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(1);
2341 VGP_PUSHCC(VgpToUCode);
sewardjde4a1d02002-03-22 01:27:54 +00002342 n_disassembled_bytes = VG_(disBB) ( cb, orig_addr );
njn25e49d8e72002-09-23 09:36:25 +00002343 VGP_POPCC(VgpToUCode);
2344
sewardjde4a1d02002-03-22 01:27:54 +00002345 /* Try and improve the code a bit. */
2346 if (VG_(clo_optimise)) {
njn25e49d8e72002-09-23 09:36:25 +00002347 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(2);
2348 VGP_PUSHCC(VgpImprove);
sewardjde4a1d02002-03-22 01:27:54 +00002349 vg_improve ( cb );
njn25e49d8e72002-09-23 09:36:25 +00002350 VGP_POPCC(VgpImprove);
sewardjde4a1d02002-03-22 01:27:54 +00002351 }
2352
njn25e49d8e72002-09-23 09:36:25 +00002353 /* Skin's instrumentation (Nb: must set VG_(print_codegen) in case
2354 SK_(instrument) looks at it. */
2355 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(3);
2356 VGP_PUSHCC(VgpInstrument);
2357 cb = SK_(instrument) ( cb, orig_addr );
2358 if (VG_(print_codegen))
njn4ba5a792002-09-30 10:23:54 +00002359 VG_(pp_UCodeBlock) ( cb, "Instrumented UCode:" );
njn25e49d8e72002-09-23 09:36:25 +00002360 VG_(saneUCodeBlock)( cb );
2361 VGP_POPCC(VgpInstrument);
njn4f9c9342002-04-29 16:03:24 +00002362
njn9b007f62003-04-07 14:40:25 +00002363 /* Add %ESP-update hooks if the skin requires them */
2364 /* Nb: We don't print out this phase, because it doesn't do much */
2365 if (VG_(need_to_handle_esp_assignment)()) {
2366 VGP_PUSHCC(VgpESPUpdate);
2367 cb = vg_ESP_update_pass ( cb );
2368 VGP_POPCC(VgpESPUpdate);
2369 }
2370
sewardjde4a1d02002-03-22 01:27:54 +00002371 /* Allocate registers. */
njn25e49d8e72002-09-23 09:36:25 +00002372 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(4);
2373 VGP_PUSHCC(VgpRegAlloc);
sewardjde4a1d02002-03-22 01:27:54 +00002374 cb = vg_do_register_allocation ( cb );
njn25e49d8e72002-09-23 09:36:25 +00002375 VGP_POPCC(VgpRegAlloc);
sewardjde4a1d02002-03-22 01:27:54 +00002376
njn25e49d8e72002-09-23 09:36:25 +00002377 /* Do post reg-alloc %e[acd]x liveness analysis (too boring to print
2378 * anything; results can be seen when emitting final code). */
2379 VGP_PUSHCC(VgpLiveness);
2380 vg_realreg_liveness_analysis ( cb );
2381 VGP_POPCC(VgpLiveness);
2382
2383 /* Emit final code */
2384 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(5);
2385
2386 VGP_PUSHCC(VgpFromUcode);
sewardj22854b92002-11-30 14:00:47 +00002387 final_code = VG_(emit_code)(cb, &final_code_size, jumps );
njn25e49d8e72002-09-23 09:36:25 +00002388 VGP_POPCC(VgpFromUcode);
njn4ba5a792002-09-30 10:23:54 +00002389 VG_(free_UCodeBlock)(cb);
sewardjde4a1d02002-03-22 01:27:54 +00002390
njn25e49d8e72002-09-23 09:36:25 +00002391#undef DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE
2392
sewardjde4a1d02002-03-22 01:27:54 +00002393 if (debugging_translation) {
2394 /* Only done for debugging -- throw away final result. */
njn25e49d8e72002-09-23 09:36:25 +00002395 VG_(arena_free)(VG_AR_JITTER, final_code);
sewardjde4a1d02002-03-22 01:27:54 +00002396 } else {
2397 /* Doing it for real -- return values to caller. */
sewardjde4a1d02002-03-22 01:27:54 +00002398 *orig_size = n_disassembled_bytes;
2399 *trans_addr = (Addr)final_code;
2400 *trans_size = final_code_size;
2401 }
njn25e49d8e72002-09-23 09:36:25 +00002402 VGP_POPCC(VgpTranslate);
sewardjde4a1d02002-03-22 01:27:54 +00002403}
2404
2405/*--------------------------------------------------------------------*/
2406/*--- end vg_translate.c ---*/
2407/*--------------------------------------------------------------------*/