blob: 5a5dfd45bcf0aa6c57b39a5b1b151953f75343fd [file] [log] [blame]
sewardjde4a1d02002-03-22 01:27:54 +00001
2/*--------------------------------------------------------------------*/
3/*--- The JITter proper: register allocation & code improvement ---*/
4/*--- vg_translate.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
njnc9539842002-10-02 13:26:35 +00008 This file is part of Valgrind, an extensible x86 protected-mode
9 emulator for monitoring program execution on x86-Unixes.
sewardjde4a1d02002-03-22 01:27:54 +000010
njn0e1b5142003-04-15 14:58:06 +000011 Copyright (C) 2000-2003 Julian Seward
sewardjde4a1d02002-03-22 01:27:54 +000012 jseward@acm.org
sewardjde4a1d02002-03-22 01:27:54 +000013
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
njn25e49d8e72002-09-23 09:36:25 +000029 The GNU General Public License is contained in the file COPYING.
sewardjde4a1d02002-03-22 01:27:54 +000030*/
31
32#include "vg_include.h"
33
sewardjde4a1d02002-03-22 01:27:54 +000034/*------------------------------------------------------------*/
35/*--- Renamings of frequently-used global functions. ---*/
36/*------------------------------------------------------------*/
37
njn25e49d8e72002-09-23 09:36:25 +000038#define dis VG_(print_codegen)
sewardjde4a1d02002-03-22 01:27:54 +000039
sewardje1042472002-09-30 12:33:11 +000040
sewardjde4a1d02002-03-22 01:27:54 +000041/*------------------------------------------------------------*/
42/*--- Basics ---*/
43/*------------------------------------------------------------*/
44
njn810086f2002-11-14 12:42:47 +000045/* This one is called by the core */
njn4ba5a792002-09-30 10:23:54 +000046UCodeBlock* VG_(alloc_UCodeBlock) ( void )
sewardjde4a1d02002-03-22 01:27:54 +000047{
njn25e49d8e72002-09-23 09:36:25 +000048 UCodeBlock* cb = VG_(arena_malloc)(VG_AR_CORE, sizeof(UCodeBlock));
sewardjde4a1d02002-03-22 01:27:54 +000049 cb->used = cb->size = cb->nextTemp = 0;
50 cb->instrs = NULL;
51 return cb;
52}
53
njn810086f2002-11-14 12:42:47 +000054/* This one is called by skins */
55UCodeBlock* VG_(setup_UCodeBlock) ( UCodeBlock* cb_in )
56{
57 UCodeBlock* cb = VG_(arena_malloc)(VG_AR_CORE, sizeof(UCodeBlock));
sewardj22854b92002-11-30 14:00:47 +000058 cb->orig_eip = cb_in->orig_eip;
njn810086f2002-11-14 12:42:47 +000059 cb->used = cb->size = 0;
60 cb->nextTemp = cb_in->nextTemp;
61 cb->instrs = NULL;
62 return cb;
63}
sewardjde4a1d02002-03-22 01:27:54 +000064
njn4ba5a792002-09-30 10:23:54 +000065void VG_(free_UCodeBlock) ( UCodeBlock* cb )
sewardjde4a1d02002-03-22 01:27:54 +000066{
njn25e49d8e72002-09-23 09:36:25 +000067 if (cb->instrs) VG_(arena_free)(VG_AR_CORE, cb->instrs);
68 VG_(arena_free)(VG_AR_CORE, cb);
sewardjde4a1d02002-03-22 01:27:54 +000069}
70
71
72/* Ensure there's enough space in a block to add one uinstr. */
73static __inline__
74void ensureUInstr ( UCodeBlock* cb )
75{
76 if (cb->used == cb->size) {
77 if (cb->instrs == NULL) {
78 vg_assert(cb->size == 0);
79 vg_assert(cb->used == 0);
80 cb->size = 8;
njn25e49d8e72002-09-23 09:36:25 +000081 cb->instrs = VG_(arena_malloc)(VG_AR_CORE, 8 * sizeof(UInstr));
sewardjde4a1d02002-03-22 01:27:54 +000082 } else {
83 Int i;
njn25e49d8e72002-09-23 09:36:25 +000084 UInstr* instrs2 = VG_(arena_malloc)(VG_AR_CORE,
sewardjde4a1d02002-03-22 01:27:54 +000085 2 * sizeof(UInstr) * cb->size);
86 for (i = 0; i < cb->used; i++)
87 instrs2[i] = cb->instrs[i];
88 cb->size *= 2;
njn25e49d8e72002-09-23 09:36:25 +000089 VG_(arena_free)(VG_AR_CORE, cb->instrs);
sewardjde4a1d02002-03-22 01:27:54 +000090 cb->instrs = instrs2;
91 }
92 }
93
94 vg_assert(cb->used < cb->size);
95}
96
97
98__inline__
njn4ba5a792002-09-30 10:23:54 +000099void VG_(new_NOP) ( UInstr* u )
sewardjde4a1d02002-03-22 01:27:54 +0000100{
101 u->val1 = u->val2 = u->val3 = 0;
102 u->tag1 = u->tag2 = u->tag3 = NoValue;
103 u->flags_r = u->flags_w = FlagsEmpty;
sewardj2e93c502002-04-12 11:12:52 +0000104 u->jmpkind = JmpBoring;
njn25e49d8e72002-09-23 09:36:25 +0000105 u->signed_widen = u->has_ret_val = False;
106 u->regs_live_after = ALL_RREGS_LIVE;
sewardjde4a1d02002-03-22 01:27:54 +0000107 u->lit32 = 0;
njn25e49d8e72002-09-23 09:36:25 +0000108 u->opcode = NOP;
sewardjde4a1d02002-03-22 01:27:54 +0000109 u->size = 0;
110 u->cond = 0;
111 u->extra4b = 0;
njn25e49d8e72002-09-23 09:36:25 +0000112 u->argc = u->regparms_n = 0;
sewardjde4a1d02002-03-22 01:27:54 +0000113}
114
115
116/* Add an instruction to a ucode block, and return the index of the
117 instruction. */
118__inline__
njn4ba5a792002-09-30 10:23:54 +0000119void VG_(new_UInstr3) ( UCodeBlock* cb, Opcode opcode, Int sz,
sewardjde4a1d02002-03-22 01:27:54 +0000120 Tag tag1, UInt val1,
121 Tag tag2, UInt val2,
122 Tag tag3, UInt val3 )
123{
124 UInstr* ui;
125 ensureUInstr(cb);
126 ui = & cb->instrs[cb->used];
127 cb->used++;
njn4ba5a792002-09-30 10:23:54 +0000128 VG_(new_NOP)(ui);
sewardjde4a1d02002-03-22 01:27:54 +0000129 ui->val1 = val1;
130 ui->val2 = val2;
131 ui->val3 = val3;
132 ui->opcode = opcode;
133 ui->tag1 = tag1;
134 ui->tag2 = tag2;
135 ui->tag3 = tag3;
136 ui->size = sz;
137 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
138 if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
139 if (tag3 == TempReg) vg_assert(val3 != INVALID_TEMPREG);
140}
141
142
143__inline__
njn4ba5a792002-09-30 10:23:54 +0000144void VG_(new_UInstr2) ( UCodeBlock* cb, Opcode opcode, Int sz,
sewardjde4a1d02002-03-22 01:27:54 +0000145 Tag tag1, UInt val1,
146 Tag tag2, UInt val2 )
147{
148 UInstr* ui;
149 ensureUInstr(cb);
150 ui = & cb->instrs[cb->used];
151 cb->used++;
njn4ba5a792002-09-30 10:23:54 +0000152 VG_(new_NOP)(ui);
sewardjde4a1d02002-03-22 01:27:54 +0000153 ui->val1 = val1;
154 ui->val2 = val2;
155 ui->opcode = opcode;
156 ui->tag1 = tag1;
157 ui->tag2 = tag2;
158 ui->size = sz;
159 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
160 if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
161}
162
163
164__inline__
njn4ba5a792002-09-30 10:23:54 +0000165void VG_(new_UInstr1) ( UCodeBlock* cb, Opcode opcode, Int sz,
sewardjde4a1d02002-03-22 01:27:54 +0000166 Tag tag1, UInt val1 )
167{
168 UInstr* ui;
169 ensureUInstr(cb);
170 ui = & cb->instrs[cb->used];
171 cb->used++;
njn4ba5a792002-09-30 10:23:54 +0000172 VG_(new_NOP)(ui);
sewardjde4a1d02002-03-22 01:27:54 +0000173 ui->val1 = val1;
174 ui->opcode = opcode;
175 ui->tag1 = tag1;
176 ui->size = sz;
177 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
178}
179
180
181__inline__
njn4ba5a792002-09-30 10:23:54 +0000182void VG_(new_UInstr0) ( UCodeBlock* cb, Opcode opcode, Int sz )
sewardjde4a1d02002-03-22 01:27:54 +0000183{
184 UInstr* ui;
185 ensureUInstr(cb);
186 ui = & cb->instrs[cb->used];
187 cb->used++;
njn4ba5a792002-09-30 10:23:54 +0000188 VG_(new_NOP)(ui);
sewardjde4a1d02002-03-22 01:27:54 +0000189 ui->opcode = opcode;
190 ui->size = sz;
191}
192
sewardjde4a1d02002-03-22 01:27:54 +0000193/* Copy an instruction into the given codeblock. */
njn4f9c9342002-04-29 16:03:24 +0000194__inline__
njn4ba5a792002-09-30 10:23:54 +0000195void VG_(copy_UInstr) ( UCodeBlock* cb, UInstr* instr )
sewardjde4a1d02002-03-22 01:27:54 +0000196{
197 ensureUInstr(cb);
198 cb->instrs[cb->used] = *instr;
199 cb->used++;
200}
201
sewardjde4a1d02002-03-22 01:27:54 +0000202/* Copy auxiliary info from one uinstr to another. */
203static __inline__
204void copyAuxInfoFromTo ( UInstr* src, UInstr* dst )
205{
njn25e49d8e72002-09-23 09:36:25 +0000206 dst->cond = src->cond;
207 dst->extra4b = src->extra4b;
208 dst->signed_widen = src->signed_widen;
209 dst->jmpkind = src->jmpkind;
210 dst->flags_r = src->flags_r;
211 dst->flags_w = src->flags_w;
212 dst->argc = src->argc;
213 dst->regparms_n = src->regparms_n;
214 dst->has_ret_val = src->has_ret_val;
215 dst->regs_live_after = src->regs_live_after;
sewardjde4a1d02002-03-22 01:27:54 +0000216}
217
218
sewardjde4a1d02002-03-22 01:27:54 +0000219/* Set the lit32 field of the most recent uinsn. */
njn4ba5a792002-09-30 10:23:54 +0000220void VG_(set_lit_field) ( UCodeBlock* cb, UInt lit32 )
sewardjde4a1d02002-03-22 01:27:54 +0000221{
222 LAST_UINSTR(cb).lit32 = lit32;
223}
224
225
njn25e49d8e72002-09-23 09:36:25 +0000226/* Set the C call info fields of the most recent uinsn. */
njn4ba5a792002-09-30 10:23:54 +0000227void VG_(set_ccall_fields) ( UCodeBlock* cb, Addr fn, UChar argc, UChar
228 regparms_n, Bool has_ret_val )
njn25e49d8e72002-09-23 09:36:25 +0000229{
230 vg_assert(argc < 4);
231 vg_assert(regparms_n <= argc);
232 LAST_UINSTR(cb).lit32 = fn;
233 LAST_UINSTR(cb).argc = argc;
234 LAST_UINSTR(cb).regparms_n = regparms_n;
235 LAST_UINSTR(cb).has_ret_val = has_ret_val;
236}
237
njn810086f2002-11-14 12:42:47 +0000238/* For the last uinsn inserted into cb, set the read, written and
239 undefined flags. Undefined flags are counted as written, but it
240 seems worthwhile to distinguish them.
241*/
242__inline__
243void VG_(set_flag_fields) ( UCodeBlock* cb,
244 FlagSet rr, FlagSet ww, FlagSet uu )
245{
246 FlagSet uw = VG_UNION_FLAG_SETS(ww,uu);
247
248 vg_assert(rr == (rr & FlagsALL));
249 vg_assert(uw == (uw & FlagsALL));
250 LAST_UINSTR(cb).flags_r = rr;
251 LAST_UINSTR(cb).flags_w = uw;
252}
253
254
njn4ba5a792002-09-30 10:23:54 +0000255Bool VG_(any_flag_use) ( UInstr* u )
sewardjde4a1d02002-03-22 01:27:54 +0000256{
257 return (u->flags_r != FlagsEmpty
258 || u->flags_w != FlagsEmpty);
259}
260
njn25e49d8e72002-09-23 09:36:25 +0000261#if 1
262# define BEST_ALLOC_ORDER
263#endif
sewardjde4a1d02002-03-22 01:27:54 +0000264
265/* Convert a rank in the range 0 .. VG_MAX_REALREGS-1 into an Intel
266 register number. This effectively defines the order in which real
267 registers are allocated. %ebp is excluded since it is permanently
njn25e49d8e72002-09-23 09:36:25 +0000268 reserved for pointing at VG_(baseBlock).
sewardjde4a1d02002-03-22 01:27:54 +0000269
njn25e49d8e72002-09-23 09:36:25 +0000270 Important! This function must correspond with the value of
271 VG_MAX_REALREGS (actually, VG_MAX_REALREGS can be reduced without
272 a problem, except the generated code will obviously be worse).
sewardjde4a1d02002-03-22 01:27:54 +0000273*/
njn25e49d8e72002-09-23 09:36:25 +0000274__inline__
njn4ba5a792002-09-30 10:23:54 +0000275Int VG_(rank_to_realreg) ( Int rank )
sewardjde4a1d02002-03-22 01:27:54 +0000276{
277 switch (rank) {
njn25e49d8e72002-09-23 09:36:25 +0000278# ifdef BEST_ALLOC_ORDER
sewardjde4a1d02002-03-22 01:27:54 +0000279 /* Probably the best allocation ordering. */
280 case 0: return R_EAX;
281 case 1: return R_EBX;
282 case 2: return R_ECX;
283 case 3: return R_EDX;
284 case 4: return R_ESI;
njn25e49d8e72002-09-23 09:36:25 +0000285 case 5: return R_EDI;
sewardjde4a1d02002-03-22 01:27:54 +0000286# else
287 /* Contrary; probably the worst. Helpful for debugging, tho. */
njn25e49d8e72002-09-23 09:36:25 +0000288 case 5: return R_EAX;
289 case 4: return R_EBX;
290 case 3: return R_ECX;
291 case 2: return R_EDX;
292 case 1: return R_ESI;
293 case 0: return R_EDI;
sewardjde4a1d02002-03-22 01:27:54 +0000294# endif
njne427a662002-10-02 11:08:25 +0000295 default: VG_(core_panic)("VG_(rank_to_realreg)");
njn25e49d8e72002-09-23 09:36:25 +0000296 }
297}
298
299/* Convert an Intel register number into a rank in the range 0 ..
njn4ba5a792002-09-30 10:23:54 +0000300 VG_MAX_REALREGS-1. See related comments for rank_to_realreg()
njn25e49d8e72002-09-23 09:36:25 +0000301 above. */
302__inline__
njn4ba5a792002-09-30 10:23:54 +0000303Int VG_(realreg_to_rank) ( Int realReg )
njn25e49d8e72002-09-23 09:36:25 +0000304{
305 switch (realReg) {
306# ifdef BEST_ALLOC_ORDER
307 case R_EAX: return 0;
308 case R_EBX: return 1;
309 case R_ECX: return 2;
310 case R_EDX: return 3;
311 case R_ESI: return 4;
312 case R_EDI: return 5;
313# else
314 case R_EAX: return 5;
315 case R_EBX: return 4;
316 case R_ECX: return 3;
317 case R_EDX: return 2;
318 case R_ESI: return 1;
319 case R_EDI: return 0;
320# endif
njne427a662002-10-02 11:08:25 +0000321 default: VG_(core_panic)("VG_(realreg_to_rank)");
sewardjde4a1d02002-03-22 01:27:54 +0000322 }
323}
324
325
326/*------------------------------------------------------------*/
327/*--- Sanity checking uinstrs. ---*/
328/*------------------------------------------------------------*/
329
330/* This seems as good a place as any to record some important stuff
331 about ucode semantics.
332
333 * TempRegs are 32 bits wide. LOADs of 8/16 bit values into a
334 TempReg are defined to zero-extend the loaded value to 32 bits.
335 This is needed to make the translation of movzbl et al work
336 properly.
337
338 * Similarly, GETs of a 8/16 bit ArchRegs are zero-extended.
339
340 * Arithmetic on TempRegs is at the specified size. For example,
341 SUBW t1, t2 has to result in a real 16 bit x86 subtraction
342 being emitted -- not a 32 bit one.
343
344 * On some insns we allow the cc bit to be set. If so, the
345 intention is that the simulated machine's %eflags register
346 is copied into that of the real machine before the insn,
347 and copied back again afterwards. This means that the
348 code generated for that insn must be very careful only to
349 update %eflags in the intended way. This is particularly
350 important for the routines referenced by CALL insns.
351*/
352
353/* Meaning of operand kinds is as follows:
354
355 ArchReg is a register of the simulated CPU, stored in memory,
356 in vg_m_state.m_eax .. m_edi. These values are stored
357 using the Intel register encoding.
358
359 RealReg is a register of the real CPU. There are VG_MAX_REALREGS
360 available for allocation. As with ArchRegs, these values
361 are stored using the Intel register encoding.
362
363 TempReg is a temporary register used to express the results of
364 disassembly. There is an unlimited supply of them --
365 register allocation and spilling eventually assigns them
366 to RealRegs.
367
368 SpillNo is a spill slot number. The number of required spill
369 slots is VG_MAX_PSEUDOS, in general. Only allowed
370 as the ArchReg operand of GET and PUT.
371
372 Lit16 is a signed 16-bit literal value.
373
374 Literal is a 32-bit literal value. Each uinstr can only hold
375 one of these.
376
377 The disassembled code is expressed purely in terms of ArchReg,
378 TempReg and Literal operands. Eventually, register allocation
379 removes all the TempRegs, giving a result using ArchRegs, RealRegs,
380 and Literals. New x86 code can easily be synthesised from this.
381 There are carefully designed restrictions on which insns can have
382 which operands, intended to make it possible to generate x86 code
383 from the result of register allocation on the ucode efficiently and
384 without need of any further RealRegs.
385
njn25e49d8e72002-09-23 09:36:25 +0000386 Restrictions for the individual UInstrs are clear from the checks below.
387 Abbreviations: A=ArchReg S=SpillNo T=TempReg L=Literal
388 Ls=Lit16 R=RealReg N=NoValue
sewardje1042472002-09-30 12:33:11 +0000389 As=ArchRegS
sewardjde4a1d02002-03-22 01:27:54 +0000390
sewardjde4a1d02002-03-22 01:27:54 +0000391 Before register allocation, S operands should not appear anywhere.
392 After register allocation, all T operands should have been
393 converted into Rs, and S operands are allowed in GET and PUT --
394 denoting spill saves/restores.
395
njn25e49d8e72002-09-23 09:36:25 +0000396 Before liveness analysis, save_e[acd]x fields should all be True.
397 Afterwards, they may be False.
398
sewardjde4a1d02002-03-22 01:27:54 +0000399 The size field should be 0 for insns for which it is meaningless,
400 ie those which do not directly move/operate on data.
401*/
njn25e49d8e72002-09-23 09:36:25 +0000402Bool VG_(saneUInstr) ( Bool beforeRA, Bool beforeLiveness, UInstr* u )
sewardjde4a1d02002-03-22 01:27:54 +0000403{
njn25e49d8e72002-09-23 09:36:25 +0000404# define LIT0 (u->lit32 == 0)
sewardjb31b06d2003-06-13 00:26:02 +0000405# define LIT8 (((u->lit32) & 0xFFFFFF00) == 0)
njn25e49d8e72002-09-23 09:36:25 +0000406# define LIT1 (!(LIT0))
407# define LITm (u->tag1 == Literal ? True : LIT0 )
sewardj3d7c9c82003-03-26 21:08:13 +0000408# define SZ8 (u->size == 8)
njn25e49d8e72002-09-23 09:36:25 +0000409# define SZ4 (u->size == 4)
410# define SZ2 (u->size == 2)
411# define SZ1 (u->size == 1)
412# define SZ0 (u->size == 0)
413# define SZ42 (u->size == 4 || u->size == 2)
sewardjd7971012003-04-04 00:21:58 +0000414# define SZ48 (u->size == 4 || u->size == 8)
sewardjfebaa3b2003-05-25 01:07:34 +0000415# define SZ416 (u->size == 4 || u->size == 16)
sewardjde8aecf2003-05-27 00:46:28 +0000416# define SZsse (u->size == 4 || u->size == 8 || u->size == 16)
njn25e49d8e72002-09-23 09:36:25 +0000417# define SZi (u->size == 4 || u->size == 2 || u->size == 1)
418# define SZf ( u->size == 4 || u->size == 8 || u->size == 2 \
419 || u->size == 10 || u->size == 28 || u->size == 108)
420# define SZ4m ((u->tag1 == TempReg || u->tag1 == RealReg) \
421 ? (u->size == 4) : True)
422
423/* For these ones, two cases:
424 *
425 * 1. They are transliterations of the corresponding x86 instruction, in
426 * which case they should have its flags (except that redundant write
427 * flags can be annulled by the optimisation pass).
428 *
429 * 2. They are being used generally for other purposes, eg. helping with a
430 * 'rep'-prefixed instruction, in which case should have empty flags .
431 */
432# define emptyR (u->flags_r == FlagsEmpty)
433# define emptyW (u->flags_w == FlagsEmpty)
434# define CC0 (emptyR && emptyW)
435# define CCr (u->flags_r == FlagsALL && emptyW)
436# define CCw (emptyR && u->flags_w == FlagsALL)
437# define CCa (emptyR && (u->flags_w == FlagsOSZACP || emptyW))
438# define CCc (emptyR && (u->flags_w == FlagsOC || emptyW))
439# define CCe (emptyR && (u->flags_w == FlagsOSZAP || emptyW))
440# define CCb ((u->flags_r==FlagC || emptyR) && \
441 (u->flags_w==FlagsOSZACP || emptyW))
442# define CCd ((u->flags_r==FlagC || emptyR) && \
443 (u->flags_w==FlagsOC || emptyW))
sewardjc232b212002-12-10 22:24:03 +0000444# define CCf (CC0 || (emptyR && u->flags_w==FlagsZCP) \
445 || (u->flags_r==FlagsZCP && emptyW))
njn25e49d8e72002-09-23 09:36:25 +0000446# define CCg ((u->flags_r==FlagsOSZACP || emptyR) && emptyW)
447# define CCj (u->cond==CondAlways ? CC0 : CCg)
448
sewardjde4a1d02002-03-22 01:27:54 +0000449# define TR1 (beforeRA ? (u->tag1 == TempReg) : (u->tag1 == RealReg))
450# define TR2 (beforeRA ? (u->tag2 == TempReg) : (u->tag2 == RealReg))
451# define TR3 (beforeRA ? (u->tag3 == TempReg) : (u->tag3 == RealReg))
452# define A1 (u->tag1 == ArchReg)
453# define A2 (u->tag2 == ArchReg)
454# define AS1 ((u->tag1 == ArchReg) || ((!beforeRA && (u->tag1 == SpillNo))))
455# define AS2 ((u->tag2 == ArchReg) || ((!beforeRA && (u->tag2 == SpillNo))))
456# define AS3 ((u->tag3 == ArchReg) || ((!beforeRA && (u->tag3 == SpillNo))))
457# define L1 (u->tag1 == Literal && u->val1 == 0)
458# define L2 (u->tag2 == Literal && u->val2 == 0)
459# define Ls1 (u->tag1 == Lit16)
sewardjfebaa3b2003-05-25 01:07:34 +0000460# define Ls2 (u->tag2 == Lit16)
sewardjde4a1d02002-03-22 01:27:54 +0000461# define Ls3 (u->tag3 == Lit16)
njn25e49d8e72002-09-23 09:36:25 +0000462# define TRL1 (TR1 || L1)
463# define TRAL1 (TR1 || A1 || L1)
sewardjde4a1d02002-03-22 01:27:54 +0000464# define N1 (u->tag1 == NoValue)
465# define N2 (u->tag2 == NoValue)
466# define N3 (u->tag3 == NoValue)
sewardje1042472002-09-30 12:33:11 +0000467# define Se1 (u->tag1 == ArchRegS)
468# define Se2 (u->tag2 == ArchRegS)
sewardjde4a1d02002-03-22 01:27:54 +0000469
njn25e49d8e72002-09-23 09:36:25 +0000470# define COND0 (u->cond == 0)
471# define EXTRA4b0 (u->extra4b == 0)
472# define SG_WD0 (u->signed_widen == 0)
473# define JMPKIND0 (u->jmpkind == 0)
474# define CCALL0 (u->argc==0 && u->regparms_n==0 && u->has_ret_val==0 && \
475 ( beforeLiveness \
476 ? u->regs_live_after == ALL_RREGS_LIVE \
477 : True ))
478
479# define XCONDi ( EXTRA4b0 && SG_WD0 && JMPKIND0 && CCALL0)
480# define Xextra4b (COND0 && SG_WD0 && JMPKIND0 && CCALL0)
481# define XWIDEN (COND0 && JMPKIND0 && CCALL0)
482# define XJMP ( SG_WD0 && CCALL0)
483# define XCCALL (COND0 && EXTRA4b0 && SG_WD0 && JMPKIND0 )
484# define XOTHER (COND0 && EXTRA4b0 && SG_WD0 && JMPKIND0 && CCALL0)
485
486 /* 0 or 1 Literal args per UInstr */
sewardjde4a1d02002-03-22 01:27:54 +0000487 Int n_lits = 0;
488 if (u->tag1 == Literal) n_lits++;
489 if (u->tag2 == Literal) n_lits++;
490 if (u->tag3 == Literal) n_lits++;
491 if (n_lits > 1)
492 return False;
493
njn25e49d8e72002-09-23 09:36:25 +0000494 /* Fields not checked: val1, val2, val3 */
495
sewardjde4a1d02002-03-22 01:27:54 +0000496 switch (u->opcode) {
njn25e49d8e72002-09-23 09:36:25 +0000497
498 /* Fields checked: lit32 size flags_r/w tag1 tag2 tag3 (rest) */
sewardje1042472002-09-30 12:33:11 +0000499 case PUTSEG: return LIT0 && SZ2 && CC0 && TR1 && Se2 && N3 && XOTHER;
500 case GETSEG: return LIT0 && SZ2 && CC0 && Se1 && TR2 && N3 && XOTHER;
501 case USESEG: return LIT0 && SZ0 && CC0 && TR1 && TR2 && N3 && XOTHER;
njn25e49d8e72002-09-23 09:36:25 +0000502 case NOP: return LIT0 && SZ0 && CC0 && N1 && N2 && N3 && XOTHER;
sewardj7a5ebcf2002-11-13 22:42:13 +0000503 case LOCK: return LIT0 && SZ0 && CC0 && N1 && N2 && N3 && XOTHER;
njn25e49d8e72002-09-23 09:36:25 +0000504 case GETF: return LIT0 && SZ42 && CCr && TR1 && N2 && N3 && XOTHER;
505 case PUTF: return LIT0 && SZ42 && CCw && TR1 && N2 && N3 && XOTHER;
506 case GET: return LIT0 && SZi && CC0 && AS1 && TR2 && N3 && XOTHER;
507 case PUT: return LIT0 && SZi && CC0 && TR1 && AS2 && N3 && XOTHER;
508 case LOAD:
509 case STORE: return LIT0 && SZi && CC0 && TR1 && TR2 && N3 && XOTHER;
510 case MOV: return LITm && SZ4m && CC0 && TRL1 && TR2 && N3 && XOTHER;
511 case CMOV: return LIT0 && SZ4 && CCg && TR1 && TR2 && N3 && XCONDi;
512 case WIDEN: return LIT0 && SZi && CC0 && TR1 && N2 && N3 && XWIDEN;
513 case JMP: return LITm && SZ0 && CCj && TRL1 && N2 && N3 && XJMP;
514 case CALLM: return LIT0 && SZ0 /*any*/ && Ls1 && N2 && N3 && XOTHER;
515 case CALLM_S:
516 case CALLM_E:return LIT0 && SZ0 && CC0 && N1 && N2 && N3 && XOTHER;
517 case PUSH:
518 case POP: return LIT0 && SZi && CC0 && TR1 && N2 && N3 && XOTHER;
519 case CLEAR: return LIT0 && SZ0 && CC0 && Ls1 && N2 && N3 && XOTHER;
520 case AND:
521 case OR: return LIT0 && SZi && CCa && TR1 && TR2 && N3 && XOTHER;
522 case ADD:
523 case XOR:
524 case SUB: return LITm && SZi && CCa &&TRAL1 && TR2 && N3 && XOTHER;
525 case SBB:
526 case ADC: return LITm && SZi && CCb &&TRAL1 && TR2 && N3 && XOTHER;
527 case SHL:
528 case SHR:
529 case SAR: return LITm && SZi && CCa && TRL1 && TR2 && N3 && XOTHER;
530 case ROL:
531 case ROR: return LITm && SZi && CCc && TRL1 && TR2 && N3 && XOTHER;
532 case RCL:
533 case RCR: return LITm && SZi && CCd && TRL1 && TR2 && N3 && XOTHER;
534 case NOT: return LIT0 && SZi && CC0 && TR1 && N2 && N3 && XOTHER;
535 case NEG: return LIT0 && SZi && CCa && TR1 && N2 && N3 && XOTHER;
536 case INC:
537 case DEC: return LIT0 && SZi && CCe && TR1 && N2 && N3 && XOTHER;
538 case CC2VAL: return LIT0 && SZ1 && CCg && TR1 && N2 && N3 && XCONDi;
539 case BSWAP: return LIT0 && SZ4 && CC0 && TR1 && N2 && N3 && XOTHER;
540 case JIFZ: return LIT1 && SZ4 && CC0 && TR1 && L2 && N3 && XOTHER;
541 case FPU_R:
542 case FPU_W: return LIT0 && SZf && CC0 && Ls1 && TR2 && N3 && XOTHER;
543 case FPU: return LIT0 && SZ0 && CCf && Ls1 && N2 && N3 && XOTHER;
544 case LEA1: return /*any*/ SZ4 && CC0 && TR1 && TR2 && N3 && XOTHER;
545 case LEA2: return /*any*/ SZ4 && CC0 && TR1 && TR2 && TR3 && Xextra4b;
546 case INCEIP: return LIT0 && SZ0 && CC0 && Ls1 && N2 && N3 && XOTHER;
547 case CCALL: return LIT1 && SZ0 && CC0 &&
548 (u->argc > 0 ? TR1 : N1) &&
549 (u->argc > 1 ? TR2 : N2) &&
550 (u->argc > 2 || u->has_ret_val ? TR3 : N3) &&
551 u->regparms_n <= u->argc && XCCALL;
sewardj3d7c9c82003-03-26 21:08:13 +0000552 /* Fields checked: lit32 size flags_r/w tag1 tag2 tag3 (rest) */
553 case MMX1:
sewardj4fbe6e92003-06-15 21:54:34 +0000554 case MMX2: return LIT0 && SZ0 && CC0 && Ls1 && N2 && N3 && XOTHER;
555 case MMX3: return LIT0 && SZ0 && CC0 && Ls1 && Ls2 && N3 && XOTHER;
556 case MMX2_MemRd: return LIT0 && SZ48 && CC0 && Ls1 && TR2 && N3 && XOTHER;
557 case MMX2_MemWr: return LIT0 && SZ48 && CC0 && Ls1 && TR2 && N3 && XOTHER;
558 case MMX2_ERegRd: return LIT0 && SZ4 && CC0 && Ls1 && TR2 && N3 && XOTHER;
559 case MMX2_ERegWr: return LIT0 && SZ4 && CC0 && Ls1 && TR2 && N3 && XOTHER;
sewardjfebaa3b2003-05-25 01:07:34 +0000560
561 /* Fields checked: lit32 size flags_r/w tag1 tag2 tag3 (rest) */
562 case SSE2a_MemWr: return LIT0 && SZ416 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
563 case SSE2a_MemRd: return LIT0 && SZ416 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
sewardjde8aecf2003-05-27 00:46:28 +0000564 case SSE3a_MemWr: return LIT0 && SZsse && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
sewardj02af6bc2003-06-12 00:56:06 +0000565 case SSE3a_MemRd: return LIT0 && SZsse && CCf && Ls1 && Ls2 && TR3 && XOTHER;
sewardj4fbe6e92003-06-15 21:54:34 +0000566 case SSE3e_RegRd: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
sewardj02af6bc2003-06-12 00:56:06 +0000567 case SSE3g_RegWr: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
sewardjb31b06d2003-06-13 00:26:02 +0000568 case SSE3g1_RegWr: return LIT8 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
sewardj4fbe6e92003-06-15 21:54:34 +0000569 case SSE3e1_RegRd: return LIT8 && SZ2 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
sewardja60be0e2003-05-26 08:47:27 +0000570 case SSE3: return LIT0 && SZ0 && CC0 && Ls1 && Ls2 && N3 && XOTHER;
sewardj2ca39a12003-06-14 12:03:35 +0000571 case SSE4: return LIT0 && SZ0 && CCf && Ls1 && Ls2 && N3 && XOTHER;
sewardja453fb02003-06-14 13:22:36 +0000572 case SSE5: return LIT0 && SZ0 && CC0 && Ls1 && Ls2 && Ls3 && XOTHER;
sewardje3891fa2003-06-15 03:13:48 +0000573 case SSE3ag_MemRd_RegWr:
574 return SZ48 && CC0 && TR1 && TR2 && N3 && XOTHER;
njn25e49d8e72002-09-23 09:36:25 +0000575 default:
576 if (VG_(needs).extended_UCode)
njn4ba5a792002-09-30 10:23:54 +0000577 return SK_(sane_XUInstr)(beforeRA, beforeLiveness, u);
njn25e49d8e72002-09-23 09:36:25 +0000578 else {
579 VG_(printf)("unhandled opcode: %u. Perhaps "
580 "VG_(needs).extended_UCode should be set?",
581 u->opcode);
njne427a662002-10-02 11:08:25 +0000582 VG_(core_panic)("VG_(saneUInstr): unhandled opcode");
njn25e49d8e72002-09-23 09:36:25 +0000583 }
sewardjde4a1d02002-03-22 01:27:54 +0000584 }
njn25e49d8e72002-09-23 09:36:25 +0000585# undef LIT0
586# undef LIT1
sewardjb31b06d2003-06-13 00:26:02 +0000587# undef LIT8
njn25e49d8e72002-09-23 09:36:25 +0000588# undef LITm
sewardj3d7c9c82003-03-26 21:08:13 +0000589# undef SZ8
sewardjde4a1d02002-03-22 01:27:54 +0000590# undef SZ4
591# undef SZ2
592# undef SZ1
593# undef SZ0
njn25e49d8e72002-09-23 09:36:25 +0000594# undef SZ42
sewardjd7971012003-04-04 00:21:58 +0000595# undef SZ48
sewardjfebaa3b2003-05-25 01:07:34 +0000596# undef SZ416
sewardjde8aecf2003-05-27 00:46:28 +0000597# undef SZsse
njn25e49d8e72002-09-23 09:36:25 +0000598# undef SZi
599# undef SZf
600# undef SZ4m
601# undef emptyR
602# undef emptyW
603# undef CC0
604# undef CCr
605# undef CCw
606# undef CCa
607# undef CCb
608# undef CCc
609# undef CCd
610# undef CCe
611# undef CCf
612# undef CCg
613# undef CCj
sewardjde4a1d02002-03-22 01:27:54 +0000614# undef TR1
615# undef TR2
616# undef TR3
617# undef A1
618# undef A2
619# undef AS1
620# undef AS2
621# undef AS3
622# undef L1
sewardjde4a1d02002-03-22 01:27:54 +0000623# undef L2
njn25e49d8e72002-09-23 09:36:25 +0000624# undef Ls1
sewardjfebaa3b2003-05-25 01:07:34 +0000625# undef Ls2
sewardjde4a1d02002-03-22 01:27:54 +0000626# undef Ls3
njn25e49d8e72002-09-23 09:36:25 +0000627# undef TRL1
628# undef TRAL1
sewardjde4a1d02002-03-22 01:27:54 +0000629# undef N1
630# undef N2
631# undef N3
sewardje1042472002-09-30 12:33:11 +0000632# undef Se2
633# undef Se1
njn25e49d8e72002-09-23 09:36:25 +0000634# undef COND0
635# undef EXTRA4b0
636# undef SG_WD0
637# undef JMPKIND0
638# undef CCALL0
639# undef Xextra4b
640# undef XWIDEN
641# undef XJMP
642# undef XCCALL
643# undef XOTHER
sewardjde4a1d02002-03-22 01:27:54 +0000644}
645
njn25e49d8e72002-09-23 09:36:25 +0000646void VG_(saneUCodeBlock) ( UCodeBlock* cb )
647{
648 Int i;
649
650 for (i = 0; i < cb->used; i++) {
651 Bool sane = VG_(saneUInstr)(True, True, &cb->instrs[i]);
652 if (!sane) {
653 VG_(printf)("Instruction failed sanity check:\n");
njn4ba5a792002-09-30 10:23:54 +0000654 VG_(up_UInstr)(i, &cb->instrs[i]);
njn25e49d8e72002-09-23 09:36:25 +0000655 }
656 vg_assert(sane);
657 }
658}
sewardjde4a1d02002-03-22 01:27:54 +0000659
660/* Sanity checks to do with CALLMs in UCodeBlocks. */
njn25e49d8e72002-09-23 09:36:25 +0000661Bool VG_(saneUCodeBlockCalls) ( UCodeBlock* cb )
sewardjde4a1d02002-03-22 01:27:54 +0000662{
663 Int callm = 0;
664 Int callm_s = 0;
665 Int callm_e = 0;
666 Int callm_ptr, calls_ptr;
667 Int i, j, t;
668 Bool incall = False;
669
670 /* Ensure the number of CALLM, CALLM_S and CALLM_E are the same. */
671
672 for (i = 0; i < cb->used; i++) {
673 switch (cb->instrs[i].opcode) {
674 case CALLM:
675 if (!incall) return False;
676 callm++;
677 break;
678 case CALLM_S:
679 if (incall) return False;
680 incall = True;
681 callm_s++;
682 break;
683 case CALLM_E:
684 if (!incall) return False;
685 incall = False;
686 callm_e++;
687 break;
688 case PUSH: case POP: case CLEAR:
689 if (!incall) return False;
690 break;
691 default:
692 break;
693 }
694 }
695 if (incall) return False;
696 if (callm != callm_s || callm != callm_e) return False;
697
698 /* Check the sections between CALLM_S and CALLM's. Ensure that no
699 PUSH uinsn pushes any TempReg that any other PUSH in the same
700 section pushes. Ie, check that the TempReg args to PUSHes in
701 the section are unique. If not, the instrumenter generates
702 incorrect code for CALLM insns. */
703
704 callm_ptr = 0;
705
706 find_next_CALLM:
707 /* Search for the next interval, making calls_ptr .. callm_ptr
708 bracket it. */
709 while (callm_ptr < cb->used
710 && cb->instrs[callm_ptr].opcode != CALLM)
711 callm_ptr++;
712 if (callm_ptr == cb->used)
713 return True;
714 vg_assert(cb->instrs[callm_ptr].opcode == CALLM);
715
716 calls_ptr = callm_ptr - 1;
717 while (cb->instrs[calls_ptr].opcode != CALLM_S)
718 calls_ptr--;
719 vg_assert(cb->instrs[calls_ptr].opcode == CALLM_S);
720 vg_assert(calls_ptr >= 0);
721
722 /* VG_(printf)("interval from %d to %d\n", calls_ptr, callm_ptr ); */
723
724 /* For each PUSH insn in the interval ... */
725 for (i = calls_ptr + 1; i < callm_ptr; i++) {
726 if (cb->instrs[i].opcode != PUSH) continue;
727 t = cb->instrs[i].val1;
728 /* Ensure no later PUSH insns up to callm_ptr push the same
729 TempReg. Return False if any such are found. */
730 for (j = i+1; j < callm_ptr; j++) {
731 if (cb->instrs[j].opcode == PUSH &&
732 cb->instrs[j].val1 == t)
733 return False;
734 }
735 }
736
737 /* This interval is clean. Keep going ... */
738 callm_ptr++;
739 goto find_next_CALLM;
740}
741
742
743/*------------------------------------------------------------*/
744/*--- Printing uinstrs. ---*/
745/*------------------------------------------------------------*/
746
njn25e49d8e72002-09-23 09:36:25 +0000747/* Global that dictates whether to print generated code at all stages */
748Bool VG_(print_codegen);
749
njn563f96f2003-02-03 11:17:46 +0000750Char* VG_(name_UCondcode) ( Condcode cond )
sewardjde4a1d02002-03-22 01:27:54 +0000751{
752 switch (cond) {
753 case CondO: return "o";
754 case CondNO: return "no";
755 case CondB: return "b";
756 case CondNB: return "nb";
757 case CondZ: return "z";
758 case CondNZ: return "nz";
759 case CondBE: return "be";
760 case CondNBE: return "nbe";
761 case CondS: return "s";
sewardje1042472002-09-30 12:33:11 +0000762 case CondNS: return "ns";
sewardjde4a1d02002-03-22 01:27:54 +0000763 case CondP: return "p";
764 case CondNP: return "np";
765 case CondL: return "l";
766 case CondNL: return "nl";
767 case CondLE: return "le";
768 case CondNLE: return "nle";
769 case CondAlways: return "MP"; /* hack! */
njn563f96f2003-02-03 11:17:46 +0000770 default: VG_(core_panic)("name_UCondcode");
sewardjde4a1d02002-03-22 01:27:54 +0000771 }
772}
773
774
775static void vg_ppFlagSet ( Char* prefix, FlagSet set )
776{
777 VG_(printf)("%s", prefix);
778 if (set & FlagD) VG_(printf)("D");
779 if (set & FlagO) VG_(printf)("O");
780 if (set & FlagS) VG_(printf)("S");
781 if (set & FlagZ) VG_(printf)("Z");
782 if (set & FlagA) VG_(printf)("A");
783 if (set & FlagC) VG_(printf)("C");
784 if (set & FlagP) VG_(printf)("P");
785}
786
787
788static void ppTempReg ( Int tt )
789{
790 if ((tt & 1) == 0)
791 VG_(printf)("t%d", tt);
792 else
793 VG_(printf)("q%d", tt-1);
794}
795
796
njn4ba5a792002-09-30 10:23:54 +0000797void VG_(pp_UOperand) ( UInstr* u, Int operandNo, Int sz, Bool parens )
sewardjde4a1d02002-03-22 01:27:54 +0000798{
799 UInt tag, val;
800 switch (operandNo) {
801 case 1: tag = u->tag1; val = u->val1; break;
802 case 2: tag = u->tag2; val = u->val2; break;
803 case 3: tag = u->tag3; val = u->val3; break;
njne427a662002-10-02 11:08:25 +0000804 default: VG_(core_panic)("VG_(pp_UOperand)(1)");
sewardjde4a1d02002-03-22 01:27:54 +0000805 }
806 if (tag == Literal) val = u->lit32;
807
808 if (parens) VG_(printf)("(");
809 switch (tag) {
sewardje1042472002-09-30 12:33:11 +0000810 case TempReg: ppTempReg(val); break;
811 case RealReg: VG_(printf)("%s",nameIReg(sz==0 ? 4 : sz,val)); break;
812 case Literal: VG_(printf)("$0x%x", val); break;
813 case Lit16: VG_(printf)("$0x%x", val); break;
814 case NoValue: VG_(printf)("NoValue"); break;
815 case ArchReg: VG_(printf)("%S",nameIReg(sz,val)); break;
816 case ArchRegS: VG_(printf)("%S",nameSReg(val)); break;
817 case SpillNo: VG_(printf)("spill%d", val); break;
njne427a662002-10-02 11:08:25 +0000818 default: VG_(core_panic)("VG_(ppUOperand)(2)");
sewardjde4a1d02002-03-22 01:27:54 +0000819 }
820 if (parens) VG_(printf)(")");
821}
822
823
njn4ba5a792002-09-30 10:23:54 +0000824Char* VG_(name_UOpcode) ( Bool upper, Opcode opc )
sewardjde4a1d02002-03-22 01:27:54 +0000825{
826 switch (opc) {
827 case ADD: return (upper ? "ADD" : "add");
828 case ADC: return (upper ? "ADC" : "adc");
829 case AND: return (upper ? "AND" : "and");
830 case OR: return (upper ? "OR" : "or");
831 case XOR: return (upper ? "XOR" : "xor");
832 case SUB: return (upper ? "SUB" : "sub");
833 case SBB: return (upper ? "SBB" : "sbb");
834 case SHL: return (upper ? "SHL" : "shl");
835 case SHR: return (upper ? "SHR" : "shr");
836 case SAR: return (upper ? "SAR" : "sar");
837 case ROL: return (upper ? "ROL" : "rol");
838 case ROR: return (upper ? "ROR" : "ror");
839 case RCL: return (upper ? "RCL" : "rcl");
840 case RCR: return (upper ? "RCR" : "rcr");
841 case NOT: return (upper ? "NOT" : "not");
842 case NEG: return (upper ? "NEG" : "neg");
843 case INC: return (upper ? "INC" : "inc");
844 case DEC: return (upper ? "DEC" : "dec");
845 case BSWAP: return (upper ? "BSWAP" : "bswap");
846 default: break;
847 }
njne427a662002-10-02 11:08:25 +0000848 if (!upper) VG_(core_panic)("vg_name_UOpcode: invalid !upper");
sewardjde4a1d02002-03-22 01:27:54 +0000849 switch (opc) {
sewardjde4a1d02002-03-22 01:27:54 +0000850 case CALLM_S: return "CALLM_S";
851 case CALLM_E: return "CALLM_E";
852 case INCEIP: return "INCEIP";
853 case LEA1: return "LEA1";
854 case LEA2: return "LEA2";
855 case NOP: return "NOP";
sewardj7a5ebcf2002-11-13 22:42:13 +0000856 case LOCK: return "LOCK";
sewardjde4a1d02002-03-22 01:27:54 +0000857 case GET: return "GET";
858 case PUT: return "PUT";
859 case GETF: return "GETF";
860 case PUTF: return "PUTF";
sewardje1042472002-09-30 12:33:11 +0000861 case GETSEG: return "GETSEG";
862 case PUTSEG: return "PUTSEG";
863 case USESEG: return "USESEG";
sewardjde4a1d02002-03-22 01:27:54 +0000864 case LOAD: return "LD" ;
865 case STORE: return "ST" ;
866 case MOV: return "MOV";
867 case CMOV: return "CMOV";
868 case WIDEN: return "WIDEN";
869 case JMP: return "J" ;
870 case JIFZ: return "JIFZ" ;
871 case CALLM: return "CALLM";
njn25e49d8e72002-09-23 09:36:25 +0000872 case CCALL: return "CCALL";
sewardjde4a1d02002-03-22 01:27:54 +0000873 case PUSH: return "PUSH" ;
874 case POP: return "POP" ;
875 case CLEAR: return "CLEAR";
876 case CC2VAL: return "CC2VAL";
877 case FPU_R: return "FPU_R";
878 case FPU_W: return "FPU_W";
879 case FPU: return "FPU" ;
sewardj3d7c9c82003-03-26 21:08:13 +0000880 case MMX1: return "MMX1" ;
881 case MMX2: return "MMX2" ;
sewardjca860012003-03-27 23:52:58 +0000882 case MMX3: return "MMX3" ;
sewardj3d7c9c82003-03-26 21:08:13 +0000883 case MMX2_MemRd: return "MMX2_MRd" ;
884 case MMX2_MemWr: return "MMX2_MWr" ;
sewardj4fbe6e92003-06-15 21:54:34 +0000885 case MMX2_ERegRd: return "MMX2_eRRd" ;
886 case MMX2_ERegWr: return "MMX2_eRWr" ;
sewardjfebaa3b2003-05-25 01:07:34 +0000887 case SSE2a_MemWr: return "SSE2a_MWr";
888 case SSE2a_MemRd: return "SSE2a_MRd";
sewardj4fbe6e92003-06-15 21:54:34 +0000889 case SSE3e_RegRd: return "SSE3e_RRd";
sewardj02af6bc2003-06-12 00:56:06 +0000890 case SSE3g_RegWr: return "SSE3g_RWr";
sewardjb31b06d2003-06-13 00:26:02 +0000891 case SSE3g1_RegWr: return "SSE3g1_RWr";
sewardj4fbe6e92003-06-15 21:54:34 +0000892 case SSE3e1_RegRd: return "SSE3e1_RRd";
sewardja60be0e2003-05-26 08:47:27 +0000893 case SSE3: return "SSE3";
sewardjfebaa3b2003-05-25 01:07:34 +0000894 case SSE4: return "SSE4";
sewardja453fb02003-06-14 13:22:36 +0000895 case SSE5: return "SSE5";
sewardjfebaa3b2003-05-25 01:07:34 +0000896 case SSE3a_MemWr: return "SSE3a_MWr";
897 case SSE3a_MemRd: return "SSE3a_MRd";
sewardje3891fa2003-06-15 03:13:48 +0000898 case SSE3ag_MemRd_RegWr: return "SSE3ag_MemRd_RegWr";
njn25e49d8e72002-09-23 09:36:25 +0000899 default:
900 if (VG_(needs).extended_UCode)
njn4ba5a792002-09-30 10:23:54 +0000901 return SK_(name_XUOpcode)(opc);
njn25e49d8e72002-09-23 09:36:25 +0000902 else {
903 VG_(printf)("unhandled opcode: %u. Perhaps "
904 "VG_(needs).extended_UCode should be set?",
905 opc);
njne427a662002-10-02 11:08:25 +0000906 VG_(core_panic)("name_UOpcode: unhandled opcode");
njn25e49d8e72002-09-23 09:36:25 +0000907 }
sewardjde4a1d02002-03-22 01:27:54 +0000908 }
909}
910
sewardja38e0922002-10-01 00:50:47 +0000911static
njn4ba5a792002-09-30 10:23:54 +0000912void pp_realregs_liveness ( UInstr* u )
njn25e49d8e72002-09-23 09:36:25 +0000913{
914# define PRINT_RREG_LIVENESS(realReg,s) \
njn4ba5a792002-09-30 10:23:54 +0000915 VG_(printf)( IS_RREG_LIVE(VG_(realreg_to_rank)(realReg), \
njn25e49d8e72002-09-23 09:36:25 +0000916 u->regs_live_after) \
917 ? s : "-");
sewardjde4a1d02002-03-22 01:27:54 +0000918
njn25e49d8e72002-09-23 09:36:25 +0000919 VG_(printf)("[");
920 PRINT_RREG_LIVENESS(R_EAX, "a");
921 PRINT_RREG_LIVENESS(R_EBX, "b");
922 PRINT_RREG_LIVENESS(R_ECX, "c");
923 PRINT_RREG_LIVENESS(R_EDX, "d");
924 PRINT_RREG_LIVENESS(R_ESI, "S");
925 PRINT_RREG_LIVENESS(R_EDI, "D");
926 VG_(printf)("]");
927
928# undef PRINT_RREG_LIVENESS
929}
930
931/* Ugly-print UInstr :) */
njn4ba5a792002-09-30 10:23:54 +0000932void VG_(up_UInstr) ( Int i, UInstr* u )
njn25e49d8e72002-09-23 09:36:25 +0000933{
njn4ba5a792002-09-30 10:23:54 +0000934 VG_(pp_UInstr_regs)(i, u);
njn25e49d8e72002-09-23 09:36:25 +0000935
936 VG_(printf)("opcode: %d\n", u->opcode);
sewardjc1b86882002-10-06 21:43:50 +0000937 VG_(printf)("lit32: 0x%x\n", u->lit32);
njn25e49d8e72002-09-23 09:36:25 +0000938 VG_(printf)("size: %d\n", u->size);
939 VG_(printf)("val1,val2,val3: %d, %d, %d\n", u->val1, u->val2, u->val3);
940 VG_(printf)("tag1,tag2,tag3: %d, %d, %d\n", u->tag1, u->tag2, u->tag3);
sewardjc1b86882002-10-06 21:43:50 +0000941 VG_(printf)("flags_r: 0x%x\n", u->flags_r);
942 VG_(printf)("flags_w: 0x%x\n", u->flags_w);
943 VG_(printf)("extra4b: 0x%x\n", u->extra4b);
944 VG_(printf)("cond: 0x%x\n", u->cond);
njn25e49d8e72002-09-23 09:36:25 +0000945 VG_(printf)("signed_widen: %d\n", u->signed_widen);
946 VG_(printf)("jmpkind: %d\n", u->jmpkind);
947 VG_(printf)("argc,regparms_n: %d, %d\n", u->argc, u->regparms_n);
948 VG_(printf)("has_ret_val: %d\n", u->has_ret_val);
949 VG_(printf)("regs_live_after: ");
njn4ba5a792002-09-30 10:23:54 +0000950 pp_realregs_liveness(u);
njn25e49d8e72002-09-23 09:36:25 +0000951 VG_(printf)("\n");
952}
953
sewardja38e0922002-10-01 00:50:47 +0000954static
njn4ba5a792002-09-30 10:23:54 +0000955void pp_UInstrWorker ( Int instrNo, UInstr* u, Bool ppRegsLiveness )
sewardjde4a1d02002-03-22 01:27:54 +0000956{
957 VG_(printf)("\t%4d: %s", instrNo,
njn4ba5a792002-09-30 10:23:54 +0000958 VG_(name_UOpcode)(True, u->opcode));
sewardjde4a1d02002-03-22 01:27:54 +0000959 if (u->opcode == JMP || u->opcode == CC2VAL)
njn563f96f2003-02-03 11:17:46 +0000960 VG_(printf)("%s", VG_(name_UCondcode)(u->cond));
sewardjde4a1d02002-03-22 01:27:54 +0000961
962 switch (u->size) {
963 case 0: VG_(printf)("o"); break;
964 case 1: VG_(printf)("B"); break;
965 case 2: VG_(printf)("W"); break;
966 case 4: VG_(printf)("L"); break;
967 case 8: VG_(printf)("Q"); break;
sewardjfebaa3b2003-05-25 01:07:34 +0000968 case 16: VG_(printf)("QQ"); break;
sewardjde4a1d02002-03-22 01:27:54 +0000969 default: VG_(printf)("%d", (Int)u->size); break;
970 }
971
sewardjfebaa3b2003-05-25 01:07:34 +0000972 VG_(printf)(" \t");
973
sewardjde4a1d02002-03-22 01:27:54 +0000974 switch (u->opcode) {
975
sewardjde4a1d02002-03-22 01:27:54 +0000976 case CALLM_S: case CALLM_E:
977 break;
978
979 case INCEIP:
sewardjfebaa3b2003-05-25 01:07:34 +0000980 VG_(printf)("$%d", u->val1);
sewardjde4a1d02002-03-22 01:27:54 +0000981 break;
982
983 case LEA2:
sewardjfebaa3b2003-05-25 01:07:34 +0000984 VG_(printf)("%d(" , u->lit32);
njn4ba5a792002-09-30 10:23:54 +0000985 VG_(pp_UOperand)(u, 1, 4, False);
sewardjde4a1d02002-03-22 01:27:54 +0000986 VG_(printf)(",");
njn4ba5a792002-09-30 10:23:54 +0000987 VG_(pp_UOperand)(u, 2, 4, False);
sewardjde4a1d02002-03-22 01:27:54 +0000988 VG_(printf)(",%d), ", (Int)u->extra4b);
njn4ba5a792002-09-30 10:23:54 +0000989 VG_(pp_UOperand)(u, 3, 4, False);
sewardjde4a1d02002-03-22 01:27:54 +0000990 break;
991
992 case LEA1:
sewardjfebaa3b2003-05-25 01:07:34 +0000993 VG_(printf)("%d" , u->lit32);
njn4ba5a792002-09-30 10:23:54 +0000994 VG_(pp_UOperand)(u, 1, 4, True);
sewardjde4a1d02002-03-22 01:27:54 +0000995 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +0000996 VG_(pp_UOperand)(u, 2, 4, False);
sewardjde4a1d02002-03-22 01:27:54 +0000997 break;
998
sewardj7a5ebcf2002-11-13 22:42:13 +0000999 case NOP: case LOCK:
sewardjde4a1d02002-03-22 01:27:54 +00001000 break;
1001
1002 case FPU_W:
sewardjfebaa3b2003-05-25 01:07:34 +00001003 VG_(printf)("0x%x:0x%x, ",
sewardjde4a1d02002-03-22 01:27:54 +00001004 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
njn4ba5a792002-09-30 10:23:54 +00001005 VG_(pp_UOperand)(u, 2, 4, True);
sewardjde4a1d02002-03-22 01:27:54 +00001006 break;
1007
1008 case FPU_R:
sewardjfebaa3b2003-05-25 01:07:34 +00001009 VG_(printf)("");
njn4ba5a792002-09-30 10:23:54 +00001010 VG_(pp_UOperand)(u, 2, 4, True);
sewardjde4a1d02002-03-22 01:27:54 +00001011 VG_(printf)(", 0x%x:0x%x",
1012 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
1013 break;
1014
1015 case FPU:
sewardjfebaa3b2003-05-25 01:07:34 +00001016 VG_(printf)("0x%x:0x%x",
sewardjde4a1d02002-03-22 01:27:54 +00001017 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
1018 break;
1019
sewardj3d7c9c82003-03-26 21:08:13 +00001020 case MMX1:
sewardjfebaa3b2003-05-25 01:07:34 +00001021 VG_(printf)("0x%x",
sewardj3d7c9c82003-03-26 21:08:13 +00001022 u->val1 & 0xFF );
1023 break;
1024
1025 case MMX2:
sewardjfebaa3b2003-05-25 01:07:34 +00001026 VG_(printf)("0x%x:0x%x",
sewardj3d7c9c82003-03-26 21:08:13 +00001027 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
1028 break;
1029
sewardjca860012003-03-27 23:52:58 +00001030 case MMX3:
sewardjfebaa3b2003-05-25 01:07:34 +00001031 VG_(printf)("0x%x:0x%x:0x%x",
sewardjca860012003-03-27 23:52:58 +00001032 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF, u->val2 & 0xFF );
1033 break;
1034
sewardj4fbe6e92003-06-15 21:54:34 +00001035 case MMX2_ERegWr:
1036 case MMX2_ERegRd:
sewardjfebaa3b2003-05-25 01:07:34 +00001037 VG_(printf)("0x%x:0x%x, ",
sewardjca860012003-03-27 23:52:58 +00001038 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
1039 VG_(pp_UOperand)(u, 2, 4, False);
1040 break;
1041
sewardj3d7c9c82003-03-26 21:08:13 +00001042 case MMX2_MemWr:
1043 case MMX2_MemRd:
sewardjfebaa3b2003-05-25 01:07:34 +00001044 VG_(printf)("0x%x:0x%x",
sewardj3d7c9c82003-03-26 21:08:13 +00001045 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
1046 VG_(pp_UOperand)(u, 2, 4, True);
1047 break;
1048
sewardjfebaa3b2003-05-25 01:07:34 +00001049 case SSE2a_MemWr:
1050 case SSE2a_MemRd:
1051 VG_(printf)("0x%x:0x%x:0x%x",
1052 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF, u->val2 & 0xFF );
1053 VG_(pp_UOperand)(u, 3, 4, True);
1054 break;
1055
1056 case SSE3a_MemWr:
1057 case SSE3a_MemRd:
1058 VG_(printf)("0x%x:0x%x:0x%x:0x%x",
1059 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
sewardjde8aecf2003-05-27 00:46:28 +00001060 (u->val2 >> 8) & 0xFF, u->val2 & 0xFF );
sewardjfebaa3b2003-05-25 01:07:34 +00001061 VG_(pp_UOperand)(u, 3, 4, True);
1062 break;
1063
sewardj4fbe6e92003-06-15 21:54:34 +00001064 case SSE3e_RegRd:
sewardj02af6bc2003-06-12 00:56:06 +00001065 case SSE3g_RegWr:
sewardjfebaa3b2003-05-25 01:07:34 +00001066 VG_(printf)("0x%x:0x%x:0x%x:0x%x",
1067 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
1068 (u->val2 >> 8) & 0xFF, u->val2 & 0xFF );
1069 VG_(pp_UOperand)(u, 3, 4, True);
1070 break;
1071
sewardjb31b06d2003-06-13 00:26:02 +00001072 case SSE3g1_RegWr:
sewardj4fbe6e92003-06-15 21:54:34 +00001073 case SSE3e1_RegRd:
sewardjb31b06d2003-06-13 00:26:02 +00001074 VG_(printf)("0x%x:0x%x:0x%x:0x%x:0x%x",
1075 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
1076 (u->val2 >> 8) & 0xFF, u->val2 & 0xFF,
1077 u->lit32 );
1078 VG_(pp_UOperand)(u, 3, 4, True);
1079 break;
1080
sewardja60be0e2003-05-26 08:47:27 +00001081 case SSE3:
1082 VG_(printf)("0x%x:0x%x:0x%x",
1083 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
1084 u->val2 & 0xFF );
1085 break;
1086
sewardjfebaa3b2003-05-25 01:07:34 +00001087 case SSE4:
1088 VG_(printf)("0x%x:0x%x:0x%x:0x%x",
1089 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
1090 (u->val2 >> 8) & 0xFF, u->val2 & 0xFF );
1091 break;
1092
sewardja453fb02003-06-14 13:22:36 +00001093 case SSE5:
1094 VG_(printf)("0x%x:0x%x:0x%x:0x%x:0x%x",
1095 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
1096 (u->val2 >> 8) & 0xFF, u->val2 & 0xFF,
1097 u->val3 & 0xFF );
1098 break;
1099
sewardje3891fa2003-06-15 03:13:48 +00001100 case SSE3ag_MemRd_RegWr:
1101 VG_(printf)("0x%x(addr=", u->lit32 );
1102 VG_(pp_UOperand)(u, 1, 4, False);
1103 VG_(printf)(", dst=");
1104 VG_(pp_UOperand)(u, 2, 4, False);
1105 VG_(printf)(")");
1106 break;
1107
sewardjde4a1d02002-03-22 01:27:54 +00001108 case GET: case PUT: case MOV: case LOAD: case STORE: case CMOV:
sewardje1042472002-09-30 12:33:11 +00001109 case GETSEG: case PUTSEG:
njn4ba5a792002-09-30 10:23:54 +00001110 VG_(pp_UOperand)(u, 1, u->size, u->opcode==LOAD);
sewardjde4a1d02002-03-22 01:27:54 +00001111 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001112 VG_(pp_UOperand)(u, 2, u->size, u->opcode==STORE);
njn25e49d8e72002-09-23 09:36:25 +00001113 break;
1114
1115 case JMP:
1116 switch (u->jmpkind) {
1117 case JmpCall: VG_(printf)("-c"); break;
1118 case JmpRet: VG_(printf)("-r"); break;
1119 case JmpSyscall: VG_(printf)("-sys"); break;
1120 case JmpClientReq: VG_(printf)("-cli"); break;
1121 default: break;
1122 }
njn4ba5a792002-09-30 10:23:54 +00001123 VG_(pp_UOperand)(u, 1, u->size, False);
njn25e49d8e72002-09-23 09:36:25 +00001124 if (CondAlways == u->cond) {
1125 /* Print x86 instruction size if filled in */
1126 if (0 != u->extra4b)
1127 VG_(printf)(" ($%u)", u->extra4b);
1128 }
sewardjde4a1d02002-03-22 01:27:54 +00001129 break;
1130
1131 case GETF: case PUTF:
njn25e49d8e72002-09-23 09:36:25 +00001132 case CC2VAL: case PUSH: case POP: case CLEAR: case CALLM:
1133 case NOT: case NEG: case INC: case DEC: case BSWAP:
njn4ba5a792002-09-30 10:23:54 +00001134 VG_(pp_UOperand)(u, 1, u->size, False);
sewardjde4a1d02002-03-22 01:27:54 +00001135 break;
1136
njn25e49d8e72002-09-23 09:36:25 +00001137 /* Print a "(s)" after args passed on stack */
1138 case CCALL:
njn25e49d8e72002-09-23 09:36:25 +00001139 if (u->has_ret_val) {
njn4ba5a792002-09-30 10:23:54 +00001140 VG_(pp_UOperand)(u, 3, 0, False);
njn25e49d8e72002-09-23 09:36:25 +00001141 VG_(printf)(" = ");
sewardj2e93c502002-04-12 11:12:52 +00001142 }
njn25e49d8e72002-09-23 09:36:25 +00001143 VG_(printf)("%p(", u->lit32);
1144 if (u->argc > 0) {
njn4ba5a792002-09-30 10:23:54 +00001145 VG_(pp_UOperand)(u, 1, 0, False);
njn25e49d8e72002-09-23 09:36:25 +00001146 if (u->regparms_n < 1)
1147 VG_(printf)("(s)");
1148 }
1149 if (u->argc > 1) {
1150 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001151 VG_(pp_UOperand)(u, 2, 0, False);
njn25e49d8e72002-09-23 09:36:25 +00001152 if (u->regparms_n < 2)
1153 VG_(printf)("(s)");
1154 }
1155 if (u->argc > 2) {
1156 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001157 VG_(pp_UOperand)(u, 3, 0, False);
njn25e49d8e72002-09-23 09:36:25 +00001158 if (u->regparms_n < 3)
1159 VG_(printf)("(s)");
1160 }
1161 VG_(printf)(") ");
njn6431be72002-07-28 09:53:34 +00001162 break;
1163
sewardje1042472002-09-30 12:33:11 +00001164 case USESEG:
sewardjde4a1d02002-03-22 01:27:54 +00001165 case JIFZ:
sewardjde4a1d02002-03-22 01:27:54 +00001166 case ADD: case ADC: case AND: case OR:
1167 case XOR: case SUB: case SBB:
1168 case SHL: case SHR: case SAR:
1169 case ROL: case ROR: case RCL: case RCR:
njn4ba5a792002-09-30 10:23:54 +00001170 VG_(pp_UOperand)(u, 1, u->size, False);
sewardjde4a1d02002-03-22 01:27:54 +00001171 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001172 VG_(pp_UOperand)(u, 2, u->size, False);
sewardjde4a1d02002-03-22 01:27:54 +00001173 break;
1174
1175 case WIDEN:
1176 VG_(printf)("_%c%c", VG_(toupper)(nameISize(u->extra4b)),
1177 u->signed_widen?'s':'z');
njn4ba5a792002-09-30 10:23:54 +00001178 VG_(pp_UOperand)(u, 1, u->size, False);
sewardjde4a1d02002-03-22 01:27:54 +00001179 break;
1180
njn25e49d8e72002-09-23 09:36:25 +00001181 default:
1182 if (VG_(needs).extended_UCode)
njn4ba5a792002-09-30 10:23:54 +00001183 SK_(pp_XUInstr)(u);
njn25e49d8e72002-09-23 09:36:25 +00001184 else {
1185 VG_(printf)("unhandled opcode: %u. Perhaps "
1186 "VG_(needs).extended_UCode should be set?",
1187 u->opcode);
njne427a662002-10-02 11:08:25 +00001188 VG_(core_panic)("pp_UInstr: unhandled opcode");
njn25e49d8e72002-09-23 09:36:25 +00001189 }
sewardjde4a1d02002-03-22 01:27:54 +00001190 }
sewardjde4a1d02002-03-22 01:27:54 +00001191 if (u->flags_r != FlagsEmpty || u->flags_w != FlagsEmpty) {
1192 VG_(printf)(" (");
1193 if (u->flags_r != FlagsEmpty)
1194 vg_ppFlagSet("-r", u->flags_r);
1195 if (u->flags_w != FlagsEmpty)
1196 vg_ppFlagSet("-w", u->flags_w);
1197 VG_(printf)(")");
1198 }
njn25e49d8e72002-09-23 09:36:25 +00001199
1200 if (ppRegsLiveness) {
1201 VG_(printf)("\t\t");
njn4ba5a792002-09-30 10:23:54 +00001202 pp_realregs_liveness ( u );
njn25e49d8e72002-09-23 09:36:25 +00001203 }
1204
sewardjde4a1d02002-03-22 01:27:54 +00001205 VG_(printf)("\n");
1206}
1207
njn4ba5a792002-09-30 10:23:54 +00001208void VG_(pp_UInstr) ( Int instrNo, UInstr* u )
njn25e49d8e72002-09-23 09:36:25 +00001209{
njn4ba5a792002-09-30 10:23:54 +00001210 pp_UInstrWorker ( instrNo, u, /*ppRegsLiveness*/False );
njn25e49d8e72002-09-23 09:36:25 +00001211}
1212
njn4ba5a792002-09-30 10:23:54 +00001213void VG_(pp_UInstr_regs) ( Int instrNo, UInstr* u )
njn25e49d8e72002-09-23 09:36:25 +00001214{
njn4ba5a792002-09-30 10:23:54 +00001215 pp_UInstrWorker ( instrNo, u, /*ppRegsLiveness*/True );
njn25e49d8e72002-09-23 09:36:25 +00001216}
sewardjde4a1d02002-03-22 01:27:54 +00001217
njn4ba5a792002-09-30 10:23:54 +00001218void VG_(pp_UCodeBlock) ( UCodeBlock* cb, Char* title )
sewardjde4a1d02002-03-22 01:27:54 +00001219{
1220 Int i;
njn25e49d8e72002-09-23 09:36:25 +00001221 VG_(printf)("%s\n", title);
sewardjde4a1d02002-03-22 01:27:54 +00001222 for (i = 0; i < cb->used; i++)
njn25e49d8e72002-09-23 09:36:25 +00001223 if (cb->instrs[i].opcode != NOP)
njn4ba5a792002-09-30 10:23:54 +00001224 VG_(pp_UInstr) ( i, &cb->instrs[i] );
sewardjde4a1d02002-03-22 01:27:54 +00001225 VG_(printf)("\n");
1226}
1227
1228
1229/*------------------------------------------------------------*/
1230/*--- uinstr helpers for register allocation ---*/
1231/*--- and code improvement. ---*/
1232/*------------------------------------------------------------*/
1233
njn25e49d8e72002-09-23 09:36:25 +00001234/* Get the temp/reg use of a uinstr, parking them in an array supplied by
njn810086f2002-11-14 12:42:47 +00001235 the caller (regs), which is assumed to be big enough. Return the number
1236 of entries. Written regs are indicated in parallel array isWrites.
1237 Insns which read _and_ write a register wind up mentioning it twice.
1238 Entries are placed in the array in program order, so that if a reg is
1239 read-modified-written, it appears first as a read and then as a write.
1240 'tag' indicates whether we are looking at TempRegs or RealRegs.
sewardjde4a1d02002-03-22 01:27:54 +00001241*/
njn25e49d8e72002-09-23 09:36:25 +00001242__inline__
njn810086f2002-11-14 12:42:47 +00001243Int VG_(get_reg_usage) ( UInstr* u, Tag tag, Int* regs, Bool* isWrites )
sewardjde4a1d02002-03-22 01:27:54 +00001244{
njn810086f2002-11-14 12:42:47 +00001245# define RD(ono) VG_UINSTR_READS_REG(ono, regs, isWrites)
1246# define WR(ono) VG_UINSTR_WRITES_REG(ono, regs, isWrites)
sewardjde4a1d02002-03-22 01:27:54 +00001247
1248 Int n = 0;
1249 switch (u->opcode) {
1250 case LEA1: RD(1); WR(2); break;
1251 case LEA2: RD(1); RD(2); WR(3); break;
1252
sewardj4fbe6e92003-06-15 21:54:34 +00001253 case SSE3e_RegRd:
sewardjfebaa3b2003-05-25 01:07:34 +00001254 case SSE3a_MemWr:
1255 case SSE3a_MemRd:
1256 case SSE2a_MemWr:
sewardj4fbe6e92003-06-15 21:54:34 +00001257 case SSE3e1_RegRd:
sewardj02af6bc2003-06-12 00:56:06 +00001258 case SSE2a_MemRd: RD(3); break;
1259
sewardjb31b06d2003-06-13 00:26:02 +00001260 case SSE3g1_RegWr:
sewardj02af6bc2003-06-12 00:56:06 +00001261 case SSE3g_RegWr: WR(3); break;
sewardjfebaa3b2003-05-25 01:07:34 +00001262
sewardje3891fa2003-06-15 03:13:48 +00001263 case SSE3ag_MemRd_RegWr: RD(1); WR(2); break;
1264
sewardj4fbe6e92003-06-15 21:54:34 +00001265 case MMX2_ERegRd: RD(2); break;
1266 case MMX2_ERegWr: WR(2); break;
sewardjca860012003-03-27 23:52:58 +00001267
sewardja453fb02003-06-14 13:22:36 +00001268 case SSE4: case SSE3: case SSE5:
sewardjca860012003-03-27 23:52:58 +00001269 case MMX1: case MMX2: case MMX3:
njn25e49d8e72002-09-23 09:36:25 +00001270 case NOP: case FPU: case INCEIP: case CALLM_S: case CALLM_E:
sewardj7a5ebcf2002-11-13 22:42:13 +00001271 case CLEAR: case CALLM: case LOCK: break;
njn25e49d8e72002-09-23 09:36:25 +00001272
1273 case CCALL:
1274 if (u->argc > 0) RD(1);
1275 if (u->argc > 1) RD(2);
1276 if (u->argc > 2) RD(3);
1277 if (u->has_ret_val) WR(3);
1278 break;
1279
sewardj3d7c9c82003-03-26 21:08:13 +00001280 case MMX2_MemRd: case MMX2_MemWr:
sewardjde4a1d02002-03-22 01:27:54 +00001281 case FPU_R: case FPU_W: RD(2); break;
1282
sewardje1042472002-09-30 12:33:11 +00001283 case GETSEG: WR(2); break;
1284 case PUTSEG: RD(1); break;
1285
sewardjde4a1d02002-03-22 01:27:54 +00001286 case GETF: WR(1); break;
1287 case PUTF: RD(1); break;
1288
1289 case GET: WR(2); break;
1290 case PUT: RD(1); break;
1291 case LOAD: RD(1); WR(2); break;
njn25e49d8e72002-09-23 09:36:25 +00001292 case STORE: RD(1); RD(2); break;
sewardjde4a1d02002-03-22 01:27:54 +00001293 case MOV: RD(1); WR(2); break;
1294
1295 case JMP: RD(1); break;
sewardjde4a1d02002-03-22 01:27:54 +00001296
njn25e49d8e72002-09-23 09:36:25 +00001297 case PUSH: RD(1); break;
sewardjde4a1d02002-03-22 01:27:54 +00001298 case POP: WR(1); break;
1299
sewardje1042472002-09-30 12:33:11 +00001300 case USESEG:
sewardjde4a1d02002-03-22 01:27:54 +00001301 case CMOV:
1302 case ADD: case ADC: case AND: case OR:
1303 case XOR: case SUB: case SBB:
1304 RD(1); RD(2); WR(2); break;
1305
1306 case SHL: case SHR: case SAR:
1307 case ROL: case ROR: case RCL: case RCR:
1308 RD(1); RD(2); WR(2); break;
1309
njn25e49d8e72002-09-23 09:36:25 +00001310 case NOT: case NEG: case INC: case DEC: case BSWAP:
sewardjde4a1d02002-03-22 01:27:54 +00001311 RD(1); WR(1); break;
1312
1313 case WIDEN: RD(1); WR(1); break;
1314
1315 case CC2VAL: WR(1); break;
1316 case JIFZ: RD(1); break;
1317
njn25e49d8e72002-09-23 09:36:25 +00001318 default:
1319 if (VG_(needs).extended_UCode)
njn810086f2002-11-14 12:42:47 +00001320 return SK_(get_Xreg_usage)(u, tag, regs, isWrites);
njn25e49d8e72002-09-23 09:36:25 +00001321 else {
1322 VG_(printf)("unhandled opcode: %u. Perhaps "
1323 "VG_(needs).extended_UCode should be set?",
1324 u->opcode);
njne427a662002-10-02 11:08:25 +00001325 VG_(core_panic)("VG_(get_reg_usage): unhandled opcode");
njn25e49d8e72002-09-23 09:36:25 +00001326 }
sewardjde4a1d02002-03-22 01:27:54 +00001327 }
1328 return n;
1329
1330# undef RD
1331# undef WR
1332}
1333
1334
njn25e49d8e72002-09-23 09:36:25 +00001335/* Change temp regs in u into real regs, as directed by the
1336 * temps[i]-->reals[i] mapping. */
1337static __inline__
njn810086f2002-11-14 12:42:47 +00001338void patchUInstr ( UInstr* u, Int temps[], UInt reals[], Int n_tmap )
sewardjde4a1d02002-03-22 01:27:54 +00001339{
1340 Int i;
1341 if (u->tag1 == TempReg) {
1342 for (i = 0; i < n_tmap; i++)
njn810086f2002-11-14 12:42:47 +00001343 if (temps[i] == u->val1) break;
njne427a662002-10-02 11:08:25 +00001344 if (i == n_tmap) VG_(core_panic)("patchUInstr(1)");
sewardjde4a1d02002-03-22 01:27:54 +00001345 u->tag1 = RealReg;
njn25e49d8e72002-09-23 09:36:25 +00001346 u->val1 = reals[i];
sewardjde4a1d02002-03-22 01:27:54 +00001347 }
1348 if (u->tag2 == TempReg) {
1349 for (i = 0; i < n_tmap; i++)
njn810086f2002-11-14 12:42:47 +00001350 if (temps[i] == u->val2) break;
njne427a662002-10-02 11:08:25 +00001351 if (i == n_tmap) VG_(core_panic)("patchUInstr(2)");
sewardjde4a1d02002-03-22 01:27:54 +00001352 u->tag2 = RealReg;
njn25e49d8e72002-09-23 09:36:25 +00001353 u->val2 = reals[i];
sewardjde4a1d02002-03-22 01:27:54 +00001354 }
1355 if (u->tag3 == TempReg) {
1356 for (i = 0; i < n_tmap; i++)
njn810086f2002-11-14 12:42:47 +00001357 if (temps[i] == u->val3) break;
njne427a662002-10-02 11:08:25 +00001358 if (i == n_tmap) VG_(core_panic)("patchUInstr(3)");
sewardjde4a1d02002-03-22 01:27:54 +00001359 u->tag3 = RealReg;
njn25e49d8e72002-09-23 09:36:25 +00001360 u->val3 = reals[i];
sewardjde4a1d02002-03-22 01:27:54 +00001361 }
1362}
1363
1364
1365/* Tedious x86-specific hack which compensates for the fact that the
1366 register numbers for %ah .. %dh do not correspond to those for %eax
1367 .. %edx. It maps a (reg size, reg no) pair to the number of the
1368 containing 32-bit reg. */
1369static __inline__
1370Int containingArchRegOf ( Int sz, Int aregno )
1371{
1372 switch (sz) {
1373 case 4: return aregno;
1374 case 2: return aregno;
1375 case 1: return aregno >= 4 ? aregno-4 : aregno;
njne427a662002-10-02 11:08:25 +00001376 default: VG_(core_panic)("containingArchRegOf");
sewardjde4a1d02002-03-22 01:27:54 +00001377 }
1378}
1379
1380
1381/* If u reads an ArchReg, return the number of the containing arch
njn25e49d8e72002-09-23 09:36:25 +00001382 reg. Otherwise return -1. Used in redundant-PUT elimination.
1383 Note that this is not required for skins extending UCode because
1384 this happens before instrumentation. */
sewardjde4a1d02002-03-22 01:27:54 +00001385static __inline__
1386Int maybe_uinstrReadsArchReg ( UInstr* u )
1387{
1388 switch (u->opcode) {
1389 case GET:
1390 case ADD: case ADC: case AND: case OR:
1391 case XOR: case SUB: case SBB:
1392 case SHL: case SHR: case SAR: case ROL:
1393 case ROR: case RCL: case RCR:
1394 if (u->tag1 == ArchReg)
1395 return containingArchRegOf ( u->size, u->val1 );
1396 else
1397 return -1;
1398
1399 case GETF: case PUTF:
1400 case CALLM_S: case CALLM_E:
1401 case INCEIP:
1402 case LEA1:
1403 case LEA2:
1404 case NOP:
sewardj7a5ebcf2002-11-13 22:42:13 +00001405 case LOCK:
sewardjde4a1d02002-03-22 01:27:54 +00001406 case PUT:
1407 case LOAD:
1408 case STORE:
1409 case MOV:
1410 case CMOV:
1411 case JMP:
1412 case CALLM: case CLEAR: case PUSH: case POP:
1413 case NOT: case NEG: case INC: case DEC: case BSWAP:
1414 case CC2VAL:
1415 case JIFZ:
1416 case FPU: case FPU_R: case FPU_W:
sewardjca860012003-03-27 23:52:58 +00001417 case MMX1: case MMX2: case MMX3:
sewardj3d7c9c82003-03-26 21:08:13 +00001418 case MMX2_MemRd: case MMX2_MemWr:
sewardj4fbe6e92003-06-15 21:54:34 +00001419 case MMX2_ERegRd: case MMX2_ERegWr:
sewardjfebaa3b2003-05-25 01:07:34 +00001420 case SSE2a_MemWr: case SSE2a_MemRd:
1421 case SSE3a_MemWr: case SSE3a_MemRd:
sewardj4fbe6e92003-06-15 21:54:34 +00001422 case SSE3e_RegRd: case SSE3g_RegWr:
1423 case SSE3g1_RegWr: case SSE3e1_RegRd:
sewardje3891fa2003-06-15 03:13:48 +00001424 case SSE4: case SSE3: case SSE5: case SSE3ag_MemRd_RegWr:
sewardjde4a1d02002-03-22 01:27:54 +00001425 case WIDEN:
sewardje1042472002-09-30 12:33:11 +00001426 /* GETSEG and USESEG are to do with ArchRegS, not ArchReg */
1427 case GETSEG: case PUTSEG:
1428 case USESEG:
sewardjde4a1d02002-03-22 01:27:54 +00001429 return -1;
1430
1431 default:
njn4ba5a792002-09-30 10:23:54 +00001432 VG_(pp_UInstr)(0,u);
njne427a662002-10-02 11:08:25 +00001433 VG_(core_panic)("maybe_uinstrReadsArchReg: unhandled opcode");
sewardjde4a1d02002-03-22 01:27:54 +00001434 }
1435}
1436
1437static __inline__
1438Bool uInstrMentionsTempReg ( UInstr* u, Int tempreg )
1439{
1440 Int i, k;
njnf4ce3d32003-02-10 10:17:26 +00001441 Int tempUse[VG_MAX_REGS_USED];
1442 Bool notUsed[VG_MAX_REGS_USED];
njn810086f2002-11-14 12:42:47 +00001443
1444 k = VG_(get_reg_usage) ( u, TempReg, &tempUse[0], &notUsed[0] );
sewardjde4a1d02002-03-22 01:27:54 +00001445 for (i = 0; i < k; i++)
njn810086f2002-11-14 12:42:47 +00001446 if (tempUse[i] == tempreg)
sewardjde4a1d02002-03-22 01:27:54 +00001447 return True;
1448 return False;
1449}
1450
1451
1452/*------------------------------------------------------------*/
1453/*--- ucode improvement. ---*/
1454/*------------------------------------------------------------*/
1455
1456/* Improve the code in cb by doing
1457 -- Redundant ArchReg-fetch elimination
1458 -- Redundant PUT elimination
1459 -- Redundant cond-code restore/save elimination
1460 The overall effect of these is to allow target registers to be
1461 cached in host registers over multiple target insns.
1462*/
1463static void vg_improve ( UCodeBlock* cb )
1464{
1465 Int i, j, k, m, n, ar, tr, told, actual_areg;
1466 Int areg_map[8];
1467 Bool annul_put[8];
njnf4ce3d32003-02-10 10:17:26 +00001468 Int tempUse[VG_MAX_REGS_USED];
1469 Bool isWrites[VG_MAX_REGS_USED];
sewardjde4a1d02002-03-22 01:27:54 +00001470 UInstr* u;
1471 Bool wr;
1472 Int* last_live_before;
1473 FlagSet future_dead_flags;
1474
sewardj2ca39a12003-06-14 12:03:35 +00001475# if 0
1476 /* DEBUGGING HOOK */
1477 {
1478 static int n_done=0;
1479 if (VG_(clo_stop_after) > 1000000000) {
1480 if (n_done > (VG_(clo_stop_after) - 1000000000)) {
1481 dis=False;
1482 VG_(clo_trace_codegen) = 0;
1483 return;
1484 }
1485 if (n_done == (VG_(clo_stop_after) - 1000000000)) {
1486 VG_(printf)("\n");
1487 VG_(pp_UCodeBlock) ( cb, "Incoming:" );
1488 dis = True;
1489 VG_(clo_trace_codegen) = 31;
1490 }
1491 n_done++;
1492 }
1493 }
1494 /* end DEBUGGING HOOK */
1495# endif /* 0 */
1496
njn25e49d8e72002-09-23 09:36:25 +00001497 if (dis)
1498 VG_(printf) ("Improvements:\n");
1499
sewardjde4a1d02002-03-22 01:27:54 +00001500 if (cb->nextTemp > 0)
njn25e49d8e72002-09-23 09:36:25 +00001501 last_live_before = VG_(arena_malloc) ( VG_AR_JITTER,
1502 cb->nextTemp * sizeof(Int) );
sewardjde4a1d02002-03-22 01:27:54 +00001503 else
1504 last_live_before = NULL;
1505
1506
1507 /* PASS 1: redundant GET elimination. (Actually, more general than
1508 that -- eliminates redundant fetches of ArchRegs). */
1509
1510 /* Find the live-range-ends for all temporaries. Duplicates code
1511 in the register allocator :-( */
1512
1513 for (i = 0; i < cb->nextTemp; i++) last_live_before[i] = -1;
1514
1515 for (i = cb->used-1; i >= 0; i--) {
1516 u = &cb->instrs[i];
1517
njn810086f2002-11-14 12:42:47 +00001518 k = VG_(get_reg_usage)(u, TempReg, &tempUse[0], &isWrites[0]);
sewardjde4a1d02002-03-22 01:27:54 +00001519
1520 /* For each temp usage ... bwds in program order. */
1521 for (j = k-1; j >= 0; j--) {
njn810086f2002-11-14 12:42:47 +00001522 tr = tempUse[j];
1523 wr = isWrites[j];
sewardjde4a1d02002-03-22 01:27:54 +00001524 if (last_live_before[tr] == -1) {
1525 vg_assert(tr >= 0 && tr < cb->nextTemp);
1526 last_live_before[tr] = wr ? (i+1) : i;
1527 }
1528 }
1529
1530 }
1531
1532# define BIND_ARCH_TO_TEMP(archreg,tempreg)\
1533 { Int q; \
1534 /* Invalidate any old binding(s) to tempreg. */ \
1535 for (q = 0; q < 8; q++) \
1536 if (areg_map[q] == tempreg) areg_map[q] = -1; \
1537 /* Add the new binding. */ \
1538 areg_map[archreg] = (tempreg); \
1539 }
1540
1541 /* Set up the A-reg map. */
1542 for (i = 0; i < 8; i++) areg_map[i] = -1;
1543
1544 /* Scan insns. */
1545 for (i = 0; i < cb->used; i++) {
1546 u = &cb->instrs[i];
1547 if (u->opcode == GET && u->size == 4) {
1548 /* GET; see if it can be annulled. */
1549 vg_assert(u->tag1 == ArchReg);
1550 vg_assert(u->tag2 == TempReg);
1551 ar = u->val1;
1552 tr = u->val2;
1553 told = areg_map[ar];
1554 if (told != -1 && last_live_before[told] <= i) {
1555 /* ar already has an old mapping to told, but that runs
1556 out here. Annul this GET, rename tr to told for the
1557 rest of the block, and extend told's live range to that
1558 of tr. */
njn4ba5a792002-09-30 10:23:54 +00001559 VG_(new_NOP)(u);
sewardjde4a1d02002-03-22 01:27:54 +00001560 n = last_live_before[tr] + 1;
1561 if (n > cb->used) n = cb->used;
1562 last_live_before[told] = last_live_before[tr];
1563 last_live_before[tr] = i-1;
njn25e49d8e72002-09-23 09:36:25 +00001564 if (dis)
sewardjde4a1d02002-03-22 01:27:54 +00001565 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001566 " at %2d: delete GET, rename t%d to t%d in (%d .. %d)\n",
sewardjde4a1d02002-03-22 01:27:54 +00001567 i, tr, told,i+1, n-1);
1568 for (m = i+1; m < n; m++) {
1569 if (cb->instrs[m].tag1 == TempReg
1570 && cb->instrs[m].val1 == tr)
1571 cb->instrs[m].val1 = told;
1572 if (cb->instrs[m].tag2 == TempReg
1573 && cb->instrs[m].val2 == tr)
1574 cb->instrs[m].val2 = told;
sewardjfebaa3b2003-05-25 01:07:34 +00001575 if (cb->instrs[m].tag3 == TempReg
1576 && cb->instrs[m].val3 == tr)
1577 cb->instrs[m].val3 = told;
sewardjde4a1d02002-03-22 01:27:54 +00001578 }
1579 BIND_ARCH_TO_TEMP(ar,told);
1580 }
1581 else
1582 BIND_ARCH_TO_TEMP(ar,tr);
1583 }
1584 else if (u->opcode == GET && u->size != 4) {
1585 /* Invalidate any mapping for this archreg. */
1586 actual_areg = containingArchRegOf ( u->size, u->val1 );
1587 areg_map[actual_areg] = -1;
1588 }
1589 else if (u->opcode == PUT && u->size == 4) {
1590 /* PUT; re-establish t -> a binding */
1591 vg_assert(u->tag1 == TempReg);
1592 vg_assert(u->tag2 == ArchReg);
1593 BIND_ARCH_TO_TEMP(u->val2, u->val1);
1594 }
1595 else if (u->opcode == PUT && u->size != 4) {
1596 /* Invalidate any mapping for this archreg. */
1597 actual_areg = containingArchRegOf ( u->size, u->val2 );
1598 areg_map[actual_areg] = -1;
1599 } else {
1600
1601 /* see if insn has an archreg as a read operand; if so try to
1602 map it. */
1603 if (u->tag1 == ArchReg && u->size == 4
1604 && areg_map[u->val1] != -1) {
1605 switch (u->opcode) {
1606 case ADD: case SUB: case AND: case OR: case XOR:
1607 case ADC: case SBB:
1608 case SHL: case SHR: case SAR: case ROL: case ROR:
1609 case RCL: case RCR:
njn25e49d8e72002-09-23 09:36:25 +00001610 if (dis)
sewardjde4a1d02002-03-22 01:27:54 +00001611 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001612 " at %2d: change ArchReg %S to TempReg t%d\n",
sewardjde4a1d02002-03-22 01:27:54 +00001613 i, nameIReg(4,u->val1), areg_map[u->val1]);
1614 u->tag1 = TempReg;
1615 u->val1 = areg_map[u->val1];
1616 /* Remember to extend the live range of the TempReg,
1617 if necessary. */
1618 if (last_live_before[u->val1] < i)
1619 last_live_before[u->val1] = i;
1620 break;
1621 default:
1622 break;
1623 }
1624 }
1625
1626 /* boring insn; invalidate any mappings to temps it writes */
njn810086f2002-11-14 12:42:47 +00001627 k = VG_(get_reg_usage)(u, TempReg, &tempUse[0], &isWrites[0]);
sewardjde4a1d02002-03-22 01:27:54 +00001628
1629 for (j = 0; j < k; j++) {
njn810086f2002-11-14 12:42:47 +00001630 wr = isWrites[j];
sewardjde4a1d02002-03-22 01:27:54 +00001631 if (!wr) continue;
njn810086f2002-11-14 12:42:47 +00001632 tr = tempUse[j];
sewardjde4a1d02002-03-22 01:27:54 +00001633 for (m = 0; m < 8; m++)
1634 if (areg_map[m] == tr) areg_map[m] = -1;
1635 }
1636 }
1637
1638 }
1639
1640# undef BIND_ARCH_TO_TEMP
1641
sewardj05f1aa12002-04-30 00:29:36 +00001642 /* PASS 2: redundant PUT elimination. Don't annul (delay) puts of
1643 %ESP, since the memory check machinery always requires the
1644 in-memory value of %ESP to be up to date. Although this isn't
1645 actually required by other analyses (cache simulation), it's
1646 simplest to be consistent for all end-uses. */
sewardjde4a1d02002-03-22 01:27:54 +00001647 for (j = 0; j < 8; j++)
1648 annul_put[j] = False;
1649
1650 for (i = cb->used-1; i >= 0; i--) {
1651 u = &cb->instrs[i];
1652 if (u->opcode == NOP) continue;
1653
1654 if (u->opcode == PUT && u->size == 4) {
1655 vg_assert(u->tag2 == ArchReg);
1656 actual_areg = containingArchRegOf ( 4, u->val2 );
1657 if (annul_put[actual_areg]) {
sewardj05f1aa12002-04-30 00:29:36 +00001658 vg_assert(actual_areg != R_ESP);
njn4ba5a792002-09-30 10:23:54 +00001659 VG_(new_NOP)(u);
njn25e49d8e72002-09-23 09:36:25 +00001660 if (dis)
1661 VG_(printf)(" at %2d: delete PUT\n", i );
sewardjde4a1d02002-03-22 01:27:54 +00001662 } else {
sewardj05f1aa12002-04-30 00:29:36 +00001663 if (actual_areg != R_ESP)
sewardjde4a1d02002-03-22 01:27:54 +00001664 annul_put[actual_areg] = True;
1665 }
1666 }
1667 else if (u->opcode == PUT && u->size != 4) {
1668 actual_areg = containingArchRegOf ( u->size, u->val2 );
1669 annul_put[actual_areg] = False;
1670 }
1671 else if (u->opcode == JMP || u->opcode == JIFZ
1672 || u->opcode == CALLM) {
1673 for (j = 0; j < 8; j++)
1674 annul_put[j] = False;
1675 }
1676 else {
1677 /* If an instruction reads an ArchReg, the immediately
1678 preceding PUT cannot be annulled. */
1679 actual_areg = maybe_uinstrReadsArchReg ( u );
1680 if (actual_areg != -1)
1681 annul_put[actual_areg] = False;
1682 }
1683 }
1684
1685 /* PASS 2a: redundant-move elimination. Given MOV t1, t2 and t1 is
1686 dead after this point, annul the MOV insn and rename t2 to t1.
1687 Further modifies the last_live_before map. */
1688
1689# if 0
njn4ba5a792002-09-30 10:23:54 +00001690 VG_(pp_UCodeBlock)(cb, "Before MOV elimination" );
sewardjde4a1d02002-03-22 01:27:54 +00001691 for (i = 0; i < cb->nextTemp; i++)
1692 VG_(printf)("llb[t%d]=%d ", i, last_live_before[i]);
1693 VG_(printf)("\n");
1694# endif
1695
1696 for (i = 0; i < cb->used-1; i++) {
1697 u = &cb->instrs[i];
1698 if (u->opcode != MOV) continue;
1699 if (u->tag1 == Literal) continue;
1700 vg_assert(u->tag1 == TempReg);
1701 vg_assert(u->tag2 == TempReg);
1702 if (last_live_before[u->val1] == i) {
njn25e49d8e72002-09-23 09:36:25 +00001703 if (dis)
sewardjde4a1d02002-03-22 01:27:54 +00001704 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001705 " at %2d: delete MOV, rename t%d to t%d in (%d .. %d)\n",
sewardjde4a1d02002-03-22 01:27:54 +00001706 i, u->val2, u->val1, i+1, last_live_before[u->val2] );
1707 for (j = i+1; j <= last_live_before[u->val2]; j++) {
1708 if (cb->instrs[j].tag1 == TempReg
1709 && cb->instrs[j].val1 == u->val2)
1710 cb->instrs[j].val1 = u->val1;
1711 if (cb->instrs[j].tag2 == TempReg
1712 && cb->instrs[j].val2 == u->val2)
1713 cb->instrs[j].val2 = u->val1;
sewardjfebaa3b2003-05-25 01:07:34 +00001714 if (cb->instrs[j].tag3 == TempReg
1715 && cb->instrs[j].val3 == u->val2)
1716 cb->instrs[j].val3 = u->val1;
sewardjde4a1d02002-03-22 01:27:54 +00001717 }
1718 last_live_before[u->val1] = last_live_before[u->val2];
1719 last_live_before[u->val2] = i-1;
njn4ba5a792002-09-30 10:23:54 +00001720 VG_(new_NOP)(u);
sewardjde4a1d02002-03-22 01:27:54 +00001721 }
1722 }
1723
1724 /* PASS 3: redundant condition-code restore/save elimination.
1725 Scan backwards from the end. future_dead_flags records the set
1726 of flags which are dead at this point, that is, will be written
1727 before they are next read. Earlier uinsns which write flags
1728 already in future_dead_flags can have their writes annulled.
1729 */
1730 future_dead_flags = FlagsEmpty;
1731
1732 for (i = cb->used-1; i >= 0; i--) {
1733 u = &cb->instrs[i];
1734
1735 /* We might never make it to insns beyond this one, so be
1736 conservative. */
1737 if (u->opcode == JIFZ || u->opcode == JMP) {
1738 future_dead_flags = FlagsEmpty;
1739 continue;
1740 }
1741
sewardjfbb6cda2002-07-24 09:33:52 +00001742 /* PUTF modifies the %EFLAGS in essentially unpredictable ways.
1743 For example people try to mess with bit 21 to see if CPUID
1744 works. The setting may or may not actually take hold. So we
1745 play safe here. */
1746 if (u->opcode == PUTF) {
1747 future_dead_flags = FlagsEmpty;
1748 continue;
1749 }
1750
sewardjde4a1d02002-03-22 01:27:54 +00001751 /* We can annul the flags written by this insn if it writes a
1752 subset (or eq) of the set of flags known to be dead after
1753 this insn. If not, just record the flags also written by
1754 this insn.*/
1755 if (u->flags_w != FlagsEmpty
1756 && VG_IS_FLAG_SUBSET(u->flags_w, future_dead_flags)) {
njn25e49d8e72002-09-23 09:36:25 +00001757 if (dis) {
1758 VG_(printf)(" at %2d: annul flag write ", i);
sewardjde4a1d02002-03-22 01:27:54 +00001759 vg_ppFlagSet("", u->flags_w);
1760 VG_(printf)(" due to later ");
1761 vg_ppFlagSet("", future_dead_flags);
1762 VG_(printf)("\n");
1763 }
1764 u->flags_w = FlagsEmpty;
1765 } else {
1766 future_dead_flags
1767 = VG_UNION_FLAG_SETS ( u->flags_w, future_dead_flags );
1768 }
1769
1770 /* If this insn also reads flags, empty out future_dead_flags so
1771 as to force preceding writes not to be annulled. */
1772 if (u->flags_r != FlagsEmpty)
1773 future_dead_flags = FlagsEmpty;
1774 }
1775
1776 if (last_live_before)
njn25e49d8e72002-09-23 09:36:25 +00001777 VG_(arena_free) ( VG_AR_JITTER, last_live_before );
1778
1779 if (dis) {
1780 VG_(printf)("\n");
njn4ba5a792002-09-30 10:23:54 +00001781 VG_(pp_UCodeBlock) ( cb, "Improved UCode:" );
njn25e49d8e72002-09-23 09:36:25 +00001782 }
sewardjde4a1d02002-03-22 01:27:54 +00001783}
1784
njn9b007f62003-04-07 14:40:25 +00001785/*------------------------------------------------------------*/
1786/*--- %ESP-update pass ---*/
1787/*------------------------------------------------------------*/
1788
1789/* For skins that want to know about %ESP changes, this pass adds
1790 in the appropriate hooks. We have to do it after the skin's
1791 instrumentation, so the skin doesn't have to worry about the CCALLs
1792 it adds in, and we must do it before register allocation because
1793 spilled temps make it much harder to work out the %esp deltas.
1794 Thus we have it as an extra phase between the two. */
1795static
1796UCodeBlock* vg_ESP_update_pass(UCodeBlock* cb_in)
1797{
1798 UCodeBlock* cb;
1799 UInstr* u;
1800 Int delta = 0;
1801 UInt t_ESP = INVALID_TEMPREG;
sewardj05bcdcb2003-05-18 10:05:38 +00001802 Int i;
njn9b007f62003-04-07 14:40:25 +00001803
1804 cb = VG_(setup_UCodeBlock)(cb_in);
1805
1806 for (i = 0; i < VG_(get_num_instrs)(cb_in); i++) {
1807 u = VG_(get_instr)(cb_in, i);
1808
1809 if (GET == u->opcode && R_ESP == u->val1) {
1810 t_ESP = u->val2;
1811 delta = 0;
1812
1813 } else if (PUT == u->opcode && R_ESP == u->val2 && 4 == u->size) {
1814
1815# define DO_GENERIC \
1816 if (VG_(track_events).new_mem_stack || \
1817 VG_(track_events).die_mem_stack) { \
1818 uInstr1(cb, CCALL, 0, TempReg, u->val1); \
1819 uCCall(cb, (Addr) VG_(unknown_esp_update), \
1820 1, 1, False); \
1821 }
1822
1823# define DO(kind, size) \
1824 if (VG_(track_events).kind##_mem_stack_##size) { \
1825 uInstr1(cb, CCALL, 0, TempReg, u->val1); \
1826 uCCall(cb, (Addr) VG_(track_events).kind##_mem_stack_##size,\
1827 1, 1, False); \
1828 \
1829 } else \
1830 DO_GENERIC \
1831 break
1832
1833 if (u->val1 == t_ESP) {
1834 /* Known delta, common cases handled specially. */
1835 switch (delta) {
1836 case 4: DO(die, 4);
1837 case -4: DO(new, 4);
1838 case 8: DO(die, 8);
1839 case -8: DO(new, 8);
1840 case 12: DO(die, 12);
1841 case -12: DO(new, 12);
1842 case 16: DO(die, 16);
1843 case -16: DO(new, 16);
1844 case 32: DO(die, 32);
1845 case -32: DO(new, 32);
1846 default: DO_GENERIC; break;
1847 }
1848 } else {
1849 /* Unknown delta */
1850 DO_GENERIC;
1851 }
1852 delta = 0;
1853
1854# undef DO
1855# undef DO_GENERIC
1856
1857 } else if (Literal == u->tag1 && t_ESP == u->val2) {
1858 if (ADD == u->opcode) delta += u->lit32;
1859 if (SUB == u->opcode) delta -= u->lit32;
1860
1861 } else if (MOV == u->opcode && TempReg == u->tag1 && t_ESP == u->val1 &&
1862 TempReg == u->tag2) {
1863 t_ESP = u->val2;
1864 }
1865 VG_(copy_UInstr) ( cb, u );
1866 }
1867
1868 VG_(free_UCodeBlock)(cb_in);
1869 return cb;
1870}
sewardjde4a1d02002-03-22 01:27:54 +00001871
1872/*------------------------------------------------------------*/
1873/*--- The new register allocator. ---*/
1874/*------------------------------------------------------------*/
1875
1876typedef
1877 struct {
1878 /* Becomes live for the first time after this insn ... */
1879 Int live_after;
1880 /* Becomes dead for the last time after this insn ... */
1881 Int dead_before;
1882 /* The "home" spill slot, if needed. Never changes. */
1883 Int spill_no;
1884 /* Where is it? VG_NOVALUE==in a spill slot; else in reg. */
1885 Int real_no;
1886 }
1887 TempInfo;
1888
1889
1890/* Take a ucode block and allocate its TempRegs to RealRegs, or put
1891 them in spill locations, and add spill code, if there are not
1892 enough real regs. The usual register allocation deal, in short.
1893
1894 Important redundancy of representation:
1895
1896 real_to_temp maps real reg ranks (RRRs) to TempReg nos, or
1897 to VG_NOVALUE if the real reg has no currently assigned TempReg.
1898
1899 The .real_no field of a TempInfo gives the current RRR for
1900 this TempReg, or VG_NOVALUE if the TempReg is currently
1901 in memory, in which case it is in the SpillNo denoted by
1902 spillno.
1903
1904 These pieces of information (a fwds-bwds mapping, really) must
1905 be kept consistent!
1906
1907 This allocator uses the so-called Second Chance Bin Packing
1908 algorithm, as described in "Quality and Speed in Linear-scan
1909 Register Allocation" (Traub, Holloway and Smith, ACM PLDI98,
1910 pp142-151). It is simple and fast and remarkably good at
1911 minimising the amount of spill code introduced.
1912*/
1913
1914static
1915UCodeBlock* vg_do_register_allocation ( UCodeBlock* c1 )
1916{
1917 TempInfo* temp_info;
1918 Int real_to_temp[VG_MAX_REALREGS];
1919 Bool is_spill_cand[VG_MAX_REALREGS];
1920 Int ss_busy_until_before[VG_MAX_SPILLSLOTS];
1921 Int i, j, k, m, r, tno, max_ss_no;
1922 Bool wr, defer, isRead, spill_reqd;
njnf4ce3d32003-02-10 10:17:26 +00001923 UInt realUse[VG_MAX_REGS_USED];
1924 Int tempUse[VG_MAX_REGS_USED];
1925 Bool isWrites[VG_MAX_REGS_USED];
sewardjde4a1d02002-03-22 01:27:54 +00001926 UCodeBlock* c2;
1927
1928 /* Used to denote ... well, "no value" in this fn. */
1929# define VG_NOTHING (-2)
1930
1931 /* Initialise the TempReg info. */
1932 if (c1->nextTemp > 0)
njn25e49d8e72002-09-23 09:36:25 +00001933 temp_info = VG_(arena_malloc)(VG_AR_JITTER,
1934 c1->nextTemp * sizeof(TempInfo) );
sewardjde4a1d02002-03-22 01:27:54 +00001935 else
1936 temp_info = NULL;
1937
1938 for (i = 0; i < c1->nextTemp; i++) {
1939 temp_info[i].live_after = VG_NOTHING;
1940 temp_info[i].dead_before = VG_NOTHING;
1941 temp_info[i].spill_no = VG_NOTHING;
1942 /* temp_info[i].real_no is not yet relevant. */
1943 }
1944
1945 spill_reqd = False;
1946
1947 /* Scan fwds to establish live ranges. */
1948
1949 for (i = 0; i < c1->used; i++) {
njn810086f2002-11-14 12:42:47 +00001950 k = VG_(get_reg_usage)(&c1->instrs[i], TempReg, &tempUse[0],
1951 &isWrites[0]);
njnf4ce3d32003-02-10 10:17:26 +00001952 vg_assert(k >= 0 && k <= VG_MAX_REGS_USED);
sewardjde4a1d02002-03-22 01:27:54 +00001953
1954 /* For each temp usage ... fwds in program order */
1955 for (j = 0; j < k; j++) {
njn810086f2002-11-14 12:42:47 +00001956 tno = tempUse[j];
1957 wr = isWrites[j];
sewardjde4a1d02002-03-22 01:27:54 +00001958 if (wr) {
1959 /* Writes hold a reg live until after this insn. */
1960 if (temp_info[tno].live_after == VG_NOTHING)
1961 temp_info[tno].live_after = i;
1962 if (temp_info[tno].dead_before < i + 1)
1963 temp_info[tno].dead_before = i + 1;
1964 } else {
1965 /* First use of a tmp should be a write. */
njnfa0ad422003-02-03 11:07:03 +00001966 if (temp_info[tno].live_after == VG_NOTHING) {
1967 VG_(printf)("At instr %d...\n", i);
1968 VG_(core_panic)("First use of tmp not a write,"
1969 " probably a skin instrumentation error");
1970 }
sewardjde4a1d02002-03-22 01:27:54 +00001971 /* Reads only hold it live until before this insn. */
1972 if (temp_info[tno].dead_before < i)
1973 temp_info[tno].dead_before = i;
1974 }
1975 }
1976 }
1977
1978# if 0
1979 /* Sanity check on live ranges. Expensive but correct. */
1980 for (i = 0; i < c1->nextTemp; i++) {
1981 vg_assert( (temp_info[i].live_after == VG_NOTHING
1982 && temp_info[i].dead_before == VG_NOTHING)
1983 || (temp_info[i].live_after != VG_NOTHING
1984 && temp_info[i].dead_before != VG_NOTHING) );
1985 }
1986# endif
1987
1988 /* Do a rank-based allocation of TempRegs to spill slot numbers.
1989 We put as few as possible values in spill slots, but
1990 nevertheless need to have an assignment to them just in case. */
1991
1992 max_ss_no = -1;
1993
1994 for (i = 0; i < VG_MAX_SPILLSLOTS; i++)
1995 ss_busy_until_before[i] = 0;
1996
1997 for (i = 0; i < c1->nextTemp; i++) {
1998
1999 /* True iff this temp is unused. */
2000 if (temp_info[i].live_after == VG_NOTHING)
2001 continue;
2002
2003 /* Find the lowest-numbered spill slot which is available at the
2004 start point of this interval, and assign the interval to
2005 it. */
2006 for (j = 0; j < VG_MAX_SPILLSLOTS; j++)
2007 if (ss_busy_until_before[j] <= temp_info[i].live_after)
2008 break;
2009 if (j == VG_MAX_SPILLSLOTS) {
2010 VG_(printf)("VG_MAX_SPILLSLOTS is too low; increase and recompile.\n");
njne427a662002-10-02 11:08:25 +00002011 VG_(core_panic)("register allocation failed -- out of spill slots");
sewardjde4a1d02002-03-22 01:27:54 +00002012 }
2013 ss_busy_until_before[j] = temp_info[i].dead_before;
2014 temp_info[i].spill_no = j;
2015 if (j > max_ss_no)
2016 max_ss_no = j;
2017 }
2018
2019 VG_(total_reg_rank) += (max_ss_no+1);
2020
2021 /* Show live ranges and assigned spill slot nos. */
2022
njn25e49d8e72002-09-23 09:36:25 +00002023 if (dis) {
2024 VG_(printf)("Live range assignments:\n");
sewardjde4a1d02002-03-22 01:27:54 +00002025
2026 for (i = 0; i < c1->nextTemp; i++) {
2027 if (temp_info[i].live_after == VG_NOTHING)
2028 continue;
2029 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00002030 " LR %d is after %d to before %d\tspillno %d\n",
sewardjde4a1d02002-03-22 01:27:54 +00002031 i,
2032 temp_info[i].live_after,
2033 temp_info[i].dead_before,
2034 temp_info[i].spill_no
2035 );
2036 }
njn25e49d8e72002-09-23 09:36:25 +00002037 VG_(printf)("\n");
sewardjde4a1d02002-03-22 01:27:54 +00002038 }
2039
2040 /* Now that we've established a spill slot number for each used
2041 temporary, we can go ahead and do the core of the "Second-chance
2042 binpacking" allocation algorithm. */
2043
njn25e49d8e72002-09-23 09:36:25 +00002044 if (dis) VG_(printf)("Register allocated UCode:\n");
2045
2046
sewardjde4a1d02002-03-22 01:27:54 +00002047 /* Resulting code goes here. We generate it all in a forwards
2048 pass. */
njn4ba5a792002-09-30 10:23:54 +00002049 c2 = VG_(alloc_UCodeBlock)();
sewardj22854b92002-11-30 14:00:47 +00002050 c2->orig_eip = c1->orig_eip;
sewardjde4a1d02002-03-22 01:27:54 +00002051
2052 /* At the start, no TempRegs are assigned to any real register.
2053 Correspondingly, all temps claim to be currently resident in
2054 their spill slots, as computed by the previous two passes. */
2055 for (i = 0; i < VG_MAX_REALREGS; i++)
2056 real_to_temp[i] = VG_NOTHING;
2057 for (i = 0; i < c1->nextTemp; i++)
2058 temp_info[i].real_no = VG_NOTHING;
2059
sewardjde4a1d02002-03-22 01:27:54 +00002060 /* Process each insn in turn. */
2061 for (i = 0; i < c1->used; i++) {
2062
2063 if (c1->instrs[i].opcode == NOP) continue;
2064 VG_(uinstrs_prealloc)++;
2065
2066# if 0
2067 /* Check map consistency. Expensive but correct. */
2068 for (r = 0; r < VG_MAX_REALREGS; r++) {
2069 if (real_to_temp[r] != VG_NOTHING) {
2070 tno = real_to_temp[r];
2071 vg_assert(tno >= 0 && tno < c1->nextTemp);
2072 vg_assert(temp_info[tno].real_no == r);
2073 }
2074 }
2075 for (tno = 0; tno < c1->nextTemp; tno++) {
2076 if (temp_info[tno].real_no != VG_NOTHING) {
2077 r = temp_info[tno].real_no;
2078 vg_assert(r >= 0 && r < VG_MAX_REALREGS);
2079 vg_assert(real_to_temp[r] == tno);
2080 }
2081 }
2082# endif
2083
njn25e49d8e72002-09-23 09:36:25 +00002084 if (dis)
njn4ba5a792002-09-30 10:23:54 +00002085 VG_(pp_UInstr)(i, &c1->instrs[i]);
sewardjde4a1d02002-03-22 01:27:54 +00002086
2087 /* First, free up enough real regs for this insn. This may
2088 generate spill stores since we may have to evict some TempRegs
2089 currently in real regs. Also generates spill loads. */
2090
njn810086f2002-11-14 12:42:47 +00002091 k = VG_(get_reg_usage)(&c1->instrs[i], TempReg, &tempUse[0],
2092 &isWrites[0]);
njnf4ce3d32003-02-10 10:17:26 +00002093 vg_assert(k >= 0 && k <= VG_MAX_REGS_USED);
sewardjde4a1d02002-03-22 01:27:54 +00002094
2095 /* For each ***different*** temp mentioned in the insn .... */
2096 for (j = 0; j < k; j++) {
2097
2098 /* First check if the temp is mentioned again later; if so,
2099 ignore this mention. We only want to process each temp
2100 used by the insn once, even if it is mentioned more than
2101 once. */
2102 defer = False;
njn810086f2002-11-14 12:42:47 +00002103 tno = tempUse[j];
sewardjde4a1d02002-03-22 01:27:54 +00002104 for (m = j+1; m < k; m++)
njn810086f2002-11-14 12:42:47 +00002105 if (tempUse[m] == tno)
sewardjde4a1d02002-03-22 01:27:54 +00002106 defer = True;
2107 if (defer)
2108 continue;
2109
njn810086f2002-11-14 12:42:47 +00002110 /* Now we're trying to find a register for tempUse[j].
sewardjde4a1d02002-03-22 01:27:54 +00002111 First of all, if it already has a register assigned, we
2112 don't need to do anything more. */
2113 if (temp_info[tno].real_no != VG_NOTHING)
2114 continue;
2115
2116 /* No luck. The next thing to do is see if there is a
2117 currently unassigned register available. If so, bag it. */
2118 for (r = 0; r < VG_MAX_REALREGS; r++) {
2119 if (real_to_temp[r] == VG_NOTHING)
2120 break;
2121 }
2122 if (r < VG_MAX_REALREGS) {
2123 real_to_temp[r] = tno;
2124 temp_info[tno].real_no = r;
2125 continue;
2126 }
2127
2128 /* Unfortunately, that didn't pan out either. So we'll have
2129 to eject some other unfortunate TempReg into a spill slot
2130 in order to free up a register. Of course, we need to be
2131 careful not to eject some other TempReg needed by this
2132 insn.
2133
2134 Select r in 0 .. VG_MAX_REALREGS-1 such that
2135 real_to_temp[r] is not mentioned in
njn810086f2002-11-14 12:42:47 +00002136 tempUse[0 .. k-1], since it would be just plain
sewardjde4a1d02002-03-22 01:27:54 +00002137 wrong to eject some other TempReg which we need to use in
2138 this insn.
2139
2140 It is here that it is important to make a good choice of
2141 register to spill. */
2142
2143 /* First, mark those regs which are not spill candidates. */
2144 for (r = 0; r < VG_MAX_REALREGS; r++) {
2145 is_spill_cand[r] = True;
2146 for (m = 0; m < k; m++) {
njn810086f2002-11-14 12:42:47 +00002147 if (real_to_temp[r] == tempUse[m]) {
sewardjde4a1d02002-03-22 01:27:54 +00002148 is_spill_cand[r] = False;
2149 break;
2150 }
2151 }
2152 }
2153
2154 /* We can choose any r satisfying is_spill_cand[r]. However,
2155 try to make a good choice. First, try and find r such
2156 that the associated TempReg is already dead. */
2157 for (r = 0; r < VG_MAX_REALREGS; r++) {
2158 if (is_spill_cand[r] &&
2159 temp_info[real_to_temp[r]].dead_before <= i)
2160 goto have_spill_cand;
2161 }
2162
2163 /* No spill cand is mapped to a dead TempReg. Now we really
2164 _do_ have to generate spill code. Choose r so that the
2165 next use of its associated TempReg is as far ahead as
2166 possible, in the hope that this will minimise the number of
2167 consequent reloads required. This is a bit expensive, but
2168 we don't have to do it very often. */
2169 {
2170 Int furthest_r = VG_MAX_REALREGS;
2171 Int furthest = 0;
2172 for (r = 0; r < VG_MAX_REALREGS; r++) {
2173 if (!is_spill_cand[r]) continue;
2174 for (m = i+1; m < c1->used; m++)
2175 if (uInstrMentionsTempReg(&c1->instrs[m],
2176 real_to_temp[r]))
2177 break;
2178 if (m > furthest) {
2179 furthest = m;
2180 furthest_r = r;
2181 }
2182 }
2183 r = furthest_r;
2184 goto have_spill_cand;
2185 }
2186
2187 have_spill_cand:
2188 if (r == VG_MAX_REALREGS)
njne427a662002-10-02 11:08:25 +00002189 VG_(core_panic)("new reg alloc: out of registers ?!");
sewardjde4a1d02002-03-22 01:27:54 +00002190
2191 /* Eject r. Important refinement: don't bother if the
2192 associated TempReg is now dead. */
2193 vg_assert(real_to_temp[r] != VG_NOTHING);
2194 vg_assert(real_to_temp[r] != tno);
2195 temp_info[real_to_temp[r]].real_no = VG_NOTHING;
2196 if (temp_info[real_to_temp[r]].dead_before > i) {
2197 uInstr2(c2, PUT, 4,
njn4ba5a792002-09-30 10:23:54 +00002198 RealReg, VG_(rank_to_realreg)(r),
sewardjde4a1d02002-03-22 01:27:54 +00002199 SpillNo, temp_info[real_to_temp[r]].spill_no);
2200 VG_(uinstrs_spill)++;
2201 spill_reqd = True;
njn25e49d8e72002-09-23 09:36:25 +00002202 if (dis)
njn4ba5a792002-09-30 10:23:54 +00002203 VG_(pp_UInstr)(c2->used-1, &LAST_UINSTR(c2));
sewardjde4a1d02002-03-22 01:27:54 +00002204 }
2205
2206 /* Decide if tno is read. */
2207 isRead = False;
2208 for (m = 0; m < k; m++)
njn810086f2002-11-14 12:42:47 +00002209 if (tempUse[m] == tno && !isWrites[m])
sewardjde4a1d02002-03-22 01:27:54 +00002210 isRead = True;
2211
2212 /* If so, generate a spill load. */
2213 if (isRead) {
2214 uInstr2(c2, GET, 4,
2215 SpillNo, temp_info[tno].spill_no,
njn4ba5a792002-09-30 10:23:54 +00002216 RealReg, VG_(rank_to_realreg)(r) );
sewardjde4a1d02002-03-22 01:27:54 +00002217 VG_(uinstrs_spill)++;
2218 spill_reqd = True;
njn25e49d8e72002-09-23 09:36:25 +00002219 if (dis)
njn4ba5a792002-09-30 10:23:54 +00002220 VG_(pp_UInstr)(c2->used-1, &LAST_UINSTR(c2));
sewardjde4a1d02002-03-22 01:27:54 +00002221 }
2222
2223 /* Update the forwards and backwards maps. */
2224 real_to_temp[r] = tno;
2225 temp_info[tno].real_no = r;
2226 }
2227
2228 /* By this point, all TempRegs mentioned by the insn have been
2229 bought into real regs. We now copy the insn to the output
2230 and use patchUInstr to convert its rTempRegs into
2231 realregs. */
2232 for (j = 0; j < k; j++)
njn810086f2002-11-14 12:42:47 +00002233 realUse[j] = VG_(rank_to_realreg)(temp_info[tempUse[j]].real_no);
njn4ba5a792002-09-30 10:23:54 +00002234 VG_(copy_UInstr)(c2, &c1->instrs[i]);
njn25e49d8e72002-09-23 09:36:25 +00002235 patchUInstr(&LAST_UINSTR(c2), &tempUse[0], &realUse[0], k);
sewardjde4a1d02002-03-22 01:27:54 +00002236
njn25e49d8e72002-09-23 09:36:25 +00002237 if (dis) {
njn4ba5a792002-09-30 10:23:54 +00002238 VG_(pp_UInstr)(c2->used-1, &LAST_UINSTR(c2));
sewardjde4a1d02002-03-22 01:27:54 +00002239 VG_(printf)("\n");
2240 }
2241 }
2242
2243 if (temp_info != NULL)
njn25e49d8e72002-09-23 09:36:25 +00002244 VG_(arena_free)(VG_AR_JITTER, temp_info);
sewardjde4a1d02002-03-22 01:27:54 +00002245
njn4ba5a792002-09-30 10:23:54 +00002246 VG_(free_UCodeBlock)(c1);
sewardjde4a1d02002-03-22 01:27:54 +00002247
2248 if (spill_reqd)
2249 VG_(translations_needing_spill)++;
2250
2251 return c2;
2252
2253# undef VG_NOTHING
2254
2255}
sewardj7c4b6042003-06-14 15:47:15 +00002256
njn25e49d8e72002-09-23 09:36:25 +00002257/* Analysis records liveness of all general-use RealRegs in the UCode. */
2258static void vg_realreg_liveness_analysis ( UCodeBlock* cb )
2259{
2260 Int i, j, k;
2261 RRegSet rregs_live;
njnf4ce3d32003-02-10 10:17:26 +00002262 Int regUse[VG_MAX_REGS_USED];
2263 Bool isWrites[VG_MAX_REGS_USED];
njn25e49d8e72002-09-23 09:36:25 +00002264 UInstr* u;
sewardjde4a1d02002-03-22 01:27:54 +00002265
njn25e49d8e72002-09-23 09:36:25 +00002266 /* All regs are dead at the end of the block */
2267 rregs_live = ALL_RREGS_DEAD;
sewardjde4a1d02002-03-22 01:27:54 +00002268
sewardjde4a1d02002-03-22 01:27:54 +00002269 for (i = cb->used-1; i >= 0; i--) {
2270 u = &cb->instrs[i];
2271
njn25e49d8e72002-09-23 09:36:25 +00002272 u->regs_live_after = rregs_live;
sewardj97ced732002-03-25 00:07:36 +00002273
njn810086f2002-11-14 12:42:47 +00002274 k = VG_(get_reg_usage)(u, RealReg, &regUse[0], &isWrites[0]);
sewardj97ced732002-03-25 00:07:36 +00002275
njn25e49d8e72002-09-23 09:36:25 +00002276 /* For each reg usage ... bwds in program order. Variable is live
2277 before this UInstr if it is read by this UInstr.
njn810086f2002-11-14 12:42:47 +00002278 Note that regUse[j] holds the Intel reg number, so we must
njn25e49d8e72002-09-23 09:36:25 +00002279 convert it to our rank number. */
2280 for (j = k-1; j >= 0; j--) {
njn810086f2002-11-14 12:42:47 +00002281 SET_RREG_LIVENESS ( VG_(realreg_to_rank)(regUse[j]),
njn25e49d8e72002-09-23 09:36:25 +00002282 rregs_live,
njn810086f2002-11-14 12:42:47 +00002283 !isWrites[j] );
sewardjde4a1d02002-03-22 01:27:54 +00002284 }
2285 }
sewardjde4a1d02002-03-22 01:27:54 +00002286}
2287
sewardjde4a1d02002-03-22 01:27:54 +00002288/*------------------------------------------------------------*/
2289/*--- Main entry point for the JITter. ---*/
2290/*------------------------------------------------------------*/
2291
2292/* Translate the basic block beginning at orig_addr, placing the
2293 translation in a vg_malloc'd block, the address and size of which
2294 are returned in trans_addr and trans_size. Length of the original
2295 block is also returned in orig_size. If the latter three are NULL,
2296 this call is being done for debugging purposes, in which case (a)
2297 throw away the translation once it is made, and (b) produce a load
2298 of debugging output.
njn25e49d8e72002-09-23 09:36:25 +00002299
2300 'tst' is the identity of the thread needing this block.
sewardjde4a1d02002-03-22 01:27:54 +00002301*/
njn25e49d8e72002-09-23 09:36:25 +00002302void VG_(translate) ( /*IN*/ ThreadState* tst,
2303 /*IN*/ Addr orig_addr,
2304 /*OUT*/ UInt* orig_size,
2305 /*OUT*/ Addr* trans_addr,
sewardj22854b92002-11-30 14:00:47 +00002306 /*OUT*/ UInt* trans_size,
2307 /*OUT*/ UShort jumps[VG_MAX_JUMPS])
sewardjde4a1d02002-03-22 01:27:54 +00002308{
2309 Int n_disassembled_bytes, final_code_size;
2310 Bool debugging_translation;
2311 UChar* final_code;
2312 UCodeBlock* cb;
sewardja60be0e2003-05-26 08:47:27 +00002313 Bool notrace_until_done;
sewardje3891fa2003-06-15 03:13:48 +00002314 UInt notrace_until_limit = 53000;
sewardjde4a1d02002-03-22 01:27:54 +00002315
2316 VGP_PUSHCC(VgpTranslate);
2317 debugging_translation
2318 = orig_size == NULL || trans_addr == NULL || trans_size == NULL;
2319
sewardja60be0e2003-05-26 08:47:27 +00002320 /* If codegen tracing, don't start tracing until
2321 notrace_until_limit blocks have gone by. This avoids printing
2322 huge amounts of useless junk when all we want to see is the last
2323 few blocks translated prior to a failure. Set
2324 notrace_until_limit to be the number of translations to be made
2325 before --trace-codegen= style printing takes effect. */
2326 notrace_until_done
2327 = VG_(overall_in_count) > notrace_until_limit;
2328
njn25e49d8e72002-09-23 09:36:25 +00002329 if (!debugging_translation)
2330 VG_TRACK( pre_mem_read, Vg_CoreTranslate, tst, "", orig_addr, 1 );
sewardjde4a1d02002-03-22 01:27:54 +00002331
njn4ba5a792002-09-30 10:23:54 +00002332 cb = VG_(alloc_UCodeBlock)();
sewardj22854b92002-11-30 14:00:47 +00002333 cb->orig_eip = orig_addr;
sewardjde4a1d02002-03-22 01:27:54 +00002334
njn25e49d8e72002-09-23 09:36:25 +00002335 /* If doing any code printing, print a basic block start marker */
sewardja60be0e2003-05-26 08:47:27 +00002336 if (VG_(clo_trace_codegen) && notrace_until_done) {
njn25e49d8e72002-09-23 09:36:25 +00002337 Char fnname[64] = "";
2338 VG_(get_fnname_if_entry)(orig_addr, fnname, 64);
2339 VG_(printf)(
njne0205ff2003-04-08 00:56:14 +00002340 "==== BB %d %s(%p) in %dB, out %dB, BBs exec'd %llu ====\n\n",
njn25e49d8e72002-09-23 09:36:25 +00002341 VG_(overall_in_count), fnname, orig_addr,
2342 VG_(overall_in_osize), VG_(overall_in_tsize),
2343 VG_(bbs_done));
2344 }
2345
2346 /* True if a debug trans., or if bit N set in VG_(clo_trace_codegen). */
sewardja60be0e2003-05-26 08:47:27 +00002347# define DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(n) \
2348 ( debugging_translation \
2349 || (notrace_until_done \
2350 && (VG_(clo_trace_codegen) & (1 << (n-1))) ))
njn25e49d8e72002-09-23 09:36:25 +00002351
sewardjde4a1d02002-03-22 01:27:54 +00002352 /* Disassemble this basic block into cb. */
njn25e49d8e72002-09-23 09:36:25 +00002353 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(1);
2354 VGP_PUSHCC(VgpToUCode);
sewardjde4a1d02002-03-22 01:27:54 +00002355 n_disassembled_bytes = VG_(disBB) ( cb, orig_addr );
njn25e49d8e72002-09-23 09:36:25 +00002356 VGP_POPCC(VgpToUCode);
2357
sewardjde4a1d02002-03-22 01:27:54 +00002358 /* Try and improve the code a bit. */
2359 if (VG_(clo_optimise)) {
njn25e49d8e72002-09-23 09:36:25 +00002360 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(2);
2361 VGP_PUSHCC(VgpImprove);
sewardjde4a1d02002-03-22 01:27:54 +00002362 vg_improve ( cb );
njn25e49d8e72002-09-23 09:36:25 +00002363 VGP_POPCC(VgpImprove);
sewardjde4a1d02002-03-22 01:27:54 +00002364 }
2365
njn25e49d8e72002-09-23 09:36:25 +00002366 /* Skin's instrumentation (Nb: must set VG_(print_codegen) in case
2367 SK_(instrument) looks at it. */
2368 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(3);
2369 VGP_PUSHCC(VgpInstrument);
2370 cb = SK_(instrument) ( cb, orig_addr );
2371 if (VG_(print_codegen))
njn4ba5a792002-09-30 10:23:54 +00002372 VG_(pp_UCodeBlock) ( cb, "Instrumented UCode:" );
njn25e49d8e72002-09-23 09:36:25 +00002373 VG_(saneUCodeBlock)( cb );
2374 VGP_POPCC(VgpInstrument);
njn4f9c9342002-04-29 16:03:24 +00002375
njn9b007f62003-04-07 14:40:25 +00002376 /* Add %ESP-update hooks if the skin requires them */
2377 /* Nb: We don't print out this phase, because it doesn't do much */
2378 if (VG_(need_to_handle_esp_assignment)()) {
2379 VGP_PUSHCC(VgpESPUpdate);
2380 cb = vg_ESP_update_pass ( cb );
2381 VGP_POPCC(VgpESPUpdate);
2382 }
2383
sewardjde4a1d02002-03-22 01:27:54 +00002384 /* Allocate registers. */
njn25e49d8e72002-09-23 09:36:25 +00002385 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(4);
2386 VGP_PUSHCC(VgpRegAlloc);
sewardjde4a1d02002-03-22 01:27:54 +00002387 cb = vg_do_register_allocation ( cb );
njn25e49d8e72002-09-23 09:36:25 +00002388 VGP_POPCC(VgpRegAlloc);
sewardjde4a1d02002-03-22 01:27:54 +00002389
njn25e49d8e72002-09-23 09:36:25 +00002390 /* Do post reg-alloc %e[acd]x liveness analysis (too boring to print
2391 * anything; results can be seen when emitting final code). */
2392 VGP_PUSHCC(VgpLiveness);
2393 vg_realreg_liveness_analysis ( cb );
2394 VGP_POPCC(VgpLiveness);
2395
2396 /* Emit final code */
2397 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(5);
2398
2399 VGP_PUSHCC(VgpFromUcode);
sewardj22854b92002-11-30 14:00:47 +00002400 final_code = VG_(emit_code)(cb, &final_code_size, jumps );
njn25e49d8e72002-09-23 09:36:25 +00002401 VGP_POPCC(VgpFromUcode);
njn4ba5a792002-09-30 10:23:54 +00002402 VG_(free_UCodeBlock)(cb);
sewardjde4a1d02002-03-22 01:27:54 +00002403
njn25e49d8e72002-09-23 09:36:25 +00002404#undef DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE
2405
sewardjde4a1d02002-03-22 01:27:54 +00002406 if (debugging_translation) {
2407 /* Only done for debugging -- throw away final result. */
njn25e49d8e72002-09-23 09:36:25 +00002408 VG_(arena_free)(VG_AR_JITTER, final_code);
sewardjde4a1d02002-03-22 01:27:54 +00002409 } else {
2410 /* Doing it for real -- return values to caller. */
sewardjde4a1d02002-03-22 01:27:54 +00002411 *orig_size = n_disassembled_bytes;
2412 *trans_addr = (Addr)final_code;
2413 *trans_size = final_code_size;
2414 }
njn25e49d8e72002-09-23 09:36:25 +00002415 VGP_POPCC(VgpTranslate);
sewardjde4a1d02002-03-22 01:27:54 +00002416}
2417
2418/*--------------------------------------------------------------------*/
2419/*--- end vg_translate.c ---*/
2420/*--------------------------------------------------------------------*/