blob: 9c21425f0113cfc4693ece23215bd3f7741c1a47 [file] [log] [blame]
sewardjde4a1d02002-03-22 01:27:54 +00001
2/*--------------------------------------------------------------------*/
3/*--- The JITter proper: register allocation & code improvement ---*/
4/*--- vg_translate.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
njnc9539842002-10-02 13:26:35 +00008 This file is part of Valgrind, an extensible x86 protected-mode
9 emulator for monitoring program execution on x86-Unixes.
sewardjde4a1d02002-03-22 01:27:54 +000010
11 Copyright (C) 2000-2002 Julian Seward
12 jseward@acm.org
sewardjde4a1d02002-03-22 01:27:54 +000013
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
njn25e49d8e72002-09-23 09:36:25 +000029 The GNU General Public License is contained in the file COPYING.
sewardjde4a1d02002-03-22 01:27:54 +000030*/
31
32#include "vg_include.h"
33
sewardjde4a1d02002-03-22 01:27:54 +000034/*------------------------------------------------------------*/
35/*--- Renamings of frequently-used global functions. ---*/
36/*------------------------------------------------------------*/
37
njn25e49d8e72002-09-23 09:36:25 +000038#define dis VG_(print_codegen)
sewardjde4a1d02002-03-22 01:27:54 +000039
sewardje1042472002-09-30 12:33:11 +000040
sewardjde4a1d02002-03-22 01:27:54 +000041/*------------------------------------------------------------*/
42/*--- Basics ---*/
43/*------------------------------------------------------------*/
44
njn810086f2002-11-14 12:42:47 +000045/* This one is called by the core */
njn4ba5a792002-09-30 10:23:54 +000046UCodeBlock* VG_(alloc_UCodeBlock) ( void )
sewardjde4a1d02002-03-22 01:27:54 +000047{
njn25e49d8e72002-09-23 09:36:25 +000048 UCodeBlock* cb = VG_(arena_malloc)(VG_AR_CORE, sizeof(UCodeBlock));
sewardjde4a1d02002-03-22 01:27:54 +000049 cb->used = cb->size = cb->nextTemp = 0;
50 cb->instrs = NULL;
51 return cb;
52}
53
njn810086f2002-11-14 12:42:47 +000054/* This one is called by skins */
55UCodeBlock* VG_(setup_UCodeBlock) ( UCodeBlock* cb_in )
56{
57 UCodeBlock* cb = VG_(arena_malloc)(VG_AR_CORE, sizeof(UCodeBlock));
sewardj22854b92002-11-30 14:00:47 +000058 cb->orig_eip = cb_in->orig_eip;
njn810086f2002-11-14 12:42:47 +000059 cb->used = cb->size = 0;
60 cb->nextTemp = cb_in->nextTemp;
61 cb->instrs = NULL;
62 return cb;
63}
sewardjde4a1d02002-03-22 01:27:54 +000064
njn4ba5a792002-09-30 10:23:54 +000065void VG_(free_UCodeBlock) ( UCodeBlock* cb )
sewardjde4a1d02002-03-22 01:27:54 +000066{
njn25e49d8e72002-09-23 09:36:25 +000067 if (cb->instrs) VG_(arena_free)(VG_AR_CORE, cb->instrs);
68 VG_(arena_free)(VG_AR_CORE, cb);
sewardjde4a1d02002-03-22 01:27:54 +000069}
70
71
72/* Ensure there's enough space in a block to add one uinstr. */
73static __inline__
74void ensureUInstr ( UCodeBlock* cb )
75{
76 if (cb->used == cb->size) {
77 if (cb->instrs == NULL) {
78 vg_assert(cb->size == 0);
79 vg_assert(cb->used == 0);
80 cb->size = 8;
njn25e49d8e72002-09-23 09:36:25 +000081 cb->instrs = VG_(arena_malloc)(VG_AR_CORE, 8 * sizeof(UInstr));
sewardjde4a1d02002-03-22 01:27:54 +000082 } else {
83 Int i;
njn25e49d8e72002-09-23 09:36:25 +000084 UInstr* instrs2 = VG_(arena_malloc)(VG_AR_CORE,
sewardjde4a1d02002-03-22 01:27:54 +000085 2 * sizeof(UInstr) * cb->size);
86 for (i = 0; i < cb->used; i++)
87 instrs2[i] = cb->instrs[i];
88 cb->size *= 2;
njn25e49d8e72002-09-23 09:36:25 +000089 VG_(arena_free)(VG_AR_CORE, cb->instrs);
sewardjde4a1d02002-03-22 01:27:54 +000090 cb->instrs = instrs2;
91 }
92 }
93
94 vg_assert(cb->used < cb->size);
95}
96
97
98__inline__
njn4ba5a792002-09-30 10:23:54 +000099void VG_(new_NOP) ( UInstr* u )
sewardjde4a1d02002-03-22 01:27:54 +0000100{
101 u->val1 = u->val2 = u->val3 = 0;
102 u->tag1 = u->tag2 = u->tag3 = NoValue;
103 u->flags_r = u->flags_w = FlagsEmpty;
sewardj2e93c502002-04-12 11:12:52 +0000104 u->jmpkind = JmpBoring;
njn25e49d8e72002-09-23 09:36:25 +0000105 u->signed_widen = u->has_ret_val = False;
106 u->regs_live_after = ALL_RREGS_LIVE;
sewardjde4a1d02002-03-22 01:27:54 +0000107 u->lit32 = 0;
njn25e49d8e72002-09-23 09:36:25 +0000108 u->opcode = NOP;
sewardjde4a1d02002-03-22 01:27:54 +0000109 u->size = 0;
110 u->cond = 0;
111 u->extra4b = 0;
njn25e49d8e72002-09-23 09:36:25 +0000112 u->argc = u->regparms_n = 0;
sewardjde4a1d02002-03-22 01:27:54 +0000113}
114
115
116/* Add an instruction to a ucode block, and return the index of the
117 instruction. */
118__inline__
njn4ba5a792002-09-30 10:23:54 +0000119void VG_(new_UInstr3) ( UCodeBlock* cb, Opcode opcode, Int sz,
sewardjde4a1d02002-03-22 01:27:54 +0000120 Tag tag1, UInt val1,
121 Tag tag2, UInt val2,
122 Tag tag3, UInt val3 )
123{
124 UInstr* ui;
125 ensureUInstr(cb);
126 ui = & cb->instrs[cb->used];
127 cb->used++;
njn4ba5a792002-09-30 10:23:54 +0000128 VG_(new_NOP)(ui);
sewardjde4a1d02002-03-22 01:27:54 +0000129 ui->val1 = val1;
130 ui->val2 = val2;
131 ui->val3 = val3;
132 ui->opcode = opcode;
133 ui->tag1 = tag1;
134 ui->tag2 = tag2;
135 ui->tag3 = tag3;
136 ui->size = sz;
137 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
138 if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
139 if (tag3 == TempReg) vg_assert(val3 != INVALID_TEMPREG);
140}
141
142
143__inline__
njn4ba5a792002-09-30 10:23:54 +0000144void VG_(new_UInstr2) ( UCodeBlock* cb, Opcode opcode, Int sz,
sewardjde4a1d02002-03-22 01:27:54 +0000145 Tag tag1, UInt val1,
146 Tag tag2, UInt val2 )
147{
148 UInstr* ui;
149 ensureUInstr(cb);
150 ui = & cb->instrs[cb->used];
151 cb->used++;
njn4ba5a792002-09-30 10:23:54 +0000152 VG_(new_NOP)(ui);
sewardjde4a1d02002-03-22 01:27:54 +0000153 ui->val1 = val1;
154 ui->val2 = val2;
155 ui->opcode = opcode;
156 ui->tag1 = tag1;
157 ui->tag2 = tag2;
158 ui->size = sz;
159 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
160 if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
161}
162
163
164__inline__
njn4ba5a792002-09-30 10:23:54 +0000165void VG_(new_UInstr1) ( UCodeBlock* cb, Opcode opcode, Int sz,
sewardjde4a1d02002-03-22 01:27:54 +0000166 Tag tag1, UInt val1 )
167{
168 UInstr* ui;
169 ensureUInstr(cb);
170 ui = & cb->instrs[cb->used];
171 cb->used++;
njn4ba5a792002-09-30 10:23:54 +0000172 VG_(new_NOP)(ui);
sewardjde4a1d02002-03-22 01:27:54 +0000173 ui->val1 = val1;
174 ui->opcode = opcode;
175 ui->tag1 = tag1;
176 ui->size = sz;
177 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
178}
179
180
181__inline__
njn4ba5a792002-09-30 10:23:54 +0000182void VG_(new_UInstr0) ( UCodeBlock* cb, Opcode opcode, Int sz )
sewardjde4a1d02002-03-22 01:27:54 +0000183{
184 UInstr* ui;
185 ensureUInstr(cb);
186 ui = & cb->instrs[cb->used];
187 cb->used++;
njn4ba5a792002-09-30 10:23:54 +0000188 VG_(new_NOP)(ui);
sewardjde4a1d02002-03-22 01:27:54 +0000189 ui->opcode = opcode;
190 ui->size = sz;
191}
192
sewardjde4a1d02002-03-22 01:27:54 +0000193/* Copy an instruction into the given codeblock. */
njn4f9c9342002-04-29 16:03:24 +0000194__inline__
njn4ba5a792002-09-30 10:23:54 +0000195void VG_(copy_UInstr) ( UCodeBlock* cb, UInstr* instr )
sewardjde4a1d02002-03-22 01:27:54 +0000196{
197 ensureUInstr(cb);
198 cb->instrs[cb->used] = *instr;
199 cb->used++;
200}
201
sewardjde4a1d02002-03-22 01:27:54 +0000202/* Copy auxiliary info from one uinstr to another. */
203static __inline__
204void copyAuxInfoFromTo ( UInstr* src, UInstr* dst )
205{
njn25e49d8e72002-09-23 09:36:25 +0000206 dst->cond = src->cond;
207 dst->extra4b = src->extra4b;
208 dst->signed_widen = src->signed_widen;
209 dst->jmpkind = src->jmpkind;
210 dst->flags_r = src->flags_r;
211 dst->flags_w = src->flags_w;
212 dst->argc = src->argc;
213 dst->regparms_n = src->regparms_n;
214 dst->has_ret_val = src->has_ret_val;
215 dst->regs_live_after = src->regs_live_after;
sewardjde4a1d02002-03-22 01:27:54 +0000216}
217
218
sewardjde4a1d02002-03-22 01:27:54 +0000219/* Set the lit32 field of the most recent uinsn. */
njn4ba5a792002-09-30 10:23:54 +0000220void VG_(set_lit_field) ( UCodeBlock* cb, UInt lit32 )
sewardjde4a1d02002-03-22 01:27:54 +0000221{
222 LAST_UINSTR(cb).lit32 = lit32;
223}
224
225
njn25e49d8e72002-09-23 09:36:25 +0000226/* Set the C call info fields of the most recent uinsn. */
njn4ba5a792002-09-30 10:23:54 +0000227void VG_(set_ccall_fields) ( UCodeBlock* cb, Addr fn, UChar argc, UChar
228 regparms_n, Bool has_ret_val )
njn25e49d8e72002-09-23 09:36:25 +0000229{
230 vg_assert(argc < 4);
231 vg_assert(regparms_n <= argc);
232 LAST_UINSTR(cb).lit32 = fn;
233 LAST_UINSTR(cb).argc = argc;
234 LAST_UINSTR(cb).regparms_n = regparms_n;
235 LAST_UINSTR(cb).has_ret_val = has_ret_val;
236}
237
njn810086f2002-11-14 12:42:47 +0000238/* For the last uinsn inserted into cb, set the read, written and
239 undefined flags. Undefined flags are counted as written, but it
240 seems worthwhile to distinguish them.
241*/
242__inline__
243void VG_(set_flag_fields) ( UCodeBlock* cb,
244 FlagSet rr, FlagSet ww, FlagSet uu )
245{
246 FlagSet uw = VG_UNION_FLAG_SETS(ww,uu);
247
248 vg_assert(rr == (rr & FlagsALL));
249 vg_assert(uw == (uw & FlagsALL));
250 LAST_UINSTR(cb).flags_r = rr;
251 LAST_UINSTR(cb).flags_w = uw;
252}
253
254
njn4ba5a792002-09-30 10:23:54 +0000255Bool VG_(any_flag_use) ( UInstr* u )
sewardjde4a1d02002-03-22 01:27:54 +0000256{
257 return (u->flags_r != FlagsEmpty
258 || u->flags_w != FlagsEmpty);
259}
260
njn25e49d8e72002-09-23 09:36:25 +0000261#if 1
262# define BEST_ALLOC_ORDER
263#endif
sewardjde4a1d02002-03-22 01:27:54 +0000264
265/* Convert a rank in the range 0 .. VG_MAX_REALREGS-1 into an Intel
266 register number. This effectively defines the order in which real
267 registers are allocated. %ebp is excluded since it is permanently
njn25e49d8e72002-09-23 09:36:25 +0000268 reserved for pointing at VG_(baseBlock).
sewardjde4a1d02002-03-22 01:27:54 +0000269
njn25e49d8e72002-09-23 09:36:25 +0000270 Important! This function must correspond with the value of
271 VG_MAX_REALREGS (actually, VG_MAX_REALREGS can be reduced without
272 a problem, except the generated code will obviously be worse).
sewardjde4a1d02002-03-22 01:27:54 +0000273*/
njn25e49d8e72002-09-23 09:36:25 +0000274__inline__
njn4ba5a792002-09-30 10:23:54 +0000275Int VG_(rank_to_realreg) ( Int rank )
sewardjde4a1d02002-03-22 01:27:54 +0000276{
277 switch (rank) {
njn25e49d8e72002-09-23 09:36:25 +0000278# ifdef BEST_ALLOC_ORDER
sewardjde4a1d02002-03-22 01:27:54 +0000279 /* Probably the best allocation ordering. */
280 case 0: return R_EAX;
281 case 1: return R_EBX;
282 case 2: return R_ECX;
283 case 3: return R_EDX;
284 case 4: return R_ESI;
njn25e49d8e72002-09-23 09:36:25 +0000285 case 5: return R_EDI;
sewardjde4a1d02002-03-22 01:27:54 +0000286# else
287 /* Contrary; probably the worst. Helpful for debugging, tho. */
njn25e49d8e72002-09-23 09:36:25 +0000288 case 5: return R_EAX;
289 case 4: return R_EBX;
290 case 3: return R_ECX;
291 case 2: return R_EDX;
292 case 1: return R_ESI;
293 case 0: return R_EDI;
sewardjde4a1d02002-03-22 01:27:54 +0000294# endif
njne427a662002-10-02 11:08:25 +0000295 default: VG_(core_panic)("VG_(rank_to_realreg)");
njn25e49d8e72002-09-23 09:36:25 +0000296 }
297}
298
299/* Convert an Intel register number into a rank in the range 0 ..
njn4ba5a792002-09-30 10:23:54 +0000300 VG_MAX_REALREGS-1. See related comments for rank_to_realreg()
njn25e49d8e72002-09-23 09:36:25 +0000301 above. */
302__inline__
njn4ba5a792002-09-30 10:23:54 +0000303Int VG_(realreg_to_rank) ( Int realReg )
njn25e49d8e72002-09-23 09:36:25 +0000304{
305 switch (realReg) {
306# ifdef BEST_ALLOC_ORDER
307 case R_EAX: return 0;
308 case R_EBX: return 1;
309 case R_ECX: return 2;
310 case R_EDX: return 3;
311 case R_ESI: return 4;
312 case R_EDI: return 5;
313# else
314 case R_EAX: return 5;
315 case R_EBX: return 4;
316 case R_ECX: return 3;
317 case R_EDX: return 2;
318 case R_ESI: return 1;
319 case R_EDI: return 0;
320# endif
njne427a662002-10-02 11:08:25 +0000321 default: VG_(core_panic)("VG_(realreg_to_rank)");
sewardjde4a1d02002-03-22 01:27:54 +0000322 }
323}
324
325
326/*------------------------------------------------------------*/
327/*--- Sanity checking uinstrs. ---*/
328/*------------------------------------------------------------*/
329
330/* This seems as good a place as any to record some important stuff
331 about ucode semantics.
332
333 * TempRegs are 32 bits wide. LOADs of 8/16 bit values into a
334 TempReg are defined to zero-extend the loaded value to 32 bits.
335 This is needed to make the translation of movzbl et al work
336 properly.
337
338 * Similarly, GETs of a 8/16 bit ArchRegs are zero-extended.
339
340 * Arithmetic on TempRegs is at the specified size. For example,
341 SUBW t1, t2 has to result in a real 16 bit x86 subtraction
342 being emitted -- not a 32 bit one.
343
344 * On some insns we allow the cc bit to be set. If so, the
345 intention is that the simulated machine's %eflags register
346 is copied into that of the real machine before the insn,
347 and copied back again afterwards. This means that the
348 code generated for that insn must be very careful only to
349 update %eflags in the intended way. This is particularly
350 important for the routines referenced by CALL insns.
351*/
352
353/* Meaning of operand kinds is as follows:
354
355 ArchReg is a register of the simulated CPU, stored in memory,
356 in vg_m_state.m_eax .. m_edi. These values are stored
357 using the Intel register encoding.
358
359 RealReg is a register of the real CPU. There are VG_MAX_REALREGS
360 available for allocation. As with ArchRegs, these values
361 are stored using the Intel register encoding.
362
363 TempReg is a temporary register used to express the results of
364 disassembly. There is an unlimited supply of them --
365 register allocation and spilling eventually assigns them
366 to RealRegs.
367
368 SpillNo is a spill slot number. The number of required spill
369 slots is VG_MAX_PSEUDOS, in general. Only allowed
370 as the ArchReg operand of GET and PUT.
371
372 Lit16 is a signed 16-bit literal value.
373
374 Literal is a 32-bit literal value. Each uinstr can only hold
375 one of these.
376
377 The disassembled code is expressed purely in terms of ArchReg,
378 TempReg and Literal operands. Eventually, register allocation
379 removes all the TempRegs, giving a result using ArchRegs, RealRegs,
380 and Literals. New x86 code can easily be synthesised from this.
381 There are carefully designed restrictions on which insns can have
382 which operands, intended to make it possible to generate x86 code
383 from the result of register allocation on the ucode efficiently and
384 without need of any further RealRegs.
385
njn25e49d8e72002-09-23 09:36:25 +0000386 Restrictions for the individual UInstrs are clear from the checks below.
387 Abbreviations: A=ArchReg S=SpillNo T=TempReg L=Literal
388 Ls=Lit16 R=RealReg N=NoValue
sewardje1042472002-09-30 12:33:11 +0000389 As=ArchRegS
sewardjde4a1d02002-03-22 01:27:54 +0000390
sewardjde4a1d02002-03-22 01:27:54 +0000391 Before register allocation, S operands should not appear anywhere.
392 After register allocation, all T operands should have been
393 converted into Rs, and S operands are allowed in GET and PUT --
394 denoting spill saves/restores.
395
njn25e49d8e72002-09-23 09:36:25 +0000396 Before liveness analysis, save_e[acd]x fields should all be True.
397 Afterwards, they may be False.
398
sewardjde4a1d02002-03-22 01:27:54 +0000399 The size field should be 0 for insns for which it is meaningless,
400 ie those which do not directly move/operate on data.
401*/
njn25e49d8e72002-09-23 09:36:25 +0000402Bool VG_(saneUInstr) ( Bool beforeRA, Bool beforeLiveness, UInstr* u )
sewardjde4a1d02002-03-22 01:27:54 +0000403{
njn25e49d8e72002-09-23 09:36:25 +0000404# define LIT0 (u->lit32 == 0)
405# define LIT1 (!(LIT0))
406# define LITm (u->tag1 == Literal ? True : LIT0 )
sewardj3d7c9c82003-03-26 21:08:13 +0000407# define SZ8 (u->size == 8)
njn25e49d8e72002-09-23 09:36:25 +0000408# define SZ4 (u->size == 4)
409# define SZ2 (u->size == 2)
410# define SZ1 (u->size == 1)
411# define SZ0 (u->size == 0)
412# define SZ42 (u->size == 4 || u->size == 2)
sewardjd7971012003-04-04 00:21:58 +0000413# define SZ48 (u->size == 4 || u->size == 8)
njn25e49d8e72002-09-23 09:36:25 +0000414# define SZi (u->size == 4 || u->size == 2 || u->size == 1)
415# define SZf ( u->size == 4 || u->size == 8 || u->size == 2 \
416 || u->size == 10 || u->size == 28 || u->size == 108)
417# define SZ4m ((u->tag1 == TempReg || u->tag1 == RealReg) \
418 ? (u->size == 4) : True)
419
420/* For these ones, two cases:
421 *
422 * 1. They are transliterations of the corresponding x86 instruction, in
423 * which case they should have its flags (except that redundant write
424 * flags can be annulled by the optimisation pass).
425 *
426 * 2. They are being used generally for other purposes, eg. helping with a
427 * 'rep'-prefixed instruction, in which case should have empty flags .
428 */
429# define emptyR (u->flags_r == FlagsEmpty)
430# define emptyW (u->flags_w == FlagsEmpty)
431# define CC0 (emptyR && emptyW)
432# define CCr (u->flags_r == FlagsALL && emptyW)
433# define CCw (emptyR && u->flags_w == FlagsALL)
434# define CCa (emptyR && (u->flags_w == FlagsOSZACP || emptyW))
435# define CCc (emptyR && (u->flags_w == FlagsOC || emptyW))
436# define CCe (emptyR && (u->flags_w == FlagsOSZAP || emptyW))
437# define CCb ((u->flags_r==FlagC || emptyR) && \
438 (u->flags_w==FlagsOSZACP || emptyW))
439# define CCd ((u->flags_r==FlagC || emptyR) && \
440 (u->flags_w==FlagsOC || emptyW))
sewardjc232b212002-12-10 22:24:03 +0000441# define CCf (CC0 || (emptyR && u->flags_w==FlagsZCP) \
442 || (u->flags_r==FlagsZCP && emptyW))
njn25e49d8e72002-09-23 09:36:25 +0000443# define CCg ((u->flags_r==FlagsOSZACP || emptyR) && emptyW)
444# define CCj (u->cond==CondAlways ? CC0 : CCg)
445
sewardjde4a1d02002-03-22 01:27:54 +0000446# define TR1 (beforeRA ? (u->tag1 == TempReg) : (u->tag1 == RealReg))
447# define TR2 (beforeRA ? (u->tag2 == TempReg) : (u->tag2 == RealReg))
448# define TR3 (beforeRA ? (u->tag3 == TempReg) : (u->tag3 == RealReg))
449# define A1 (u->tag1 == ArchReg)
450# define A2 (u->tag2 == ArchReg)
451# define AS1 ((u->tag1 == ArchReg) || ((!beforeRA && (u->tag1 == SpillNo))))
452# define AS2 ((u->tag2 == ArchReg) || ((!beforeRA && (u->tag2 == SpillNo))))
453# define AS3 ((u->tag3 == ArchReg) || ((!beforeRA && (u->tag3 == SpillNo))))
454# define L1 (u->tag1 == Literal && u->val1 == 0)
455# define L2 (u->tag2 == Literal && u->val2 == 0)
456# define Ls1 (u->tag1 == Lit16)
457# define Ls3 (u->tag3 == Lit16)
njn25e49d8e72002-09-23 09:36:25 +0000458# define TRL1 (TR1 || L1)
459# define TRAL1 (TR1 || A1 || L1)
sewardjde4a1d02002-03-22 01:27:54 +0000460# define N1 (u->tag1 == NoValue)
461# define N2 (u->tag2 == NoValue)
462# define N3 (u->tag3 == NoValue)
sewardje1042472002-09-30 12:33:11 +0000463# define Se1 (u->tag1 == ArchRegS)
464# define Se2 (u->tag2 == ArchRegS)
sewardjde4a1d02002-03-22 01:27:54 +0000465
njn25e49d8e72002-09-23 09:36:25 +0000466# define COND0 (u->cond == 0)
467# define EXTRA4b0 (u->extra4b == 0)
468# define SG_WD0 (u->signed_widen == 0)
469# define JMPKIND0 (u->jmpkind == 0)
470# define CCALL0 (u->argc==0 && u->regparms_n==0 && u->has_ret_val==0 && \
471 ( beforeLiveness \
472 ? u->regs_live_after == ALL_RREGS_LIVE \
473 : True ))
474
475# define XCONDi ( EXTRA4b0 && SG_WD0 && JMPKIND0 && CCALL0)
476# define Xextra4b (COND0 && SG_WD0 && JMPKIND0 && CCALL0)
477# define XWIDEN (COND0 && JMPKIND0 && CCALL0)
478# define XJMP ( SG_WD0 && CCALL0)
479# define XCCALL (COND0 && EXTRA4b0 && SG_WD0 && JMPKIND0 )
480# define XOTHER (COND0 && EXTRA4b0 && SG_WD0 && JMPKIND0 && CCALL0)
481
482 /* 0 or 1 Literal args per UInstr */
sewardjde4a1d02002-03-22 01:27:54 +0000483 Int n_lits = 0;
484 if (u->tag1 == Literal) n_lits++;
485 if (u->tag2 == Literal) n_lits++;
486 if (u->tag3 == Literal) n_lits++;
487 if (n_lits > 1)
488 return False;
489
njn25e49d8e72002-09-23 09:36:25 +0000490 /* Fields not checked: val1, val2, val3 */
491
sewardjde4a1d02002-03-22 01:27:54 +0000492 switch (u->opcode) {
njn25e49d8e72002-09-23 09:36:25 +0000493
494 /* Fields checked: lit32 size flags_r/w tag1 tag2 tag3 (rest) */
sewardje1042472002-09-30 12:33:11 +0000495 case PUTSEG: return LIT0 && SZ2 && CC0 && TR1 && Se2 && N3 && XOTHER;
496 case GETSEG: return LIT0 && SZ2 && CC0 && Se1 && TR2 && N3 && XOTHER;
497 case USESEG: return LIT0 && SZ0 && CC0 && TR1 && TR2 && N3 && XOTHER;
njn25e49d8e72002-09-23 09:36:25 +0000498 case NOP: return LIT0 && SZ0 && CC0 && N1 && N2 && N3 && XOTHER;
sewardj7a5ebcf2002-11-13 22:42:13 +0000499 case LOCK: return LIT0 && SZ0 && CC0 && N1 && N2 && N3 && XOTHER;
njn25e49d8e72002-09-23 09:36:25 +0000500 case GETF: return LIT0 && SZ42 && CCr && TR1 && N2 && N3 && XOTHER;
501 case PUTF: return LIT0 && SZ42 && CCw && TR1 && N2 && N3 && XOTHER;
502 case GET: return LIT0 && SZi && CC0 && AS1 && TR2 && N3 && XOTHER;
503 case PUT: return LIT0 && SZi && CC0 && TR1 && AS2 && N3 && XOTHER;
504 case LOAD:
505 case STORE: return LIT0 && SZi && CC0 && TR1 && TR2 && N3 && XOTHER;
506 case MOV: return LITm && SZ4m && CC0 && TRL1 && TR2 && N3 && XOTHER;
507 case CMOV: return LIT0 && SZ4 && CCg && TR1 && TR2 && N3 && XCONDi;
508 case WIDEN: return LIT0 && SZi && CC0 && TR1 && N2 && N3 && XWIDEN;
509 case JMP: return LITm && SZ0 && CCj && TRL1 && N2 && N3 && XJMP;
510 case CALLM: return LIT0 && SZ0 /*any*/ && Ls1 && N2 && N3 && XOTHER;
511 case CALLM_S:
512 case CALLM_E:return LIT0 && SZ0 && CC0 && N1 && N2 && N3 && XOTHER;
513 case PUSH:
514 case POP: return LIT0 && SZi && CC0 && TR1 && N2 && N3 && XOTHER;
515 case CLEAR: return LIT0 && SZ0 && CC0 && Ls1 && N2 && N3 && XOTHER;
516 case AND:
517 case OR: return LIT0 && SZi && CCa && TR1 && TR2 && N3 && XOTHER;
518 case ADD:
519 case XOR:
520 case SUB: return LITm && SZi && CCa &&TRAL1 && TR2 && N3 && XOTHER;
521 case SBB:
522 case ADC: return LITm && SZi && CCb &&TRAL1 && TR2 && N3 && XOTHER;
523 case SHL:
524 case SHR:
525 case SAR: return LITm && SZi && CCa && TRL1 && TR2 && N3 && XOTHER;
526 case ROL:
527 case ROR: return LITm && SZi && CCc && TRL1 && TR2 && N3 && XOTHER;
528 case RCL:
529 case RCR: return LITm && SZi && CCd && TRL1 && TR2 && N3 && XOTHER;
530 case NOT: return LIT0 && SZi && CC0 && TR1 && N2 && N3 && XOTHER;
531 case NEG: return LIT0 && SZi && CCa && TR1 && N2 && N3 && XOTHER;
532 case INC:
533 case DEC: return LIT0 && SZi && CCe && TR1 && N2 && N3 && XOTHER;
534 case CC2VAL: return LIT0 && SZ1 && CCg && TR1 && N2 && N3 && XCONDi;
535 case BSWAP: return LIT0 && SZ4 && CC0 && TR1 && N2 && N3 && XOTHER;
536 case JIFZ: return LIT1 && SZ4 && CC0 && TR1 && L2 && N3 && XOTHER;
537 case FPU_R:
538 case FPU_W: return LIT0 && SZf && CC0 && Ls1 && TR2 && N3 && XOTHER;
539 case FPU: return LIT0 && SZ0 && CCf && Ls1 && N2 && N3 && XOTHER;
540 case LEA1: return /*any*/ SZ4 && CC0 && TR1 && TR2 && N3 && XOTHER;
541 case LEA2: return /*any*/ SZ4 && CC0 && TR1 && TR2 && TR3 && Xextra4b;
542 case INCEIP: return LIT0 && SZ0 && CC0 && Ls1 && N2 && N3 && XOTHER;
543 case CCALL: return LIT1 && SZ0 && CC0 &&
544 (u->argc > 0 ? TR1 : N1) &&
545 (u->argc > 1 ? TR2 : N2) &&
546 (u->argc > 2 || u->has_ret_val ? TR3 : N3) &&
547 u->regparms_n <= u->argc && XCCALL;
sewardj3d7c9c82003-03-26 21:08:13 +0000548 /* Fields checked: lit32 size flags_r/w tag1 tag2 tag3 (rest) */
549 case MMX1:
550 case MMX2: return LIT0 && SZ0 && CC0 && Ls1 && N2 && N3 && XOTHER;
sewardjca860012003-03-27 23:52:58 +0000551 case MMX3: return LIT0 && SZ0 && CC0 && Ls1 && Ls1 && N3 && XOTHER;
sewardjd7971012003-04-04 00:21:58 +0000552 case MMX2_MemRd: return LIT0 && SZ48 && CC0 && Ls1 && TR2 && N3 && XOTHER;
sewardjd1c9e432003-04-04 20:40:34 +0000553 case MMX2_MemWr: return LIT0 && SZ48 && CC0 && Ls1 && TR2 && N3 && XOTHER;
sewardjca860012003-03-27 23:52:58 +0000554 case MMX2_RegRd: return LIT0 && SZ4 && CC0 && Ls1 && TR2 && N3 && XOTHER;
sewardjd1c9e432003-04-04 20:40:34 +0000555 case MMX2_RegWr: return LIT0 && SZ4 && CC0 && Ls1 && TR2 && N3 && XOTHER;
njn25e49d8e72002-09-23 09:36:25 +0000556 default:
557 if (VG_(needs).extended_UCode)
njn4ba5a792002-09-30 10:23:54 +0000558 return SK_(sane_XUInstr)(beforeRA, beforeLiveness, u);
njn25e49d8e72002-09-23 09:36:25 +0000559 else {
560 VG_(printf)("unhandled opcode: %u. Perhaps "
561 "VG_(needs).extended_UCode should be set?",
562 u->opcode);
njne427a662002-10-02 11:08:25 +0000563 VG_(core_panic)("VG_(saneUInstr): unhandled opcode");
njn25e49d8e72002-09-23 09:36:25 +0000564 }
sewardjde4a1d02002-03-22 01:27:54 +0000565 }
njn25e49d8e72002-09-23 09:36:25 +0000566# undef LIT0
567# undef LIT1
568# undef LITm
sewardj3d7c9c82003-03-26 21:08:13 +0000569# undef SZ8
sewardjde4a1d02002-03-22 01:27:54 +0000570# undef SZ4
571# undef SZ2
572# undef SZ1
573# undef SZ0
njn25e49d8e72002-09-23 09:36:25 +0000574# undef SZ42
sewardjd7971012003-04-04 00:21:58 +0000575# undef SZ48
njn25e49d8e72002-09-23 09:36:25 +0000576# undef SZi
577# undef SZf
578# undef SZ4m
579# undef emptyR
580# undef emptyW
581# undef CC0
582# undef CCr
583# undef CCw
584# undef CCa
585# undef CCb
586# undef CCc
587# undef CCd
588# undef CCe
589# undef CCf
590# undef CCg
591# undef CCj
sewardjde4a1d02002-03-22 01:27:54 +0000592# undef TR1
593# undef TR2
594# undef TR3
595# undef A1
596# undef A2
597# undef AS1
598# undef AS2
599# undef AS3
600# undef L1
sewardjde4a1d02002-03-22 01:27:54 +0000601# undef L2
njn25e49d8e72002-09-23 09:36:25 +0000602# undef Ls1
sewardjde4a1d02002-03-22 01:27:54 +0000603# undef Ls3
njn25e49d8e72002-09-23 09:36:25 +0000604# undef TRL1
605# undef TRAL1
sewardjde4a1d02002-03-22 01:27:54 +0000606# undef N1
607# undef N2
608# undef N3
sewardje1042472002-09-30 12:33:11 +0000609# undef Se2
610# undef Se1
njn25e49d8e72002-09-23 09:36:25 +0000611# undef COND0
612# undef EXTRA4b0
613# undef SG_WD0
614# undef JMPKIND0
615# undef CCALL0
616# undef Xextra4b
617# undef XWIDEN
618# undef XJMP
619# undef XCCALL
620# undef XOTHER
sewardjde4a1d02002-03-22 01:27:54 +0000621}
622
njn25e49d8e72002-09-23 09:36:25 +0000623void VG_(saneUCodeBlock) ( UCodeBlock* cb )
624{
625 Int i;
626
627 for (i = 0; i < cb->used; i++) {
628 Bool sane = VG_(saneUInstr)(True, True, &cb->instrs[i]);
629 if (!sane) {
630 VG_(printf)("Instruction failed sanity check:\n");
njn4ba5a792002-09-30 10:23:54 +0000631 VG_(up_UInstr)(i, &cb->instrs[i]);
njn25e49d8e72002-09-23 09:36:25 +0000632 }
633 vg_assert(sane);
634 }
635}
sewardjde4a1d02002-03-22 01:27:54 +0000636
637/* Sanity checks to do with CALLMs in UCodeBlocks. */
njn25e49d8e72002-09-23 09:36:25 +0000638Bool VG_(saneUCodeBlockCalls) ( UCodeBlock* cb )
sewardjde4a1d02002-03-22 01:27:54 +0000639{
640 Int callm = 0;
641 Int callm_s = 0;
642 Int callm_e = 0;
643 Int callm_ptr, calls_ptr;
644 Int i, j, t;
645 Bool incall = False;
646
647 /* Ensure the number of CALLM, CALLM_S and CALLM_E are the same. */
648
649 for (i = 0; i < cb->used; i++) {
650 switch (cb->instrs[i].opcode) {
651 case CALLM:
652 if (!incall) return False;
653 callm++;
654 break;
655 case CALLM_S:
656 if (incall) return False;
657 incall = True;
658 callm_s++;
659 break;
660 case CALLM_E:
661 if (!incall) return False;
662 incall = False;
663 callm_e++;
664 break;
665 case PUSH: case POP: case CLEAR:
666 if (!incall) return False;
667 break;
668 default:
669 break;
670 }
671 }
672 if (incall) return False;
673 if (callm != callm_s || callm != callm_e) return False;
674
675 /* Check the sections between CALLM_S and CALLM's. Ensure that no
676 PUSH uinsn pushes any TempReg that any other PUSH in the same
677 section pushes. Ie, check that the TempReg args to PUSHes in
678 the section are unique. If not, the instrumenter generates
679 incorrect code for CALLM insns. */
680
681 callm_ptr = 0;
682
683 find_next_CALLM:
684 /* Search for the next interval, making calls_ptr .. callm_ptr
685 bracket it. */
686 while (callm_ptr < cb->used
687 && cb->instrs[callm_ptr].opcode != CALLM)
688 callm_ptr++;
689 if (callm_ptr == cb->used)
690 return True;
691 vg_assert(cb->instrs[callm_ptr].opcode == CALLM);
692
693 calls_ptr = callm_ptr - 1;
694 while (cb->instrs[calls_ptr].opcode != CALLM_S)
695 calls_ptr--;
696 vg_assert(cb->instrs[calls_ptr].opcode == CALLM_S);
697 vg_assert(calls_ptr >= 0);
698
699 /* VG_(printf)("interval from %d to %d\n", calls_ptr, callm_ptr ); */
700
701 /* For each PUSH insn in the interval ... */
702 for (i = calls_ptr + 1; i < callm_ptr; i++) {
703 if (cb->instrs[i].opcode != PUSH) continue;
704 t = cb->instrs[i].val1;
705 /* Ensure no later PUSH insns up to callm_ptr push the same
706 TempReg. Return False if any such are found. */
707 for (j = i+1; j < callm_ptr; j++) {
708 if (cb->instrs[j].opcode == PUSH &&
709 cb->instrs[j].val1 == t)
710 return False;
711 }
712 }
713
714 /* This interval is clean. Keep going ... */
715 callm_ptr++;
716 goto find_next_CALLM;
717}
718
719
720/*------------------------------------------------------------*/
721/*--- Printing uinstrs. ---*/
722/*------------------------------------------------------------*/
723
njn25e49d8e72002-09-23 09:36:25 +0000724/* Global that dictates whether to print generated code at all stages */
725Bool VG_(print_codegen);
726
njn563f96f2003-02-03 11:17:46 +0000727Char* VG_(name_UCondcode) ( Condcode cond )
sewardjde4a1d02002-03-22 01:27:54 +0000728{
729 switch (cond) {
730 case CondO: return "o";
731 case CondNO: return "no";
732 case CondB: return "b";
733 case CondNB: return "nb";
734 case CondZ: return "z";
735 case CondNZ: return "nz";
736 case CondBE: return "be";
737 case CondNBE: return "nbe";
738 case CondS: return "s";
sewardje1042472002-09-30 12:33:11 +0000739 case CondNS: return "ns";
sewardjde4a1d02002-03-22 01:27:54 +0000740 case CondP: return "p";
741 case CondNP: return "np";
742 case CondL: return "l";
743 case CondNL: return "nl";
744 case CondLE: return "le";
745 case CondNLE: return "nle";
746 case CondAlways: return "MP"; /* hack! */
njn563f96f2003-02-03 11:17:46 +0000747 default: VG_(core_panic)("name_UCondcode");
sewardjde4a1d02002-03-22 01:27:54 +0000748 }
749}
750
751
752static void vg_ppFlagSet ( Char* prefix, FlagSet set )
753{
754 VG_(printf)("%s", prefix);
755 if (set & FlagD) VG_(printf)("D");
756 if (set & FlagO) VG_(printf)("O");
757 if (set & FlagS) VG_(printf)("S");
758 if (set & FlagZ) VG_(printf)("Z");
759 if (set & FlagA) VG_(printf)("A");
760 if (set & FlagC) VG_(printf)("C");
761 if (set & FlagP) VG_(printf)("P");
762}
763
764
765static void ppTempReg ( Int tt )
766{
767 if ((tt & 1) == 0)
768 VG_(printf)("t%d", tt);
769 else
770 VG_(printf)("q%d", tt-1);
771}
772
773
njn4ba5a792002-09-30 10:23:54 +0000774void VG_(pp_UOperand) ( UInstr* u, Int operandNo, Int sz, Bool parens )
sewardjde4a1d02002-03-22 01:27:54 +0000775{
776 UInt tag, val;
777 switch (operandNo) {
778 case 1: tag = u->tag1; val = u->val1; break;
779 case 2: tag = u->tag2; val = u->val2; break;
780 case 3: tag = u->tag3; val = u->val3; break;
njne427a662002-10-02 11:08:25 +0000781 default: VG_(core_panic)("VG_(pp_UOperand)(1)");
sewardjde4a1d02002-03-22 01:27:54 +0000782 }
783 if (tag == Literal) val = u->lit32;
784
785 if (parens) VG_(printf)("(");
786 switch (tag) {
sewardje1042472002-09-30 12:33:11 +0000787 case TempReg: ppTempReg(val); break;
788 case RealReg: VG_(printf)("%s",nameIReg(sz==0 ? 4 : sz,val)); break;
789 case Literal: VG_(printf)("$0x%x", val); break;
790 case Lit16: VG_(printf)("$0x%x", val); break;
791 case NoValue: VG_(printf)("NoValue"); break;
792 case ArchReg: VG_(printf)("%S",nameIReg(sz,val)); break;
793 case ArchRegS: VG_(printf)("%S",nameSReg(val)); break;
794 case SpillNo: VG_(printf)("spill%d", val); break;
njne427a662002-10-02 11:08:25 +0000795 default: VG_(core_panic)("VG_(ppUOperand)(2)");
sewardjde4a1d02002-03-22 01:27:54 +0000796 }
797 if (parens) VG_(printf)(")");
798}
799
800
njn4ba5a792002-09-30 10:23:54 +0000801Char* VG_(name_UOpcode) ( Bool upper, Opcode opc )
sewardjde4a1d02002-03-22 01:27:54 +0000802{
803 switch (opc) {
804 case ADD: return (upper ? "ADD" : "add");
805 case ADC: return (upper ? "ADC" : "adc");
806 case AND: return (upper ? "AND" : "and");
807 case OR: return (upper ? "OR" : "or");
808 case XOR: return (upper ? "XOR" : "xor");
809 case SUB: return (upper ? "SUB" : "sub");
810 case SBB: return (upper ? "SBB" : "sbb");
811 case SHL: return (upper ? "SHL" : "shl");
812 case SHR: return (upper ? "SHR" : "shr");
813 case SAR: return (upper ? "SAR" : "sar");
814 case ROL: return (upper ? "ROL" : "rol");
815 case ROR: return (upper ? "ROR" : "ror");
816 case RCL: return (upper ? "RCL" : "rcl");
817 case RCR: return (upper ? "RCR" : "rcr");
818 case NOT: return (upper ? "NOT" : "not");
819 case NEG: return (upper ? "NEG" : "neg");
820 case INC: return (upper ? "INC" : "inc");
821 case DEC: return (upper ? "DEC" : "dec");
822 case BSWAP: return (upper ? "BSWAP" : "bswap");
823 default: break;
824 }
njne427a662002-10-02 11:08:25 +0000825 if (!upper) VG_(core_panic)("vg_name_UOpcode: invalid !upper");
sewardjde4a1d02002-03-22 01:27:54 +0000826 switch (opc) {
sewardjde4a1d02002-03-22 01:27:54 +0000827 case CALLM_S: return "CALLM_S";
828 case CALLM_E: return "CALLM_E";
829 case INCEIP: return "INCEIP";
830 case LEA1: return "LEA1";
831 case LEA2: return "LEA2";
832 case NOP: return "NOP";
sewardj7a5ebcf2002-11-13 22:42:13 +0000833 case LOCK: return "LOCK";
sewardjde4a1d02002-03-22 01:27:54 +0000834 case GET: return "GET";
835 case PUT: return "PUT";
836 case GETF: return "GETF";
837 case PUTF: return "PUTF";
sewardje1042472002-09-30 12:33:11 +0000838 case GETSEG: return "GETSEG";
839 case PUTSEG: return "PUTSEG";
840 case USESEG: return "USESEG";
sewardjde4a1d02002-03-22 01:27:54 +0000841 case LOAD: return "LD" ;
842 case STORE: return "ST" ;
843 case MOV: return "MOV";
844 case CMOV: return "CMOV";
845 case WIDEN: return "WIDEN";
846 case JMP: return "J" ;
847 case JIFZ: return "JIFZ" ;
848 case CALLM: return "CALLM";
njn25e49d8e72002-09-23 09:36:25 +0000849 case CCALL: return "CCALL";
sewardjde4a1d02002-03-22 01:27:54 +0000850 case PUSH: return "PUSH" ;
851 case POP: return "POP" ;
852 case CLEAR: return "CLEAR";
853 case CC2VAL: return "CC2VAL";
854 case FPU_R: return "FPU_R";
855 case FPU_W: return "FPU_W";
856 case FPU: return "FPU" ;
sewardj3d7c9c82003-03-26 21:08:13 +0000857 case MMX1: return "MMX1" ;
858 case MMX2: return "MMX2" ;
sewardjca860012003-03-27 23:52:58 +0000859 case MMX3: return "MMX3" ;
sewardj3d7c9c82003-03-26 21:08:13 +0000860 case MMX2_MemRd: return "MMX2_MRd" ;
861 case MMX2_MemWr: return "MMX2_MWr" ;
sewardjca860012003-03-27 23:52:58 +0000862 case MMX2_RegRd: return "MMX2_RRd" ;
sewardjd1c9e432003-04-04 20:40:34 +0000863 case MMX2_RegWr: return "MMX2_RWr" ;
njn25e49d8e72002-09-23 09:36:25 +0000864 default:
865 if (VG_(needs).extended_UCode)
njn4ba5a792002-09-30 10:23:54 +0000866 return SK_(name_XUOpcode)(opc);
njn25e49d8e72002-09-23 09:36:25 +0000867 else {
868 VG_(printf)("unhandled opcode: %u. Perhaps "
869 "VG_(needs).extended_UCode should be set?",
870 opc);
njne427a662002-10-02 11:08:25 +0000871 VG_(core_panic)("name_UOpcode: unhandled opcode");
njn25e49d8e72002-09-23 09:36:25 +0000872 }
sewardjde4a1d02002-03-22 01:27:54 +0000873 }
874}
875
sewardja38e0922002-10-01 00:50:47 +0000876static
njn4ba5a792002-09-30 10:23:54 +0000877void pp_realregs_liveness ( UInstr* u )
njn25e49d8e72002-09-23 09:36:25 +0000878{
879# define PRINT_RREG_LIVENESS(realReg,s) \
njn4ba5a792002-09-30 10:23:54 +0000880 VG_(printf)( IS_RREG_LIVE(VG_(realreg_to_rank)(realReg), \
njn25e49d8e72002-09-23 09:36:25 +0000881 u->regs_live_after) \
882 ? s : "-");
sewardjde4a1d02002-03-22 01:27:54 +0000883
njn25e49d8e72002-09-23 09:36:25 +0000884 VG_(printf)("[");
885 PRINT_RREG_LIVENESS(R_EAX, "a");
886 PRINT_RREG_LIVENESS(R_EBX, "b");
887 PRINT_RREG_LIVENESS(R_ECX, "c");
888 PRINT_RREG_LIVENESS(R_EDX, "d");
889 PRINT_RREG_LIVENESS(R_ESI, "S");
890 PRINT_RREG_LIVENESS(R_EDI, "D");
891 VG_(printf)("]");
892
893# undef PRINT_RREG_LIVENESS
894}
895
896/* Ugly-print UInstr :) */
njn4ba5a792002-09-30 10:23:54 +0000897void VG_(up_UInstr) ( Int i, UInstr* u )
njn25e49d8e72002-09-23 09:36:25 +0000898{
njn4ba5a792002-09-30 10:23:54 +0000899 VG_(pp_UInstr_regs)(i, u);
njn25e49d8e72002-09-23 09:36:25 +0000900
901 VG_(printf)("opcode: %d\n", u->opcode);
sewardjc1b86882002-10-06 21:43:50 +0000902 VG_(printf)("lit32: 0x%x\n", u->lit32);
njn25e49d8e72002-09-23 09:36:25 +0000903 VG_(printf)("size: %d\n", u->size);
904 VG_(printf)("val1,val2,val3: %d, %d, %d\n", u->val1, u->val2, u->val3);
905 VG_(printf)("tag1,tag2,tag3: %d, %d, %d\n", u->tag1, u->tag2, u->tag3);
sewardjc1b86882002-10-06 21:43:50 +0000906 VG_(printf)("flags_r: 0x%x\n", u->flags_r);
907 VG_(printf)("flags_w: 0x%x\n", u->flags_w);
908 VG_(printf)("extra4b: 0x%x\n", u->extra4b);
909 VG_(printf)("cond: 0x%x\n", u->cond);
njn25e49d8e72002-09-23 09:36:25 +0000910 VG_(printf)("signed_widen: %d\n", u->signed_widen);
911 VG_(printf)("jmpkind: %d\n", u->jmpkind);
912 VG_(printf)("argc,regparms_n: %d, %d\n", u->argc, u->regparms_n);
913 VG_(printf)("has_ret_val: %d\n", u->has_ret_val);
914 VG_(printf)("regs_live_after: ");
njn4ba5a792002-09-30 10:23:54 +0000915 pp_realregs_liveness(u);
njn25e49d8e72002-09-23 09:36:25 +0000916 VG_(printf)("\n");
917}
918
sewardja38e0922002-10-01 00:50:47 +0000919static
njn4ba5a792002-09-30 10:23:54 +0000920void pp_UInstrWorker ( Int instrNo, UInstr* u, Bool ppRegsLiveness )
sewardjde4a1d02002-03-22 01:27:54 +0000921{
922 VG_(printf)("\t%4d: %s", instrNo,
njn4ba5a792002-09-30 10:23:54 +0000923 VG_(name_UOpcode)(True, u->opcode));
sewardjde4a1d02002-03-22 01:27:54 +0000924 if (u->opcode == JMP || u->opcode == CC2VAL)
njn563f96f2003-02-03 11:17:46 +0000925 VG_(printf)("%s", VG_(name_UCondcode)(u->cond));
sewardjde4a1d02002-03-22 01:27:54 +0000926
927 switch (u->size) {
928 case 0: VG_(printf)("o"); break;
929 case 1: VG_(printf)("B"); break;
930 case 2: VG_(printf)("W"); break;
931 case 4: VG_(printf)("L"); break;
932 case 8: VG_(printf)("Q"); break;
933 default: VG_(printf)("%d", (Int)u->size); break;
934 }
935
936 switch (u->opcode) {
937
sewardjde4a1d02002-03-22 01:27:54 +0000938 case CALLM_S: case CALLM_E:
939 break;
940
941 case INCEIP:
942 VG_(printf)("\t$%d", u->val1);
943 break;
944
945 case LEA2:
946 VG_(printf)("\t%d(" , u->lit32);
njn4ba5a792002-09-30 10:23:54 +0000947 VG_(pp_UOperand)(u, 1, 4, False);
sewardjde4a1d02002-03-22 01:27:54 +0000948 VG_(printf)(",");
njn4ba5a792002-09-30 10:23:54 +0000949 VG_(pp_UOperand)(u, 2, 4, False);
sewardjde4a1d02002-03-22 01:27:54 +0000950 VG_(printf)(",%d), ", (Int)u->extra4b);
njn4ba5a792002-09-30 10:23:54 +0000951 VG_(pp_UOperand)(u, 3, 4, False);
sewardjde4a1d02002-03-22 01:27:54 +0000952 break;
953
954 case LEA1:
955 VG_(printf)("\t%d" , u->lit32);
njn4ba5a792002-09-30 10:23:54 +0000956 VG_(pp_UOperand)(u, 1, 4, True);
sewardjde4a1d02002-03-22 01:27:54 +0000957 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +0000958 VG_(pp_UOperand)(u, 2, 4, False);
sewardjde4a1d02002-03-22 01:27:54 +0000959 break;
960
sewardj7a5ebcf2002-11-13 22:42:13 +0000961 case NOP: case LOCK:
sewardjde4a1d02002-03-22 01:27:54 +0000962 break;
963
964 case FPU_W:
965 VG_(printf)("\t0x%x:0x%x, ",
966 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
njn4ba5a792002-09-30 10:23:54 +0000967 VG_(pp_UOperand)(u, 2, 4, True);
sewardjde4a1d02002-03-22 01:27:54 +0000968 break;
969
970 case FPU_R:
971 VG_(printf)("\t");
njn4ba5a792002-09-30 10:23:54 +0000972 VG_(pp_UOperand)(u, 2, 4, True);
sewardjde4a1d02002-03-22 01:27:54 +0000973 VG_(printf)(", 0x%x:0x%x",
974 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
975 break;
976
977 case FPU:
978 VG_(printf)("\t0x%x:0x%x",
979 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
980 break;
981
sewardj3d7c9c82003-03-26 21:08:13 +0000982 case MMX1:
983 VG_(printf)("\t0x%x",
984 u->val1 & 0xFF );
985 break;
986
987 case MMX2:
988 VG_(printf)("\t0x%x:0x%x",
989 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
990 break;
991
sewardjca860012003-03-27 23:52:58 +0000992 case MMX3:
993 VG_(printf)("\t0x%x:0x%x:0x%x",
994 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF, u->val2 & 0xFF );
995 break;
996
sewardjd1c9e432003-04-04 20:40:34 +0000997 case MMX2_RegWr:
sewardjca860012003-03-27 23:52:58 +0000998 case MMX2_RegRd:
999 VG_(printf)("\t0x%x:0x%x, ",
1000 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
1001 VG_(pp_UOperand)(u, 2, 4, False);
1002 break;
1003
sewardj3d7c9c82003-03-26 21:08:13 +00001004 case MMX2_MemWr:
1005 case MMX2_MemRd:
sewardjca860012003-03-27 23:52:58 +00001006 VG_(printf)("\t0x%x:0x%x",
sewardj3d7c9c82003-03-26 21:08:13 +00001007 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
1008 VG_(pp_UOperand)(u, 2, 4, True);
1009 break;
1010
sewardjde4a1d02002-03-22 01:27:54 +00001011 case GET: case PUT: case MOV: case LOAD: case STORE: case CMOV:
sewardje1042472002-09-30 12:33:11 +00001012 case GETSEG: case PUTSEG:
sewardjde4a1d02002-03-22 01:27:54 +00001013 VG_(printf)("\t");
njn4ba5a792002-09-30 10:23:54 +00001014 VG_(pp_UOperand)(u, 1, u->size, u->opcode==LOAD);
sewardjde4a1d02002-03-22 01:27:54 +00001015 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001016 VG_(pp_UOperand)(u, 2, u->size, u->opcode==STORE);
njn25e49d8e72002-09-23 09:36:25 +00001017 break;
1018
1019 case JMP:
1020 switch (u->jmpkind) {
1021 case JmpCall: VG_(printf)("-c"); break;
1022 case JmpRet: VG_(printf)("-r"); break;
1023 case JmpSyscall: VG_(printf)("-sys"); break;
1024 case JmpClientReq: VG_(printf)("-cli"); break;
1025 default: break;
1026 }
1027 VG_(printf)("\t");
njn4ba5a792002-09-30 10:23:54 +00001028 VG_(pp_UOperand)(u, 1, u->size, False);
njn25e49d8e72002-09-23 09:36:25 +00001029 if (CondAlways == u->cond) {
1030 /* Print x86 instruction size if filled in */
1031 if (0 != u->extra4b)
1032 VG_(printf)(" ($%u)", u->extra4b);
1033 }
sewardjde4a1d02002-03-22 01:27:54 +00001034 break;
1035
1036 case GETF: case PUTF:
njn25e49d8e72002-09-23 09:36:25 +00001037 case CC2VAL: case PUSH: case POP: case CLEAR: case CALLM:
1038 case NOT: case NEG: case INC: case DEC: case BSWAP:
sewardjde4a1d02002-03-22 01:27:54 +00001039 VG_(printf)("\t");
njn4ba5a792002-09-30 10:23:54 +00001040 VG_(pp_UOperand)(u, 1, u->size, False);
sewardjde4a1d02002-03-22 01:27:54 +00001041 break;
1042
njn25e49d8e72002-09-23 09:36:25 +00001043 /* Print a "(s)" after args passed on stack */
1044 case CCALL:
1045 VG_(printf)("\t");
1046 if (u->has_ret_val) {
njn4ba5a792002-09-30 10:23:54 +00001047 VG_(pp_UOperand)(u, 3, 0, False);
njn25e49d8e72002-09-23 09:36:25 +00001048 VG_(printf)(" = ");
sewardj2e93c502002-04-12 11:12:52 +00001049 }
njn25e49d8e72002-09-23 09:36:25 +00001050 VG_(printf)("%p(", u->lit32);
1051 if (u->argc > 0) {
njn4ba5a792002-09-30 10:23:54 +00001052 VG_(pp_UOperand)(u, 1, 0, False);
njn25e49d8e72002-09-23 09:36:25 +00001053 if (u->regparms_n < 1)
1054 VG_(printf)("(s)");
1055 }
1056 if (u->argc > 1) {
1057 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001058 VG_(pp_UOperand)(u, 2, 0, False);
njn25e49d8e72002-09-23 09:36:25 +00001059 if (u->regparms_n < 2)
1060 VG_(printf)("(s)");
1061 }
1062 if (u->argc > 2) {
1063 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001064 VG_(pp_UOperand)(u, 3, 0, False);
njn25e49d8e72002-09-23 09:36:25 +00001065 if (u->regparms_n < 3)
1066 VG_(printf)("(s)");
1067 }
1068 VG_(printf)(") ");
njn6431be72002-07-28 09:53:34 +00001069 break;
1070
sewardje1042472002-09-30 12:33:11 +00001071 case USESEG:
sewardjde4a1d02002-03-22 01:27:54 +00001072 case JIFZ:
sewardjde4a1d02002-03-22 01:27:54 +00001073 case ADD: case ADC: case AND: case OR:
1074 case XOR: case SUB: case SBB:
1075 case SHL: case SHR: case SAR:
1076 case ROL: case ROR: case RCL: case RCR:
1077 VG_(printf)("\t");
njn4ba5a792002-09-30 10:23:54 +00001078 VG_(pp_UOperand)(u, 1, u->size, False);
sewardjde4a1d02002-03-22 01:27:54 +00001079 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001080 VG_(pp_UOperand)(u, 2, u->size, False);
sewardjde4a1d02002-03-22 01:27:54 +00001081 break;
1082
1083 case WIDEN:
1084 VG_(printf)("_%c%c", VG_(toupper)(nameISize(u->extra4b)),
1085 u->signed_widen?'s':'z');
1086 VG_(printf)("\t");
njn4ba5a792002-09-30 10:23:54 +00001087 VG_(pp_UOperand)(u, 1, u->size, False);
sewardjde4a1d02002-03-22 01:27:54 +00001088 break;
1089
njn25e49d8e72002-09-23 09:36:25 +00001090 default:
1091 if (VG_(needs).extended_UCode)
njn4ba5a792002-09-30 10:23:54 +00001092 SK_(pp_XUInstr)(u);
njn25e49d8e72002-09-23 09:36:25 +00001093 else {
1094 VG_(printf)("unhandled opcode: %u. Perhaps "
1095 "VG_(needs).extended_UCode should be set?",
1096 u->opcode);
njne427a662002-10-02 11:08:25 +00001097 VG_(core_panic)("pp_UInstr: unhandled opcode");
njn25e49d8e72002-09-23 09:36:25 +00001098 }
sewardjde4a1d02002-03-22 01:27:54 +00001099 }
sewardjde4a1d02002-03-22 01:27:54 +00001100 if (u->flags_r != FlagsEmpty || u->flags_w != FlagsEmpty) {
1101 VG_(printf)(" (");
1102 if (u->flags_r != FlagsEmpty)
1103 vg_ppFlagSet("-r", u->flags_r);
1104 if (u->flags_w != FlagsEmpty)
1105 vg_ppFlagSet("-w", u->flags_w);
1106 VG_(printf)(")");
1107 }
njn25e49d8e72002-09-23 09:36:25 +00001108
1109 if (ppRegsLiveness) {
1110 VG_(printf)("\t\t");
njn4ba5a792002-09-30 10:23:54 +00001111 pp_realregs_liveness ( u );
njn25e49d8e72002-09-23 09:36:25 +00001112 }
1113
sewardjde4a1d02002-03-22 01:27:54 +00001114 VG_(printf)("\n");
1115}
1116
njn4ba5a792002-09-30 10:23:54 +00001117void VG_(pp_UInstr) ( Int instrNo, UInstr* u )
njn25e49d8e72002-09-23 09:36:25 +00001118{
njn4ba5a792002-09-30 10:23:54 +00001119 pp_UInstrWorker ( instrNo, u, /*ppRegsLiveness*/False );
njn25e49d8e72002-09-23 09:36:25 +00001120}
1121
njn4ba5a792002-09-30 10:23:54 +00001122void VG_(pp_UInstr_regs) ( Int instrNo, UInstr* u )
njn25e49d8e72002-09-23 09:36:25 +00001123{
njn4ba5a792002-09-30 10:23:54 +00001124 pp_UInstrWorker ( instrNo, u, /*ppRegsLiveness*/True );
njn25e49d8e72002-09-23 09:36:25 +00001125}
sewardjde4a1d02002-03-22 01:27:54 +00001126
njn4ba5a792002-09-30 10:23:54 +00001127void VG_(pp_UCodeBlock) ( UCodeBlock* cb, Char* title )
sewardjde4a1d02002-03-22 01:27:54 +00001128{
1129 Int i;
njn25e49d8e72002-09-23 09:36:25 +00001130 VG_(printf)("%s\n", title);
sewardjde4a1d02002-03-22 01:27:54 +00001131 for (i = 0; i < cb->used; i++)
njn25e49d8e72002-09-23 09:36:25 +00001132 if (cb->instrs[i].opcode != NOP)
njn4ba5a792002-09-30 10:23:54 +00001133 VG_(pp_UInstr) ( i, &cb->instrs[i] );
sewardjde4a1d02002-03-22 01:27:54 +00001134 VG_(printf)("\n");
1135}
1136
1137
1138/*------------------------------------------------------------*/
1139/*--- uinstr helpers for register allocation ---*/
1140/*--- and code improvement. ---*/
1141/*------------------------------------------------------------*/
1142
njn25e49d8e72002-09-23 09:36:25 +00001143/* Get the temp/reg use of a uinstr, parking them in an array supplied by
njn810086f2002-11-14 12:42:47 +00001144 the caller (regs), which is assumed to be big enough. Return the number
1145 of entries. Written regs are indicated in parallel array isWrites.
1146 Insns which read _and_ write a register wind up mentioning it twice.
1147 Entries are placed in the array in program order, so that if a reg is
1148 read-modified-written, it appears first as a read and then as a write.
1149 'tag' indicates whether we are looking at TempRegs or RealRegs.
sewardjde4a1d02002-03-22 01:27:54 +00001150*/
njn25e49d8e72002-09-23 09:36:25 +00001151__inline__
njn810086f2002-11-14 12:42:47 +00001152Int VG_(get_reg_usage) ( UInstr* u, Tag tag, Int* regs, Bool* isWrites )
sewardjde4a1d02002-03-22 01:27:54 +00001153{
njn810086f2002-11-14 12:42:47 +00001154# define RD(ono) VG_UINSTR_READS_REG(ono, regs, isWrites)
1155# define WR(ono) VG_UINSTR_WRITES_REG(ono, regs, isWrites)
sewardjde4a1d02002-03-22 01:27:54 +00001156
1157 Int n = 0;
1158 switch (u->opcode) {
1159 case LEA1: RD(1); WR(2); break;
1160 case LEA2: RD(1); RD(2); WR(3); break;
1161
sewardjca860012003-03-27 23:52:58 +00001162 case MMX2_RegRd: RD(2); break;
sewardjd1c9e432003-04-04 20:40:34 +00001163 case MMX2_RegWr: WR(2); break;
sewardjca860012003-03-27 23:52:58 +00001164
1165 case MMX1: case MMX2: case MMX3:
njn25e49d8e72002-09-23 09:36:25 +00001166 case NOP: case FPU: case INCEIP: case CALLM_S: case CALLM_E:
sewardj7a5ebcf2002-11-13 22:42:13 +00001167 case CLEAR: case CALLM: case LOCK: break;
njn25e49d8e72002-09-23 09:36:25 +00001168
1169 case CCALL:
1170 if (u->argc > 0) RD(1);
1171 if (u->argc > 1) RD(2);
1172 if (u->argc > 2) RD(3);
1173 if (u->has_ret_val) WR(3);
1174 break;
1175
sewardj3d7c9c82003-03-26 21:08:13 +00001176 case MMX2_MemRd: case MMX2_MemWr:
sewardjde4a1d02002-03-22 01:27:54 +00001177 case FPU_R: case FPU_W: RD(2); break;
1178
sewardje1042472002-09-30 12:33:11 +00001179 case GETSEG: WR(2); break;
1180 case PUTSEG: RD(1); break;
1181
sewardjde4a1d02002-03-22 01:27:54 +00001182 case GETF: WR(1); break;
1183 case PUTF: RD(1); break;
1184
1185 case GET: WR(2); break;
1186 case PUT: RD(1); break;
1187 case LOAD: RD(1); WR(2); break;
njn25e49d8e72002-09-23 09:36:25 +00001188 case STORE: RD(1); RD(2); break;
sewardjde4a1d02002-03-22 01:27:54 +00001189 case MOV: RD(1); WR(2); break;
1190
1191 case JMP: RD(1); break;
sewardjde4a1d02002-03-22 01:27:54 +00001192
njn25e49d8e72002-09-23 09:36:25 +00001193 case PUSH: RD(1); break;
sewardjde4a1d02002-03-22 01:27:54 +00001194 case POP: WR(1); break;
1195
sewardje1042472002-09-30 12:33:11 +00001196 case USESEG:
sewardjde4a1d02002-03-22 01:27:54 +00001197 case CMOV:
1198 case ADD: case ADC: case AND: case OR:
1199 case XOR: case SUB: case SBB:
1200 RD(1); RD(2); WR(2); break;
1201
1202 case SHL: case SHR: case SAR:
1203 case ROL: case ROR: case RCL: case RCR:
1204 RD(1); RD(2); WR(2); break;
1205
njn25e49d8e72002-09-23 09:36:25 +00001206 case NOT: case NEG: case INC: case DEC: case BSWAP:
sewardjde4a1d02002-03-22 01:27:54 +00001207 RD(1); WR(1); break;
1208
1209 case WIDEN: RD(1); WR(1); break;
1210
1211 case CC2VAL: WR(1); break;
1212 case JIFZ: RD(1); break;
1213
njn25e49d8e72002-09-23 09:36:25 +00001214 default:
1215 if (VG_(needs).extended_UCode)
njn810086f2002-11-14 12:42:47 +00001216 return SK_(get_Xreg_usage)(u, tag, regs, isWrites);
njn25e49d8e72002-09-23 09:36:25 +00001217 else {
1218 VG_(printf)("unhandled opcode: %u. Perhaps "
1219 "VG_(needs).extended_UCode should be set?",
1220 u->opcode);
njne427a662002-10-02 11:08:25 +00001221 VG_(core_panic)("VG_(get_reg_usage): unhandled opcode");
njn25e49d8e72002-09-23 09:36:25 +00001222 }
sewardjde4a1d02002-03-22 01:27:54 +00001223 }
1224 return n;
1225
1226# undef RD
1227# undef WR
1228}
1229
1230
njn25e49d8e72002-09-23 09:36:25 +00001231/* Change temp regs in u into real regs, as directed by the
1232 * temps[i]-->reals[i] mapping. */
1233static __inline__
njn810086f2002-11-14 12:42:47 +00001234void patchUInstr ( UInstr* u, Int temps[], UInt reals[], Int n_tmap )
sewardjde4a1d02002-03-22 01:27:54 +00001235{
1236 Int i;
1237 if (u->tag1 == TempReg) {
1238 for (i = 0; i < n_tmap; i++)
njn810086f2002-11-14 12:42:47 +00001239 if (temps[i] == u->val1) break;
njne427a662002-10-02 11:08:25 +00001240 if (i == n_tmap) VG_(core_panic)("patchUInstr(1)");
sewardjde4a1d02002-03-22 01:27:54 +00001241 u->tag1 = RealReg;
njn25e49d8e72002-09-23 09:36:25 +00001242 u->val1 = reals[i];
sewardjde4a1d02002-03-22 01:27:54 +00001243 }
1244 if (u->tag2 == TempReg) {
1245 for (i = 0; i < n_tmap; i++)
njn810086f2002-11-14 12:42:47 +00001246 if (temps[i] == u->val2) break;
njne427a662002-10-02 11:08:25 +00001247 if (i == n_tmap) VG_(core_panic)("patchUInstr(2)");
sewardjde4a1d02002-03-22 01:27:54 +00001248 u->tag2 = RealReg;
njn25e49d8e72002-09-23 09:36:25 +00001249 u->val2 = reals[i];
sewardjde4a1d02002-03-22 01:27:54 +00001250 }
1251 if (u->tag3 == TempReg) {
1252 for (i = 0; i < n_tmap; i++)
njn810086f2002-11-14 12:42:47 +00001253 if (temps[i] == u->val3) break;
njne427a662002-10-02 11:08:25 +00001254 if (i == n_tmap) VG_(core_panic)("patchUInstr(3)");
sewardjde4a1d02002-03-22 01:27:54 +00001255 u->tag3 = RealReg;
njn25e49d8e72002-09-23 09:36:25 +00001256 u->val3 = reals[i];
sewardjde4a1d02002-03-22 01:27:54 +00001257 }
1258}
1259
1260
1261/* Tedious x86-specific hack which compensates for the fact that the
1262 register numbers for %ah .. %dh do not correspond to those for %eax
1263 .. %edx. It maps a (reg size, reg no) pair to the number of the
1264 containing 32-bit reg. */
1265static __inline__
1266Int containingArchRegOf ( Int sz, Int aregno )
1267{
1268 switch (sz) {
1269 case 4: return aregno;
1270 case 2: return aregno;
1271 case 1: return aregno >= 4 ? aregno-4 : aregno;
njne427a662002-10-02 11:08:25 +00001272 default: VG_(core_panic)("containingArchRegOf");
sewardjde4a1d02002-03-22 01:27:54 +00001273 }
1274}
1275
1276
1277/* If u reads an ArchReg, return the number of the containing arch
njn25e49d8e72002-09-23 09:36:25 +00001278 reg. Otherwise return -1. Used in redundant-PUT elimination.
1279 Note that this is not required for skins extending UCode because
1280 this happens before instrumentation. */
sewardjde4a1d02002-03-22 01:27:54 +00001281static __inline__
1282Int maybe_uinstrReadsArchReg ( UInstr* u )
1283{
1284 switch (u->opcode) {
1285 case GET:
1286 case ADD: case ADC: case AND: case OR:
1287 case XOR: case SUB: case SBB:
1288 case SHL: case SHR: case SAR: case ROL:
1289 case ROR: case RCL: case RCR:
1290 if (u->tag1 == ArchReg)
1291 return containingArchRegOf ( u->size, u->val1 );
1292 else
1293 return -1;
1294
1295 case GETF: case PUTF:
1296 case CALLM_S: case CALLM_E:
1297 case INCEIP:
1298 case LEA1:
1299 case LEA2:
1300 case NOP:
sewardj7a5ebcf2002-11-13 22:42:13 +00001301 case LOCK:
sewardjde4a1d02002-03-22 01:27:54 +00001302 case PUT:
1303 case LOAD:
1304 case STORE:
1305 case MOV:
1306 case CMOV:
1307 case JMP:
1308 case CALLM: case CLEAR: case PUSH: case POP:
1309 case NOT: case NEG: case INC: case DEC: case BSWAP:
1310 case CC2VAL:
1311 case JIFZ:
1312 case FPU: case FPU_R: case FPU_W:
sewardjca860012003-03-27 23:52:58 +00001313 case MMX1: case MMX2: case MMX3:
sewardj3d7c9c82003-03-26 21:08:13 +00001314 case MMX2_MemRd: case MMX2_MemWr:
sewardjd1c9e432003-04-04 20:40:34 +00001315 case MMX2_RegRd: case MMX2_RegWr:
sewardjde4a1d02002-03-22 01:27:54 +00001316 case WIDEN:
sewardje1042472002-09-30 12:33:11 +00001317 /* GETSEG and USESEG are to do with ArchRegS, not ArchReg */
1318 case GETSEG: case PUTSEG:
1319 case USESEG:
sewardjde4a1d02002-03-22 01:27:54 +00001320 return -1;
1321
1322 default:
njn4ba5a792002-09-30 10:23:54 +00001323 VG_(pp_UInstr)(0,u);
njne427a662002-10-02 11:08:25 +00001324 VG_(core_panic)("maybe_uinstrReadsArchReg: unhandled opcode");
sewardjde4a1d02002-03-22 01:27:54 +00001325 }
1326}
1327
1328static __inline__
1329Bool uInstrMentionsTempReg ( UInstr* u, Int tempreg )
1330{
1331 Int i, k;
njnf4ce3d32003-02-10 10:17:26 +00001332 Int tempUse[VG_MAX_REGS_USED];
1333 Bool notUsed[VG_MAX_REGS_USED];
njn810086f2002-11-14 12:42:47 +00001334
1335 k = VG_(get_reg_usage) ( u, TempReg, &tempUse[0], &notUsed[0] );
sewardjde4a1d02002-03-22 01:27:54 +00001336 for (i = 0; i < k; i++)
njn810086f2002-11-14 12:42:47 +00001337 if (tempUse[i] == tempreg)
sewardjde4a1d02002-03-22 01:27:54 +00001338 return True;
1339 return False;
1340}
1341
1342
1343/*------------------------------------------------------------*/
1344/*--- ucode improvement. ---*/
1345/*------------------------------------------------------------*/
1346
1347/* Improve the code in cb by doing
1348 -- Redundant ArchReg-fetch elimination
1349 -- Redundant PUT elimination
1350 -- Redundant cond-code restore/save elimination
1351 The overall effect of these is to allow target registers to be
1352 cached in host registers over multiple target insns.
1353*/
1354static void vg_improve ( UCodeBlock* cb )
1355{
1356 Int i, j, k, m, n, ar, tr, told, actual_areg;
1357 Int areg_map[8];
1358 Bool annul_put[8];
njnf4ce3d32003-02-10 10:17:26 +00001359 Int tempUse[VG_MAX_REGS_USED];
1360 Bool isWrites[VG_MAX_REGS_USED];
sewardjde4a1d02002-03-22 01:27:54 +00001361 UInstr* u;
1362 Bool wr;
1363 Int* last_live_before;
1364 FlagSet future_dead_flags;
1365
njn25e49d8e72002-09-23 09:36:25 +00001366 if (dis)
1367 VG_(printf) ("Improvements:\n");
1368
sewardjde4a1d02002-03-22 01:27:54 +00001369 if (cb->nextTemp > 0)
njn25e49d8e72002-09-23 09:36:25 +00001370 last_live_before = VG_(arena_malloc) ( VG_AR_JITTER,
1371 cb->nextTemp * sizeof(Int) );
sewardjde4a1d02002-03-22 01:27:54 +00001372 else
1373 last_live_before = NULL;
1374
1375
1376 /* PASS 1: redundant GET elimination. (Actually, more general than
1377 that -- eliminates redundant fetches of ArchRegs). */
1378
1379 /* Find the live-range-ends for all temporaries. Duplicates code
1380 in the register allocator :-( */
1381
1382 for (i = 0; i < cb->nextTemp; i++) last_live_before[i] = -1;
1383
1384 for (i = cb->used-1; i >= 0; i--) {
1385 u = &cb->instrs[i];
1386
njn810086f2002-11-14 12:42:47 +00001387 k = VG_(get_reg_usage)(u, TempReg, &tempUse[0], &isWrites[0]);
sewardjde4a1d02002-03-22 01:27:54 +00001388
1389 /* For each temp usage ... bwds in program order. */
1390 for (j = k-1; j >= 0; j--) {
njn810086f2002-11-14 12:42:47 +00001391 tr = tempUse[j];
1392 wr = isWrites[j];
sewardjde4a1d02002-03-22 01:27:54 +00001393 if (last_live_before[tr] == -1) {
1394 vg_assert(tr >= 0 && tr < cb->nextTemp);
1395 last_live_before[tr] = wr ? (i+1) : i;
1396 }
1397 }
1398
1399 }
1400
1401# define BIND_ARCH_TO_TEMP(archreg,tempreg)\
1402 { Int q; \
1403 /* Invalidate any old binding(s) to tempreg. */ \
1404 for (q = 0; q < 8; q++) \
1405 if (areg_map[q] == tempreg) areg_map[q] = -1; \
1406 /* Add the new binding. */ \
1407 areg_map[archreg] = (tempreg); \
1408 }
1409
1410 /* Set up the A-reg map. */
1411 for (i = 0; i < 8; i++) areg_map[i] = -1;
1412
1413 /* Scan insns. */
1414 for (i = 0; i < cb->used; i++) {
1415 u = &cb->instrs[i];
1416 if (u->opcode == GET && u->size == 4) {
1417 /* GET; see if it can be annulled. */
1418 vg_assert(u->tag1 == ArchReg);
1419 vg_assert(u->tag2 == TempReg);
1420 ar = u->val1;
1421 tr = u->val2;
1422 told = areg_map[ar];
1423 if (told != -1 && last_live_before[told] <= i) {
1424 /* ar already has an old mapping to told, but that runs
1425 out here. Annul this GET, rename tr to told for the
1426 rest of the block, and extend told's live range to that
1427 of tr. */
njn4ba5a792002-09-30 10:23:54 +00001428 VG_(new_NOP)(u);
sewardjde4a1d02002-03-22 01:27:54 +00001429 n = last_live_before[tr] + 1;
1430 if (n > cb->used) n = cb->used;
1431 last_live_before[told] = last_live_before[tr];
1432 last_live_before[tr] = i-1;
njn25e49d8e72002-09-23 09:36:25 +00001433 if (dis)
sewardjde4a1d02002-03-22 01:27:54 +00001434 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001435 " at %2d: delete GET, rename t%d to t%d in (%d .. %d)\n",
sewardjde4a1d02002-03-22 01:27:54 +00001436 i, tr, told,i+1, n-1);
1437 for (m = i+1; m < n; m++) {
1438 if (cb->instrs[m].tag1 == TempReg
1439 && cb->instrs[m].val1 == tr)
1440 cb->instrs[m].val1 = told;
1441 if (cb->instrs[m].tag2 == TempReg
1442 && cb->instrs[m].val2 == tr)
1443 cb->instrs[m].val2 = told;
1444 }
1445 BIND_ARCH_TO_TEMP(ar,told);
1446 }
1447 else
1448 BIND_ARCH_TO_TEMP(ar,tr);
1449 }
1450 else if (u->opcode == GET && u->size != 4) {
1451 /* Invalidate any mapping for this archreg. */
1452 actual_areg = containingArchRegOf ( u->size, u->val1 );
1453 areg_map[actual_areg] = -1;
1454 }
1455 else if (u->opcode == PUT && u->size == 4) {
1456 /* PUT; re-establish t -> a binding */
1457 vg_assert(u->tag1 == TempReg);
1458 vg_assert(u->tag2 == ArchReg);
1459 BIND_ARCH_TO_TEMP(u->val2, u->val1);
1460 }
1461 else if (u->opcode == PUT && u->size != 4) {
1462 /* Invalidate any mapping for this archreg. */
1463 actual_areg = containingArchRegOf ( u->size, u->val2 );
1464 areg_map[actual_areg] = -1;
1465 } else {
1466
1467 /* see if insn has an archreg as a read operand; if so try to
1468 map it. */
1469 if (u->tag1 == ArchReg && u->size == 4
1470 && areg_map[u->val1] != -1) {
1471 switch (u->opcode) {
1472 case ADD: case SUB: case AND: case OR: case XOR:
1473 case ADC: case SBB:
1474 case SHL: case SHR: case SAR: case ROL: case ROR:
1475 case RCL: case RCR:
njn25e49d8e72002-09-23 09:36:25 +00001476 if (dis)
sewardjde4a1d02002-03-22 01:27:54 +00001477 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001478 " at %2d: change ArchReg %S to TempReg t%d\n",
sewardjde4a1d02002-03-22 01:27:54 +00001479 i, nameIReg(4,u->val1), areg_map[u->val1]);
1480 u->tag1 = TempReg;
1481 u->val1 = areg_map[u->val1];
1482 /* Remember to extend the live range of the TempReg,
1483 if necessary. */
1484 if (last_live_before[u->val1] < i)
1485 last_live_before[u->val1] = i;
1486 break;
1487 default:
1488 break;
1489 }
1490 }
1491
1492 /* boring insn; invalidate any mappings to temps it writes */
njn810086f2002-11-14 12:42:47 +00001493 k = VG_(get_reg_usage)(u, TempReg, &tempUse[0], &isWrites[0]);
sewardjde4a1d02002-03-22 01:27:54 +00001494
1495 for (j = 0; j < k; j++) {
njn810086f2002-11-14 12:42:47 +00001496 wr = isWrites[j];
sewardjde4a1d02002-03-22 01:27:54 +00001497 if (!wr) continue;
njn810086f2002-11-14 12:42:47 +00001498 tr = tempUse[j];
sewardjde4a1d02002-03-22 01:27:54 +00001499 for (m = 0; m < 8; m++)
1500 if (areg_map[m] == tr) areg_map[m] = -1;
1501 }
1502 }
1503
1504 }
1505
1506# undef BIND_ARCH_TO_TEMP
1507
sewardj05f1aa12002-04-30 00:29:36 +00001508 /* PASS 2: redundant PUT elimination. Don't annul (delay) puts of
1509 %ESP, since the memory check machinery always requires the
1510 in-memory value of %ESP to be up to date. Although this isn't
1511 actually required by other analyses (cache simulation), it's
1512 simplest to be consistent for all end-uses. */
sewardjde4a1d02002-03-22 01:27:54 +00001513 for (j = 0; j < 8; j++)
1514 annul_put[j] = False;
1515
1516 for (i = cb->used-1; i >= 0; i--) {
1517 u = &cb->instrs[i];
1518 if (u->opcode == NOP) continue;
1519
1520 if (u->opcode == PUT && u->size == 4) {
1521 vg_assert(u->tag2 == ArchReg);
1522 actual_areg = containingArchRegOf ( 4, u->val2 );
1523 if (annul_put[actual_areg]) {
sewardj05f1aa12002-04-30 00:29:36 +00001524 vg_assert(actual_areg != R_ESP);
njn4ba5a792002-09-30 10:23:54 +00001525 VG_(new_NOP)(u);
njn25e49d8e72002-09-23 09:36:25 +00001526 if (dis)
1527 VG_(printf)(" at %2d: delete PUT\n", i );
sewardjde4a1d02002-03-22 01:27:54 +00001528 } else {
sewardj05f1aa12002-04-30 00:29:36 +00001529 if (actual_areg != R_ESP)
sewardjde4a1d02002-03-22 01:27:54 +00001530 annul_put[actual_areg] = True;
1531 }
1532 }
1533 else if (u->opcode == PUT && u->size != 4) {
1534 actual_areg = containingArchRegOf ( u->size, u->val2 );
1535 annul_put[actual_areg] = False;
1536 }
1537 else if (u->opcode == JMP || u->opcode == JIFZ
1538 || u->opcode == CALLM) {
1539 for (j = 0; j < 8; j++)
1540 annul_put[j] = False;
1541 }
1542 else {
1543 /* If an instruction reads an ArchReg, the immediately
1544 preceding PUT cannot be annulled. */
1545 actual_areg = maybe_uinstrReadsArchReg ( u );
1546 if (actual_areg != -1)
1547 annul_put[actual_areg] = False;
1548 }
1549 }
1550
1551 /* PASS 2a: redundant-move elimination. Given MOV t1, t2 and t1 is
1552 dead after this point, annul the MOV insn and rename t2 to t1.
1553 Further modifies the last_live_before map. */
1554
1555# if 0
njn4ba5a792002-09-30 10:23:54 +00001556 VG_(pp_UCodeBlock)(cb, "Before MOV elimination" );
sewardjde4a1d02002-03-22 01:27:54 +00001557 for (i = 0; i < cb->nextTemp; i++)
1558 VG_(printf)("llb[t%d]=%d ", i, last_live_before[i]);
1559 VG_(printf)("\n");
1560# endif
1561
1562 for (i = 0; i < cb->used-1; i++) {
1563 u = &cb->instrs[i];
1564 if (u->opcode != MOV) continue;
1565 if (u->tag1 == Literal) continue;
1566 vg_assert(u->tag1 == TempReg);
1567 vg_assert(u->tag2 == TempReg);
1568 if (last_live_before[u->val1] == i) {
njn25e49d8e72002-09-23 09:36:25 +00001569 if (dis)
sewardjde4a1d02002-03-22 01:27:54 +00001570 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001571 " at %2d: delete MOV, rename t%d to t%d in (%d .. %d)\n",
sewardjde4a1d02002-03-22 01:27:54 +00001572 i, u->val2, u->val1, i+1, last_live_before[u->val2] );
1573 for (j = i+1; j <= last_live_before[u->val2]; j++) {
1574 if (cb->instrs[j].tag1 == TempReg
1575 && cb->instrs[j].val1 == u->val2)
1576 cb->instrs[j].val1 = u->val1;
1577 if (cb->instrs[j].tag2 == TempReg
1578 && cb->instrs[j].val2 == u->val2)
1579 cb->instrs[j].val2 = u->val1;
1580 }
1581 last_live_before[u->val1] = last_live_before[u->val2];
1582 last_live_before[u->val2] = i-1;
njn4ba5a792002-09-30 10:23:54 +00001583 VG_(new_NOP)(u);
sewardjde4a1d02002-03-22 01:27:54 +00001584 }
1585 }
1586
1587 /* PASS 3: redundant condition-code restore/save elimination.
1588 Scan backwards from the end. future_dead_flags records the set
1589 of flags which are dead at this point, that is, will be written
1590 before they are next read. Earlier uinsns which write flags
1591 already in future_dead_flags can have their writes annulled.
1592 */
1593 future_dead_flags = FlagsEmpty;
1594
1595 for (i = cb->used-1; i >= 0; i--) {
1596 u = &cb->instrs[i];
1597
1598 /* We might never make it to insns beyond this one, so be
1599 conservative. */
1600 if (u->opcode == JIFZ || u->opcode == JMP) {
1601 future_dead_flags = FlagsEmpty;
1602 continue;
1603 }
1604
sewardjfbb6cda2002-07-24 09:33:52 +00001605 /* PUTF modifies the %EFLAGS in essentially unpredictable ways.
1606 For example people try to mess with bit 21 to see if CPUID
1607 works. The setting may or may not actually take hold. So we
1608 play safe here. */
1609 if (u->opcode == PUTF) {
1610 future_dead_flags = FlagsEmpty;
1611 continue;
1612 }
1613
sewardjde4a1d02002-03-22 01:27:54 +00001614 /* We can annul the flags written by this insn if it writes a
1615 subset (or eq) of the set of flags known to be dead after
1616 this insn. If not, just record the flags also written by
1617 this insn.*/
1618 if (u->flags_w != FlagsEmpty
1619 && VG_IS_FLAG_SUBSET(u->flags_w, future_dead_flags)) {
njn25e49d8e72002-09-23 09:36:25 +00001620 if (dis) {
1621 VG_(printf)(" at %2d: annul flag write ", i);
sewardjde4a1d02002-03-22 01:27:54 +00001622 vg_ppFlagSet("", u->flags_w);
1623 VG_(printf)(" due to later ");
1624 vg_ppFlagSet("", future_dead_flags);
1625 VG_(printf)("\n");
1626 }
1627 u->flags_w = FlagsEmpty;
1628 } else {
1629 future_dead_flags
1630 = VG_UNION_FLAG_SETS ( u->flags_w, future_dead_flags );
1631 }
1632
1633 /* If this insn also reads flags, empty out future_dead_flags so
1634 as to force preceding writes not to be annulled. */
1635 if (u->flags_r != FlagsEmpty)
1636 future_dead_flags = FlagsEmpty;
1637 }
1638
1639 if (last_live_before)
njn25e49d8e72002-09-23 09:36:25 +00001640 VG_(arena_free) ( VG_AR_JITTER, last_live_before );
1641
1642 if (dis) {
1643 VG_(printf)("\n");
njn4ba5a792002-09-30 10:23:54 +00001644 VG_(pp_UCodeBlock) ( cb, "Improved UCode:" );
njn25e49d8e72002-09-23 09:36:25 +00001645 }
sewardjde4a1d02002-03-22 01:27:54 +00001646}
1647
njn9b007f62003-04-07 14:40:25 +00001648/*------------------------------------------------------------*/
1649/*--- %ESP-update pass ---*/
1650/*------------------------------------------------------------*/
1651
1652/* For skins that want to know about %ESP changes, this pass adds
1653 in the appropriate hooks. We have to do it after the skin's
1654 instrumentation, so the skin doesn't have to worry about the CCALLs
1655 it adds in, and we must do it before register allocation because
1656 spilled temps make it much harder to work out the %esp deltas.
1657 Thus we have it as an extra phase between the two. */
1658static
1659UCodeBlock* vg_ESP_update_pass(UCodeBlock* cb_in)
1660{
1661 UCodeBlock* cb;
1662 UInstr* u;
1663 Int delta = 0;
1664 UInt t_ESP = INVALID_TEMPREG;
1665 UInt i;
1666
1667 cb = VG_(setup_UCodeBlock)(cb_in);
1668
1669 for (i = 0; i < VG_(get_num_instrs)(cb_in); i++) {
1670 u = VG_(get_instr)(cb_in, i);
1671
1672 if (GET == u->opcode && R_ESP == u->val1) {
1673 t_ESP = u->val2;
1674 delta = 0;
1675
1676 } else if (PUT == u->opcode && R_ESP == u->val2 && 4 == u->size) {
1677
1678# define DO_GENERIC \
1679 if (VG_(track_events).new_mem_stack || \
1680 VG_(track_events).die_mem_stack) { \
1681 uInstr1(cb, CCALL, 0, TempReg, u->val1); \
1682 uCCall(cb, (Addr) VG_(unknown_esp_update), \
1683 1, 1, False); \
1684 }
1685
1686# define DO(kind, size) \
1687 if (VG_(track_events).kind##_mem_stack_##size) { \
1688 uInstr1(cb, CCALL, 0, TempReg, u->val1); \
1689 uCCall(cb, (Addr) VG_(track_events).kind##_mem_stack_##size,\
1690 1, 1, False); \
1691 \
1692 } else \
1693 DO_GENERIC \
1694 break
1695
1696 if (u->val1 == t_ESP) {
1697 /* Known delta, common cases handled specially. */
1698 switch (delta) {
1699 case 4: DO(die, 4);
1700 case -4: DO(new, 4);
1701 case 8: DO(die, 8);
1702 case -8: DO(new, 8);
1703 case 12: DO(die, 12);
1704 case -12: DO(new, 12);
1705 case 16: DO(die, 16);
1706 case -16: DO(new, 16);
1707 case 32: DO(die, 32);
1708 case -32: DO(new, 32);
1709 default: DO_GENERIC; break;
1710 }
1711 } else {
1712 /* Unknown delta */
1713 DO_GENERIC;
1714 }
1715 delta = 0;
1716
1717# undef DO
1718# undef DO_GENERIC
1719
1720 } else if (Literal == u->tag1 && t_ESP == u->val2) {
1721 if (ADD == u->opcode) delta += u->lit32;
1722 if (SUB == u->opcode) delta -= u->lit32;
1723
1724 } else if (MOV == u->opcode && TempReg == u->tag1 && t_ESP == u->val1 &&
1725 TempReg == u->tag2) {
1726 t_ESP = u->val2;
1727 }
1728 VG_(copy_UInstr) ( cb, u );
1729 }
1730
1731 VG_(free_UCodeBlock)(cb_in);
1732 return cb;
1733}
sewardjde4a1d02002-03-22 01:27:54 +00001734
1735/*------------------------------------------------------------*/
1736/*--- The new register allocator. ---*/
1737/*------------------------------------------------------------*/
1738
1739typedef
1740 struct {
1741 /* Becomes live for the first time after this insn ... */
1742 Int live_after;
1743 /* Becomes dead for the last time after this insn ... */
1744 Int dead_before;
1745 /* The "home" spill slot, if needed. Never changes. */
1746 Int spill_no;
1747 /* Where is it? VG_NOVALUE==in a spill slot; else in reg. */
1748 Int real_no;
1749 }
1750 TempInfo;
1751
1752
1753/* Take a ucode block and allocate its TempRegs to RealRegs, or put
1754 them in spill locations, and add spill code, if there are not
1755 enough real regs. The usual register allocation deal, in short.
1756
1757 Important redundancy of representation:
1758
1759 real_to_temp maps real reg ranks (RRRs) to TempReg nos, or
1760 to VG_NOVALUE if the real reg has no currently assigned TempReg.
1761
1762 The .real_no field of a TempInfo gives the current RRR for
1763 this TempReg, or VG_NOVALUE if the TempReg is currently
1764 in memory, in which case it is in the SpillNo denoted by
1765 spillno.
1766
1767 These pieces of information (a fwds-bwds mapping, really) must
1768 be kept consistent!
1769
1770 This allocator uses the so-called Second Chance Bin Packing
1771 algorithm, as described in "Quality and Speed in Linear-scan
1772 Register Allocation" (Traub, Holloway and Smith, ACM PLDI98,
1773 pp142-151). It is simple and fast and remarkably good at
1774 minimising the amount of spill code introduced.
1775*/
1776
1777static
1778UCodeBlock* vg_do_register_allocation ( UCodeBlock* c1 )
1779{
1780 TempInfo* temp_info;
1781 Int real_to_temp[VG_MAX_REALREGS];
1782 Bool is_spill_cand[VG_MAX_REALREGS];
1783 Int ss_busy_until_before[VG_MAX_SPILLSLOTS];
1784 Int i, j, k, m, r, tno, max_ss_no;
1785 Bool wr, defer, isRead, spill_reqd;
njnf4ce3d32003-02-10 10:17:26 +00001786 UInt realUse[VG_MAX_REGS_USED];
1787 Int tempUse[VG_MAX_REGS_USED];
1788 Bool isWrites[VG_MAX_REGS_USED];
sewardjde4a1d02002-03-22 01:27:54 +00001789 UCodeBlock* c2;
1790
1791 /* Used to denote ... well, "no value" in this fn. */
1792# define VG_NOTHING (-2)
1793
1794 /* Initialise the TempReg info. */
1795 if (c1->nextTemp > 0)
njn25e49d8e72002-09-23 09:36:25 +00001796 temp_info = VG_(arena_malloc)(VG_AR_JITTER,
1797 c1->nextTemp * sizeof(TempInfo) );
sewardjde4a1d02002-03-22 01:27:54 +00001798 else
1799 temp_info = NULL;
1800
1801 for (i = 0; i < c1->nextTemp; i++) {
1802 temp_info[i].live_after = VG_NOTHING;
1803 temp_info[i].dead_before = VG_NOTHING;
1804 temp_info[i].spill_no = VG_NOTHING;
1805 /* temp_info[i].real_no is not yet relevant. */
1806 }
1807
1808 spill_reqd = False;
1809
1810 /* Scan fwds to establish live ranges. */
1811
1812 for (i = 0; i < c1->used; i++) {
njn810086f2002-11-14 12:42:47 +00001813 k = VG_(get_reg_usage)(&c1->instrs[i], TempReg, &tempUse[0],
1814 &isWrites[0]);
njnf4ce3d32003-02-10 10:17:26 +00001815 vg_assert(k >= 0 && k <= VG_MAX_REGS_USED);
sewardjde4a1d02002-03-22 01:27:54 +00001816
1817 /* For each temp usage ... fwds in program order */
1818 for (j = 0; j < k; j++) {
njn810086f2002-11-14 12:42:47 +00001819 tno = tempUse[j];
1820 wr = isWrites[j];
sewardjde4a1d02002-03-22 01:27:54 +00001821 if (wr) {
1822 /* Writes hold a reg live until after this insn. */
1823 if (temp_info[tno].live_after == VG_NOTHING)
1824 temp_info[tno].live_after = i;
1825 if (temp_info[tno].dead_before < i + 1)
1826 temp_info[tno].dead_before = i + 1;
1827 } else {
1828 /* First use of a tmp should be a write. */
njnfa0ad422003-02-03 11:07:03 +00001829 if (temp_info[tno].live_after == VG_NOTHING) {
1830 VG_(printf)("At instr %d...\n", i);
1831 VG_(core_panic)("First use of tmp not a write,"
1832 " probably a skin instrumentation error");
1833 }
sewardjde4a1d02002-03-22 01:27:54 +00001834 /* Reads only hold it live until before this insn. */
1835 if (temp_info[tno].dead_before < i)
1836 temp_info[tno].dead_before = i;
1837 }
1838 }
1839 }
1840
1841# if 0
1842 /* Sanity check on live ranges. Expensive but correct. */
1843 for (i = 0; i < c1->nextTemp; i++) {
1844 vg_assert( (temp_info[i].live_after == VG_NOTHING
1845 && temp_info[i].dead_before == VG_NOTHING)
1846 || (temp_info[i].live_after != VG_NOTHING
1847 && temp_info[i].dead_before != VG_NOTHING) );
1848 }
1849# endif
1850
1851 /* Do a rank-based allocation of TempRegs to spill slot numbers.
1852 We put as few as possible values in spill slots, but
1853 nevertheless need to have an assignment to them just in case. */
1854
1855 max_ss_no = -1;
1856
1857 for (i = 0; i < VG_MAX_SPILLSLOTS; i++)
1858 ss_busy_until_before[i] = 0;
1859
1860 for (i = 0; i < c1->nextTemp; i++) {
1861
1862 /* True iff this temp is unused. */
1863 if (temp_info[i].live_after == VG_NOTHING)
1864 continue;
1865
1866 /* Find the lowest-numbered spill slot which is available at the
1867 start point of this interval, and assign the interval to
1868 it. */
1869 for (j = 0; j < VG_MAX_SPILLSLOTS; j++)
1870 if (ss_busy_until_before[j] <= temp_info[i].live_after)
1871 break;
1872 if (j == VG_MAX_SPILLSLOTS) {
1873 VG_(printf)("VG_MAX_SPILLSLOTS is too low; increase and recompile.\n");
njne427a662002-10-02 11:08:25 +00001874 VG_(core_panic)("register allocation failed -- out of spill slots");
sewardjde4a1d02002-03-22 01:27:54 +00001875 }
1876 ss_busy_until_before[j] = temp_info[i].dead_before;
1877 temp_info[i].spill_no = j;
1878 if (j > max_ss_no)
1879 max_ss_no = j;
1880 }
1881
1882 VG_(total_reg_rank) += (max_ss_no+1);
1883
1884 /* Show live ranges and assigned spill slot nos. */
1885
njn25e49d8e72002-09-23 09:36:25 +00001886 if (dis) {
1887 VG_(printf)("Live range assignments:\n");
sewardjde4a1d02002-03-22 01:27:54 +00001888
1889 for (i = 0; i < c1->nextTemp; i++) {
1890 if (temp_info[i].live_after == VG_NOTHING)
1891 continue;
1892 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001893 " LR %d is after %d to before %d\tspillno %d\n",
sewardjde4a1d02002-03-22 01:27:54 +00001894 i,
1895 temp_info[i].live_after,
1896 temp_info[i].dead_before,
1897 temp_info[i].spill_no
1898 );
1899 }
njn25e49d8e72002-09-23 09:36:25 +00001900 VG_(printf)("\n");
sewardjde4a1d02002-03-22 01:27:54 +00001901 }
1902
1903 /* Now that we've established a spill slot number for each used
1904 temporary, we can go ahead and do the core of the "Second-chance
1905 binpacking" allocation algorithm. */
1906
njn25e49d8e72002-09-23 09:36:25 +00001907 if (dis) VG_(printf)("Register allocated UCode:\n");
1908
1909
sewardjde4a1d02002-03-22 01:27:54 +00001910 /* Resulting code goes here. We generate it all in a forwards
1911 pass. */
njn4ba5a792002-09-30 10:23:54 +00001912 c2 = VG_(alloc_UCodeBlock)();
sewardj22854b92002-11-30 14:00:47 +00001913 c2->orig_eip = c1->orig_eip;
sewardjde4a1d02002-03-22 01:27:54 +00001914
1915 /* At the start, no TempRegs are assigned to any real register.
1916 Correspondingly, all temps claim to be currently resident in
1917 their spill slots, as computed by the previous two passes. */
1918 for (i = 0; i < VG_MAX_REALREGS; i++)
1919 real_to_temp[i] = VG_NOTHING;
1920 for (i = 0; i < c1->nextTemp; i++)
1921 temp_info[i].real_no = VG_NOTHING;
1922
sewardjde4a1d02002-03-22 01:27:54 +00001923 /* Process each insn in turn. */
1924 for (i = 0; i < c1->used; i++) {
1925
1926 if (c1->instrs[i].opcode == NOP) continue;
1927 VG_(uinstrs_prealloc)++;
1928
1929# if 0
1930 /* Check map consistency. Expensive but correct. */
1931 for (r = 0; r < VG_MAX_REALREGS; r++) {
1932 if (real_to_temp[r] != VG_NOTHING) {
1933 tno = real_to_temp[r];
1934 vg_assert(tno >= 0 && tno < c1->nextTemp);
1935 vg_assert(temp_info[tno].real_no == r);
1936 }
1937 }
1938 for (tno = 0; tno < c1->nextTemp; tno++) {
1939 if (temp_info[tno].real_no != VG_NOTHING) {
1940 r = temp_info[tno].real_no;
1941 vg_assert(r >= 0 && r < VG_MAX_REALREGS);
1942 vg_assert(real_to_temp[r] == tno);
1943 }
1944 }
1945# endif
1946
njn25e49d8e72002-09-23 09:36:25 +00001947 if (dis)
njn4ba5a792002-09-30 10:23:54 +00001948 VG_(pp_UInstr)(i, &c1->instrs[i]);
sewardjde4a1d02002-03-22 01:27:54 +00001949
1950 /* First, free up enough real regs for this insn. This may
1951 generate spill stores since we may have to evict some TempRegs
1952 currently in real regs. Also generates spill loads. */
1953
njn810086f2002-11-14 12:42:47 +00001954 k = VG_(get_reg_usage)(&c1->instrs[i], TempReg, &tempUse[0],
1955 &isWrites[0]);
njnf4ce3d32003-02-10 10:17:26 +00001956 vg_assert(k >= 0 && k <= VG_MAX_REGS_USED);
sewardjde4a1d02002-03-22 01:27:54 +00001957
1958 /* For each ***different*** temp mentioned in the insn .... */
1959 for (j = 0; j < k; j++) {
1960
1961 /* First check if the temp is mentioned again later; if so,
1962 ignore this mention. We only want to process each temp
1963 used by the insn once, even if it is mentioned more than
1964 once. */
1965 defer = False;
njn810086f2002-11-14 12:42:47 +00001966 tno = tempUse[j];
sewardjde4a1d02002-03-22 01:27:54 +00001967 for (m = j+1; m < k; m++)
njn810086f2002-11-14 12:42:47 +00001968 if (tempUse[m] == tno)
sewardjde4a1d02002-03-22 01:27:54 +00001969 defer = True;
1970 if (defer)
1971 continue;
1972
njn810086f2002-11-14 12:42:47 +00001973 /* Now we're trying to find a register for tempUse[j].
sewardjde4a1d02002-03-22 01:27:54 +00001974 First of all, if it already has a register assigned, we
1975 don't need to do anything more. */
1976 if (temp_info[tno].real_no != VG_NOTHING)
1977 continue;
1978
1979 /* No luck. The next thing to do is see if there is a
1980 currently unassigned register available. If so, bag it. */
1981 for (r = 0; r < VG_MAX_REALREGS; r++) {
1982 if (real_to_temp[r] == VG_NOTHING)
1983 break;
1984 }
1985 if (r < VG_MAX_REALREGS) {
1986 real_to_temp[r] = tno;
1987 temp_info[tno].real_no = r;
1988 continue;
1989 }
1990
1991 /* Unfortunately, that didn't pan out either. So we'll have
1992 to eject some other unfortunate TempReg into a spill slot
1993 in order to free up a register. Of course, we need to be
1994 careful not to eject some other TempReg needed by this
1995 insn.
1996
1997 Select r in 0 .. VG_MAX_REALREGS-1 such that
1998 real_to_temp[r] is not mentioned in
njn810086f2002-11-14 12:42:47 +00001999 tempUse[0 .. k-1], since it would be just plain
sewardjde4a1d02002-03-22 01:27:54 +00002000 wrong to eject some other TempReg which we need to use in
2001 this insn.
2002
2003 It is here that it is important to make a good choice of
2004 register to spill. */
2005
2006 /* First, mark those regs which are not spill candidates. */
2007 for (r = 0; r < VG_MAX_REALREGS; r++) {
2008 is_spill_cand[r] = True;
2009 for (m = 0; m < k; m++) {
njn810086f2002-11-14 12:42:47 +00002010 if (real_to_temp[r] == tempUse[m]) {
sewardjde4a1d02002-03-22 01:27:54 +00002011 is_spill_cand[r] = False;
2012 break;
2013 }
2014 }
2015 }
2016
2017 /* We can choose any r satisfying is_spill_cand[r]. However,
2018 try to make a good choice. First, try and find r such
2019 that the associated TempReg is already dead. */
2020 for (r = 0; r < VG_MAX_REALREGS; r++) {
2021 if (is_spill_cand[r] &&
2022 temp_info[real_to_temp[r]].dead_before <= i)
2023 goto have_spill_cand;
2024 }
2025
2026 /* No spill cand is mapped to a dead TempReg. Now we really
2027 _do_ have to generate spill code. Choose r so that the
2028 next use of its associated TempReg is as far ahead as
2029 possible, in the hope that this will minimise the number of
2030 consequent reloads required. This is a bit expensive, but
2031 we don't have to do it very often. */
2032 {
2033 Int furthest_r = VG_MAX_REALREGS;
2034 Int furthest = 0;
2035 for (r = 0; r < VG_MAX_REALREGS; r++) {
2036 if (!is_spill_cand[r]) continue;
2037 for (m = i+1; m < c1->used; m++)
2038 if (uInstrMentionsTempReg(&c1->instrs[m],
2039 real_to_temp[r]))
2040 break;
2041 if (m > furthest) {
2042 furthest = m;
2043 furthest_r = r;
2044 }
2045 }
2046 r = furthest_r;
2047 goto have_spill_cand;
2048 }
2049
2050 have_spill_cand:
2051 if (r == VG_MAX_REALREGS)
njne427a662002-10-02 11:08:25 +00002052 VG_(core_panic)("new reg alloc: out of registers ?!");
sewardjde4a1d02002-03-22 01:27:54 +00002053
2054 /* Eject r. Important refinement: don't bother if the
2055 associated TempReg is now dead. */
2056 vg_assert(real_to_temp[r] != VG_NOTHING);
2057 vg_assert(real_to_temp[r] != tno);
2058 temp_info[real_to_temp[r]].real_no = VG_NOTHING;
2059 if (temp_info[real_to_temp[r]].dead_before > i) {
2060 uInstr2(c2, PUT, 4,
njn4ba5a792002-09-30 10:23:54 +00002061 RealReg, VG_(rank_to_realreg)(r),
sewardjde4a1d02002-03-22 01:27:54 +00002062 SpillNo, temp_info[real_to_temp[r]].spill_no);
2063 VG_(uinstrs_spill)++;
2064 spill_reqd = True;
njn25e49d8e72002-09-23 09:36:25 +00002065 if (dis)
njn4ba5a792002-09-30 10:23:54 +00002066 VG_(pp_UInstr)(c2->used-1, &LAST_UINSTR(c2));
sewardjde4a1d02002-03-22 01:27:54 +00002067 }
2068
2069 /* Decide if tno is read. */
2070 isRead = False;
2071 for (m = 0; m < k; m++)
njn810086f2002-11-14 12:42:47 +00002072 if (tempUse[m] == tno && !isWrites[m])
sewardjde4a1d02002-03-22 01:27:54 +00002073 isRead = True;
2074
2075 /* If so, generate a spill load. */
2076 if (isRead) {
2077 uInstr2(c2, GET, 4,
2078 SpillNo, temp_info[tno].spill_no,
njn4ba5a792002-09-30 10:23:54 +00002079 RealReg, VG_(rank_to_realreg)(r) );
sewardjde4a1d02002-03-22 01:27:54 +00002080 VG_(uinstrs_spill)++;
2081 spill_reqd = True;
njn25e49d8e72002-09-23 09:36:25 +00002082 if (dis)
njn4ba5a792002-09-30 10:23:54 +00002083 VG_(pp_UInstr)(c2->used-1, &LAST_UINSTR(c2));
sewardjde4a1d02002-03-22 01:27:54 +00002084 }
2085
2086 /* Update the forwards and backwards maps. */
2087 real_to_temp[r] = tno;
2088 temp_info[tno].real_no = r;
2089 }
2090
2091 /* By this point, all TempRegs mentioned by the insn have been
2092 bought into real regs. We now copy the insn to the output
2093 and use patchUInstr to convert its rTempRegs into
2094 realregs. */
2095 for (j = 0; j < k; j++)
njn810086f2002-11-14 12:42:47 +00002096 realUse[j] = VG_(rank_to_realreg)(temp_info[tempUse[j]].real_no);
njn4ba5a792002-09-30 10:23:54 +00002097 VG_(copy_UInstr)(c2, &c1->instrs[i]);
njn25e49d8e72002-09-23 09:36:25 +00002098 patchUInstr(&LAST_UINSTR(c2), &tempUse[0], &realUse[0], k);
sewardjde4a1d02002-03-22 01:27:54 +00002099
njn25e49d8e72002-09-23 09:36:25 +00002100 if (dis) {
njn4ba5a792002-09-30 10:23:54 +00002101 VG_(pp_UInstr)(c2->used-1, &LAST_UINSTR(c2));
sewardjde4a1d02002-03-22 01:27:54 +00002102 VG_(printf)("\n");
2103 }
2104 }
2105
2106 if (temp_info != NULL)
njn25e49d8e72002-09-23 09:36:25 +00002107 VG_(arena_free)(VG_AR_JITTER, temp_info);
sewardjde4a1d02002-03-22 01:27:54 +00002108
njn4ba5a792002-09-30 10:23:54 +00002109 VG_(free_UCodeBlock)(c1);
sewardjde4a1d02002-03-22 01:27:54 +00002110
2111 if (spill_reqd)
2112 VG_(translations_needing_spill)++;
2113
2114 return c2;
2115
2116# undef VG_NOTHING
2117
2118}
njn25e49d8e72002-09-23 09:36:25 +00002119extern void fooble(int);
2120/* Analysis records liveness of all general-use RealRegs in the UCode. */
2121static void vg_realreg_liveness_analysis ( UCodeBlock* cb )
2122{
2123 Int i, j, k;
2124 RRegSet rregs_live;
njnf4ce3d32003-02-10 10:17:26 +00002125 Int regUse[VG_MAX_REGS_USED];
2126 Bool isWrites[VG_MAX_REGS_USED];
njn25e49d8e72002-09-23 09:36:25 +00002127 UInstr* u;
sewardjde4a1d02002-03-22 01:27:54 +00002128
njn25e49d8e72002-09-23 09:36:25 +00002129 /* All regs are dead at the end of the block */
2130 rregs_live = ALL_RREGS_DEAD;
sewardjde4a1d02002-03-22 01:27:54 +00002131
sewardjde4a1d02002-03-22 01:27:54 +00002132 for (i = cb->used-1; i >= 0; i--) {
2133 u = &cb->instrs[i];
2134
njn25e49d8e72002-09-23 09:36:25 +00002135 u->regs_live_after = rregs_live;
sewardj97ced732002-03-25 00:07:36 +00002136
njn810086f2002-11-14 12:42:47 +00002137 k = VG_(get_reg_usage)(u, RealReg, &regUse[0], &isWrites[0]);
sewardj97ced732002-03-25 00:07:36 +00002138
njn25e49d8e72002-09-23 09:36:25 +00002139 /* For each reg usage ... bwds in program order. Variable is live
2140 before this UInstr if it is read by this UInstr.
njn810086f2002-11-14 12:42:47 +00002141 Note that regUse[j] holds the Intel reg number, so we must
njn25e49d8e72002-09-23 09:36:25 +00002142 convert it to our rank number. */
2143 for (j = k-1; j >= 0; j--) {
njn810086f2002-11-14 12:42:47 +00002144 SET_RREG_LIVENESS ( VG_(realreg_to_rank)(regUse[j]),
njn25e49d8e72002-09-23 09:36:25 +00002145 rregs_live,
njn810086f2002-11-14 12:42:47 +00002146 !isWrites[j] );
sewardjde4a1d02002-03-22 01:27:54 +00002147 }
2148 }
sewardjde4a1d02002-03-22 01:27:54 +00002149}
2150
sewardjde4a1d02002-03-22 01:27:54 +00002151/*------------------------------------------------------------*/
2152/*--- Main entry point for the JITter. ---*/
2153/*------------------------------------------------------------*/
2154
2155/* Translate the basic block beginning at orig_addr, placing the
2156 translation in a vg_malloc'd block, the address and size of which
2157 are returned in trans_addr and trans_size. Length of the original
2158 block is also returned in orig_size. If the latter three are NULL,
2159 this call is being done for debugging purposes, in which case (a)
2160 throw away the translation once it is made, and (b) produce a load
2161 of debugging output.
njn25e49d8e72002-09-23 09:36:25 +00002162
2163 'tst' is the identity of the thread needing this block.
sewardjde4a1d02002-03-22 01:27:54 +00002164*/
njn25e49d8e72002-09-23 09:36:25 +00002165void VG_(translate) ( /*IN*/ ThreadState* tst,
2166 /*IN*/ Addr orig_addr,
2167 /*OUT*/ UInt* orig_size,
2168 /*OUT*/ Addr* trans_addr,
sewardj22854b92002-11-30 14:00:47 +00002169 /*OUT*/ UInt* trans_size,
2170 /*OUT*/ UShort jumps[VG_MAX_JUMPS])
sewardjde4a1d02002-03-22 01:27:54 +00002171{
2172 Int n_disassembled_bytes, final_code_size;
2173 Bool debugging_translation;
2174 UChar* final_code;
2175 UCodeBlock* cb;
2176
2177 VGP_PUSHCC(VgpTranslate);
2178 debugging_translation
2179 = orig_size == NULL || trans_addr == NULL || trans_size == NULL;
2180
njn25e49d8e72002-09-23 09:36:25 +00002181 if (!debugging_translation)
2182 VG_TRACK( pre_mem_read, Vg_CoreTranslate, tst, "", orig_addr, 1 );
sewardjde4a1d02002-03-22 01:27:54 +00002183
njn4ba5a792002-09-30 10:23:54 +00002184 cb = VG_(alloc_UCodeBlock)();
sewardj22854b92002-11-30 14:00:47 +00002185 cb->orig_eip = orig_addr;
sewardjde4a1d02002-03-22 01:27:54 +00002186
njn25e49d8e72002-09-23 09:36:25 +00002187 /* If doing any code printing, print a basic block start marker */
2188 if (VG_(clo_trace_codegen)) {
2189 Char fnname[64] = "";
2190 VG_(get_fnname_if_entry)(orig_addr, fnname, 64);
2191 VG_(printf)(
2192 "==== BB %d %s(%p) in %dB, out %dB, BBs exec'd %lu ====\n\n",
2193 VG_(overall_in_count), fnname, orig_addr,
2194 VG_(overall_in_osize), VG_(overall_in_tsize),
2195 VG_(bbs_done));
2196 }
2197
2198 /* True if a debug trans., or if bit N set in VG_(clo_trace_codegen). */
2199# define DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(n) \
2200 ( debugging_translation || (VG_(clo_trace_codegen) & (1 << (n-1))) )
2201
sewardjde4a1d02002-03-22 01:27:54 +00002202 /* Disassemble this basic block into cb. */
njn25e49d8e72002-09-23 09:36:25 +00002203 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(1);
2204 VGP_PUSHCC(VgpToUCode);
sewardjde4a1d02002-03-22 01:27:54 +00002205 n_disassembled_bytes = VG_(disBB) ( cb, orig_addr );
njn25e49d8e72002-09-23 09:36:25 +00002206 VGP_POPCC(VgpToUCode);
2207
sewardjde4a1d02002-03-22 01:27:54 +00002208 /* Try and improve the code a bit. */
2209 if (VG_(clo_optimise)) {
njn25e49d8e72002-09-23 09:36:25 +00002210 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(2);
2211 VGP_PUSHCC(VgpImprove);
sewardjde4a1d02002-03-22 01:27:54 +00002212 vg_improve ( cb );
njn25e49d8e72002-09-23 09:36:25 +00002213 VGP_POPCC(VgpImprove);
sewardjde4a1d02002-03-22 01:27:54 +00002214 }
2215
njn25e49d8e72002-09-23 09:36:25 +00002216 /* Skin's instrumentation (Nb: must set VG_(print_codegen) in case
2217 SK_(instrument) looks at it. */
2218 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(3);
2219 VGP_PUSHCC(VgpInstrument);
2220 cb = SK_(instrument) ( cb, orig_addr );
2221 if (VG_(print_codegen))
njn4ba5a792002-09-30 10:23:54 +00002222 VG_(pp_UCodeBlock) ( cb, "Instrumented UCode:" );
njn25e49d8e72002-09-23 09:36:25 +00002223 VG_(saneUCodeBlock)( cb );
2224 VGP_POPCC(VgpInstrument);
njn4f9c9342002-04-29 16:03:24 +00002225
njn9b007f62003-04-07 14:40:25 +00002226 /* Add %ESP-update hooks if the skin requires them */
2227 /* Nb: We don't print out this phase, because it doesn't do much */
2228 if (VG_(need_to_handle_esp_assignment)()) {
2229 VGP_PUSHCC(VgpESPUpdate);
2230 cb = vg_ESP_update_pass ( cb );
2231 VGP_POPCC(VgpESPUpdate);
2232 }
2233
sewardjde4a1d02002-03-22 01:27:54 +00002234 /* Allocate registers. */
njn25e49d8e72002-09-23 09:36:25 +00002235 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(4);
2236 VGP_PUSHCC(VgpRegAlloc);
sewardjde4a1d02002-03-22 01:27:54 +00002237 cb = vg_do_register_allocation ( cb );
njn25e49d8e72002-09-23 09:36:25 +00002238 VGP_POPCC(VgpRegAlloc);
sewardjde4a1d02002-03-22 01:27:54 +00002239
njn25e49d8e72002-09-23 09:36:25 +00002240 /* Do post reg-alloc %e[acd]x liveness analysis (too boring to print
2241 * anything; results can be seen when emitting final code). */
2242 VGP_PUSHCC(VgpLiveness);
2243 vg_realreg_liveness_analysis ( cb );
2244 VGP_POPCC(VgpLiveness);
2245
2246 /* Emit final code */
2247 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(5);
2248
2249 VGP_PUSHCC(VgpFromUcode);
sewardj22854b92002-11-30 14:00:47 +00002250 final_code = VG_(emit_code)(cb, &final_code_size, jumps );
njn25e49d8e72002-09-23 09:36:25 +00002251 VGP_POPCC(VgpFromUcode);
njn4ba5a792002-09-30 10:23:54 +00002252 VG_(free_UCodeBlock)(cb);
sewardjde4a1d02002-03-22 01:27:54 +00002253
njn25e49d8e72002-09-23 09:36:25 +00002254#undef DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE
2255
sewardjde4a1d02002-03-22 01:27:54 +00002256 if (debugging_translation) {
2257 /* Only done for debugging -- throw away final result. */
njn25e49d8e72002-09-23 09:36:25 +00002258 VG_(arena_free)(VG_AR_JITTER, final_code);
sewardjde4a1d02002-03-22 01:27:54 +00002259 } else {
2260 /* Doing it for real -- return values to caller. */
sewardjde4a1d02002-03-22 01:27:54 +00002261 *orig_size = n_disassembled_bytes;
2262 *trans_addr = (Addr)final_code;
2263 *trans_size = final_code_size;
2264 }
njn25e49d8e72002-09-23 09:36:25 +00002265 VGP_POPCC(VgpTranslate);
sewardjde4a1d02002-03-22 01:27:54 +00002266}
2267
2268/*--------------------------------------------------------------------*/
2269/*--- end vg_translate.c ---*/
2270/*--------------------------------------------------------------------*/