blob: c3ac29849b244a377384e4eae5475d4c132cf781 [file] [log] [blame]
sewardjde4a1d02002-03-22 01:27:54 +00001
2/*--------------------------------------------------------------------*/
3/*--- The JITter proper: register allocation & code improvement ---*/
4/*--- vg_translate.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
njnc9539842002-10-02 13:26:35 +00008 This file is part of Valgrind, an extensible x86 protected-mode
9 emulator for monitoring program execution on x86-Unixes.
sewardjde4a1d02002-03-22 01:27:54 +000010
nethercotebb1c9912004-01-04 16:43:23 +000011 Copyright (C) 2000-2004 Julian Seward
sewardjde4a1d02002-03-22 01:27:54 +000012 jseward@acm.org
sewardjde4a1d02002-03-22 01:27:54 +000013
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
njn25e49d8e72002-09-23 09:36:25 +000029 The GNU General Public License is contained in the file COPYING.
sewardjde4a1d02002-03-22 01:27:54 +000030*/
31
32#include "vg_include.h"
33
sewardjde4a1d02002-03-22 01:27:54 +000034/*------------------------------------------------------------*/
35/*--- Renamings of frequently-used global functions. ---*/
36/*------------------------------------------------------------*/
37
njn25e49d8e72002-09-23 09:36:25 +000038#define dis VG_(print_codegen)
sewardjde4a1d02002-03-22 01:27:54 +000039
sewardje1042472002-09-30 12:33:11 +000040
sewardjde4a1d02002-03-22 01:27:54 +000041/*------------------------------------------------------------*/
42/*--- Basics ---*/
43/*------------------------------------------------------------*/
44
nethercote85cdd342004-08-01 22:36:40 +000045#define VG_IS_FLAG_SUBSET(set1,set2) \
46 (( ((FlagSet)set1) & ((FlagSet)set2) ) == ((FlagSet)set1) )
47
48#define VG_UNION_FLAG_SETS(set1,set2) \
49 ( ((FlagSet)set1) | ((FlagSet)set2) )
50
njn810086f2002-11-14 12:42:47 +000051/* This one is called by the core */
njn4ba5a792002-09-30 10:23:54 +000052UCodeBlock* VG_(alloc_UCodeBlock) ( void )
sewardjde4a1d02002-03-22 01:27:54 +000053{
njn25e49d8e72002-09-23 09:36:25 +000054 UCodeBlock* cb = VG_(arena_malloc)(VG_AR_CORE, sizeof(UCodeBlock));
sewardjde4a1d02002-03-22 01:27:54 +000055 cb->used = cb->size = cb->nextTemp = 0;
56 cb->instrs = NULL;
57 return cb;
58}
59
njn810086f2002-11-14 12:42:47 +000060/* This one is called by skins */
61UCodeBlock* VG_(setup_UCodeBlock) ( UCodeBlock* cb_in )
62{
63 UCodeBlock* cb = VG_(arena_malloc)(VG_AR_CORE, sizeof(UCodeBlock));
sewardj22854b92002-11-30 14:00:47 +000064 cb->orig_eip = cb_in->orig_eip;
njn810086f2002-11-14 12:42:47 +000065 cb->used = cb->size = 0;
66 cb->nextTemp = cb_in->nextTemp;
67 cb->instrs = NULL;
68 return cb;
69}
sewardjde4a1d02002-03-22 01:27:54 +000070
njn4ba5a792002-09-30 10:23:54 +000071void VG_(free_UCodeBlock) ( UCodeBlock* cb )
sewardjde4a1d02002-03-22 01:27:54 +000072{
njn25e49d8e72002-09-23 09:36:25 +000073 if (cb->instrs) VG_(arena_free)(VG_AR_CORE, cb->instrs);
74 VG_(arena_free)(VG_AR_CORE, cb);
sewardjde4a1d02002-03-22 01:27:54 +000075}
76
77
78/* Ensure there's enough space in a block to add one uinstr. */
daywalkerb18d2532003-09-27 20:15:01 +000079static
sewardjde4a1d02002-03-22 01:27:54 +000080void ensureUInstr ( UCodeBlock* cb )
81{
82 if (cb->used == cb->size) {
83 if (cb->instrs == NULL) {
84 vg_assert(cb->size == 0);
85 vg_assert(cb->used == 0);
86 cb->size = 8;
njn25e49d8e72002-09-23 09:36:25 +000087 cb->instrs = VG_(arena_malloc)(VG_AR_CORE, 8 * sizeof(UInstr));
sewardjde4a1d02002-03-22 01:27:54 +000088 } else {
89 Int i;
njn25e49d8e72002-09-23 09:36:25 +000090 UInstr* instrs2 = VG_(arena_malloc)(VG_AR_CORE,
sewardjde4a1d02002-03-22 01:27:54 +000091 2 * sizeof(UInstr) * cb->size);
92 for (i = 0; i < cb->used; i++)
93 instrs2[i] = cb->instrs[i];
94 cb->size *= 2;
njn25e49d8e72002-09-23 09:36:25 +000095 VG_(arena_free)(VG_AR_CORE, cb->instrs);
sewardjde4a1d02002-03-22 01:27:54 +000096 cb->instrs = instrs2;
97 }
98 }
99
100 vg_assert(cb->used < cb->size);
101}
102
103
104__inline__
njn4ba5a792002-09-30 10:23:54 +0000105void VG_(new_NOP) ( UInstr* u )
sewardjde4a1d02002-03-22 01:27:54 +0000106{
107 u->val1 = u->val2 = u->val3 = 0;
108 u->tag1 = u->tag2 = u->tag3 = NoValue;
109 u->flags_r = u->flags_w = FlagsEmpty;
sewardj2e93c502002-04-12 11:12:52 +0000110 u->jmpkind = JmpBoring;
njn25e49d8e72002-09-23 09:36:25 +0000111 u->signed_widen = u->has_ret_val = False;
112 u->regs_live_after = ALL_RREGS_LIVE;
sewardjde4a1d02002-03-22 01:27:54 +0000113 u->lit32 = 0;
njn25e49d8e72002-09-23 09:36:25 +0000114 u->opcode = NOP;
sewardjde4a1d02002-03-22 01:27:54 +0000115 u->size = 0;
116 u->cond = 0;
117 u->extra4b = 0;
njn25e49d8e72002-09-23 09:36:25 +0000118 u->argc = u->regparms_n = 0;
sewardjde4a1d02002-03-22 01:27:54 +0000119}
120
121
122/* Add an instruction to a ucode block, and return the index of the
123 instruction. */
124__inline__
njn4ba5a792002-09-30 10:23:54 +0000125void VG_(new_UInstr3) ( UCodeBlock* cb, Opcode opcode, Int sz,
sewardjde4a1d02002-03-22 01:27:54 +0000126 Tag tag1, UInt val1,
127 Tag tag2, UInt val2,
128 Tag tag3, UInt val3 )
129{
130 UInstr* ui;
131 ensureUInstr(cb);
132 ui = & cb->instrs[cb->used];
133 cb->used++;
njn4ba5a792002-09-30 10:23:54 +0000134 VG_(new_NOP)(ui);
sewardjde4a1d02002-03-22 01:27:54 +0000135 ui->val1 = val1;
136 ui->val2 = val2;
137 ui->val3 = val3;
138 ui->opcode = opcode;
139 ui->tag1 = tag1;
140 ui->tag2 = tag2;
141 ui->tag3 = tag3;
142 ui->size = sz;
143 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
144 if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
145 if (tag3 == TempReg) vg_assert(val3 != INVALID_TEMPREG);
146}
147
148
149__inline__
njn4ba5a792002-09-30 10:23:54 +0000150void VG_(new_UInstr2) ( UCodeBlock* cb, Opcode opcode, Int sz,
sewardjde4a1d02002-03-22 01:27:54 +0000151 Tag tag1, UInt val1,
152 Tag tag2, UInt val2 )
153{
154 UInstr* ui;
155 ensureUInstr(cb);
156 ui = & cb->instrs[cb->used];
157 cb->used++;
njn4ba5a792002-09-30 10:23:54 +0000158 VG_(new_NOP)(ui);
sewardjde4a1d02002-03-22 01:27:54 +0000159 ui->val1 = val1;
160 ui->val2 = val2;
161 ui->opcode = opcode;
162 ui->tag1 = tag1;
163 ui->tag2 = tag2;
164 ui->size = sz;
165 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
166 if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
167}
168
169
170__inline__
njn4ba5a792002-09-30 10:23:54 +0000171void VG_(new_UInstr1) ( UCodeBlock* cb, Opcode opcode, Int sz,
sewardjde4a1d02002-03-22 01:27:54 +0000172 Tag tag1, UInt val1 )
173{
174 UInstr* ui;
175 ensureUInstr(cb);
176 ui = & cb->instrs[cb->used];
177 cb->used++;
njn4ba5a792002-09-30 10:23:54 +0000178 VG_(new_NOP)(ui);
sewardjde4a1d02002-03-22 01:27:54 +0000179 ui->val1 = val1;
180 ui->opcode = opcode;
181 ui->tag1 = tag1;
182 ui->size = sz;
183 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
184}
185
186
187__inline__
njn4ba5a792002-09-30 10:23:54 +0000188void VG_(new_UInstr0) ( UCodeBlock* cb, Opcode opcode, Int sz )
sewardjde4a1d02002-03-22 01:27:54 +0000189{
190 UInstr* ui;
191 ensureUInstr(cb);
192 ui = & cb->instrs[cb->used];
193 cb->used++;
njn4ba5a792002-09-30 10:23:54 +0000194 VG_(new_NOP)(ui);
sewardjde4a1d02002-03-22 01:27:54 +0000195 ui->opcode = opcode;
196 ui->size = sz;
197}
198
sewardjde4a1d02002-03-22 01:27:54 +0000199/* Copy an instruction into the given codeblock. */
njn4f9c9342002-04-29 16:03:24 +0000200__inline__
njn4ba5a792002-09-30 10:23:54 +0000201void VG_(copy_UInstr) ( UCodeBlock* cb, UInstr* instr )
sewardjde4a1d02002-03-22 01:27:54 +0000202{
203 ensureUInstr(cb);
204 cb->instrs[cb->used] = *instr;
205 cb->used++;
206}
207
sewardjde4a1d02002-03-22 01:27:54 +0000208/* Set the lit32 field of the most recent uinsn. */
njn4ba5a792002-09-30 10:23:54 +0000209void VG_(set_lit_field) ( UCodeBlock* cb, UInt lit32 )
sewardjde4a1d02002-03-22 01:27:54 +0000210{
211 LAST_UINSTR(cb).lit32 = lit32;
212}
213
214
njn25e49d8e72002-09-23 09:36:25 +0000215/* Set the C call info fields of the most recent uinsn. */
njn4ba5a792002-09-30 10:23:54 +0000216void VG_(set_ccall_fields) ( UCodeBlock* cb, Addr fn, UChar argc, UChar
217 regparms_n, Bool has_ret_val )
njn25e49d8e72002-09-23 09:36:25 +0000218{
219 vg_assert(argc < 4);
220 vg_assert(regparms_n <= argc);
221 LAST_UINSTR(cb).lit32 = fn;
222 LAST_UINSTR(cb).argc = argc;
223 LAST_UINSTR(cb).regparms_n = regparms_n;
224 LAST_UINSTR(cb).has_ret_val = has_ret_val;
225}
226
njn810086f2002-11-14 12:42:47 +0000227/* For the last uinsn inserted into cb, set the read, written and
228 undefined flags. Undefined flags are counted as written, but it
229 seems worthwhile to distinguish them.
230*/
231__inline__
232void VG_(set_flag_fields) ( UCodeBlock* cb,
233 FlagSet rr, FlagSet ww, FlagSet uu )
234{
235 FlagSet uw = VG_UNION_FLAG_SETS(ww,uu);
236
237 vg_assert(rr == (rr & FlagsALL));
238 vg_assert(uw == (uw & FlagsALL));
239 LAST_UINSTR(cb).flags_r = rr;
240 LAST_UINSTR(cb).flags_w = uw;
241}
242
nethercote911cc372004-04-18 12:23:02 +0000243void VG_(set_cond_field) ( UCodeBlock* cb, Condcode cond )
244{
245 LAST_UINSTR(cb).cond = cond;
246}
247
248void VG_(set_widen_fields) ( UCodeBlock* cb, UInt szs, Bool is_signed )
249{
250 LAST_UINSTR(cb).extra4b = szs;
251 LAST_UINSTR(cb).signed_widen = is_signed;
252}
253
njn810086f2002-11-14 12:42:47 +0000254
njn4ba5a792002-09-30 10:23:54 +0000255Bool VG_(any_flag_use) ( UInstr* u )
sewardjde4a1d02002-03-22 01:27:54 +0000256{
257 return (u->flags_r != FlagsEmpty
258 || u->flags_w != FlagsEmpty);
259}
260
njn25e49d8e72002-09-23 09:36:25 +0000261#if 1
262# define BEST_ALLOC_ORDER
263#endif
sewardjde4a1d02002-03-22 01:27:54 +0000264
265/* Convert a rank in the range 0 .. VG_MAX_REALREGS-1 into an Intel
266 register number. This effectively defines the order in which real
267 registers are allocated. %ebp is excluded since it is permanently
njn25e49d8e72002-09-23 09:36:25 +0000268 reserved for pointing at VG_(baseBlock).
sewardjde4a1d02002-03-22 01:27:54 +0000269
njn25e49d8e72002-09-23 09:36:25 +0000270 Important! This function must correspond with the value of
271 VG_MAX_REALREGS (actually, VG_MAX_REALREGS can be reduced without
272 a problem, except the generated code will obviously be worse).
sewardjde4a1d02002-03-22 01:27:54 +0000273*/
njn25e49d8e72002-09-23 09:36:25 +0000274__inline__
njn4ba5a792002-09-30 10:23:54 +0000275Int VG_(rank_to_realreg) ( Int rank )
sewardjde4a1d02002-03-22 01:27:54 +0000276{
277 switch (rank) {
njn25e49d8e72002-09-23 09:36:25 +0000278# ifdef BEST_ALLOC_ORDER
sewardjde4a1d02002-03-22 01:27:54 +0000279 /* Probably the best allocation ordering. */
280 case 0: return R_EAX;
281 case 1: return R_EBX;
282 case 2: return R_ECX;
283 case 3: return R_EDX;
284 case 4: return R_ESI;
njn25e49d8e72002-09-23 09:36:25 +0000285 case 5: return R_EDI;
sewardjde4a1d02002-03-22 01:27:54 +0000286# else
287 /* Contrary; probably the worst. Helpful for debugging, tho. */
njn25e49d8e72002-09-23 09:36:25 +0000288 case 5: return R_EAX;
289 case 4: return R_EBX;
290 case 3: return R_ECX;
291 case 2: return R_EDX;
292 case 1: return R_ESI;
293 case 0: return R_EDI;
sewardjde4a1d02002-03-22 01:27:54 +0000294# endif
njne427a662002-10-02 11:08:25 +0000295 default: VG_(core_panic)("VG_(rank_to_realreg)");
njn25e49d8e72002-09-23 09:36:25 +0000296 }
297}
298
299/* Convert an Intel register number into a rank in the range 0 ..
njn4ba5a792002-09-30 10:23:54 +0000300 VG_MAX_REALREGS-1. See related comments for rank_to_realreg()
njn25e49d8e72002-09-23 09:36:25 +0000301 above. */
302__inline__
njn4ba5a792002-09-30 10:23:54 +0000303Int VG_(realreg_to_rank) ( Int realReg )
njn25e49d8e72002-09-23 09:36:25 +0000304{
305 switch (realReg) {
306# ifdef BEST_ALLOC_ORDER
307 case R_EAX: return 0;
308 case R_EBX: return 1;
309 case R_ECX: return 2;
310 case R_EDX: return 3;
311 case R_ESI: return 4;
312 case R_EDI: return 5;
313# else
314 case R_EAX: return 5;
315 case R_EBX: return 4;
316 case R_ECX: return 3;
317 case R_EDX: return 2;
318 case R_ESI: return 1;
319 case R_EDI: return 0;
320# endif
njne427a662002-10-02 11:08:25 +0000321 default: VG_(core_panic)("VG_(realreg_to_rank)");
sewardjde4a1d02002-03-22 01:27:54 +0000322 }
323}
324
325
326/*------------------------------------------------------------*/
327/*--- Sanity checking uinstrs. ---*/
328/*------------------------------------------------------------*/
329
330/* This seems as good a place as any to record some important stuff
331 about ucode semantics.
332
333 * TempRegs are 32 bits wide. LOADs of 8/16 bit values into a
334 TempReg are defined to zero-extend the loaded value to 32 bits.
335 This is needed to make the translation of movzbl et al work
336 properly.
337
338 * Similarly, GETs of a 8/16 bit ArchRegs are zero-extended.
339
340 * Arithmetic on TempRegs is at the specified size. For example,
341 SUBW t1, t2 has to result in a real 16 bit x86 subtraction
342 being emitted -- not a 32 bit one.
343
344 * On some insns we allow the cc bit to be set. If so, the
345 intention is that the simulated machine's %eflags register
346 is copied into that of the real machine before the insn,
347 and copied back again afterwards. This means that the
348 code generated for that insn must be very careful only to
349 update %eflags in the intended way. This is particularly
350 important for the routines referenced by CALL insns.
351*/
352
353/* Meaning of operand kinds is as follows:
354
355 ArchReg is a register of the simulated CPU, stored in memory,
356 in vg_m_state.m_eax .. m_edi. These values are stored
357 using the Intel register encoding.
358
359 RealReg is a register of the real CPU. There are VG_MAX_REALREGS
360 available for allocation. As with ArchRegs, these values
361 are stored using the Intel register encoding.
362
363 TempReg is a temporary register used to express the results of
364 disassembly. There is an unlimited supply of them --
365 register allocation and spilling eventually assigns them
366 to RealRegs.
367
368 SpillNo is a spill slot number. The number of required spill
369 slots is VG_MAX_PSEUDOS, in general. Only allowed
370 as the ArchReg operand of GET and PUT.
371
372 Lit16 is a signed 16-bit literal value.
373
374 Literal is a 32-bit literal value. Each uinstr can only hold
375 one of these.
376
377 The disassembled code is expressed purely in terms of ArchReg,
378 TempReg and Literal operands. Eventually, register allocation
379 removes all the TempRegs, giving a result using ArchRegs, RealRegs,
380 and Literals. New x86 code can easily be synthesised from this.
381 There are carefully designed restrictions on which insns can have
382 which operands, intended to make it possible to generate x86 code
383 from the result of register allocation on the ucode efficiently and
384 without need of any further RealRegs.
385
njn25e49d8e72002-09-23 09:36:25 +0000386 Restrictions for the individual UInstrs are clear from the checks below.
387 Abbreviations: A=ArchReg S=SpillNo T=TempReg L=Literal
388 Ls=Lit16 R=RealReg N=NoValue
sewardje1042472002-09-30 12:33:11 +0000389 As=ArchRegS
sewardjde4a1d02002-03-22 01:27:54 +0000390
sewardjde4a1d02002-03-22 01:27:54 +0000391 Before register allocation, S operands should not appear anywhere.
392 After register allocation, all T operands should have been
393 converted into Rs, and S operands are allowed in GET and PUT --
394 denoting spill saves/restores.
395
njn25e49d8e72002-09-23 09:36:25 +0000396 Before liveness analysis, save_e[acd]x fields should all be True.
397 Afterwards, they may be False.
398
sewardjde4a1d02002-03-22 01:27:54 +0000399 The size field should be 0 for insns for which it is meaningless,
400 ie those which do not directly move/operate on data.
401*/
njn25e49d8e72002-09-23 09:36:25 +0000402Bool VG_(saneUInstr) ( Bool beforeRA, Bool beforeLiveness, UInstr* u )
sewardjde4a1d02002-03-22 01:27:54 +0000403{
njn25e49d8e72002-09-23 09:36:25 +0000404# define LIT0 (u->lit32 == 0)
sewardjb31b06d2003-06-13 00:26:02 +0000405# define LIT8 (((u->lit32) & 0xFFFFFF00) == 0)
njn25e49d8e72002-09-23 09:36:25 +0000406# define LIT1 (!(LIT0))
407# define LITm (u->tag1 == Literal ? True : LIT0 )
sewardj77d30a22003-10-19 08:18:52 +0000408# define SZ16 (u->size == 16)
sewardj3d7c9c82003-03-26 21:08:13 +0000409# define SZ8 (u->size == 8)
njn25e49d8e72002-09-23 09:36:25 +0000410# define SZ4 (u->size == 4)
411# define SZ2 (u->size == 2)
412# define SZ1 (u->size == 1)
413# define SZ0 (u->size == 0)
414# define SZ42 (u->size == 4 || u->size == 2)
sewardjd7971012003-04-04 00:21:58 +0000415# define SZ48 (u->size == 4 || u->size == 8)
sewardjfebaa3b2003-05-25 01:07:34 +0000416# define SZ416 (u->size == 4 || u->size == 16)
nethercoteb1affa82004-01-19 19:14:18 +0000417# define SZ816 (u->size == 8 || u->size == 16)
418# define SZsse2 (u->size == 4 || u->size == 8 || u->size == 16 || u->size == 512)
jsewardfca60182004-01-04 23:30:55 +0000419# define SZsse3 (u->size == 4 || u->size == 8 || u->size == 16)
njn25e49d8e72002-09-23 09:36:25 +0000420# define SZi (u->size == 4 || u->size == 2 || u->size == 1)
421# define SZf ( u->size == 4 || u->size == 8 || u->size == 2 \
422 || u->size == 10 || u->size == 28 || u->size == 108)
423# define SZ4m ((u->tag1 == TempReg || u->tag1 == RealReg) \
nethercoteafa17ef2004-04-26 09:21:25 +0000424 ? (u->size == 4) : SZi)
njn25e49d8e72002-09-23 09:36:25 +0000425
426/* For these ones, two cases:
427 *
428 * 1. They are transliterations of the corresponding x86 instruction, in
429 * which case they should have its flags (except that redundant write
430 * flags can be annulled by the optimisation pass).
431 *
432 * 2. They are being used generally for other purposes, eg. helping with a
433 * 'rep'-prefixed instruction, in which case should have empty flags .
434 */
435# define emptyR (u->flags_r == FlagsEmpty)
436# define emptyW (u->flags_w == FlagsEmpty)
437# define CC0 (emptyR && emptyW)
438# define CCr (u->flags_r == FlagsALL && emptyW)
439# define CCw (emptyR && u->flags_w == FlagsALL)
440# define CCa (emptyR && (u->flags_w == FlagsOSZACP || emptyW))
441# define CCc (emptyR && (u->flags_w == FlagsOC || emptyW))
442# define CCe (emptyR && (u->flags_w == FlagsOSZAP || emptyW))
443# define CCb ((u->flags_r==FlagC || emptyR) && \
444 (u->flags_w==FlagsOSZACP || emptyW))
445# define CCd ((u->flags_r==FlagC || emptyR) && \
446 (u->flags_w==FlagsOC || emptyW))
sewardjc232b212002-12-10 22:24:03 +0000447# define CCf (CC0 || (emptyR && u->flags_w==FlagsZCP) \
448 || (u->flags_r==FlagsZCP && emptyW))
njn25e49d8e72002-09-23 09:36:25 +0000449# define CCg ((u->flags_r==FlagsOSZACP || emptyR) && emptyW)
450# define CCj (u->cond==CondAlways ? CC0 : CCg)
451
sewardjde4a1d02002-03-22 01:27:54 +0000452# define TR1 (beforeRA ? (u->tag1 == TempReg) : (u->tag1 == RealReg))
453# define TR2 (beforeRA ? (u->tag2 == TempReg) : (u->tag2 == RealReg))
454# define TR3 (beforeRA ? (u->tag3 == TempReg) : (u->tag3 == RealReg))
455# define A1 (u->tag1 == ArchReg)
456# define A2 (u->tag2 == ArchReg)
457# define AS1 ((u->tag1 == ArchReg) || ((!beforeRA && (u->tag1 == SpillNo))))
458# define AS2 ((u->tag2 == ArchReg) || ((!beforeRA && (u->tag2 == SpillNo))))
459# define AS3 ((u->tag3 == ArchReg) || ((!beforeRA && (u->tag3 == SpillNo))))
460# define L1 (u->tag1 == Literal && u->val1 == 0)
461# define L2 (u->tag2 == Literal && u->val2 == 0)
462# define Ls1 (u->tag1 == Lit16)
sewardjfebaa3b2003-05-25 01:07:34 +0000463# define Ls2 (u->tag2 == Lit16)
sewardjde4a1d02002-03-22 01:27:54 +0000464# define Ls3 (u->tag3 == Lit16)
njn25e49d8e72002-09-23 09:36:25 +0000465# define TRL1 (TR1 || L1)
466# define TRAL1 (TR1 || A1 || L1)
jsgf5efa4fd2003-10-14 21:49:11 +0000467# define TRA1 (TR1 || A1)
468# define TRA2 (TR2 || A2)
sewardjde4a1d02002-03-22 01:27:54 +0000469# define N1 (u->tag1 == NoValue)
470# define N2 (u->tag2 == NoValue)
471# define N3 (u->tag3 == NoValue)
sewardje1042472002-09-30 12:33:11 +0000472# define Se1 (u->tag1 == ArchRegS)
473# define Se2 (u->tag2 == ArchRegS)
sewardjde4a1d02002-03-22 01:27:54 +0000474
njn25e49d8e72002-09-23 09:36:25 +0000475# define COND0 (u->cond == 0)
476# define EXTRA4b0 (u->extra4b == 0)
nethercote4a12dbd2004-04-16 16:16:34 +0000477# define EXTRA4b12 (u->extra4b == 1 || u->extra4b == 2)
njn25e49d8e72002-09-23 09:36:25 +0000478# define SG_WD0 (u->signed_widen == 0)
479# define JMPKIND0 (u->jmpkind == 0)
480# define CCALL0 (u->argc==0 && u->regparms_n==0 && u->has_ret_val==0 && \
481 ( beforeLiveness \
482 ? u->regs_live_after == ALL_RREGS_LIVE \
483 : True ))
484
485# define XCONDi ( EXTRA4b0 && SG_WD0 && JMPKIND0 && CCALL0)
nethercote4a12dbd2004-04-16 16:16:34 +0000486# define XLEA2 (COND0 && SG_WD0 && JMPKIND0 && CCALL0)
487# define XWIDEN (COND0 && EXTRA4b12 && JMPKIND0 && CCALL0)
njn25e49d8e72002-09-23 09:36:25 +0000488# define XJMP ( SG_WD0 && CCALL0)
489# define XCCALL (COND0 && EXTRA4b0 && SG_WD0 && JMPKIND0 )
490# define XOTHER (COND0 && EXTRA4b0 && SG_WD0 && JMPKIND0 && CCALL0)
491
492 /* 0 or 1 Literal args per UInstr */
sewardjde4a1d02002-03-22 01:27:54 +0000493 Int n_lits = 0;
494 if (u->tag1 == Literal) n_lits++;
495 if (u->tag2 == Literal) n_lits++;
496 if (u->tag3 == Literal) n_lits++;
497 if (n_lits > 1)
498 return False;
499
njn25e49d8e72002-09-23 09:36:25 +0000500 /* Fields not checked: val1, val2, val3 */
501
sewardjde4a1d02002-03-22 01:27:54 +0000502 switch (u->opcode) {
njn25e49d8e72002-09-23 09:36:25 +0000503
504 /* Fields checked: lit32 size flags_r/w tag1 tag2 tag3 (rest) */
sewardje1042472002-09-30 12:33:11 +0000505 case PUTSEG: return LIT0 && SZ2 && CC0 && TR1 && Se2 && N3 && XOTHER;
506 case GETSEG: return LIT0 && SZ2 && CC0 && Se1 && TR2 && N3 && XOTHER;
507 case USESEG: return LIT0 && SZ0 && CC0 && TR1 && TR2 && N3 && XOTHER;
njn25e49d8e72002-09-23 09:36:25 +0000508 case NOP: return LIT0 && SZ0 && CC0 && N1 && N2 && N3 && XOTHER;
sewardj7a5ebcf2002-11-13 22:42:13 +0000509 case LOCK: return LIT0 && SZ0 && CC0 && N1 && N2 && N3 && XOTHER;
njn25e49d8e72002-09-23 09:36:25 +0000510 case GETF: return LIT0 && SZ42 && CCr && TR1 && N2 && N3 && XOTHER;
511 case PUTF: return LIT0 && SZ42 && CCw && TR1 && N2 && N3 && XOTHER;
512 case GET: return LIT0 && SZi && CC0 && AS1 && TR2 && N3 && XOTHER;
513 case PUT: return LIT0 && SZi && CC0 && TR1 && AS2 && N3 && XOTHER;
514 case LOAD:
515 case STORE: return LIT0 && SZi && CC0 && TR1 && TR2 && N3 && XOTHER;
516 case MOV: return LITm && SZ4m && CC0 && TRL1 && TR2 && N3 && XOTHER;
517 case CMOV: return LIT0 && SZ4 && CCg && TR1 && TR2 && N3 && XCONDi;
njn95bc3862003-09-30 13:22:30 +0000518 case WIDEN: return LIT0 && SZ42 && CC0 && TR1 && N2 && N3 && XWIDEN;
njn25e49d8e72002-09-23 09:36:25 +0000519 case JMP: return LITm && SZ0 && CCj && TRL1 && N2 && N3 && XJMP;
520 case CALLM: return LIT0 && SZ0 /*any*/ && Ls1 && N2 && N3 && XOTHER;
521 case CALLM_S:
522 case CALLM_E:return LIT0 && SZ0 && CC0 && N1 && N2 && N3 && XOTHER;
523 case PUSH:
524 case POP: return LIT0 && SZi && CC0 && TR1 && N2 && N3 && XOTHER;
525 case CLEAR: return LIT0 && SZ0 && CC0 && Ls1 && N2 && N3 && XOTHER;
526 case AND:
527 case OR: return LIT0 && SZi && CCa && TR1 && TR2 && N3 && XOTHER;
jsgf5efa4fd2003-10-14 21:49:11 +0000528 case MUL: return LIT0 && SZ42 && CCa && TRA1 &&TRA2 && N3 && XOTHER;
njn25e49d8e72002-09-23 09:36:25 +0000529 case ADD:
530 case XOR:
531 case SUB: return LITm && SZi && CCa &&TRAL1 && TR2 && N3 && XOTHER;
532 case SBB:
533 case ADC: return LITm && SZi && CCb &&TRAL1 && TR2 && N3 && XOTHER;
534 case SHL:
535 case SHR:
536 case SAR: return LITm && SZi && CCa && TRL1 && TR2 && N3 && XOTHER;
537 case ROL:
538 case ROR: return LITm && SZi && CCc && TRL1 && TR2 && N3 && XOTHER;
539 case RCL:
540 case RCR: return LITm && SZi && CCd && TRL1 && TR2 && N3 && XOTHER;
541 case NOT: return LIT0 && SZi && CC0 && TR1 && N2 && N3 && XOTHER;
542 case NEG: return LIT0 && SZi && CCa && TR1 && N2 && N3 && XOTHER;
543 case INC:
544 case DEC: return LIT0 && SZi && CCe && TR1 && N2 && N3 && XOTHER;
545 case CC2VAL: return LIT0 && SZ1 && CCg && TR1 && N2 && N3 && XCONDi;
546 case BSWAP: return LIT0 && SZ4 && CC0 && TR1 && N2 && N3 && XOTHER;
547 case JIFZ: return LIT1 && SZ4 && CC0 && TR1 && L2 && N3 && XOTHER;
548 case FPU_R:
549 case FPU_W: return LIT0 && SZf && CC0 && Ls1 && TR2 && N3 && XOTHER;
550 case FPU: return LIT0 && SZ0 && CCf && Ls1 && N2 && N3 && XOTHER;
551 case LEA1: return /*any*/ SZ4 && CC0 && TR1 && TR2 && N3 && XOTHER;
nethercote4a12dbd2004-04-16 16:16:34 +0000552 case LEA2: return /*any*/ SZ4 && CC0 && TR1 && TR2 && TR3 && XLEA2;
njn25e49d8e72002-09-23 09:36:25 +0000553 case INCEIP: return LIT0 && SZ0 && CC0 && Ls1 && N2 && N3 && XOTHER;
554 case CCALL: return LIT1 && SZ0 && CC0 &&
555 (u->argc > 0 ? TR1 : N1) &&
556 (u->argc > 1 ? TR2 : N2) &&
557 (u->argc > 2 || u->has_ret_val ? TR3 : N3) &&
558 u->regparms_n <= u->argc && XCCALL;
thughes96b466a2004-03-15 16:43:58 +0000559 /* Fields checked: lit32 size flags_r/w tag1 tag2 tag3 (rest) */
sewardj3d7c9c82003-03-26 21:08:13 +0000560 case MMX1:
thughes96b466a2004-03-15 16:43:58 +0000561 case MMX2: return LIT0 && SZ0 && CC0 && Ls1 && N2 && N3 && XOTHER;
562 case MMX3: return LIT0 && SZ0 && CC0 && Ls1 && Ls2 && N3 && XOTHER;
563 case MMX2_MemRd: return LIT0 && SZ48 && CC0 && Ls1 && TR2 && N3 && XOTHER;
564 case MMX2_MemWr: return LIT0 && SZ48 && CC0 && Ls1 && TR2 && N3 && XOTHER;
565 case MMX2a1_MemRd: return LIT0 && SZ8 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
566 case MMX2_ERegRd: return LIT0 && SZ4 && CC0 && Ls1 && TR2 && N3 && XOTHER;
567 case MMX2_ERegWr: return LIT0 && SZ4 && CC0 && Ls1 && TR2 && N3 && XOTHER;
sewardjfebaa3b2003-05-25 01:07:34 +0000568
569 /* Fields checked: lit32 size flags_r/w tag1 tag2 tag3 (rest) */
jsewardfca60182004-01-04 23:30:55 +0000570 case SSE2a_MemWr: return LIT0 && SZsse2 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
571 case SSE2a_MemRd: return LIT0 && SZsse2 && CCa && Ls1 && Ls2 && TR3 && XOTHER;
nethercote1018bdd2004-02-11 23:33:29 +0000572 case SSE2a1_MemRd: return LIT0 && SZsse3 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
nethercoteb1affa82004-01-19 19:14:18 +0000573 case SSE2g_RegWr: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
574 case SSE2g1_RegWr: return LIT8 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
575 case SSE2e1_RegRd: return LIT8 && SZ2 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
jsewardfca60182004-01-04 23:30:55 +0000576 case SSE3a_MemWr: return LIT0 && SZsse3 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
577 case SSE3a_MemRd: return LIT0 && SZsse3 && CCa && Ls1 && Ls2 && TR3 && XOTHER;
578 case SSE3e_RegRd: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
579 case SSE3e_RegWr: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
nethercoteb1affa82004-01-19 19:14:18 +0000580 case SSE3a1_MemRd: return LIT8 && SZ816 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
jsewardfca60182004-01-04 23:30:55 +0000581 case SSE3g_RegWr: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
582 case SSE3g1_RegWr: return LIT8 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
583 case SSE3e1_RegRd: return LIT8 && SZ2 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
584 case SSE3: return LIT0 && SZ0 && CCa && Ls1 && Ls2 && N3 && XOTHER;
585 case SSE4: return LIT0 && SZ0 && CCa && Ls1 && Ls2 && N3 && XOTHER;
586 case SSE5: return LIT0 && SZ0 && CC0 && Ls1 && Ls2 && Ls3 && XOTHER;
sewardje3891fa2003-06-15 03:13:48 +0000587 case SSE3ag_MemRd_RegWr:
jsewardfca60182004-01-04 23:30:55 +0000588 return SZ48 && CC0 && TR1 && TR2 && N3 && XOTHER;
njn25e49d8e72002-09-23 09:36:25 +0000589 default:
590 if (VG_(needs).extended_UCode)
njn4ba5a792002-09-30 10:23:54 +0000591 return SK_(sane_XUInstr)(beforeRA, beforeLiveness, u);
njn25e49d8e72002-09-23 09:36:25 +0000592 else {
593 VG_(printf)("unhandled opcode: %u. Perhaps "
594 "VG_(needs).extended_UCode should be set?",
595 u->opcode);
njne427a662002-10-02 11:08:25 +0000596 VG_(core_panic)("VG_(saneUInstr): unhandled opcode");
njn25e49d8e72002-09-23 09:36:25 +0000597 }
sewardjde4a1d02002-03-22 01:27:54 +0000598 }
njn25e49d8e72002-09-23 09:36:25 +0000599# undef LIT0
600# undef LIT1
sewardjb31b06d2003-06-13 00:26:02 +0000601# undef LIT8
njn25e49d8e72002-09-23 09:36:25 +0000602# undef LITm
sewardj77d30a22003-10-19 08:18:52 +0000603# undef SZ16
sewardj3d7c9c82003-03-26 21:08:13 +0000604# undef SZ8
sewardjde4a1d02002-03-22 01:27:54 +0000605# undef SZ4
606# undef SZ2
607# undef SZ1
608# undef SZ0
njn25e49d8e72002-09-23 09:36:25 +0000609# undef SZ42
sewardjd7971012003-04-04 00:21:58 +0000610# undef SZ48
sewardjfebaa3b2003-05-25 01:07:34 +0000611# undef SZ416
nethercote4a12dbd2004-04-16 16:16:34 +0000612# undef SZ816
jsewardfca60182004-01-04 23:30:55 +0000613# undef SZsse2
614# undef SZsse3
njn25e49d8e72002-09-23 09:36:25 +0000615# undef SZi
616# undef SZf
617# undef SZ4m
618# undef emptyR
619# undef emptyW
620# undef CC0
621# undef CCr
622# undef CCw
623# undef CCa
624# undef CCb
625# undef CCc
626# undef CCd
627# undef CCe
628# undef CCf
629# undef CCg
630# undef CCj
sewardjde4a1d02002-03-22 01:27:54 +0000631# undef TR1
632# undef TR2
633# undef TR3
634# undef A1
635# undef A2
636# undef AS1
637# undef AS2
638# undef AS3
639# undef L1
sewardjde4a1d02002-03-22 01:27:54 +0000640# undef L2
njn25e49d8e72002-09-23 09:36:25 +0000641# undef Ls1
sewardjfebaa3b2003-05-25 01:07:34 +0000642# undef Ls2
sewardjde4a1d02002-03-22 01:27:54 +0000643# undef Ls3
njn25e49d8e72002-09-23 09:36:25 +0000644# undef TRL1
645# undef TRAL1
nethercote4a12dbd2004-04-16 16:16:34 +0000646# undef TRA1
647# undef TRA2
sewardjde4a1d02002-03-22 01:27:54 +0000648# undef N1
649# undef N2
650# undef N3
sewardje1042472002-09-30 12:33:11 +0000651# undef Se2
652# undef Se1
njn25e49d8e72002-09-23 09:36:25 +0000653# undef COND0
654# undef EXTRA4b0
nethercote4a12dbd2004-04-16 16:16:34 +0000655# undef EXTRA4b12
njn25e49d8e72002-09-23 09:36:25 +0000656# undef SG_WD0
657# undef JMPKIND0
658# undef CCALL0
nethercote4a12dbd2004-04-16 16:16:34 +0000659# undef XCONDi
660# undef XLEA2
njn25e49d8e72002-09-23 09:36:25 +0000661# undef XWIDEN
662# undef XJMP
663# undef XCCALL
664# undef XOTHER
sewardjde4a1d02002-03-22 01:27:54 +0000665}
666
njn25e49d8e72002-09-23 09:36:25 +0000667void VG_(saneUCodeBlock) ( UCodeBlock* cb )
668{
669 Int i;
670
671 for (i = 0; i < cb->used; i++) {
672 Bool sane = VG_(saneUInstr)(True, True, &cb->instrs[i]);
673 if (!sane) {
674 VG_(printf)("Instruction failed sanity check:\n");
njn4ba5a792002-09-30 10:23:54 +0000675 VG_(up_UInstr)(i, &cb->instrs[i]);
njn25e49d8e72002-09-23 09:36:25 +0000676 }
677 vg_assert(sane);
678 }
679}
sewardjde4a1d02002-03-22 01:27:54 +0000680
681/* Sanity checks to do with CALLMs in UCodeBlocks. */
njn25e49d8e72002-09-23 09:36:25 +0000682Bool VG_(saneUCodeBlockCalls) ( UCodeBlock* cb )
sewardjde4a1d02002-03-22 01:27:54 +0000683{
684 Int callm = 0;
685 Int callm_s = 0;
686 Int callm_e = 0;
687 Int callm_ptr, calls_ptr;
688 Int i, j, t;
689 Bool incall = False;
690
691 /* Ensure the number of CALLM, CALLM_S and CALLM_E are the same. */
692
693 for (i = 0; i < cb->used; i++) {
694 switch (cb->instrs[i].opcode) {
695 case CALLM:
696 if (!incall) return False;
697 callm++;
698 break;
699 case CALLM_S:
700 if (incall) return False;
701 incall = True;
702 callm_s++;
703 break;
704 case CALLM_E:
705 if (!incall) return False;
706 incall = False;
707 callm_e++;
708 break;
709 case PUSH: case POP: case CLEAR:
710 if (!incall) return False;
711 break;
712 default:
713 break;
714 }
715 }
716 if (incall) return False;
717 if (callm != callm_s || callm != callm_e) return False;
718
719 /* Check the sections between CALLM_S and CALLM's. Ensure that no
720 PUSH uinsn pushes any TempReg that any other PUSH in the same
721 section pushes. Ie, check that the TempReg args to PUSHes in
722 the section are unique. If not, the instrumenter generates
723 incorrect code for CALLM insns. */
724
725 callm_ptr = 0;
726
727 find_next_CALLM:
728 /* Search for the next interval, making calls_ptr .. callm_ptr
729 bracket it. */
730 while (callm_ptr < cb->used
731 && cb->instrs[callm_ptr].opcode != CALLM)
732 callm_ptr++;
733 if (callm_ptr == cb->used)
734 return True;
735 vg_assert(cb->instrs[callm_ptr].opcode == CALLM);
736
737 calls_ptr = callm_ptr - 1;
738 while (cb->instrs[calls_ptr].opcode != CALLM_S)
739 calls_ptr--;
740 vg_assert(cb->instrs[calls_ptr].opcode == CALLM_S);
741 vg_assert(calls_ptr >= 0);
742
743 /* VG_(printf)("interval from %d to %d\n", calls_ptr, callm_ptr ); */
744
745 /* For each PUSH insn in the interval ... */
746 for (i = calls_ptr + 1; i < callm_ptr; i++) {
747 if (cb->instrs[i].opcode != PUSH) continue;
748 t = cb->instrs[i].val1;
749 /* Ensure no later PUSH insns up to callm_ptr push the same
750 TempReg. Return False if any such are found. */
751 for (j = i+1; j < callm_ptr; j++) {
752 if (cb->instrs[j].opcode == PUSH &&
753 cb->instrs[j].val1 == t)
754 return False;
755 }
756 }
757
758 /* This interval is clean. Keep going ... */
759 callm_ptr++;
760 goto find_next_CALLM;
761}
762
763
764/*------------------------------------------------------------*/
765/*--- Printing uinstrs. ---*/
766/*------------------------------------------------------------*/
767
njn25e49d8e72002-09-23 09:36:25 +0000768/* Global that dictates whether to print generated code at all stages */
769Bool VG_(print_codegen);
770
njn563f96f2003-02-03 11:17:46 +0000771Char* VG_(name_UCondcode) ( Condcode cond )
sewardjde4a1d02002-03-22 01:27:54 +0000772{
773 switch (cond) {
774 case CondO: return "o";
775 case CondNO: return "no";
776 case CondB: return "b";
777 case CondNB: return "nb";
778 case CondZ: return "z";
779 case CondNZ: return "nz";
780 case CondBE: return "be";
781 case CondNBE: return "nbe";
782 case CondS: return "s";
sewardje1042472002-09-30 12:33:11 +0000783 case CondNS: return "ns";
sewardjde4a1d02002-03-22 01:27:54 +0000784 case CondP: return "p";
785 case CondNP: return "np";
786 case CondL: return "l";
787 case CondNL: return "nl";
788 case CondLE: return "le";
789 case CondNLE: return "nle";
790 case CondAlways: return "MP"; /* hack! */
njn563f96f2003-02-03 11:17:46 +0000791 default: VG_(core_panic)("name_UCondcode");
sewardjde4a1d02002-03-22 01:27:54 +0000792 }
793}
794
795
796static void vg_ppFlagSet ( Char* prefix, FlagSet set )
797{
798 VG_(printf)("%s", prefix);
799 if (set & FlagD) VG_(printf)("D");
800 if (set & FlagO) VG_(printf)("O");
801 if (set & FlagS) VG_(printf)("S");
802 if (set & FlagZ) VG_(printf)("Z");
803 if (set & FlagA) VG_(printf)("A");
804 if (set & FlagC) VG_(printf)("C");
805 if (set & FlagP) VG_(printf)("P");
806}
807
808
809static void ppTempReg ( Int tt )
810{
811 if ((tt & 1) == 0)
812 VG_(printf)("t%d", tt);
813 else
814 VG_(printf)("q%d", tt-1);
815}
816
817
njn4ba5a792002-09-30 10:23:54 +0000818void VG_(pp_UOperand) ( UInstr* u, Int operandNo, Int sz, Bool parens )
sewardjde4a1d02002-03-22 01:27:54 +0000819{
820 UInt tag, val;
821 switch (operandNo) {
822 case 1: tag = u->tag1; val = u->val1; break;
823 case 2: tag = u->tag2; val = u->val2; break;
824 case 3: tag = u->tag3; val = u->val3; break;
njne427a662002-10-02 11:08:25 +0000825 default: VG_(core_panic)("VG_(pp_UOperand)(1)");
sewardjde4a1d02002-03-22 01:27:54 +0000826 }
827 if (tag == Literal) val = u->lit32;
828
829 if (parens) VG_(printf)("(");
830 switch (tag) {
sewardje1042472002-09-30 12:33:11 +0000831 case TempReg: ppTempReg(val); break;
832 case RealReg: VG_(printf)("%s",nameIReg(sz==0 ? 4 : sz,val)); break;
833 case Literal: VG_(printf)("$0x%x", val); break;
834 case Lit16: VG_(printf)("$0x%x", val); break;
835 case NoValue: VG_(printf)("NoValue"); break;
836 case ArchReg: VG_(printf)("%S",nameIReg(sz,val)); break;
837 case ArchRegS: VG_(printf)("%S",nameSReg(val)); break;
838 case SpillNo: VG_(printf)("spill%d", val); break;
njne427a662002-10-02 11:08:25 +0000839 default: VG_(core_panic)("VG_(ppUOperand)(2)");
sewardjde4a1d02002-03-22 01:27:54 +0000840 }
841 if (parens) VG_(printf)(")");
842}
843
844
njn4ba5a792002-09-30 10:23:54 +0000845Char* VG_(name_UOpcode) ( Bool upper, Opcode opc )
sewardjde4a1d02002-03-22 01:27:54 +0000846{
847 switch (opc) {
848 case ADD: return (upper ? "ADD" : "add");
849 case ADC: return (upper ? "ADC" : "adc");
850 case AND: return (upper ? "AND" : "and");
851 case OR: return (upper ? "OR" : "or");
852 case XOR: return (upper ? "XOR" : "xor");
853 case SUB: return (upper ? "SUB" : "sub");
854 case SBB: return (upper ? "SBB" : "sbb");
855 case SHL: return (upper ? "SHL" : "shl");
856 case SHR: return (upper ? "SHR" : "shr");
857 case SAR: return (upper ? "SAR" : "sar");
858 case ROL: return (upper ? "ROL" : "rol");
859 case ROR: return (upper ? "ROR" : "ror");
860 case RCL: return (upper ? "RCL" : "rcl");
861 case RCR: return (upper ? "RCR" : "rcr");
jsgf5efa4fd2003-10-14 21:49:11 +0000862 case MUL: return (upper ? "MUL" : "mul");
sewardjde4a1d02002-03-22 01:27:54 +0000863 case NOT: return (upper ? "NOT" : "not");
864 case NEG: return (upper ? "NEG" : "neg");
865 case INC: return (upper ? "INC" : "inc");
866 case DEC: return (upper ? "DEC" : "dec");
867 case BSWAP: return (upper ? "BSWAP" : "bswap");
868 default: break;
869 }
njne427a662002-10-02 11:08:25 +0000870 if (!upper) VG_(core_panic)("vg_name_UOpcode: invalid !upper");
sewardjde4a1d02002-03-22 01:27:54 +0000871 switch (opc) {
sewardjde4a1d02002-03-22 01:27:54 +0000872 case CALLM_S: return "CALLM_S";
873 case CALLM_E: return "CALLM_E";
874 case INCEIP: return "INCEIP";
875 case LEA1: return "LEA1";
876 case LEA2: return "LEA2";
877 case NOP: return "NOP";
sewardj7a5ebcf2002-11-13 22:42:13 +0000878 case LOCK: return "LOCK";
sewardjde4a1d02002-03-22 01:27:54 +0000879 case GET: return "GET";
880 case PUT: return "PUT";
881 case GETF: return "GETF";
882 case PUTF: return "PUTF";
sewardje1042472002-09-30 12:33:11 +0000883 case GETSEG: return "GETSEG";
884 case PUTSEG: return "PUTSEG";
885 case USESEG: return "USESEG";
sewardjde4a1d02002-03-22 01:27:54 +0000886 case LOAD: return "LD" ;
887 case STORE: return "ST" ;
888 case MOV: return "MOV";
889 case CMOV: return "CMOV";
890 case WIDEN: return "WIDEN";
891 case JMP: return "J" ;
892 case JIFZ: return "JIFZ" ;
893 case CALLM: return "CALLM";
njn25e49d8e72002-09-23 09:36:25 +0000894 case CCALL: return "CCALL";
sewardjde4a1d02002-03-22 01:27:54 +0000895 case PUSH: return "PUSH" ;
896 case POP: return "POP" ;
897 case CLEAR: return "CLEAR";
898 case CC2VAL: return "CC2VAL";
899 case FPU_R: return "FPU_R";
900 case FPU_W: return "FPU_W";
901 case FPU: return "FPU" ;
sewardj3d7c9c82003-03-26 21:08:13 +0000902 case MMX1: return "MMX1" ;
903 case MMX2: return "MMX2" ;
sewardjca860012003-03-27 23:52:58 +0000904 case MMX3: return "MMX3" ;
sewardj3d7c9c82003-03-26 21:08:13 +0000905 case MMX2_MemRd: return "MMX2_MRd" ;
906 case MMX2_MemWr: return "MMX2_MWr" ;
thughes96b466a2004-03-15 16:43:58 +0000907 case MMX2a1_MemRd: return "MMX2a1_MRd" ;
sewardj4fbe6e92003-06-15 21:54:34 +0000908 case MMX2_ERegRd: return "MMX2_eRRd" ;
909 case MMX2_ERegWr: return "MMX2_eRWr" ;
sewardjfebaa3b2003-05-25 01:07:34 +0000910 case SSE2a_MemWr: return "SSE2a_MWr";
911 case SSE2a_MemRd: return "SSE2a_MRd";
nethercoteb1affa82004-01-19 19:14:18 +0000912 case SSE2g_RegWr: return "SSE2g_RWr";
sewardj9dd209f2003-06-18 23:30:52 +0000913 case SSE2a1_MemRd: return "SSE2a1_MRd";
nethercoteb1affa82004-01-19 19:14:18 +0000914 case SSE2g1_RegWr: return "SSE2g1_RWr";
915 case SSE2e1_RegRd: return "SSE2e1_RRd";
sewardj4fbe6e92003-06-15 21:54:34 +0000916 case SSE3e_RegRd: return "SSE3e_RRd";
sewardjabf8bf82003-06-15 22:28:05 +0000917 case SSE3e_RegWr: return "SSE3e_RWr";
sewardj02af6bc2003-06-12 00:56:06 +0000918 case SSE3g_RegWr: return "SSE3g_RWr";
sewardj77d30a22003-10-19 08:18:52 +0000919 case SSE3a1_MemRd: return "SSE3a1_MRd";
sewardjb31b06d2003-06-13 00:26:02 +0000920 case SSE3g1_RegWr: return "SSE3g1_RWr";
sewardj4fbe6e92003-06-15 21:54:34 +0000921 case SSE3e1_RegRd: return "SSE3e1_RRd";
sewardja60be0e2003-05-26 08:47:27 +0000922 case SSE3: return "SSE3";
sewardjfebaa3b2003-05-25 01:07:34 +0000923 case SSE4: return "SSE4";
sewardja453fb02003-06-14 13:22:36 +0000924 case SSE5: return "SSE5";
sewardjfebaa3b2003-05-25 01:07:34 +0000925 case SSE3a_MemWr: return "SSE3a_MWr";
926 case SSE3a_MemRd: return "SSE3a_MRd";
sewardje3891fa2003-06-15 03:13:48 +0000927 case SSE3ag_MemRd_RegWr: return "SSE3ag_MemRd_RegWr";
njn25e49d8e72002-09-23 09:36:25 +0000928 default:
929 if (VG_(needs).extended_UCode)
njn4ba5a792002-09-30 10:23:54 +0000930 return SK_(name_XUOpcode)(opc);
njn25e49d8e72002-09-23 09:36:25 +0000931 else {
932 VG_(printf)("unhandled opcode: %u. Perhaps "
933 "VG_(needs).extended_UCode should be set?",
934 opc);
njne427a662002-10-02 11:08:25 +0000935 VG_(core_panic)("name_UOpcode: unhandled opcode");
njn25e49d8e72002-09-23 09:36:25 +0000936 }
sewardjde4a1d02002-03-22 01:27:54 +0000937 }
938}
939
sewardja38e0922002-10-01 00:50:47 +0000940static
njn4ba5a792002-09-30 10:23:54 +0000941void pp_realregs_liveness ( UInstr* u )
njn25e49d8e72002-09-23 09:36:25 +0000942{
943# define PRINT_RREG_LIVENESS(realReg,s) \
njn4ba5a792002-09-30 10:23:54 +0000944 VG_(printf)( IS_RREG_LIVE(VG_(realreg_to_rank)(realReg), \
njn25e49d8e72002-09-23 09:36:25 +0000945 u->regs_live_after) \
946 ? s : "-");
sewardjde4a1d02002-03-22 01:27:54 +0000947
njn25e49d8e72002-09-23 09:36:25 +0000948 VG_(printf)("[");
949 PRINT_RREG_LIVENESS(R_EAX, "a");
950 PRINT_RREG_LIVENESS(R_EBX, "b");
951 PRINT_RREG_LIVENESS(R_ECX, "c");
952 PRINT_RREG_LIVENESS(R_EDX, "d");
953 PRINT_RREG_LIVENESS(R_ESI, "S");
954 PRINT_RREG_LIVENESS(R_EDI, "D");
955 VG_(printf)("]");
956
957# undef PRINT_RREG_LIVENESS
958}
959
960/* Ugly-print UInstr :) */
njn4ba5a792002-09-30 10:23:54 +0000961void VG_(up_UInstr) ( Int i, UInstr* u )
njn25e49d8e72002-09-23 09:36:25 +0000962{
njn4ba5a792002-09-30 10:23:54 +0000963 VG_(pp_UInstr_regs)(i, u);
njn25e49d8e72002-09-23 09:36:25 +0000964
965 VG_(printf)("opcode: %d\n", u->opcode);
sewardjc1b86882002-10-06 21:43:50 +0000966 VG_(printf)("lit32: 0x%x\n", u->lit32);
njn25e49d8e72002-09-23 09:36:25 +0000967 VG_(printf)("size: %d\n", u->size);
968 VG_(printf)("val1,val2,val3: %d, %d, %d\n", u->val1, u->val2, u->val3);
969 VG_(printf)("tag1,tag2,tag3: %d, %d, %d\n", u->tag1, u->tag2, u->tag3);
sewardjc1b86882002-10-06 21:43:50 +0000970 VG_(printf)("flags_r: 0x%x\n", u->flags_r);
971 VG_(printf)("flags_w: 0x%x\n", u->flags_w);
972 VG_(printf)("extra4b: 0x%x\n", u->extra4b);
973 VG_(printf)("cond: 0x%x\n", u->cond);
njn25e49d8e72002-09-23 09:36:25 +0000974 VG_(printf)("signed_widen: %d\n", u->signed_widen);
975 VG_(printf)("jmpkind: %d\n", u->jmpkind);
976 VG_(printf)("argc,regparms_n: %d, %d\n", u->argc, u->regparms_n);
977 VG_(printf)("has_ret_val: %d\n", u->has_ret_val);
978 VG_(printf)("regs_live_after: ");
njn4ba5a792002-09-30 10:23:54 +0000979 pp_realregs_liveness(u);
njn25e49d8e72002-09-23 09:36:25 +0000980 VG_(printf)("\n");
981}
982
sewardja38e0922002-10-01 00:50:47 +0000983static
njn4ba5a792002-09-30 10:23:54 +0000984void pp_UInstrWorker ( Int instrNo, UInstr* u, Bool ppRegsLiveness )
sewardjde4a1d02002-03-22 01:27:54 +0000985{
986 VG_(printf)("\t%4d: %s", instrNo,
njn4ba5a792002-09-30 10:23:54 +0000987 VG_(name_UOpcode)(True, u->opcode));
nethercotee00f1ff2004-04-16 11:33:53 +0000988 // For JMP, the condition goes before the size
989 if (u->opcode == JMP)
njn563f96f2003-02-03 11:17:46 +0000990 VG_(printf)("%s", VG_(name_UCondcode)(u->cond));
sewardjde4a1d02002-03-22 01:27:54 +0000991
992 switch (u->size) {
993 case 0: VG_(printf)("o"); break;
994 case 1: VG_(printf)("B"); break;
995 case 2: VG_(printf)("W"); break;
996 case 4: VG_(printf)("L"); break;
997 case 8: VG_(printf)("Q"); break;
sewardjfebaa3b2003-05-25 01:07:34 +0000998 case 16: VG_(printf)("QQ"); break;
sewardjde4a1d02002-03-22 01:27:54 +0000999 default: VG_(printf)("%d", (Int)u->size); break;
1000 }
1001
nethercotee00f1ff2004-04-16 11:33:53 +00001002 // For CC2VAL and CMOV, the condition goes after the size
1003 if (u->opcode == CC2VAL || u->opcode == CMOV)
1004 VG_(printf)("%s", VG_(name_UCondcode)(u->cond));
1005
nethercotebbcfb582004-04-16 15:39:22 +00001006 // Append extra bits
1007 switch (u->opcode) {
1008 case JMP:
nethercotee00f1ff2004-04-16 11:33:53 +00001009 switch (u->jmpkind) {
1010 case JmpCall: VG_(printf)("-c"); break;
1011 case JmpRet: VG_(printf)("-r"); break;
1012 case JmpSyscall: VG_(printf)("-sys"); break;
1013 case JmpClientReq: VG_(printf)("-cli"); break;
1014 case JmpYield: VG_(printf)("-yld"); break;
1015 default: break;
1016 }
nethercotebbcfb582004-04-16 15:39:22 +00001017 break;
1018
1019 case WIDEN:
1020 VG_(printf)("_%c%c", VG_(toupper)(nameISize(u->extra4b)),
1021 u->signed_widen?'s':'z');
1022 }
sewardjfebaa3b2003-05-25 01:07:34 +00001023 VG_(printf)(" \t");
1024
sewardjde4a1d02002-03-22 01:27:54 +00001025 switch (u->opcode) {
1026
sewardjde4a1d02002-03-22 01:27:54 +00001027 case CALLM_S: case CALLM_E:
1028 break;
1029
1030 case INCEIP:
sewardjfebaa3b2003-05-25 01:07:34 +00001031 VG_(printf)("$%d", u->val1);
sewardjde4a1d02002-03-22 01:27:54 +00001032 break;
1033
1034 case LEA2:
sewardjfebaa3b2003-05-25 01:07:34 +00001035 VG_(printf)("%d(" , u->lit32);
njn4ba5a792002-09-30 10:23:54 +00001036 VG_(pp_UOperand)(u, 1, 4, False);
sewardjde4a1d02002-03-22 01:27:54 +00001037 VG_(printf)(",");
njn4ba5a792002-09-30 10:23:54 +00001038 VG_(pp_UOperand)(u, 2, 4, False);
sewardjde4a1d02002-03-22 01:27:54 +00001039 VG_(printf)(",%d), ", (Int)u->extra4b);
njn4ba5a792002-09-30 10:23:54 +00001040 VG_(pp_UOperand)(u, 3, 4, False);
sewardjde4a1d02002-03-22 01:27:54 +00001041 break;
1042
1043 case LEA1:
sewardjfebaa3b2003-05-25 01:07:34 +00001044 VG_(printf)("%d" , u->lit32);
njn4ba5a792002-09-30 10:23:54 +00001045 VG_(pp_UOperand)(u, 1, 4, True);
sewardjde4a1d02002-03-22 01:27:54 +00001046 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001047 VG_(pp_UOperand)(u, 2, 4, False);
sewardjde4a1d02002-03-22 01:27:54 +00001048 break;
1049
sewardj7a5ebcf2002-11-13 22:42:13 +00001050 case NOP: case LOCK:
sewardjde4a1d02002-03-22 01:27:54 +00001051 break;
1052
1053 case FPU_W:
sewardjfebaa3b2003-05-25 01:07:34 +00001054 VG_(printf)("0x%x:0x%x, ",
sewardjde4a1d02002-03-22 01:27:54 +00001055 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
njn4ba5a792002-09-30 10:23:54 +00001056 VG_(pp_UOperand)(u, 2, 4, True);
sewardjde4a1d02002-03-22 01:27:54 +00001057 break;
1058
1059 case FPU_R:
sewardjfebaa3b2003-05-25 01:07:34 +00001060 VG_(printf)("");
njn4ba5a792002-09-30 10:23:54 +00001061 VG_(pp_UOperand)(u, 2, 4, True);
sewardjde4a1d02002-03-22 01:27:54 +00001062 VG_(printf)(", 0x%x:0x%x",
1063 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
1064 break;
1065
1066 case FPU:
sewardjfebaa3b2003-05-25 01:07:34 +00001067 VG_(printf)("0x%x:0x%x",
sewardjde4a1d02002-03-22 01:27:54 +00001068 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
1069 break;
1070
sewardj3d7c9c82003-03-26 21:08:13 +00001071 case MMX1:
sewardjfebaa3b2003-05-25 01:07:34 +00001072 VG_(printf)("0x%x",
sewardj3d7c9c82003-03-26 21:08:13 +00001073 u->val1 & 0xFF );
1074 break;
1075
1076 case MMX2:
sewardjfebaa3b2003-05-25 01:07:34 +00001077 VG_(printf)("0x%x:0x%x",
sewardj3d7c9c82003-03-26 21:08:13 +00001078 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
1079 break;
1080
sewardjca860012003-03-27 23:52:58 +00001081 case MMX3:
sewardjfebaa3b2003-05-25 01:07:34 +00001082 VG_(printf)("0x%x:0x%x:0x%x",
sewardjca860012003-03-27 23:52:58 +00001083 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF, u->val2 & 0xFF );
1084 break;
1085
sewardj4fbe6e92003-06-15 21:54:34 +00001086 case MMX2_ERegWr:
1087 case MMX2_ERegRd:
sewardjfebaa3b2003-05-25 01:07:34 +00001088 VG_(printf)("0x%x:0x%x, ",
sewardjca860012003-03-27 23:52:58 +00001089 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
1090 VG_(pp_UOperand)(u, 2, 4, False);
1091 break;
1092
sewardj3d7c9c82003-03-26 21:08:13 +00001093 case MMX2_MemWr:
1094 case MMX2_MemRd:
sewardjfebaa3b2003-05-25 01:07:34 +00001095 VG_(printf)("0x%x:0x%x",
sewardj3d7c9c82003-03-26 21:08:13 +00001096 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
1097 VG_(pp_UOperand)(u, 2, 4, True);
1098 break;
1099
thughes96b466a2004-03-15 16:43:58 +00001100 case MMX2a1_MemRd:
1101 VG_(printf)("0x%x:0x%x:0x%x",
1102 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF, u->val2 & 0xFF );
1103 VG_(pp_UOperand)(u, 3, 4, True);
1104 break;
1105
sewardjfebaa3b2003-05-25 01:07:34 +00001106 case SSE2a_MemWr:
1107 case SSE2a_MemRd:
nethercoteb1affa82004-01-19 19:14:18 +00001108 case SSE2g_RegWr:
1109 case SSE2g1_RegWr:
1110 case SSE2e1_RegRd:
sewardjfebaa3b2003-05-25 01:07:34 +00001111 VG_(printf)("0x%x:0x%x:0x%x",
1112 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF, u->val2 & 0xFF );
1113 VG_(pp_UOperand)(u, 3, 4, True);
1114 break;
1115
sewardj9dd209f2003-06-18 23:30:52 +00001116 case SSE2a1_MemRd:
sewardjfebaa3b2003-05-25 01:07:34 +00001117 case SSE3a_MemWr:
1118 case SSE3a_MemRd:
1119 VG_(printf)("0x%x:0x%x:0x%x:0x%x",
1120 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
sewardjde8aecf2003-05-27 00:46:28 +00001121 (u->val2 >> 8) & 0xFF, u->val2 & 0xFF );
sewardjfebaa3b2003-05-25 01:07:34 +00001122 VG_(pp_UOperand)(u, 3, 4, True);
1123 break;
1124
sewardjabf8bf82003-06-15 22:28:05 +00001125 case SSE3e_RegWr:
sewardj4fbe6e92003-06-15 21:54:34 +00001126 case SSE3e_RegRd:
sewardj02af6bc2003-06-12 00:56:06 +00001127 case SSE3g_RegWr:
sewardjfebaa3b2003-05-25 01:07:34 +00001128 VG_(printf)("0x%x:0x%x:0x%x:0x%x",
1129 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
1130 (u->val2 >> 8) & 0xFF, u->val2 & 0xFF );
1131 VG_(pp_UOperand)(u, 3, 4, True);
1132 break;
1133
sewardjb31b06d2003-06-13 00:26:02 +00001134 case SSE3g1_RegWr:
sewardj4fbe6e92003-06-15 21:54:34 +00001135 case SSE3e1_RegRd:
sewardj77d30a22003-10-19 08:18:52 +00001136 case SSE3a1_MemRd:
sewardjb31b06d2003-06-13 00:26:02 +00001137 VG_(printf)("0x%x:0x%x:0x%x:0x%x:0x%x",
1138 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
1139 (u->val2 >> 8) & 0xFF, u->val2 & 0xFF,
1140 u->lit32 );
1141 VG_(pp_UOperand)(u, 3, 4, True);
1142 break;
1143
sewardja60be0e2003-05-26 08:47:27 +00001144 case SSE3:
1145 VG_(printf)("0x%x:0x%x:0x%x",
1146 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
1147 u->val2 & 0xFF );
1148 break;
1149
sewardjfebaa3b2003-05-25 01:07:34 +00001150 case SSE4:
1151 VG_(printf)("0x%x:0x%x:0x%x:0x%x",
1152 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
1153 (u->val2 >> 8) & 0xFF, u->val2 & 0xFF );
1154 break;
1155
sewardja453fb02003-06-14 13:22:36 +00001156 case SSE5:
1157 VG_(printf)("0x%x:0x%x:0x%x:0x%x:0x%x",
1158 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF,
1159 (u->val2 >> 8) & 0xFF, u->val2 & 0xFF,
1160 u->val3 & 0xFF );
1161 break;
1162
sewardje3891fa2003-06-15 03:13:48 +00001163 case SSE3ag_MemRd_RegWr:
1164 VG_(printf)("0x%x(addr=", u->lit32 );
1165 VG_(pp_UOperand)(u, 1, 4, False);
1166 VG_(printf)(", dst=");
1167 VG_(pp_UOperand)(u, 2, 4, False);
1168 VG_(printf)(")");
1169 break;
1170
sewardjde4a1d02002-03-22 01:27:54 +00001171 case GET: case PUT: case MOV: case LOAD: case STORE: case CMOV:
sewardje1042472002-09-30 12:33:11 +00001172 case GETSEG: case PUTSEG:
njn4ba5a792002-09-30 10:23:54 +00001173 VG_(pp_UOperand)(u, 1, u->size, u->opcode==LOAD);
sewardjde4a1d02002-03-22 01:27:54 +00001174 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001175 VG_(pp_UOperand)(u, 2, u->size, u->opcode==STORE);
njn25e49d8e72002-09-23 09:36:25 +00001176 break;
1177
1178 case JMP:
njn4ba5a792002-09-30 10:23:54 +00001179 VG_(pp_UOperand)(u, 1, u->size, False);
njn25e49d8e72002-09-23 09:36:25 +00001180 if (CondAlways == u->cond) {
1181 /* Print x86 instruction size if filled in */
1182 if (0 != u->extra4b)
1183 VG_(printf)(" ($%u)", u->extra4b);
1184 }
sewardjde4a1d02002-03-22 01:27:54 +00001185 break;
1186
1187 case GETF: case PUTF:
njn25e49d8e72002-09-23 09:36:25 +00001188 case CC2VAL: case PUSH: case POP: case CLEAR: case CALLM:
1189 case NOT: case NEG: case INC: case DEC: case BSWAP:
njn4ba5a792002-09-30 10:23:54 +00001190 VG_(pp_UOperand)(u, 1, u->size, False);
sewardjde4a1d02002-03-22 01:27:54 +00001191 break;
1192
njn25e49d8e72002-09-23 09:36:25 +00001193 /* Print a "(s)" after args passed on stack */
1194 case CCALL:
njn25e49d8e72002-09-23 09:36:25 +00001195 if (u->has_ret_val) {
njn4ba5a792002-09-30 10:23:54 +00001196 VG_(pp_UOperand)(u, 3, 0, False);
njn25e49d8e72002-09-23 09:36:25 +00001197 VG_(printf)(" = ");
sewardj2e93c502002-04-12 11:12:52 +00001198 }
njn25e49d8e72002-09-23 09:36:25 +00001199 VG_(printf)("%p(", u->lit32);
1200 if (u->argc > 0) {
njn4ba5a792002-09-30 10:23:54 +00001201 VG_(pp_UOperand)(u, 1, 0, False);
njn25e49d8e72002-09-23 09:36:25 +00001202 if (u->regparms_n < 1)
1203 VG_(printf)("(s)");
1204 }
1205 if (u->argc > 1) {
1206 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001207 VG_(pp_UOperand)(u, 2, 0, False);
njn25e49d8e72002-09-23 09:36:25 +00001208 if (u->regparms_n < 2)
1209 VG_(printf)("(s)");
1210 }
1211 if (u->argc > 2) {
1212 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001213 VG_(pp_UOperand)(u, 3, 0, False);
njn25e49d8e72002-09-23 09:36:25 +00001214 if (u->regparms_n < 3)
1215 VG_(printf)("(s)");
1216 }
1217 VG_(printf)(") ");
njn6431be72002-07-28 09:53:34 +00001218 break;
1219
sewardje1042472002-09-30 12:33:11 +00001220 case USESEG:
sewardjde4a1d02002-03-22 01:27:54 +00001221 case JIFZ:
sewardjde4a1d02002-03-22 01:27:54 +00001222 case ADD: case ADC: case AND: case OR:
1223 case XOR: case SUB: case SBB:
1224 case SHL: case SHR: case SAR:
jsgf5efa4fd2003-10-14 21:49:11 +00001225 case ROL: case ROR: case RCL: case RCR:
1226 case MUL:
njn4ba5a792002-09-30 10:23:54 +00001227 VG_(pp_UOperand)(u, 1, u->size, False);
sewardjde4a1d02002-03-22 01:27:54 +00001228 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001229 VG_(pp_UOperand)(u, 2, u->size, False);
sewardjde4a1d02002-03-22 01:27:54 +00001230 break;
1231
1232 case WIDEN:
njn4ba5a792002-09-30 10:23:54 +00001233 VG_(pp_UOperand)(u, 1, u->size, False);
sewardjde4a1d02002-03-22 01:27:54 +00001234 break;
1235
njn25e49d8e72002-09-23 09:36:25 +00001236 default:
1237 if (VG_(needs).extended_UCode)
njn4ba5a792002-09-30 10:23:54 +00001238 SK_(pp_XUInstr)(u);
njn25e49d8e72002-09-23 09:36:25 +00001239 else {
1240 VG_(printf)("unhandled opcode: %u. Perhaps "
1241 "VG_(needs).extended_UCode should be set?",
1242 u->opcode);
njne427a662002-10-02 11:08:25 +00001243 VG_(core_panic)("pp_UInstr: unhandled opcode");
njn25e49d8e72002-09-23 09:36:25 +00001244 }
sewardjde4a1d02002-03-22 01:27:54 +00001245 }
sewardjde4a1d02002-03-22 01:27:54 +00001246 if (u->flags_r != FlagsEmpty || u->flags_w != FlagsEmpty) {
1247 VG_(printf)(" (");
1248 if (u->flags_r != FlagsEmpty)
1249 vg_ppFlagSet("-r", u->flags_r);
1250 if (u->flags_w != FlagsEmpty)
1251 vg_ppFlagSet("-w", u->flags_w);
1252 VG_(printf)(")");
1253 }
njn25e49d8e72002-09-23 09:36:25 +00001254
1255 if (ppRegsLiveness) {
1256 VG_(printf)("\t\t");
njn4ba5a792002-09-30 10:23:54 +00001257 pp_realregs_liveness ( u );
njn25e49d8e72002-09-23 09:36:25 +00001258 }
1259
sewardjde4a1d02002-03-22 01:27:54 +00001260 VG_(printf)("\n");
1261}
1262
njn4ba5a792002-09-30 10:23:54 +00001263void VG_(pp_UInstr) ( Int instrNo, UInstr* u )
njn25e49d8e72002-09-23 09:36:25 +00001264{
njn4ba5a792002-09-30 10:23:54 +00001265 pp_UInstrWorker ( instrNo, u, /*ppRegsLiveness*/False );
njn25e49d8e72002-09-23 09:36:25 +00001266}
1267
njn4ba5a792002-09-30 10:23:54 +00001268void VG_(pp_UInstr_regs) ( Int instrNo, UInstr* u )
njn25e49d8e72002-09-23 09:36:25 +00001269{
njn4ba5a792002-09-30 10:23:54 +00001270 pp_UInstrWorker ( instrNo, u, /*ppRegsLiveness*/True );
njn25e49d8e72002-09-23 09:36:25 +00001271}
sewardjde4a1d02002-03-22 01:27:54 +00001272
njn4ba5a792002-09-30 10:23:54 +00001273void VG_(pp_UCodeBlock) ( UCodeBlock* cb, Char* title )
sewardjde4a1d02002-03-22 01:27:54 +00001274{
1275 Int i;
njn25e49d8e72002-09-23 09:36:25 +00001276 VG_(printf)("%s\n", title);
sewardjde4a1d02002-03-22 01:27:54 +00001277 for (i = 0; i < cb->used; i++)
njn25e49d8e72002-09-23 09:36:25 +00001278 if (cb->instrs[i].opcode != NOP)
njn4ba5a792002-09-30 10:23:54 +00001279 VG_(pp_UInstr) ( i, &cb->instrs[i] );
sewardjde4a1d02002-03-22 01:27:54 +00001280 VG_(printf)("\n");
1281}
1282
1283
1284/*------------------------------------------------------------*/
1285/*--- uinstr helpers for register allocation ---*/
1286/*--- and code improvement. ---*/
1287/*------------------------------------------------------------*/
1288
njn25e49d8e72002-09-23 09:36:25 +00001289/* Get the temp/reg use of a uinstr, parking them in an array supplied by
njn810086f2002-11-14 12:42:47 +00001290 the caller (regs), which is assumed to be big enough. Return the number
1291 of entries. Written regs are indicated in parallel array isWrites.
1292 Insns which read _and_ write a register wind up mentioning it twice.
1293 Entries are placed in the array in program order, so that if a reg is
1294 read-modified-written, it appears first as a read and then as a write.
1295 'tag' indicates whether we are looking at TempRegs or RealRegs.
sewardjde4a1d02002-03-22 01:27:54 +00001296*/
njn810086f2002-11-14 12:42:47 +00001297Int VG_(get_reg_usage) ( UInstr* u, Tag tag, Int* regs, Bool* isWrites )
sewardjde4a1d02002-03-22 01:27:54 +00001298{
njn810086f2002-11-14 12:42:47 +00001299# define RD(ono) VG_UINSTR_READS_REG(ono, regs, isWrites)
1300# define WR(ono) VG_UINSTR_WRITES_REG(ono, regs, isWrites)
sewardjde4a1d02002-03-22 01:27:54 +00001301
1302 Int n = 0;
1303 switch (u->opcode) {
1304 case LEA1: RD(1); WR(2); break;
1305 case LEA2: RD(1); RD(2); WR(3); break;
1306
sewardj77d30a22003-10-19 08:18:52 +00001307 case SSE3a1_MemRd:
sewardj9dd209f2003-06-18 23:30:52 +00001308 case SSE2a1_MemRd:
nethercoteb1affa82004-01-19 19:14:18 +00001309 case SSE2e1_RegRd:
sewardj4fbe6e92003-06-15 21:54:34 +00001310 case SSE3e_RegRd:
sewardjfebaa3b2003-05-25 01:07:34 +00001311 case SSE3a_MemWr:
1312 case SSE3a_MemRd:
1313 case SSE2a_MemWr:
sewardj4fbe6e92003-06-15 21:54:34 +00001314 case SSE3e1_RegRd:
sewardj02af6bc2003-06-12 00:56:06 +00001315 case SSE2a_MemRd: RD(3); break;
1316
nethercoteb1affa82004-01-19 19:14:18 +00001317 case SSE2g_RegWr:
1318 case SSE2g1_RegWr:
sewardjabf8bf82003-06-15 22:28:05 +00001319 case SSE3e_RegWr:
sewardjb31b06d2003-06-13 00:26:02 +00001320 case SSE3g1_RegWr:
sewardj02af6bc2003-06-12 00:56:06 +00001321 case SSE3g_RegWr: WR(3); break;
sewardjfebaa3b2003-05-25 01:07:34 +00001322
sewardje3891fa2003-06-15 03:13:48 +00001323 case SSE3ag_MemRd_RegWr: RD(1); WR(2); break;
1324
thughes96b466a2004-03-15 16:43:58 +00001325 case MMX2a1_MemRd: RD(3); break;
sewardj4fbe6e92003-06-15 21:54:34 +00001326 case MMX2_ERegRd: RD(2); break;
1327 case MMX2_ERegWr: WR(2); break;
sewardjca860012003-03-27 23:52:58 +00001328
sewardja453fb02003-06-14 13:22:36 +00001329 case SSE4: case SSE3: case SSE5:
sewardjca860012003-03-27 23:52:58 +00001330 case MMX1: case MMX2: case MMX3:
njn25e49d8e72002-09-23 09:36:25 +00001331 case NOP: case FPU: case INCEIP: case CALLM_S: case CALLM_E:
sewardj7a5ebcf2002-11-13 22:42:13 +00001332 case CLEAR: case CALLM: case LOCK: break;
njn25e49d8e72002-09-23 09:36:25 +00001333
1334 case CCALL:
1335 if (u->argc > 0) RD(1);
1336 if (u->argc > 1) RD(2);
1337 if (u->argc > 2) RD(3);
1338 if (u->has_ret_val) WR(3);
1339 break;
1340
sewardj3d7c9c82003-03-26 21:08:13 +00001341 case MMX2_MemRd: case MMX2_MemWr:
sewardjde4a1d02002-03-22 01:27:54 +00001342 case FPU_R: case FPU_W: RD(2); break;
1343
sewardje1042472002-09-30 12:33:11 +00001344 case GETSEG: WR(2); break;
1345 case PUTSEG: RD(1); break;
1346
sewardjde4a1d02002-03-22 01:27:54 +00001347 case GETF: WR(1); break;
1348 case PUTF: RD(1); break;
1349
1350 case GET: WR(2); break;
1351 case PUT: RD(1); break;
1352 case LOAD: RD(1); WR(2); break;
njn25e49d8e72002-09-23 09:36:25 +00001353 case STORE: RD(1); RD(2); break;
sewardjde4a1d02002-03-22 01:27:54 +00001354 case MOV: RD(1); WR(2); break;
1355
1356 case JMP: RD(1); break;
sewardjde4a1d02002-03-22 01:27:54 +00001357
njn25e49d8e72002-09-23 09:36:25 +00001358 case PUSH: RD(1); break;
sewardjde4a1d02002-03-22 01:27:54 +00001359 case POP: WR(1); break;
1360
sewardje1042472002-09-30 12:33:11 +00001361 case USESEG:
sewardjde4a1d02002-03-22 01:27:54 +00001362 case CMOV:
1363 case ADD: case ADC: case AND: case OR:
1364 case XOR: case SUB: case SBB:
jsgf5efa4fd2003-10-14 21:49:11 +00001365 case MUL:
sewardjde4a1d02002-03-22 01:27:54 +00001366 RD(1); RD(2); WR(2); break;
1367
1368 case SHL: case SHR: case SAR:
1369 case ROL: case ROR: case RCL: case RCR:
1370 RD(1); RD(2); WR(2); break;
1371
njn25e49d8e72002-09-23 09:36:25 +00001372 case NOT: case NEG: case INC: case DEC: case BSWAP:
sewardjde4a1d02002-03-22 01:27:54 +00001373 RD(1); WR(1); break;
1374
1375 case WIDEN: RD(1); WR(1); break;
1376
1377 case CC2VAL: WR(1); break;
1378 case JIFZ: RD(1); break;
1379
njn25e49d8e72002-09-23 09:36:25 +00001380 default:
1381 if (VG_(needs).extended_UCode)
njn810086f2002-11-14 12:42:47 +00001382 return SK_(get_Xreg_usage)(u, tag, regs, isWrites);
njn25e49d8e72002-09-23 09:36:25 +00001383 else {
1384 VG_(printf)("unhandled opcode: %u. Perhaps "
1385 "VG_(needs).extended_UCode should be set?",
1386 u->opcode);
njne427a662002-10-02 11:08:25 +00001387 VG_(core_panic)("VG_(get_reg_usage): unhandled opcode");
njn25e49d8e72002-09-23 09:36:25 +00001388 }
sewardjde4a1d02002-03-22 01:27:54 +00001389 }
1390 return n;
1391
1392# undef RD
1393# undef WR
1394}
1395
1396
njn25e49d8e72002-09-23 09:36:25 +00001397/* Change temp regs in u into real regs, as directed by the
1398 * temps[i]-->reals[i] mapping. */
sewardj56867352003-10-12 10:27:06 +00001399static
njn810086f2002-11-14 12:42:47 +00001400void patchUInstr ( UInstr* u, Int temps[], UInt reals[], Int n_tmap )
sewardjde4a1d02002-03-22 01:27:54 +00001401{
1402 Int i;
1403 if (u->tag1 == TempReg) {
1404 for (i = 0; i < n_tmap; i++)
njn810086f2002-11-14 12:42:47 +00001405 if (temps[i] == u->val1) break;
njne427a662002-10-02 11:08:25 +00001406 if (i == n_tmap) VG_(core_panic)("patchUInstr(1)");
sewardjde4a1d02002-03-22 01:27:54 +00001407 u->tag1 = RealReg;
njn25e49d8e72002-09-23 09:36:25 +00001408 u->val1 = reals[i];
sewardjde4a1d02002-03-22 01:27:54 +00001409 }
1410 if (u->tag2 == TempReg) {
1411 for (i = 0; i < n_tmap; i++)
njn810086f2002-11-14 12:42:47 +00001412 if (temps[i] == u->val2) break;
njne427a662002-10-02 11:08:25 +00001413 if (i == n_tmap) VG_(core_panic)("patchUInstr(2)");
sewardjde4a1d02002-03-22 01:27:54 +00001414 u->tag2 = RealReg;
njn25e49d8e72002-09-23 09:36:25 +00001415 u->val2 = reals[i];
sewardjde4a1d02002-03-22 01:27:54 +00001416 }
1417 if (u->tag3 == TempReg) {
1418 for (i = 0; i < n_tmap; i++)
njn810086f2002-11-14 12:42:47 +00001419 if (temps[i] == u->val3) break;
njne427a662002-10-02 11:08:25 +00001420 if (i == n_tmap) VG_(core_panic)("patchUInstr(3)");
sewardjde4a1d02002-03-22 01:27:54 +00001421 u->tag3 = RealReg;
njn25e49d8e72002-09-23 09:36:25 +00001422 u->val3 = reals[i];
sewardjde4a1d02002-03-22 01:27:54 +00001423 }
1424}
1425
1426
1427/* Tedious x86-specific hack which compensates for the fact that the
1428 register numbers for %ah .. %dh do not correspond to those for %eax
1429 .. %edx. It maps a (reg size, reg no) pair to the number of the
1430 containing 32-bit reg. */
1431static __inline__
1432Int containingArchRegOf ( Int sz, Int aregno )
1433{
1434 switch (sz) {
1435 case 4: return aregno;
1436 case 2: return aregno;
1437 case 1: return aregno >= 4 ? aregno-4 : aregno;
njne427a662002-10-02 11:08:25 +00001438 default: VG_(core_panic)("containingArchRegOf");
sewardjde4a1d02002-03-22 01:27:54 +00001439 }
1440}
1441
1442
1443/* If u reads an ArchReg, return the number of the containing arch
njn25e49d8e72002-09-23 09:36:25 +00001444 reg. Otherwise return -1. Used in redundant-PUT elimination.
1445 Note that this is not required for skins extending UCode because
1446 this happens before instrumentation. */
sewardj56867352003-10-12 10:27:06 +00001447static
sewardjde4a1d02002-03-22 01:27:54 +00001448Int maybe_uinstrReadsArchReg ( UInstr* u )
1449{
1450 switch (u->opcode) {
1451 case GET:
1452 case ADD: case ADC: case AND: case OR:
1453 case XOR: case SUB: case SBB:
1454 case SHL: case SHR: case SAR: case ROL:
1455 case ROR: case RCL: case RCR:
jsgf5efa4fd2003-10-14 21:49:11 +00001456 case MUL:
sewardjde4a1d02002-03-22 01:27:54 +00001457 if (u->tag1 == ArchReg)
1458 return containingArchRegOf ( u->size, u->val1 );
1459 else
1460 return -1;
1461
1462 case GETF: case PUTF:
1463 case CALLM_S: case CALLM_E:
1464 case INCEIP:
1465 case LEA1:
1466 case LEA2:
1467 case NOP:
sewardj7a5ebcf2002-11-13 22:42:13 +00001468 case LOCK:
sewardjde4a1d02002-03-22 01:27:54 +00001469 case PUT:
1470 case LOAD:
1471 case STORE:
1472 case MOV:
1473 case CMOV:
1474 case JMP:
1475 case CALLM: case CLEAR: case PUSH: case POP:
1476 case NOT: case NEG: case INC: case DEC: case BSWAP:
1477 case CC2VAL:
1478 case JIFZ:
1479 case FPU: case FPU_R: case FPU_W:
sewardjca860012003-03-27 23:52:58 +00001480 case MMX1: case MMX2: case MMX3:
thughes96b466a2004-03-15 16:43:58 +00001481 case MMX2_MemRd: case MMX2_MemWr: case MMX2a1_MemRd:
sewardj4fbe6e92003-06-15 21:54:34 +00001482 case MMX2_ERegRd: case MMX2_ERegWr:
sewardj9dd209f2003-06-18 23:30:52 +00001483 case SSE2a_MemWr: case SSE2a_MemRd: case SSE2a1_MemRd:
nethercoteb1affa82004-01-19 19:14:18 +00001484 case SSE2g_RegWr: case SSE2g1_RegWr: case SSE2e1_RegRd:
sewardj77d30a22003-10-19 08:18:52 +00001485 case SSE3a_MemWr: case SSE3a_MemRd: case SSE3a1_MemRd:
sewardjabf8bf82003-06-15 22:28:05 +00001486 case SSE3e_RegRd: case SSE3g_RegWr: case SSE3e_RegWr:
sewardj4fbe6e92003-06-15 21:54:34 +00001487 case SSE3g1_RegWr: case SSE3e1_RegRd:
sewardje3891fa2003-06-15 03:13:48 +00001488 case SSE4: case SSE3: case SSE5: case SSE3ag_MemRd_RegWr:
sewardjde4a1d02002-03-22 01:27:54 +00001489 case WIDEN:
sewardje1042472002-09-30 12:33:11 +00001490 /* GETSEG and USESEG are to do with ArchRegS, not ArchReg */
1491 case GETSEG: case PUTSEG:
1492 case USESEG:
sewardjde4a1d02002-03-22 01:27:54 +00001493 return -1;
1494
1495 default:
njn4ba5a792002-09-30 10:23:54 +00001496 VG_(pp_UInstr)(0,u);
njne427a662002-10-02 11:08:25 +00001497 VG_(core_panic)("maybe_uinstrReadsArchReg: unhandled opcode");
sewardjde4a1d02002-03-22 01:27:54 +00001498 }
1499}
1500
1501static __inline__
1502Bool uInstrMentionsTempReg ( UInstr* u, Int tempreg )
1503{
1504 Int i, k;
njnf4ce3d32003-02-10 10:17:26 +00001505 Int tempUse[VG_MAX_REGS_USED];
1506 Bool notUsed[VG_MAX_REGS_USED];
njn810086f2002-11-14 12:42:47 +00001507
1508 k = VG_(get_reg_usage) ( u, TempReg, &tempUse[0], &notUsed[0] );
sewardjde4a1d02002-03-22 01:27:54 +00001509 for (i = 0; i < k; i++)
njn810086f2002-11-14 12:42:47 +00001510 if (tempUse[i] == tempreg)
sewardjde4a1d02002-03-22 01:27:54 +00001511 return True;
1512 return False;
1513}
1514
1515
1516/*------------------------------------------------------------*/
1517/*--- ucode improvement. ---*/
1518/*------------------------------------------------------------*/
1519
1520/* Improve the code in cb by doing
1521 -- Redundant ArchReg-fetch elimination
1522 -- Redundant PUT elimination
1523 -- Redundant cond-code restore/save elimination
1524 The overall effect of these is to allow target registers to be
1525 cached in host registers over multiple target insns.
1526*/
1527static void vg_improve ( UCodeBlock* cb )
1528{
1529 Int i, j, k, m, n, ar, tr, told, actual_areg;
1530 Int areg_map[8];
1531 Bool annul_put[8];
njnf4ce3d32003-02-10 10:17:26 +00001532 Int tempUse[VG_MAX_REGS_USED];
1533 Bool isWrites[VG_MAX_REGS_USED];
sewardjde4a1d02002-03-22 01:27:54 +00001534 UInstr* u;
1535 Bool wr;
1536 Int* last_live_before;
1537 FlagSet future_dead_flags;
1538
njn25e49d8e72002-09-23 09:36:25 +00001539 if (dis)
1540 VG_(printf) ("Improvements:\n");
1541
sewardjde4a1d02002-03-22 01:27:54 +00001542 if (cb->nextTemp > 0)
njn25e49d8e72002-09-23 09:36:25 +00001543 last_live_before = VG_(arena_malloc) ( VG_AR_JITTER,
1544 cb->nextTemp * sizeof(Int) );
sewardjde4a1d02002-03-22 01:27:54 +00001545 else
1546 last_live_before = NULL;
1547
1548
1549 /* PASS 1: redundant GET elimination. (Actually, more general than
1550 that -- eliminates redundant fetches of ArchRegs). */
1551
1552 /* Find the live-range-ends for all temporaries. Duplicates code
1553 in the register allocator :-( */
1554
1555 for (i = 0; i < cb->nextTemp; i++) last_live_before[i] = -1;
1556
1557 for (i = cb->used-1; i >= 0; i--) {
1558 u = &cb->instrs[i];
1559
njn810086f2002-11-14 12:42:47 +00001560 k = VG_(get_reg_usage)(u, TempReg, &tempUse[0], &isWrites[0]);
sewardjde4a1d02002-03-22 01:27:54 +00001561
1562 /* For each temp usage ... bwds in program order. */
1563 for (j = k-1; j >= 0; j--) {
njn810086f2002-11-14 12:42:47 +00001564 tr = tempUse[j];
1565 wr = isWrites[j];
sewardjde4a1d02002-03-22 01:27:54 +00001566 if (last_live_before[tr] == -1) {
1567 vg_assert(tr >= 0 && tr < cb->nextTemp);
1568 last_live_before[tr] = wr ? (i+1) : i;
1569 }
1570 }
1571
1572 }
1573
1574# define BIND_ARCH_TO_TEMP(archreg,tempreg)\
1575 { Int q; \
1576 /* Invalidate any old binding(s) to tempreg. */ \
1577 for (q = 0; q < 8; q++) \
1578 if (areg_map[q] == tempreg) areg_map[q] = -1; \
1579 /* Add the new binding. */ \
1580 areg_map[archreg] = (tempreg); \
1581 }
1582
1583 /* Set up the A-reg map. */
1584 for (i = 0; i < 8; i++) areg_map[i] = -1;
1585
1586 /* Scan insns. */
1587 for (i = 0; i < cb->used; i++) {
1588 u = &cb->instrs[i];
1589 if (u->opcode == GET && u->size == 4) {
1590 /* GET; see if it can be annulled. */
1591 vg_assert(u->tag1 == ArchReg);
1592 vg_assert(u->tag2 == TempReg);
1593 ar = u->val1;
1594 tr = u->val2;
1595 told = areg_map[ar];
1596 if (told != -1 && last_live_before[told] <= i) {
1597 /* ar already has an old mapping to told, but that runs
1598 out here. Annul this GET, rename tr to told for the
1599 rest of the block, and extend told's live range to that
1600 of tr. */
njn4ba5a792002-09-30 10:23:54 +00001601 VG_(new_NOP)(u);
sewardjde4a1d02002-03-22 01:27:54 +00001602 n = last_live_before[tr] + 1;
1603 if (n > cb->used) n = cb->used;
1604 last_live_before[told] = last_live_before[tr];
1605 last_live_before[tr] = i-1;
njn25e49d8e72002-09-23 09:36:25 +00001606 if (dis)
sewardjde4a1d02002-03-22 01:27:54 +00001607 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001608 " at %2d: delete GET, rename t%d to t%d in (%d .. %d)\n",
sewardjde4a1d02002-03-22 01:27:54 +00001609 i, tr, told,i+1, n-1);
1610 for (m = i+1; m < n; m++) {
1611 if (cb->instrs[m].tag1 == TempReg
1612 && cb->instrs[m].val1 == tr)
1613 cb->instrs[m].val1 = told;
1614 if (cb->instrs[m].tag2 == TempReg
1615 && cb->instrs[m].val2 == tr)
1616 cb->instrs[m].val2 = told;
sewardjfebaa3b2003-05-25 01:07:34 +00001617 if (cb->instrs[m].tag3 == TempReg
1618 && cb->instrs[m].val3 == tr)
1619 cb->instrs[m].val3 = told;
sewardjde4a1d02002-03-22 01:27:54 +00001620 }
1621 BIND_ARCH_TO_TEMP(ar,told);
1622 }
1623 else
1624 BIND_ARCH_TO_TEMP(ar,tr);
1625 }
1626 else if (u->opcode == GET && u->size != 4) {
1627 /* Invalidate any mapping for this archreg. */
1628 actual_areg = containingArchRegOf ( u->size, u->val1 );
1629 areg_map[actual_areg] = -1;
1630 }
1631 else if (u->opcode == PUT && u->size == 4) {
1632 /* PUT; re-establish t -> a binding */
1633 vg_assert(u->tag1 == TempReg);
1634 vg_assert(u->tag2 == ArchReg);
1635 BIND_ARCH_TO_TEMP(u->val2, u->val1);
1636 }
1637 else if (u->opcode == PUT && u->size != 4) {
1638 /* Invalidate any mapping for this archreg. */
1639 actual_areg = containingArchRegOf ( u->size, u->val2 );
1640 areg_map[actual_areg] = -1;
1641 } else {
1642
1643 /* see if insn has an archreg as a read operand; if so try to
1644 map it. */
1645 if (u->tag1 == ArchReg && u->size == 4
1646 && areg_map[u->val1] != -1) {
1647 switch (u->opcode) {
1648 case ADD: case SUB: case AND: case OR: case XOR:
1649 case ADC: case SBB:
1650 case SHL: case SHR: case SAR: case ROL: case ROR:
1651 case RCL: case RCR:
jsgf5efa4fd2003-10-14 21:49:11 +00001652 case MUL:
njn25e49d8e72002-09-23 09:36:25 +00001653 if (dis)
sewardjde4a1d02002-03-22 01:27:54 +00001654 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001655 " at %2d: change ArchReg %S to TempReg t%d\n",
sewardjde4a1d02002-03-22 01:27:54 +00001656 i, nameIReg(4,u->val1), areg_map[u->val1]);
1657 u->tag1 = TempReg;
1658 u->val1 = areg_map[u->val1];
1659 /* Remember to extend the live range of the TempReg,
1660 if necessary. */
1661 if (last_live_before[u->val1] < i)
1662 last_live_before[u->val1] = i;
1663 break;
1664 default:
1665 break;
1666 }
1667 }
1668
1669 /* boring insn; invalidate any mappings to temps it writes */
njn810086f2002-11-14 12:42:47 +00001670 k = VG_(get_reg_usage)(u, TempReg, &tempUse[0], &isWrites[0]);
sewardjde4a1d02002-03-22 01:27:54 +00001671
1672 for (j = 0; j < k; j++) {
njn810086f2002-11-14 12:42:47 +00001673 wr = isWrites[j];
sewardjde4a1d02002-03-22 01:27:54 +00001674 if (!wr) continue;
njn810086f2002-11-14 12:42:47 +00001675 tr = tempUse[j];
sewardjde4a1d02002-03-22 01:27:54 +00001676 for (m = 0; m < 8; m++)
1677 if (areg_map[m] == tr) areg_map[m] = -1;
1678 }
1679 }
1680
1681 }
1682
1683# undef BIND_ARCH_TO_TEMP
1684
sewardj05f1aa12002-04-30 00:29:36 +00001685 /* PASS 2: redundant PUT elimination. Don't annul (delay) puts of
1686 %ESP, since the memory check machinery always requires the
1687 in-memory value of %ESP to be up to date. Although this isn't
1688 actually required by other analyses (cache simulation), it's
1689 simplest to be consistent for all end-uses. */
sewardjde4a1d02002-03-22 01:27:54 +00001690 for (j = 0; j < 8; j++)
1691 annul_put[j] = False;
1692
1693 for (i = cb->used-1; i >= 0; i--) {
1694 u = &cb->instrs[i];
1695 if (u->opcode == NOP) continue;
1696
1697 if (u->opcode == PUT && u->size == 4) {
1698 vg_assert(u->tag2 == ArchReg);
1699 actual_areg = containingArchRegOf ( 4, u->val2 );
1700 if (annul_put[actual_areg]) {
sewardj05f1aa12002-04-30 00:29:36 +00001701 vg_assert(actual_areg != R_ESP);
njn4ba5a792002-09-30 10:23:54 +00001702 VG_(new_NOP)(u);
njn25e49d8e72002-09-23 09:36:25 +00001703 if (dis)
1704 VG_(printf)(" at %2d: delete PUT\n", i );
sewardjde4a1d02002-03-22 01:27:54 +00001705 } else {
sewardj05f1aa12002-04-30 00:29:36 +00001706 if (actual_areg != R_ESP)
sewardjde4a1d02002-03-22 01:27:54 +00001707 annul_put[actual_areg] = True;
1708 }
1709 }
1710 else if (u->opcode == PUT && u->size != 4) {
1711 actual_areg = containingArchRegOf ( u->size, u->val2 );
1712 annul_put[actual_areg] = False;
1713 }
1714 else if (u->opcode == JMP || u->opcode == JIFZ
1715 || u->opcode == CALLM) {
1716 for (j = 0; j < 8; j++)
1717 annul_put[j] = False;
1718 }
1719 else {
1720 /* If an instruction reads an ArchReg, the immediately
1721 preceding PUT cannot be annulled. */
1722 actual_areg = maybe_uinstrReadsArchReg ( u );
1723 if (actual_areg != -1)
1724 annul_put[actual_areg] = False;
1725 }
1726 }
1727
1728 /* PASS 2a: redundant-move elimination. Given MOV t1, t2 and t1 is
1729 dead after this point, annul the MOV insn and rename t2 to t1.
1730 Further modifies the last_live_before map. */
1731
1732# if 0
njn4ba5a792002-09-30 10:23:54 +00001733 VG_(pp_UCodeBlock)(cb, "Before MOV elimination" );
sewardjde4a1d02002-03-22 01:27:54 +00001734 for (i = 0; i < cb->nextTemp; i++)
1735 VG_(printf)("llb[t%d]=%d ", i, last_live_before[i]);
1736 VG_(printf)("\n");
1737# endif
1738
1739 for (i = 0; i < cb->used-1; i++) {
1740 u = &cb->instrs[i];
1741 if (u->opcode != MOV) continue;
1742 if (u->tag1 == Literal) continue;
1743 vg_assert(u->tag1 == TempReg);
1744 vg_assert(u->tag2 == TempReg);
1745 if (last_live_before[u->val1] == i) {
njn25e49d8e72002-09-23 09:36:25 +00001746 if (dis)
sewardjde4a1d02002-03-22 01:27:54 +00001747 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001748 " at %2d: delete MOV, rename t%d to t%d in (%d .. %d)\n",
sewardjde4a1d02002-03-22 01:27:54 +00001749 i, u->val2, u->val1, i+1, last_live_before[u->val2] );
1750 for (j = i+1; j <= last_live_before[u->val2]; j++) {
1751 if (cb->instrs[j].tag1 == TempReg
1752 && cb->instrs[j].val1 == u->val2)
1753 cb->instrs[j].val1 = u->val1;
1754 if (cb->instrs[j].tag2 == TempReg
1755 && cb->instrs[j].val2 == u->val2)
1756 cb->instrs[j].val2 = u->val1;
sewardjfebaa3b2003-05-25 01:07:34 +00001757 if (cb->instrs[j].tag3 == TempReg
1758 && cb->instrs[j].val3 == u->val2)
1759 cb->instrs[j].val3 = u->val1;
sewardjde4a1d02002-03-22 01:27:54 +00001760 }
1761 last_live_before[u->val1] = last_live_before[u->val2];
1762 last_live_before[u->val2] = i-1;
njn4ba5a792002-09-30 10:23:54 +00001763 VG_(new_NOP)(u);
sewardjde4a1d02002-03-22 01:27:54 +00001764 }
1765 }
1766
1767 /* PASS 3: redundant condition-code restore/save elimination.
1768 Scan backwards from the end. future_dead_flags records the set
1769 of flags which are dead at this point, that is, will be written
1770 before they are next read. Earlier uinsns which write flags
1771 already in future_dead_flags can have their writes annulled.
1772 */
1773 future_dead_flags = FlagsEmpty;
1774
1775 for (i = cb->used-1; i >= 0; i--) {
1776 u = &cb->instrs[i];
1777
1778 /* We might never make it to insns beyond this one, so be
1779 conservative. */
1780 if (u->opcode == JIFZ || u->opcode == JMP) {
1781 future_dead_flags = FlagsEmpty;
1782 continue;
1783 }
1784
sewardjfbb6cda2002-07-24 09:33:52 +00001785 /* PUTF modifies the %EFLAGS in essentially unpredictable ways.
1786 For example people try to mess with bit 21 to see if CPUID
1787 works. The setting may or may not actually take hold. So we
1788 play safe here. */
1789 if (u->opcode == PUTF) {
1790 future_dead_flags = FlagsEmpty;
1791 continue;
1792 }
1793
sewardjde4a1d02002-03-22 01:27:54 +00001794 /* We can annul the flags written by this insn if it writes a
1795 subset (or eq) of the set of flags known to be dead after
1796 this insn. If not, just record the flags also written by
1797 this insn.*/
1798 if (u->flags_w != FlagsEmpty
1799 && VG_IS_FLAG_SUBSET(u->flags_w, future_dead_flags)) {
njn25e49d8e72002-09-23 09:36:25 +00001800 if (dis) {
1801 VG_(printf)(" at %2d: annul flag write ", i);
sewardjde4a1d02002-03-22 01:27:54 +00001802 vg_ppFlagSet("", u->flags_w);
1803 VG_(printf)(" due to later ");
1804 vg_ppFlagSet("", future_dead_flags);
1805 VG_(printf)("\n");
1806 }
1807 u->flags_w = FlagsEmpty;
1808 } else {
1809 future_dead_flags
1810 = VG_UNION_FLAG_SETS ( u->flags_w, future_dead_flags );
1811 }
1812
1813 /* If this insn also reads flags, empty out future_dead_flags so
1814 as to force preceding writes not to be annulled. */
1815 if (u->flags_r != FlagsEmpty)
1816 future_dead_flags = FlagsEmpty;
1817 }
1818
1819 if (last_live_before)
njn25e49d8e72002-09-23 09:36:25 +00001820 VG_(arena_free) ( VG_AR_JITTER, last_live_before );
1821
1822 if (dis) {
1823 VG_(printf)("\n");
njn4ba5a792002-09-30 10:23:54 +00001824 VG_(pp_UCodeBlock) ( cb, "Improved UCode:" );
njn25e49d8e72002-09-23 09:36:25 +00001825 }
sewardjde4a1d02002-03-22 01:27:54 +00001826}
1827
njn9b007f62003-04-07 14:40:25 +00001828/*------------------------------------------------------------*/
1829/*--- %ESP-update pass ---*/
1830/*------------------------------------------------------------*/
1831
1832/* For skins that want to know about %ESP changes, this pass adds
1833 in the appropriate hooks. We have to do it after the skin's
1834 instrumentation, so the skin doesn't have to worry about the CCALLs
1835 it adds in, and we must do it before register allocation because
1836 spilled temps make it much harder to work out the %esp deltas.
njned619712003-10-01 16:45:04 +00001837 Thus we have it as an extra phase between the two.
1838
1839 We look for "GETL %ESP, t_ESP", then track ADDs and SUBs of
1840 literal values to t_ESP, and the total delta of the ADDs/SUBs. Then if
1841 "PUTL t_ESP, %ESP" happens, we call the helper with the known delta. We
1842 also cope with "MOVL t_ESP, tX", making tX the new t_ESP. If any other
1843 instruction clobbers t_ESP, we don't track it anymore, and fall back to
1844 the delta-is-unknown case. That case is also used when the delta is not
1845 a nice small amount, or an unknown amount.
1846*/
njn9b007f62003-04-07 14:40:25 +00001847static
1848UCodeBlock* vg_ESP_update_pass(UCodeBlock* cb_in)
1849{
1850 UCodeBlock* cb;
1851 UInstr* u;
1852 Int delta = 0;
1853 UInt t_ESP = INVALID_TEMPREG;
sewardj05bcdcb2003-05-18 10:05:38 +00001854 Int i;
njn9b007f62003-04-07 14:40:25 +00001855
1856 cb = VG_(setup_UCodeBlock)(cb_in);
1857
1858 for (i = 0; i < VG_(get_num_instrs)(cb_in); i++) {
1859 u = VG_(get_instr)(cb_in, i);
1860
1861 if (GET == u->opcode && R_ESP == u->val1) {
1862 t_ESP = u->val2;
1863 delta = 0;
1864
1865 } else if (PUT == u->opcode && R_ESP == u->val2 && 4 == u->size) {
1866
fitzhardinge98abfc72003-12-16 02:05:15 +00001867# define DO_GENERIC \
1868 if (VG_(defined_new_mem_stack)() || \
1869 VG_(defined_die_mem_stack)()) { \
1870 uInstr1(cb, CCALL, 0, TempReg, u->val1); \
1871 uCCall(cb, (Addr) VG_(unknown_esp_update), \
1872 1, 1, False); \
njn9b007f62003-04-07 14:40:25 +00001873 }
1874
fitzhardinge98abfc72003-12-16 02:05:15 +00001875# define DO(kind, size) \
1876 if (VG_(defined_##kind##_mem_stack_##size)()) { \
1877 uInstr1(cb, CCALL, 0, TempReg, u->val1); \
1878 uCCall(cb, (Addr) VG_(tool_interface).track_##kind##_mem_stack_##size, \
1879 1, 1, False); \
1880 \
1881 } else \
1882 DO_GENERIC \
njn9b007f62003-04-07 14:40:25 +00001883 break
1884
1885 if (u->val1 == t_ESP) {
1886 /* Known delta, common cases handled specially. */
1887 switch (delta) {
njned619712003-10-01 16:45:04 +00001888 case 0: break;
njn9b007f62003-04-07 14:40:25 +00001889 case 4: DO(die, 4);
1890 case -4: DO(new, 4);
1891 case 8: DO(die, 8);
1892 case -8: DO(new, 8);
1893 case 12: DO(die, 12);
1894 case -12: DO(new, 12);
1895 case 16: DO(die, 16);
1896 case -16: DO(new, 16);
1897 case 32: DO(die, 32);
1898 case -32: DO(new, 32);
1899 default: DO_GENERIC; break;
1900 }
1901 } else {
1902 /* Unknown delta */
1903 DO_GENERIC;
njned619712003-10-01 16:45:04 +00001904
daywalker972a7592003-10-01 10:19:08 +00001905 /* now we know the temp that points to %ESP */
njned619712003-10-01 16:45:04 +00001906 t_ESP = u->val1;
njn9b007f62003-04-07 14:40:25 +00001907 }
1908 delta = 0;
1909
1910# undef DO
1911# undef DO_GENERIC
1912
njned619712003-10-01 16:45:04 +00001913 } else if (ADD == u->opcode && Literal == u->tag1 && t_ESP == u->val2) {
1914 delta += u->lit32;
1915
1916 } else if (SUB == u->opcode && Literal == u->tag1 && t_ESP == u->val2) {
1917 delta -= u->lit32;
njn9b007f62003-04-07 14:40:25 +00001918
1919 } else if (MOV == u->opcode && TempReg == u->tag1 && t_ESP == u->val1 &&
1920 TempReg == u->tag2) {
njned619712003-10-01 16:45:04 +00001921 // t_ESP is transferred
njn9b007f62003-04-07 14:40:25 +00001922 t_ESP = u->val2;
njned619712003-10-01 16:45:04 +00001923
1924 } else {
1925 // Stop tracking t_ESP if it's clobbered by this instruction.
1926 Int tempUse [VG_MAX_REGS_USED];
1927 Bool isWrites[VG_MAX_REGS_USED];
1928 Int j, n = VG_(get_reg_usage)(u, TempReg, tempUse, isWrites);
1929
1930 for (j = 0; j < n; j++) {
1931 if (tempUse[j] == t_ESP && isWrites[j])
1932 t_ESP = INVALID_TEMPREG;
1933 }
njn9b007f62003-04-07 14:40:25 +00001934 }
1935 VG_(copy_UInstr) ( cb, u );
1936 }
1937
1938 VG_(free_UCodeBlock)(cb_in);
1939 return cb;
1940}
sewardjde4a1d02002-03-22 01:27:54 +00001941
1942/*------------------------------------------------------------*/
1943/*--- The new register allocator. ---*/
1944/*------------------------------------------------------------*/
1945
1946typedef
1947 struct {
1948 /* Becomes live for the first time after this insn ... */
1949 Int live_after;
jsewardfa70a8e2004-07-01 11:38:36 +00001950 /* Becomes dead for the last time before this insn ... */
sewardjde4a1d02002-03-22 01:27:54 +00001951 Int dead_before;
1952 /* The "home" spill slot, if needed. Never changes. */
1953 Int spill_no;
1954 /* Where is it? VG_NOVALUE==in a spill slot; else in reg. */
1955 Int real_no;
1956 }
1957 TempInfo;
1958
1959
1960/* Take a ucode block and allocate its TempRegs to RealRegs, or put
1961 them in spill locations, and add spill code, if there are not
1962 enough real regs. The usual register allocation deal, in short.
1963
1964 Important redundancy of representation:
1965
1966 real_to_temp maps real reg ranks (RRRs) to TempReg nos, or
1967 to VG_NOVALUE if the real reg has no currently assigned TempReg.
1968
1969 The .real_no field of a TempInfo gives the current RRR for
1970 this TempReg, or VG_NOVALUE if the TempReg is currently
1971 in memory, in which case it is in the SpillNo denoted by
1972 spillno.
1973
1974 These pieces of information (a fwds-bwds mapping, really) must
1975 be kept consistent!
1976
1977 This allocator uses the so-called Second Chance Bin Packing
1978 algorithm, as described in "Quality and Speed in Linear-scan
1979 Register Allocation" (Traub, Holloway and Smith, ACM PLDI98,
1980 pp142-151). It is simple and fast and remarkably good at
1981 minimising the amount of spill code introduced.
1982*/
1983
1984static
1985UCodeBlock* vg_do_register_allocation ( UCodeBlock* c1 )
1986{
1987 TempInfo* temp_info;
njned619712003-10-01 16:45:04 +00001988 Int real_to_temp [VG_MAX_REALREGS];
sewardjde4a1d02002-03-22 01:27:54 +00001989 Bool is_spill_cand[VG_MAX_REALREGS];
1990 Int ss_busy_until_before[VG_MAX_SPILLSLOTS];
1991 Int i, j, k, m, r, tno, max_ss_no;
1992 Bool wr, defer, isRead, spill_reqd;
njned619712003-10-01 16:45:04 +00001993 UInt realUse [VG_MAX_REGS_USED];
1994 Int tempUse [VG_MAX_REGS_USED];
njnf4ce3d32003-02-10 10:17:26 +00001995 Bool isWrites[VG_MAX_REGS_USED];
sewardjde4a1d02002-03-22 01:27:54 +00001996 UCodeBlock* c2;
1997
1998 /* Used to denote ... well, "no value" in this fn. */
1999# define VG_NOTHING (-2)
2000
2001 /* Initialise the TempReg info. */
2002 if (c1->nextTemp > 0)
njn25e49d8e72002-09-23 09:36:25 +00002003 temp_info = VG_(arena_malloc)(VG_AR_JITTER,
2004 c1->nextTemp * sizeof(TempInfo) );
sewardjde4a1d02002-03-22 01:27:54 +00002005 else
2006 temp_info = NULL;
2007
2008 for (i = 0; i < c1->nextTemp; i++) {
2009 temp_info[i].live_after = VG_NOTHING;
2010 temp_info[i].dead_before = VG_NOTHING;
2011 temp_info[i].spill_no = VG_NOTHING;
2012 /* temp_info[i].real_no is not yet relevant. */
2013 }
2014
2015 spill_reqd = False;
2016
2017 /* Scan fwds to establish live ranges. */
2018
2019 for (i = 0; i < c1->used; i++) {
njn810086f2002-11-14 12:42:47 +00002020 k = VG_(get_reg_usage)(&c1->instrs[i], TempReg, &tempUse[0],
2021 &isWrites[0]);
njnf4ce3d32003-02-10 10:17:26 +00002022 vg_assert(k >= 0 && k <= VG_MAX_REGS_USED);
sewardjde4a1d02002-03-22 01:27:54 +00002023
2024 /* For each temp usage ... fwds in program order */
2025 for (j = 0; j < k; j++) {
njn810086f2002-11-14 12:42:47 +00002026 tno = tempUse[j];
2027 wr = isWrites[j];
sewardjde4a1d02002-03-22 01:27:54 +00002028 if (wr) {
2029 /* Writes hold a reg live until after this insn. */
2030 if (temp_info[tno].live_after == VG_NOTHING)
2031 temp_info[tno].live_after = i;
2032 if (temp_info[tno].dead_before < i + 1)
2033 temp_info[tno].dead_before = i + 1;
2034 } else {
2035 /* First use of a tmp should be a write. */
njnfa0ad422003-02-03 11:07:03 +00002036 if (temp_info[tno].live_after == VG_NOTHING) {
2037 VG_(printf)("At instr %d...\n", i);
2038 VG_(core_panic)("First use of tmp not a write,"
2039 " probably a skin instrumentation error");
2040 }
sewardjde4a1d02002-03-22 01:27:54 +00002041 /* Reads only hold it live until before this insn. */
2042 if (temp_info[tno].dead_before < i)
2043 temp_info[tno].dead_before = i;
2044 }
2045 }
2046 }
2047
2048# if 0
2049 /* Sanity check on live ranges. Expensive but correct. */
2050 for (i = 0; i < c1->nextTemp; i++) {
2051 vg_assert( (temp_info[i].live_after == VG_NOTHING
2052 && temp_info[i].dead_before == VG_NOTHING)
2053 || (temp_info[i].live_after != VG_NOTHING
2054 && temp_info[i].dead_before != VG_NOTHING) );
2055 }
2056# endif
2057
2058 /* Do a rank-based allocation of TempRegs to spill slot numbers.
2059 We put as few as possible values in spill slots, but
2060 nevertheless need to have an assignment to them just in case. */
2061
2062 max_ss_no = -1;
2063
2064 for (i = 0; i < VG_MAX_SPILLSLOTS; i++)
2065 ss_busy_until_before[i] = 0;
2066
2067 for (i = 0; i < c1->nextTemp; i++) {
2068
2069 /* True iff this temp is unused. */
2070 if (temp_info[i].live_after == VG_NOTHING)
2071 continue;
2072
2073 /* Find the lowest-numbered spill slot which is available at the
2074 start point of this interval, and assign the interval to
2075 it. */
2076 for (j = 0; j < VG_MAX_SPILLSLOTS; j++)
2077 if (ss_busy_until_before[j] <= temp_info[i].live_after)
2078 break;
2079 if (j == VG_MAX_SPILLSLOTS) {
2080 VG_(printf)("VG_MAX_SPILLSLOTS is too low; increase and recompile.\n");
njne427a662002-10-02 11:08:25 +00002081 VG_(core_panic)("register allocation failed -- out of spill slots");
sewardjde4a1d02002-03-22 01:27:54 +00002082 }
2083 ss_busy_until_before[j] = temp_info[i].dead_before;
2084 temp_info[i].spill_no = j;
2085 if (j > max_ss_no)
2086 max_ss_no = j;
2087 }
2088
2089 VG_(total_reg_rank) += (max_ss_no+1);
2090
2091 /* Show live ranges and assigned spill slot nos. */
2092
njn25e49d8e72002-09-23 09:36:25 +00002093 if (dis) {
2094 VG_(printf)("Live range assignments:\n");
sewardjde4a1d02002-03-22 01:27:54 +00002095
2096 for (i = 0; i < c1->nextTemp; i++) {
2097 if (temp_info[i].live_after == VG_NOTHING)
2098 continue;
2099 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00002100 " LR %d is after %d to before %d\tspillno %d\n",
sewardjde4a1d02002-03-22 01:27:54 +00002101 i,
2102 temp_info[i].live_after,
2103 temp_info[i].dead_before,
2104 temp_info[i].spill_no
2105 );
2106 }
njn25e49d8e72002-09-23 09:36:25 +00002107 VG_(printf)("\n");
sewardjde4a1d02002-03-22 01:27:54 +00002108 }
2109
2110 /* Now that we've established a spill slot number for each used
2111 temporary, we can go ahead and do the core of the "Second-chance
2112 binpacking" allocation algorithm. */
2113
njn25e49d8e72002-09-23 09:36:25 +00002114 if (dis) VG_(printf)("Register allocated UCode:\n");
2115
2116
sewardjde4a1d02002-03-22 01:27:54 +00002117 /* Resulting code goes here. We generate it all in a forwards
2118 pass. */
njn4ba5a792002-09-30 10:23:54 +00002119 c2 = VG_(alloc_UCodeBlock)();
sewardj22854b92002-11-30 14:00:47 +00002120 c2->orig_eip = c1->orig_eip;
sewardjde4a1d02002-03-22 01:27:54 +00002121
2122 /* At the start, no TempRegs are assigned to any real register.
2123 Correspondingly, all temps claim to be currently resident in
2124 their spill slots, as computed by the previous two passes. */
2125 for (i = 0; i < VG_MAX_REALREGS; i++)
2126 real_to_temp[i] = VG_NOTHING;
2127 for (i = 0; i < c1->nextTemp; i++)
2128 temp_info[i].real_no = VG_NOTHING;
2129
sewardjde4a1d02002-03-22 01:27:54 +00002130 /* Process each insn in turn. */
2131 for (i = 0; i < c1->used; i++) {
2132
2133 if (c1->instrs[i].opcode == NOP) continue;
2134 VG_(uinstrs_prealloc)++;
2135
2136# if 0
2137 /* Check map consistency. Expensive but correct. */
2138 for (r = 0; r < VG_MAX_REALREGS; r++) {
2139 if (real_to_temp[r] != VG_NOTHING) {
2140 tno = real_to_temp[r];
2141 vg_assert(tno >= 0 && tno < c1->nextTemp);
2142 vg_assert(temp_info[tno].real_no == r);
2143 }
2144 }
2145 for (tno = 0; tno < c1->nextTemp; tno++) {
2146 if (temp_info[tno].real_no != VG_NOTHING) {
2147 r = temp_info[tno].real_no;
2148 vg_assert(r >= 0 && r < VG_MAX_REALREGS);
2149 vg_assert(real_to_temp[r] == tno);
2150 }
2151 }
2152# endif
2153
njn25e49d8e72002-09-23 09:36:25 +00002154 if (dis)
njn4ba5a792002-09-30 10:23:54 +00002155 VG_(pp_UInstr)(i, &c1->instrs[i]);
sewardjde4a1d02002-03-22 01:27:54 +00002156
2157 /* First, free up enough real regs for this insn. This may
2158 generate spill stores since we may have to evict some TempRegs
2159 currently in real regs. Also generates spill loads. */
2160
njn810086f2002-11-14 12:42:47 +00002161 k = VG_(get_reg_usage)(&c1->instrs[i], TempReg, &tempUse[0],
2162 &isWrites[0]);
njnf4ce3d32003-02-10 10:17:26 +00002163 vg_assert(k >= 0 && k <= VG_MAX_REGS_USED);
sewardjde4a1d02002-03-22 01:27:54 +00002164
2165 /* For each ***different*** temp mentioned in the insn .... */
2166 for (j = 0; j < k; j++) {
2167
2168 /* First check if the temp is mentioned again later; if so,
2169 ignore this mention. We only want to process each temp
2170 used by the insn once, even if it is mentioned more than
2171 once. */
2172 defer = False;
njn810086f2002-11-14 12:42:47 +00002173 tno = tempUse[j];
sewardjde4a1d02002-03-22 01:27:54 +00002174 for (m = j+1; m < k; m++)
njn810086f2002-11-14 12:42:47 +00002175 if (tempUse[m] == tno)
sewardjde4a1d02002-03-22 01:27:54 +00002176 defer = True;
2177 if (defer)
2178 continue;
2179
njn810086f2002-11-14 12:42:47 +00002180 /* Now we're trying to find a register for tempUse[j].
sewardjde4a1d02002-03-22 01:27:54 +00002181 First of all, if it already has a register assigned, we
2182 don't need to do anything more. */
2183 if (temp_info[tno].real_no != VG_NOTHING)
2184 continue;
2185
2186 /* No luck. The next thing to do is see if there is a
2187 currently unassigned register available. If so, bag it. */
2188 for (r = 0; r < VG_MAX_REALREGS; r++) {
2189 if (real_to_temp[r] == VG_NOTHING)
2190 break;
2191 }
2192 if (r < VG_MAX_REALREGS) {
2193 real_to_temp[r] = tno;
2194 temp_info[tno].real_no = r;
2195 continue;
2196 }
2197
2198 /* Unfortunately, that didn't pan out either. So we'll have
2199 to eject some other unfortunate TempReg into a spill slot
2200 in order to free up a register. Of course, we need to be
2201 careful not to eject some other TempReg needed by this
2202 insn.
2203
2204 Select r in 0 .. VG_MAX_REALREGS-1 such that
2205 real_to_temp[r] is not mentioned in
njn810086f2002-11-14 12:42:47 +00002206 tempUse[0 .. k-1], since it would be just plain
sewardjde4a1d02002-03-22 01:27:54 +00002207 wrong to eject some other TempReg which we need to use in
2208 this insn.
2209
2210 It is here that it is important to make a good choice of
2211 register to spill. */
2212
2213 /* First, mark those regs which are not spill candidates. */
2214 for (r = 0; r < VG_MAX_REALREGS; r++) {
2215 is_spill_cand[r] = True;
2216 for (m = 0; m < k; m++) {
njn810086f2002-11-14 12:42:47 +00002217 if (real_to_temp[r] == tempUse[m]) {
sewardjde4a1d02002-03-22 01:27:54 +00002218 is_spill_cand[r] = False;
2219 break;
2220 }
2221 }
2222 }
2223
2224 /* We can choose any r satisfying is_spill_cand[r]. However,
2225 try to make a good choice. First, try and find r such
2226 that the associated TempReg is already dead. */
2227 for (r = 0; r < VG_MAX_REALREGS; r++) {
2228 if (is_spill_cand[r] &&
2229 temp_info[real_to_temp[r]].dead_before <= i)
2230 goto have_spill_cand;
2231 }
2232
2233 /* No spill cand is mapped to a dead TempReg. Now we really
2234 _do_ have to generate spill code. Choose r so that the
2235 next use of its associated TempReg is as far ahead as
2236 possible, in the hope that this will minimise the number of
2237 consequent reloads required. This is a bit expensive, but
2238 we don't have to do it very often. */
2239 {
2240 Int furthest_r = VG_MAX_REALREGS;
2241 Int furthest = 0;
2242 for (r = 0; r < VG_MAX_REALREGS; r++) {
2243 if (!is_spill_cand[r]) continue;
2244 for (m = i+1; m < c1->used; m++)
2245 if (uInstrMentionsTempReg(&c1->instrs[m],
2246 real_to_temp[r]))
2247 break;
2248 if (m > furthest) {
2249 furthest = m;
2250 furthest_r = r;
2251 }
2252 }
2253 r = furthest_r;
2254 goto have_spill_cand;
2255 }
2256
2257 have_spill_cand:
2258 if (r == VG_MAX_REALREGS)
njne427a662002-10-02 11:08:25 +00002259 VG_(core_panic)("new reg alloc: out of registers ?!");
sewardjde4a1d02002-03-22 01:27:54 +00002260
2261 /* Eject r. Important refinement: don't bother if the
2262 associated TempReg is now dead. */
2263 vg_assert(real_to_temp[r] != VG_NOTHING);
2264 vg_assert(real_to_temp[r] != tno);
2265 temp_info[real_to_temp[r]].real_no = VG_NOTHING;
2266 if (temp_info[real_to_temp[r]].dead_before > i) {
2267 uInstr2(c2, PUT, 4,
njn4ba5a792002-09-30 10:23:54 +00002268 RealReg, VG_(rank_to_realreg)(r),
sewardjde4a1d02002-03-22 01:27:54 +00002269 SpillNo, temp_info[real_to_temp[r]].spill_no);
2270 VG_(uinstrs_spill)++;
2271 spill_reqd = True;
njn25e49d8e72002-09-23 09:36:25 +00002272 if (dis)
njn4ba5a792002-09-30 10:23:54 +00002273 VG_(pp_UInstr)(c2->used-1, &LAST_UINSTR(c2));
sewardjde4a1d02002-03-22 01:27:54 +00002274 }
2275
2276 /* Decide if tno is read. */
2277 isRead = False;
2278 for (m = 0; m < k; m++)
njn810086f2002-11-14 12:42:47 +00002279 if (tempUse[m] == tno && !isWrites[m])
sewardjde4a1d02002-03-22 01:27:54 +00002280 isRead = True;
2281
2282 /* If so, generate a spill load. */
2283 if (isRead) {
2284 uInstr2(c2, GET, 4,
2285 SpillNo, temp_info[tno].spill_no,
njn4ba5a792002-09-30 10:23:54 +00002286 RealReg, VG_(rank_to_realreg)(r) );
sewardjde4a1d02002-03-22 01:27:54 +00002287 VG_(uinstrs_spill)++;
2288 spill_reqd = True;
njn25e49d8e72002-09-23 09:36:25 +00002289 if (dis)
njn4ba5a792002-09-30 10:23:54 +00002290 VG_(pp_UInstr)(c2->used-1, &LAST_UINSTR(c2));
sewardjde4a1d02002-03-22 01:27:54 +00002291 }
2292
2293 /* Update the forwards and backwards maps. */
2294 real_to_temp[r] = tno;
2295 temp_info[tno].real_no = r;
2296 }
2297
2298 /* By this point, all TempRegs mentioned by the insn have been
2299 bought into real regs. We now copy the insn to the output
2300 and use patchUInstr to convert its rTempRegs into
2301 realregs. */
2302 for (j = 0; j < k; j++)
njn810086f2002-11-14 12:42:47 +00002303 realUse[j] = VG_(rank_to_realreg)(temp_info[tempUse[j]].real_no);
njn4ba5a792002-09-30 10:23:54 +00002304 VG_(copy_UInstr)(c2, &c1->instrs[i]);
njn25e49d8e72002-09-23 09:36:25 +00002305 patchUInstr(&LAST_UINSTR(c2), &tempUse[0], &realUse[0], k);
sewardjde4a1d02002-03-22 01:27:54 +00002306
njn25e49d8e72002-09-23 09:36:25 +00002307 if (dis) {
njn4ba5a792002-09-30 10:23:54 +00002308 VG_(pp_UInstr)(c2->used-1, &LAST_UINSTR(c2));
sewardjde4a1d02002-03-22 01:27:54 +00002309 VG_(printf)("\n");
2310 }
2311 }
2312
2313 if (temp_info != NULL)
njn25e49d8e72002-09-23 09:36:25 +00002314 VG_(arena_free)(VG_AR_JITTER, temp_info);
sewardjde4a1d02002-03-22 01:27:54 +00002315
njn4ba5a792002-09-30 10:23:54 +00002316 VG_(free_UCodeBlock)(c1);
sewardjde4a1d02002-03-22 01:27:54 +00002317
2318 if (spill_reqd)
2319 VG_(translations_needing_spill)++;
2320
2321 return c2;
2322
2323# undef VG_NOTHING
2324
2325}
sewardj7c4b6042003-06-14 15:47:15 +00002326
njn25e49d8e72002-09-23 09:36:25 +00002327/* Analysis records liveness of all general-use RealRegs in the UCode. */
2328static void vg_realreg_liveness_analysis ( UCodeBlock* cb )
2329{
2330 Int i, j, k;
2331 RRegSet rregs_live;
njnf4ce3d32003-02-10 10:17:26 +00002332 Int regUse[VG_MAX_REGS_USED];
2333 Bool isWrites[VG_MAX_REGS_USED];
njn25e49d8e72002-09-23 09:36:25 +00002334 UInstr* u;
sewardjde4a1d02002-03-22 01:27:54 +00002335
njn25e49d8e72002-09-23 09:36:25 +00002336 /* All regs are dead at the end of the block */
2337 rregs_live = ALL_RREGS_DEAD;
sewardjde4a1d02002-03-22 01:27:54 +00002338
sewardjde4a1d02002-03-22 01:27:54 +00002339 for (i = cb->used-1; i >= 0; i--) {
2340 u = &cb->instrs[i];
2341
njn25e49d8e72002-09-23 09:36:25 +00002342 u->regs_live_after = rregs_live;
sewardj97ced732002-03-25 00:07:36 +00002343
njn810086f2002-11-14 12:42:47 +00002344 k = VG_(get_reg_usage)(u, RealReg, &regUse[0], &isWrites[0]);
sewardj97ced732002-03-25 00:07:36 +00002345
njn25e49d8e72002-09-23 09:36:25 +00002346 /* For each reg usage ... bwds in program order. Variable is live
2347 before this UInstr if it is read by this UInstr.
njn810086f2002-11-14 12:42:47 +00002348 Note that regUse[j] holds the Intel reg number, so we must
njn25e49d8e72002-09-23 09:36:25 +00002349 convert it to our rank number. */
2350 for (j = k-1; j >= 0; j--) {
njn810086f2002-11-14 12:42:47 +00002351 SET_RREG_LIVENESS ( VG_(realreg_to_rank)(regUse[j]),
njn25e49d8e72002-09-23 09:36:25 +00002352 rregs_live,
njn810086f2002-11-14 12:42:47 +00002353 !isWrites[j] );
sewardjde4a1d02002-03-22 01:27:54 +00002354 }
2355 }
sewardjde4a1d02002-03-22 01:27:54 +00002356}
2357
sewardjde4a1d02002-03-22 01:27:54 +00002358/*------------------------------------------------------------*/
2359/*--- Main entry point for the JITter. ---*/
2360/*------------------------------------------------------------*/
2361
2362/* Translate the basic block beginning at orig_addr, placing the
2363 translation in a vg_malloc'd block, the address and size of which
2364 are returned in trans_addr and trans_size. Length of the original
2365 block is also returned in orig_size. If the latter three are NULL,
2366 this call is being done for debugging purposes, in which case (a)
2367 throw away the translation once it is made, and (b) produce a load
2368 of debugging output.
njn25e49d8e72002-09-23 09:36:25 +00002369
2370 'tst' is the identity of the thread needing this block.
sewardjde4a1d02002-03-22 01:27:54 +00002371*/
njn72718642003-07-24 08:45:32 +00002372void VG_(translate) ( /*IN*/ ThreadId tid,
njn25e49d8e72002-09-23 09:36:25 +00002373 /*IN*/ Addr orig_addr,
2374 /*OUT*/ UInt* orig_size,
2375 /*OUT*/ Addr* trans_addr,
sewardj22854b92002-11-30 14:00:47 +00002376 /*OUT*/ UInt* trans_size,
2377 /*OUT*/ UShort jumps[VG_MAX_JUMPS])
sewardjde4a1d02002-03-22 01:27:54 +00002378{
fitzhardinge98abfc72003-12-16 02:05:15 +00002379 Int n_disassembled_bytes, final_code_size;
sewardjde4a1d02002-03-22 01:27:54 +00002380 Bool debugging_translation;
2381 UChar* final_code;
2382 UCodeBlock* cb;
sewardja60be0e2003-05-26 08:47:27 +00002383 Bool notrace_until_done;
sewardj1e86b8b2003-06-16 23:34:12 +00002384 UInt notrace_until_limit = 0;
fitzhardinge98abfc72003-12-16 02:05:15 +00002385 Segment *seg;
2386 Addr redir;
sewardjde4a1d02002-03-22 01:27:54 +00002387
2388 VGP_PUSHCC(VgpTranslate);
2389 debugging_translation
2390 = orig_size == NULL || trans_addr == NULL || trans_size == NULL;
2391
sewardj25c7c3a2003-07-10 00:17:58 +00002392 /* Look in the code redirect table to see if we should
2393 translate an alternative address for orig_addr. */
fitzhardinge98abfc72003-12-16 02:05:15 +00002394 redir = VG_(code_redirect)(orig_addr);
2395
2396 if (redir != orig_addr && VG_(clo_verbosity) >= 2)
2397 VG_(message)(Vg_UserMsg,
2398 "TRANSLATE: %p redirected to %p",
2399 orig_addr,
2400 redir );
2401 orig_addr = redir;
sewardj25c7c3a2003-07-10 00:17:58 +00002402
sewardja60be0e2003-05-26 08:47:27 +00002403 /* If codegen tracing, don't start tracing until
2404 notrace_until_limit blocks have gone by. This avoids printing
2405 huge amounts of useless junk when all we want to see is the last
2406 few blocks translated prior to a failure. Set
2407 notrace_until_limit to be the number of translations to be made
2408 before --trace-codegen= style printing takes effect. */
2409 notrace_until_done
fitzhardinge15117d22003-12-19 17:16:54 +00002410 = VG_(overall_in_count) >= notrace_until_limit;
sewardja60be0e2003-05-26 08:47:27 +00002411
fitzhardinge98abfc72003-12-16 02:05:15 +00002412 seg = VG_(find_segment)(orig_addr);
2413
njn25e49d8e72002-09-23 09:36:25 +00002414 if (!debugging_translation)
njn72718642003-07-24 08:45:32 +00002415 VG_TRACK( pre_mem_read, Vg_CoreTranslate, tid, "", orig_addr, 1 );
sewardjde4a1d02002-03-22 01:27:54 +00002416
fitzhardinge98abfc72003-12-16 02:05:15 +00002417 if (seg == NULL ||
2418 !VG_(seg_contains)(seg, orig_addr, 1) ||
2419 (seg->prot & (VKI_PROT_READ|VKI_PROT_EXEC)) == 0) {
fitzhardinge98abfc72003-12-16 02:05:15 +00002420 /* Code address is bad - deliver a signal instead */
2421 vg_assert(!VG_(is_addressable)(orig_addr, 1));
2422
fitzhardinge98abfc72003-12-16 02:05:15 +00002423 if (seg != NULL && VG_(seg_contains)(seg, orig_addr, 1)) {
2424 vg_assert((seg->prot & VKI_PROT_EXEC) == 0);
fitzhardingef1beb252004-03-16 09:49:08 +00002425 VG_(synth_fault_perms)(tid, orig_addr);
fitzhardinge98abfc72003-12-16 02:05:15 +00002426 } else
fitzhardingef1beb252004-03-16 09:49:08 +00002427 VG_(synth_fault_mapping)(tid, orig_addr);
jsgf855d93d2003-10-13 22:26:55 +00002428
jsgf855d93d2003-10-13 22:26:55 +00002429 return;
fitzhardinge98abfc72003-12-16 02:05:15 +00002430 } else
2431 seg->flags |= SF_CODE; /* contains cached code */
jsgf855d93d2003-10-13 22:26:55 +00002432
njn4ba5a792002-09-30 10:23:54 +00002433 cb = VG_(alloc_UCodeBlock)();
sewardj22854b92002-11-30 14:00:47 +00002434 cb->orig_eip = orig_addr;
sewardjde4a1d02002-03-22 01:27:54 +00002435
njn25e49d8e72002-09-23 09:36:25 +00002436 /* If doing any code printing, print a basic block start marker */
sewardja60be0e2003-05-26 08:47:27 +00002437 if (VG_(clo_trace_codegen) && notrace_until_done) {
njn25e49d8e72002-09-23 09:36:25 +00002438 Char fnname[64] = "";
2439 VG_(get_fnname_if_entry)(orig_addr, fnname, 64);
2440 VG_(printf)(
njne0205ff2003-04-08 00:56:14 +00002441 "==== BB %d %s(%p) in %dB, out %dB, BBs exec'd %llu ====\n\n",
njn25e49d8e72002-09-23 09:36:25 +00002442 VG_(overall_in_count), fnname, orig_addr,
2443 VG_(overall_in_osize), VG_(overall_in_tsize),
2444 VG_(bbs_done));
2445 }
2446
2447 /* True if a debug trans., or if bit N set in VG_(clo_trace_codegen). */
sewardja60be0e2003-05-26 08:47:27 +00002448# define DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(n) \
2449 ( debugging_translation \
2450 || (notrace_until_done \
2451 && (VG_(clo_trace_codegen) & (1 << (n-1))) ))
njn25e49d8e72002-09-23 09:36:25 +00002452
sewardjde4a1d02002-03-22 01:27:54 +00002453 /* Disassemble this basic block into cb. */
njn25e49d8e72002-09-23 09:36:25 +00002454 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(1);
2455 VGP_PUSHCC(VgpToUCode);
sewardjde4a1d02002-03-22 01:27:54 +00002456 n_disassembled_bytes = VG_(disBB) ( cb, orig_addr );
njn25e49d8e72002-09-23 09:36:25 +00002457 VGP_POPCC(VgpToUCode);
2458
sewardjde4a1d02002-03-22 01:27:54 +00002459 /* Try and improve the code a bit. */
2460 if (VG_(clo_optimise)) {
njn25e49d8e72002-09-23 09:36:25 +00002461 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(2);
2462 VGP_PUSHCC(VgpImprove);
sewardjde4a1d02002-03-22 01:27:54 +00002463 vg_improve ( cb );
njn25e49d8e72002-09-23 09:36:25 +00002464 VGP_POPCC(VgpImprove);
sewardjde4a1d02002-03-22 01:27:54 +00002465 }
2466
njn25e49d8e72002-09-23 09:36:25 +00002467 /* Skin's instrumentation (Nb: must set VG_(print_codegen) in case
2468 SK_(instrument) looks at it. */
2469 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(3);
2470 VGP_PUSHCC(VgpInstrument);
2471 cb = SK_(instrument) ( cb, orig_addr );
2472 if (VG_(print_codegen))
njn4ba5a792002-09-30 10:23:54 +00002473 VG_(pp_UCodeBlock) ( cb, "Instrumented UCode:" );
njn25e49d8e72002-09-23 09:36:25 +00002474 VG_(saneUCodeBlock)( cb );
2475 VGP_POPCC(VgpInstrument);
njn4f9c9342002-04-29 16:03:24 +00002476
njn9b007f62003-04-07 14:40:25 +00002477 /* Add %ESP-update hooks if the skin requires them */
2478 /* Nb: We don't print out this phase, because it doesn't do much */
2479 if (VG_(need_to_handle_esp_assignment)()) {
2480 VGP_PUSHCC(VgpESPUpdate);
2481 cb = vg_ESP_update_pass ( cb );
2482 VGP_POPCC(VgpESPUpdate);
2483 }
2484
sewardjde4a1d02002-03-22 01:27:54 +00002485 /* Allocate registers. */
njn25e49d8e72002-09-23 09:36:25 +00002486 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(4);
2487 VGP_PUSHCC(VgpRegAlloc);
sewardjde4a1d02002-03-22 01:27:54 +00002488 cb = vg_do_register_allocation ( cb );
njn25e49d8e72002-09-23 09:36:25 +00002489 VGP_POPCC(VgpRegAlloc);
sewardjde4a1d02002-03-22 01:27:54 +00002490
njn25e49d8e72002-09-23 09:36:25 +00002491 /* Do post reg-alloc %e[acd]x liveness analysis (too boring to print
2492 * anything; results can be seen when emitting final code). */
2493 VGP_PUSHCC(VgpLiveness);
2494 vg_realreg_liveness_analysis ( cb );
2495 VGP_POPCC(VgpLiveness);
2496
2497 /* Emit final code */
2498 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(5);
2499
2500 VGP_PUSHCC(VgpFromUcode);
sewardj22854b92002-11-30 14:00:47 +00002501 final_code = VG_(emit_code)(cb, &final_code_size, jumps );
njn25e49d8e72002-09-23 09:36:25 +00002502 VGP_POPCC(VgpFromUcode);
njn4ba5a792002-09-30 10:23:54 +00002503 VG_(free_UCodeBlock)(cb);
sewardjde4a1d02002-03-22 01:27:54 +00002504
njn25e49d8e72002-09-23 09:36:25 +00002505#undef DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE
2506
sewardjde4a1d02002-03-22 01:27:54 +00002507 if (debugging_translation) {
2508 /* Only done for debugging -- throw away final result. */
njn25e49d8e72002-09-23 09:36:25 +00002509 VG_(arena_free)(VG_AR_JITTER, final_code);
sewardjde4a1d02002-03-22 01:27:54 +00002510 } else {
2511 /* Doing it for real -- return values to caller. */
sewardjde4a1d02002-03-22 01:27:54 +00002512 *orig_size = n_disassembled_bytes;
2513 *trans_addr = (Addr)final_code;
2514 *trans_size = final_code_size;
2515 }
njn25e49d8e72002-09-23 09:36:25 +00002516 VGP_POPCC(VgpTranslate);
sewardjde4a1d02002-03-22 01:27:54 +00002517}
2518
2519/*--------------------------------------------------------------------*/
2520/*--- end vg_translate.c ---*/
2521/*--------------------------------------------------------------------*/
njned619712003-10-01 16:45:04 +00002522