blob: 8f5ac3d2b07ee0c85df42e78882b740f5d96d151 [file] [log] [blame]
sewardjde4a1d02002-03-22 01:27:54 +00001
2/*--------------------------------------------------------------------*/
3/*--- The JITter proper: register allocation & code improvement ---*/
4/*--- vg_translate.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
njnc9539842002-10-02 13:26:35 +00008 This file is part of Valgrind, an extensible x86 protected-mode
9 emulator for monitoring program execution on x86-Unixes.
sewardjde4a1d02002-03-22 01:27:54 +000010
11 Copyright (C) 2000-2002 Julian Seward
12 jseward@acm.org
sewardjde4a1d02002-03-22 01:27:54 +000013
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
18
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
28
njn25e49d8e72002-09-23 09:36:25 +000029 The GNU General Public License is contained in the file COPYING.
sewardjde4a1d02002-03-22 01:27:54 +000030*/
31
32#include "vg_include.h"
33
sewardjde4a1d02002-03-22 01:27:54 +000034/*------------------------------------------------------------*/
35/*--- Renamings of frequently-used global functions. ---*/
36/*------------------------------------------------------------*/
37
njn25e49d8e72002-09-23 09:36:25 +000038#define dis VG_(print_codegen)
sewardjde4a1d02002-03-22 01:27:54 +000039
sewardje1042472002-09-30 12:33:11 +000040
sewardjde4a1d02002-03-22 01:27:54 +000041/*------------------------------------------------------------*/
42/*--- Basics ---*/
43/*------------------------------------------------------------*/
44
njn810086f2002-11-14 12:42:47 +000045/* This one is called by the core */
njn4ba5a792002-09-30 10:23:54 +000046UCodeBlock* VG_(alloc_UCodeBlock) ( void )
sewardjde4a1d02002-03-22 01:27:54 +000047{
njn25e49d8e72002-09-23 09:36:25 +000048 UCodeBlock* cb = VG_(arena_malloc)(VG_AR_CORE, sizeof(UCodeBlock));
sewardjde4a1d02002-03-22 01:27:54 +000049 cb->used = cb->size = cb->nextTemp = 0;
50 cb->instrs = NULL;
51 return cb;
52}
53
njn810086f2002-11-14 12:42:47 +000054/* This one is called by skins */
55UCodeBlock* VG_(setup_UCodeBlock) ( UCodeBlock* cb_in )
56{
57 UCodeBlock* cb = VG_(arena_malloc)(VG_AR_CORE, sizeof(UCodeBlock));
58 cb->used = cb->size = 0;
59 cb->nextTemp = cb_in->nextTemp;
60 cb->instrs = NULL;
61 return cb;
62}
sewardjde4a1d02002-03-22 01:27:54 +000063
njn4ba5a792002-09-30 10:23:54 +000064void VG_(free_UCodeBlock) ( UCodeBlock* cb )
sewardjde4a1d02002-03-22 01:27:54 +000065{
njn25e49d8e72002-09-23 09:36:25 +000066 if (cb->instrs) VG_(arena_free)(VG_AR_CORE, cb->instrs);
67 VG_(arena_free)(VG_AR_CORE, cb);
sewardjde4a1d02002-03-22 01:27:54 +000068}
69
70
71/* Ensure there's enough space in a block to add one uinstr. */
72static __inline__
73void ensureUInstr ( UCodeBlock* cb )
74{
75 if (cb->used == cb->size) {
76 if (cb->instrs == NULL) {
77 vg_assert(cb->size == 0);
78 vg_assert(cb->used == 0);
79 cb->size = 8;
njn25e49d8e72002-09-23 09:36:25 +000080 cb->instrs = VG_(arena_malloc)(VG_AR_CORE, 8 * sizeof(UInstr));
sewardjde4a1d02002-03-22 01:27:54 +000081 } else {
82 Int i;
njn25e49d8e72002-09-23 09:36:25 +000083 UInstr* instrs2 = VG_(arena_malloc)(VG_AR_CORE,
sewardjde4a1d02002-03-22 01:27:54 +000084 2 * sizeof(UInstr) * cb->size);
85 for (i = 0; i < cb->used; i++)
86 instrs2[i] = cb->instrs[i];
87 cb->size *= 2;
njn25e49d8e72002-09-23 09:36:25 +000088 VG_(arena_free)(VG_AR_CORE, cb->instrs);
sewardjde4a1d02002-03-22 01:27:54 +000089 cb->instrs = instrs2;
90 }
91 }
92
93 vg_assert(cb->used < cb->size);
94}
95
96
97__inline__
njn4ba5a792002-09-30 10:23:54 +000098void VG_(new_NOP) ( UInstr* u )
sewardjde4a1d02002-03-22 01:27:54 +000099{
100 u->val1 = u->val2 = u->val3 = 0;
101 u->tag1 = u->tag2 = u->tag3 = NoValue;
102 u->flags_r = u->flags_w = FlagsEmpty;
sewardj2e93c502002-04-12 11:12:52 +0000103 u->jmpkind = JmpBoring;
njn25e49d8e72002-09-23 09:36:25 +0000104 u->signed_widen = u->has_ret_val = False;
105 u->regs_live_after = ALL_RREGS_LIVE;
sewardjde4a1d02002-03-22 01:27:54 +0000106 u->lit32 = 0;
njn25e49d8e72002-09-23 09:36:25 +0000107 u->opcode = NOP;
sewardjde4a1d02002-03-22 01:27:54 +0000108 u->size = 0;
109 u->cond = 0;
110 u->extra4b = 0;
njn25e49d8e72002-09-23 09:36:25 +0000111 u->argc = u->regparms_n = 0;
sewardjde4a1d02002-03-22 01:27:54 +0000112}
113
114
115/* Add an instruction to a ucode block, and return the index of the
116 instruction. */
117__inline__
njn4ba5a792002-09-30 10:23:54 +0000118void VG_(new_UInstr3) ( UCodeBlock* cb, Opcode opcode, Int sz,
sewardjde4a1d02002-03-22 01:27:54 +0000119 Tag tag1, UInt val1,
120 Tag tag2, UInt val2,
121 Tag tag3, UInt val3 )
122{
123 UInstr* ui;
124 ensureUInstr(cb);
125 ui = & cb->instrs[cb->used];
126 cb->used++;
njn4ba5a792002-09-30 10:23:54 +0000127 VG_(new_NOP)(ui);
sewardjde4a1d02002-03-22 01:27:54 +0000128 ui->val1 = val1;
129 ui->val2 = val2;
130 ui->val3 = val3;
131 ui->opcode = opcode;
132 ui->tag1 = tag1;
133 ui->tag2 = tag2;
134 ui->tag3 = tag3;
135 ui->size = sz;
136 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
137 if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
138 if (tag3 == TempReg) vg_assert(val3 != INVALID_TEMPREG);
139}
140
141
142__inline__
njn4ba5a792002-09-30 10:23:54 +0000143void VG_(new_UInstr2) ( UCodeBlock* cb, Opcode opcode, Int sz,
sewardjde4a1d02002-03-22 01:27:54 +0000144 Tag tag1, UInt val1,
145 Tag tag2, UInt val2 )
146{
147 UInstr* ui;
148 ensureUInstr(cb);
149 ui = & cb->instrs[cb->used];
150 cb->used++;
njn4ba5a792002-09-30 10:23:54 +0000151 VG_(new_NOP)(ui);
sewardjde4a1d02002-03-22 01:27:54 +0000152 ui->val1 = val1;
153 ui->val2 = val2;
154 ui->opcode = opcode;
155 ui->tag1 = tag1;
156 ui->tag2 = tag2;
157 ui->size = sz;
158 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
159 if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
160}
161
162
163__inline__
njn4ba5a792002-09-30 10:23:54 +0000164void VG_(new_UInstr1) ( UCodeBlock* cb, Opcode opcode, Int sz,
sewardjde4a1d02002-03-22 01:27:54 +0000165 Tag tag1, UInt val1 )
166{
167 UInstr* ui;
168 ensureUInstr(cb);
169 ui = & cb->instrs[cb->used];
170 cb->used++;
njn4ba5a792002-09-30 10:23:54 +0000171 VG_(new_NOP)(ui);
sewardjde4a1d02002-03-22 01:27:54 +0000172 ui->val1 = val1;
173 ui->opcode = opcode;
174 ui->tag1 = tag1;
175 ui->size = sz;
176 if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
177}
178
179
180__inline__
njn4ba5a792002-09-30 10:23:54 +0000181void VG_(new_UInstr0) ( UCodeBlock* cb, Opcode opcode, Int sz )
sewardjde4a1d02002-03-22 01:27:54 +0000182{
183 UInstr* ui;
184 ensureUInstr(cb);
185 ui = & cb->instrs[cb->used];
186 cb->used++;
njn4ba5a792002-09-30 10:23:54 +0000187 VG_(new_NOP)(ui);
sewardjde4a1d02002-03-22 01:27:54 +0000188 ui->opcode = opcode;
189 ui->size = sz;
190}
191
sewardjde4a1d02002-03-22 01:27:54 +0000192/* Copy an instruction into the given codeblock. */
njn4f9c9342002-04-29 16:03:24 +0000193__inline__
njn4ba5a792002-09-30 10:23:54 +0000194void VG_(copy_UInstr) ( UCodeBlock* cb, UInstr* instr )
sewardjde4a1d02002-03-22 01:27:54 +0000195{
196 ensureUInstr(cb);
197 cb->instrs[cb->used] = *instr;
198 cb->used++;
199}
200
sewardjde4a1d02002-03-22 01:27:54 +0000201/* Copy auxiliary info from one uinstr to another. */
202static __inline__
203void copyAuxInfoFromTo ( UInstr* src, UInstr* dst )
204{
njn25e49d8e72002-09-23 09:36:25 +0000205 dst->cond = src->cond;
206 dst->extra4b = src->extra4b;
207 dst->signed_widen = src->signed_widen;
208 dst->jmpkind = src->jmpkind;
209 dst->flags_r = src->flags_r;
210 dst->flags_w = src->flags_w;
211 dst->argc = src->argc;
212 dst->regparms_n = src->regparms_n;
213 dst->has_ret_val = src->has_ret_val;
214 dst->regs_live_after = src->regs_live_after;
sewardjde4a1d02002-03-22 01:27:54 +0000215}
216
217
sewardjde4a1d02002-03-22 01:27:54 +0000218/* Set the lit32 field of the most recent uinsn. */
njn4ba5a792002-09-30 10:23:54 +0000219void VG_(set_lit_field) ( UCodeBlock* cb, UInt lit32 )
sewardjde4a1d02002-03-22 01:27:54 +0000220{
221 LAST_UINSTR(cb).lit32 = lit32;
222}
223
224
njn25e49d8e72002-09-23 09:36:25 +0000225/* Set the C call info fields of the most recent uinsn. */
njn4ba5a792002-09-30 10:23:54 +0000226void VG_(set_ccall_fields) ( UCodeBlock* cb, Addr fn, UChar argc, UChar
227 regparms_n, Bool has_ret_val )
njn25e49d8e72002-09-23 09:36:25 +0000228{
229 vg_assert(argc < 4);
230 vg_assert(regparms_n <= argc);
231 LAST_UINSTR(cb).lit32 = fn;
232 LAST_UINSTR(cb).argc = argc;
233 LAST_UINSTR(cb).regparms_n = regparms_n;
234 LAST_UINSTR(cb).has_ret_val = has_ret_val;
235}
236
njn810086f2002-11-14 12:42:47 +0000237/* For the last uinsn inserted into cb, set the read, written and
238 undefined flags. Undefined flags are counted as written, but it
239 seems worthwhile to distinguish them.
240*/
241__inline__
242void VG_(set_flag_fields) ( UCodeBlock* cb,
243 FlagSet rr, FlagSet ww, FlagSet uu )
244{
245 FlagSet uw = VG_UNION_FLAG_SETS(ww,uu);
246
247 vg_assert(rr == (rr & FlagsALL));
248 vg_assert(uw == (uw & FlagsALL));
249 LAST_UINSTR(cb).flags_r = rr;
250 LAST_UINSTR(cb).flags_w = uw;
251}
252
253
njn4ba5a792002-09-30 10:23:54 +0000254Bool VG_(any_flag_use) ( UInstr* u )
sewardjde4a1d02002-03-22 01:27:54 +0000255{
256 return (u->flags_r != FlagsEmpty
257 || u->flags_w != FlagsEmpty);
258}
259
njn25e49d8e72002-09-23 09:36:25 +0000260#if 1
261# define BEST_ALLOC_ORDER
262#endif
sewardjde4a1d02002-03-22 01:27:54 +0000263
264/* Convert a rank in the range 0 .. VG_MAX_REALREGS-1 into an Intel
265 register number. This effectively defines the order in which real
266 registers are allocated. %ebp is excluded since it is permanently
njn25e49d8e72002-09-23 09:36:25 +0000267 reserved for pointing at VG_(baseBlock).
sewardjde4a1d02002-03-22 01:27:54 +0000268
njn25e49d8e72002-09-23 09:36:25 +0000269 Important! This function must correspond with the value of
270 VG_MAX_REALREGS (actually, VG_MAX_REALREGS can be reduced without
271 a problem, except the generated code will obviously be worse).
sewardjde4a1d02002-03-22 01:27:54 +0000272*/
njn25e49d8e72002-09-23 09:36:25 +0000273__inline__
njn4ba5a792002-09-30 10:23:54 +0000274Int VG_(rank_to_realreg) ( Int rank )
sewardjde4a1d02002-03-22 01:27:54 +0000275{
276 switch (rank) {
njn25e49d8e72002-09-23 09:36:25 +0000277# ifdef BEST_ALLOC_ORDER
sewardjde4a1d02002-03-22 01:27:54 +0000278 /* Probably the best allocation ordering. */
279 case 0: return R_EAX;
280 case 1: return R_EBX;
281 case 2: return R_ECX;
282 case 3: return R_EDX;
283 case 4: return R_ESI;
njn25e49d8e72002-09-23 09:36:25 +0000284 case 5: return R_EDI;
sewardjde4a1d02002-03-22 01:27:54 +0000285# else
286 /* Contrary; probably the worst. Helpful for debugging, tho. */
njn25e49d8e72002-09-23 09:36:25 +0000287 case 5: return R_EAX;
288 case 4: return R_EBX;
289 case 3: return R_ECX;
290 case 2: return R_EDX;
291 case 1: return R_ESI;
292 case 0: return R_EDI;
sewardjde4a1d02002-03-22 01:27:54 +0000293# endif
njne427a662002-10-02 11:08:25 +0000294 default: VG_(core_panic)("VG_(rank_to_realreg)");
njn25e49d8e72002-09-23 09:36:25 +0000295 }
296}
297
298/* Convert an Intel register number into a rank in the range 0 ..
njn4ba5a792002-09-30 10:23:54 +0000299 VG_MAX_REALREGS-1. See related comments for rank_to_realreg()
njn25e49d8e72002-09-23 09:36:25 +0000300 above. */
301__inline__
njn4ba5a792002-09-30 10:23:54 +0000302Int VG_(realreg_to_rank) ( Int realReg )
njn25e49d8e72002-09-23 09:36:25 +0000303{
304 switch (realReg) {
305# ifdef BEST_ALLOC_ORDER
306 case R_EAX: return 0;
307 case R_EBX: return 1;
308 case R_ECX: return 2;
309 case R_EDX: return 3;
310 case R_ESI: return 4;
311 case R_EDI: return 5;
312# else
313 case R_EAX: return 5;
314 case R_EBX: return 4;
315 case R_ECX: return 3;
316 case R_EDX: return 2;
317 case R_ESI: return 1;
318 case R_EDI: return 0;
319# endif
njne427a662002-10-02 11:08:25 +0000320 default: VG_(core_panic)("VG_(realreg_to_rank)");
sewardjde4a1d02002-03-22 01:27:54 +0000321 }
322}
323
324
325/*------------------------------------------------------------*/
326/*--- Sanity checking uinstrs. ---*/
327/*------------------------------------------------------------*/
328
329/* This seems as good a place as any to record some important stuff
330 about ucode semantics.
331
332 * TempRegs are 32 bits wide. LOADs of 8/16 bit values into a
333 TempReg are defined to zero-extend the loaded value to 32 bits.
334 This is needed to make the translation of movzbl et al work
335 properly.
336
337 * Similarly, GETs of a 8/16 bit ArchRegs are zero-extended.
338
339 * Arithmetic on TempRegs is at the specified size. For example,
340 SUBW t1, t2 has to result in a real 16 bit x86 subtraction
341 being emitted -- not a 32 bit one.
342
343 * On some insns we allow the cc bit to be set. If so, the
344 intention is that the simulated machine's %eflags register
345 is copied into that of the real machine before the insn,
346 and copied back again afterwards. This means that the
347 code generated for that insn must be very careful only to
348 update %eflags in the intended way. This is particularly
349 important for the routines referenced by CALL insns.
350*/
351
352/* Meaning of operand kinds is as follows:
353
354 ArchReg is a register of the simulated CPU, stored in memory,
355 in vg_m_state.m_eax .. m_edi. These values are stored
356 using the Intel register encoding.
357
358 RealReg is a register of the real CPU. There are VG_MAX_REALREGS
359 available for allocation. As with ArchRegs, these values
360 are stored using the Intel register encoding.
361
362 TempReg is a temporary register used to express the results of
363 disassembly. There is an unlimited supply of them --
364 register allocation and spilling eventually assigns them
365 to RealRegs.
366
367 SpillNo is a spill slot number. The number of required spill
368 slots is VG_MAX_PSEUDOS, in general. Only allowed
369 as the ArchReg operand of GET and PUT.
370
371 Lit16 is a signed 16-bit literal value.
372
373 Literal is a 32-bit literal value. Each uinstr can only hold
374 one of these.
375
376 The disassembled code is expressed purely in terms of ArchReg,
377 TempReg and Literal operands. Eventually, register allocation
378 removes all the TempRegs, giving a result using ArchRegs, RealRegs,
379 and Literals. New x86 code can easily be synthesised from this.
380 There are carefully designed restrictions on which insns can have
381 which operands, intended to make it possible to generate x86 code
382 from the result of register allocation on the ucode efficiently and
383 without need of any further RealRegs.
384
njn25e49d8e72002-09-23 09:36:25 +0000385 Restrictions for the individual UInstrs are clear from the checks below.
386 Abbreviations: A=ArchReg S=SpillNo T=TempReg L=Literal
387 Ls=Lit16 R=RealReg N=NoValue
sewardje1042472002-09-30 12:33:11 +0000388 As=ArchRegS
sewardjde4a1d02002-03-22 01:27:54 +0000389
sewardjde4a1d02002-03-22 01:27:54 +0000390 Before register allocation, S operands should not appear anywhere.
391 After register allocation, all T operands should have been
392 converted into Rs, and S operands are allowed in GET and PUT --
393 denoting spill saves/restores.
394
njn25e49d8e72002-09-23 09:36:25 +0000395 Before liveness analysis, save_e[acd]x fields should all be True.
396 Afterwards, they may be False.
397
sewardjde4a1d02002-03-22 01:27:54 +0000398 The size field should be 0 for insns for which it is meaningless,
399 ie those which do not directly move/operate on data.
400*/
njn25e49d8e72002-09-23 09:36:25 +0000401Bool VG_(saneUInstr) ( Bool beforeRA, Bool beforeLiveness, UInstr* u )
sewardjde4a1d02002-03-22 01:27:54 +0000402{
njn25e49d8e72002-09-23 09:36:25 +0000403# define LIT0 (u->lit32 == 0)
404# define LIT1 (!(LIT0))
405# define LITm (u->tag1 == Literal ? True : LIT0 )
406# define SZ4 (u->size == 4)
407# define SZ2 (u->size == 2)
408# define SZ1 (u->size == 1)
409# define SZ0 (u->size == 0)
410# define SZ42 (u->size == 4 || u->size == 2)
411# define SZi (u->size == 4 || u->size == 2 || u->size == 1)
412# define SZf ( u->size == 4 || u->size == 8 || u->size == 2 \
413 || u->size == 10 || u->size == 28 || u->size == 108)
414# define SZ4m ((u->tag1 == TempReg || u->tag1 == RealReg) \
415 ? (u->size == 4) : True)
416
417/* For these ones, two cases:
418 *
419 * 1. They are transliterations of the corresponding x86 instruction, in
420 * which case they should have its flags (except that redundant write
421 * flags can be annulled by the optimisation pass).
422 *
423 * 2. They are being used generally for other purposes, eg. helping with a
424 * 'rep'-prefixed instruction, in which case should have empty flags .
425 */
426# define emptyR (u->flags_r == FlagsEmpty)
427# define emptyW (u->flags_w == FlagsEmpty)
428# define CC0 (emptyR && emptyW)
429# define CCr (u->flags_r == FlagsALL && emptyW)
430# define CCw (emptyR && u->flags_w == FlagsALL)
431# define CCa (emptyR && (u->flags_w == FlagsOSZACP || emptyW))
432# define CCc (emptyR && (u->flags_w == FlagsOC || emptyW))
433# define CCe (emptyR && (u->flags_w == FlagsOSZAP || emptyW))
434# define CCb ((u->flags_r==FlagC || emptyR) && \
435 (u->flags_w==FlagsOSZACP || emptyW))
436# define CCd ((u->flags_r==FlagC || emptyR) && \
437 (u->flags_w==FlagsOC || emptyW))
sewardjc1b86882002-10-06 21:43:50 +0000438# define CCf (CC0 || (emptyR && u->flags_w==FlagsZCP))
njn25e49d8e72002-09-23 09:36:25 +0000439# define CCg ((u->flags_r==FlagsOSZACP || emptyR) && emptyW)
440# define CCj (u->cond==CondAlways ? CC0 : CCg)
441
sewardjde4a1d02002-03-22 01:27:54 +0000442# define TR1 (beforeRA ? (u->tag1 == TempReg) : (u->tag1 == RealReg))
443# define TR2 (beforeRA ? (u->tag2 == TempReg) : (u->tag2 == RealReg))
444# define TR3 (beforeRA ? (u->tag3 == TempReg) : (u->tag3 == RealReg))
445# define A1 (u->tag1 == ArchReg)
446# define A2 (u->tag2 == ArchReg)
447# define AS1 ((u->tag1 == ArchReg) || ((!beforeRA && (u->tag1 == SpillNo))))
448# define AS2 ((u->tag2 == ArchReg) || ((!beforeRA && (u->tag2 == SpillNo))))
449# define AS3 ((u->tag3 == ArchReg) || ((!beforeRA && (u->tag3 == SpillNo))))
450# define L1 (u->tag1 == Literal && u->val1 == 0)
451# define L2 (u->tag2 == Literal && u->val2 == 0)
452# define Ls1 (u->tag1 == Lit16)
453# define Ls3 (u->tag3 == Lit16)
njn25e49d8e72002-09-23 09:36:25 +0000454# define TRL1 (TR1 || L1)
455# define TRAL1 (TR1 || A1 || L1)
sewardjde4a1d02002-03-22 01:27:54 +0000456# define N1 (u->tag1 == NoValue)
457# define N2 (u->tag2 == NoValue)
458# define N3 (u->tag3 == NoValue)
sewardje1042472002-09-30 12:33:11 +0000459# define Se1 (u->tag1 == ArchRegS)
460# define Se2 (u->tag2 == ArchRegS)
sewardjde4a1d02002-03-22 01:27:54 +0000461
njn25e49d8e72002-09-23 09:36:25 +0000462# define COND0 (u->cond == 0)
463# define EXTRA4b0 (u->extra4b == 0)
464# define SG_WD0 (u->signed_widen == 0)
465# define JMPKIND0 (u->jmpkind == 0)
466# define CCALL0 (u->argc==0 && u->regparms_n==0 && u->has_ret_val==0 && \
467 ( beforeLiveness \
468 ? u->regs_live_after == ALL_RREGS_LIVE \
469 : True ))
470
471# define XCONDi ( EXTRA4b0 && SG_WD0 && JMPKIND0 && CCALL0)
472# define Xextra4b (COND0 && SG_WD0 && JMPKIND0 && CCALL0)
473# define XWIDEN (COND0 && JMPKIND0 && CCALL0)
474# define XJMP ( SG_WD0 && CCALL0)
475# define XCCALL (COND0 && EXTRA4b0 && SG_WD0 && JMPKIND0 )
476# define XOTHER (COND0 && EXTRA4b0 && SG_WD0 && JMPKIND0 && CCALL0)
477
478 /* 0 or 1 Literal args per UInstr */
sewardjde4a1d02002-03-22 01:27:54 +0000479 Int n_lits = 0;
480 if (u->tag1 == Literal) n_lits++;
481 if (u->tag2 == Literal) n_lits++;
482 if (u->tag3 == Literal) n_lits++;
483 if (n_lits > 1)
484 return False;
485
njn25e49d8e72002-09-23 09:36:25 +0000486 /* Fields not checked: val1, val2, val3 */
487
sewardjde4a1d02002-03-22 01:27:54 +0000488 switch (u->opcode) {
njn25e49d8e72002-09-23 09:36:25 +0000489
490 /* Fields checked: lit32 size flags_r/w tag1 tag2 tag3 (rest) */
sewardje1042472002-09-30 12:33:11 +0000491 case PUTSEG: return LIT0 && SZ2 && CC0 && TR1 && Se2 && N3 && XOTHER;
492 case GETSEG: return LIT0 && SZ2 && CC0 && Se1 && TR2 && N3 && XOTHER;
493 case USESEG: return LIT0 && SZ0 && CC0 && TR1 && TR2 && N3 && XOTHER;
njn25e49d8e72002-09-23 09:36:25 +0000494 case NOP: return LIT0 && SZ0 && CC0 && N1 && N2 && N3 && XOTHER;
sewardj7a5ebcf2002-11-13 22:42:13 +0000495 case LOCK: return LIT0 && SZ0 && CC0 && N1 && N2 && N3 && XOTHER;
njn25e49d8e72002-09-23 09:36:25 +0000496 case GETF: return LIT0 && SZ42 && CCr && TR1 && N2 && N3 && XOTHER;
497 case PUTF: return LIT0 && SZ42 && CCw && TR1 && N2 && N3 && XOTHER;
498 case GET: return LIT0 && SZi && CC0 && AS1 && TR2 && N3 && XOTHER;
499 case PUT: return LIT0 && SZi && CC0 && TR1 && AS2 && N3 && XOTHER;
500 case LOAD:
501 case STORE: return LIT0 && SZi && CC0 && TR1 && TR2 && N3 && XOTHER;
502 case MOV: return LITm && SZ4m && CC0 && TRL1 && TR2 && N3 && XOTHER;
503 case CMOV: return LIT0 && SZ4 && CCg && TR1 && TR2 && N3 && XCONDi;
504 case WIDEN: return LIT0 && SZi && CC0 && TR1 && N2 && N3 && XWIDEN;
505 case JMP: return LITm && SZ0 && CCj && TRL1 && N2 && N3 && XJMP;
506 case CALLM: return LIT0 && SZ0 /*any*/ && Ls1 && N2 && N3 && XOTHER;
507 case CALLM_S:
508 case CALLM_E:return LIT0 && SZ0 && CC0 && N1 && N2 && N3 && XOTHER;
509 case PUSH:
510 case POP: return LIT0 && SZi && CC0 && TR1 && N2 && N3 && XOTHER;
511 case CLEAR: return LIT0 && SZ0 && CC0 && Ls1 && N2 && N3 && XOTHER;
512 case AND:
513 case OR: return LIT0 && SZi && CCa && TR1 && TR2 && N3 && XOTHER;
514 case ADD:
515 case XOR:
516 case SUB: return LITm && SZi && CCa &&TRAL1 && TR2 && N3 && XOTHER;
517 case SBB:
518 case ADC: return LITm && SZi && CCb &&TRAL1 && TR2 && N3 && XOTHER;
519 case SHL:
520 case SHR:
521 case SAR: return LITm && SZi && CCa && TRL1 && TR2 && N3 && XOTHER;
522 case ROL:
523 case ROR: return LITm && SZi && CCc && TRL1 && TR2 && N3 && XOTHER;
524 case RCL:
525 case RCR: return LITm && SZi && CCd && TRL1 && TR2 && N3 && XOTHER;
526 case NOT: return LIT0 && SZi && CC0 && TR1 && N2 && N3 && XOTHER;
527 case NEG: return LIT0 && SZi && CCa && TR1 && N2 && N3 && XOTHER;
528 case INC:
529 case DEC: return LIT0 && SZi && CCe && TR1 && N2 && N3 && XOTHER;
530 case CC2VAL: return LIT0 && SZ1 && CCg && TR1 && N2 && N3 && XCONDi;
531 case BSWAP: return LIT0 && SZ4 && CC0 && TR1 && N2 && N3 && XOTHER;
532 case JIFZ: return LIT1 && SZ4 && CC0 && TR1 && L2 && N3 && XOTHER;
533 case FPU_R:
534 case FPU_W: return LIT0 && SZf && CC0 && Ls1 && TR2 && N3 && XOTHER;
535 case FPU: return LIT0 && SZ0 && CCf && Ls1 && N2 && N3 && XOTHER;
536 case LEA1: return /*any*/ SZ4 && CC0 && TR1 && TR2 && N3 && XOTHER;
537 case LEA2: return /*any*/ SZ4 && CC0 && TR1 && TR2 && TR3 && Xextra4b;
538 case INCEIP: return LIT0 && SZ0 && CC0 && Ls1 && N2 && N3 && XOTHER;
539 case CCALL: return LIT1 && SZ0 && CC0 &&
540 (u->argc > 0 ? TR1 : N1) &&
541 (u->argc > 1 ? TR2 : N2) &&
542 (u->argc > 2 || u->has_ret_val ? TR3 : N3) &&
543 u->regparms_n <= u->argc && XCCALL;
544 default:
545 if (VG_(needs).extended_UCode)
njn4ba5a792002-09-30 10:23:54 +0000546 return SK_(sane_XUInstr)(beforeRA, beforeLiveness, u);
njn25e49d8e72002-09-23 09:36:25 +0000547 else {
548 VG_(printf)("unhandled opcode: %u. Perhaps "
549 "VG_(needs).extended_UCode should be set?",
550 u->opcode);
njne427a662002-10-02 11:08:25 +0000551 VG_(core_panic)("VG_(saneUInstr): unhandled opcode");
njn25e49d8e72002-09-23 09:36:25 +0000552 }
sewardjde4a1d02002-03-22 01:27:54 +0000553 }
njn25e49d8e72002-09-23 09:36:25 +0000554# undef LIT0
555# undef LIT1
556# undef LITm
sewardjde4a1d02002-03-22 01:27:54 +0000557# undef SZ4
558# undef SZ2
559# undef SZ1
560# undef SZ0
njn25e49d8e72002-09-23 09:36:25 +0000561# undef SZ42
562# undef SZi
563# undef SZf
564# undef SZ4m
565# undef emptyR
566# undef emptyW
567# undef CC0
568# undef CCr
569# undef CCw
570# undef CCa
571# undef CCb
572# undef CCc
573# undef CCd
574# undef CCe
575# undef CCf
576# undef CCg
577# undef CCj
sewardjde4a1d02002-03-22 01:27:54 +0000578# undef TR1
579# undef TR2
580# undef TR3
581# undef A1
582# undef A2
583# undef AS1
584# undef AS2
585# undef AS3
586# undef L1
sewardjde4a1d02002-03-22 01:27:54 +0000587# undef L2
njn25e49d8e72002-09-23 09:36:25 +0000588# undef Ls1
sewardjde4a1d02002-03-22 01:27:54 +0000589# undef Ls3
njn25e49d8e72002-09-23 09:36:25 +0000590# undef TRL1
591# undef TRAL1
sewardjde4a1d02002-03-22 01:27:54 +0000592# undef N1
593# undef N2
594# undef N3
sewardje1042472002-09-30 12:33:11 +0000595# undef Se2
596# undef Se1
njn25e49d8e72002-09-23 09:36:25 +0000597# undef COND0
598# undef EXTRA4b0
599# undef SG_WD0
600# undef JMPKIND0
601# undef CCALL0
602# undef Xextra4b
603# undef XWIDEN
604# undef XJMP
605# undef XCCALL
606# undef XOTHER
sewardjde4a1d02002-03-22 01:27:54 +0000607}
608
njn25e49d8e72002-09-23 09:36:25 +0000609void VG_(saneUCodeBlock) ( UCodeBlock* cb )
610{
611 Int i;
612
613 for (i = 0; i < cb->used; i++) {
614 Bool sane = VG_(saneUInstr)(True, True, &cb->instrs[i]);
615 if (!sane) {
616 VG_(printf)("Instruction failed sanity check:\n");
njn4ba5a792002-09-30 10:23:54 +0000617 VG_(up_UInstr)(i, &cb->instrs[i]);
njn25e49d8e72002-09-23 09:36:25 +0000618 }
619 vg_assert(sane);
620 }
621}
sewardjde4a1d02002-03-22 01:27:54 +0000622
623/* Sanity checks to do with CALLMs in UCodeBlocks. */
njn25e49d8e72002-09-23 09:36:25 +0000624Bool VG_(saneUCodeBlockCalls) ( UCodeBlock* cb )
sewardjde4a1d02002-03-22 01:27:54 +0000625{
626 Int callm = 0;
627 Int callm_s = 0;
628 Int callm_e = 0;
629 Int callm_ptr, calls_ptr;
630 Int i, j, t;
631 Bool incall = False;
632
633 /* Ensure the number of CALLM, CALLM_S and CALLM_E are the same. */
634
635 for (i = 0; i < cb->used; i++) {
636 switch (cb->instrs[i].opcode) {
637 case CALLM:
638 if (!incall) return False;
639 callm++;
640 break;
641 case CALLM_S:
642 if (incall) return False;
643 incall = True;
644 callm_s++;
645 break;
646 case CALLM_E:
647 if (!incall) return False;
648 incall = False;
649 callm_e++;
650 break;
651 case PUSH: case POP: case CLEAR:
652 if (!incall) return False;
653 break;
654 default:
655 break;
656 }
657 }
658 if (incall) return False;
659 if (callm != callm_s || callm != callm_e) return False;
660
661 /* Check the sections between CALLM_S and CALLM's. Ensure that no
662 PUSH uinsn pushes any TempReg that any other PUSH in the same
663 section pushes. Ie, check that the TempReg args to PUSHes in
664 the section are unique. If not, the instrumenter generates
665 incorrect code for CALLM insns. */
666
667 callm_ptr = 0;
668
669 find_next_CALLM:
670 /* Search for the next interval, making calls_ptr .. callm_ptr
671 bracket it. */
672 while (callm_ptr < cb->used
673 && cb->instrs[callm_ptr].opcode != CALLM)
674 callm_ptr++;
675 if (callm_ptr == cb->used)
676 return True;
677 vg_assert(cb->instrs[callm_ptr].opcode == CALLM);
678
679 calls_ptr = callm_ptr - 1;
680 while (cb->instrs[calls_ptr].opcode != CALLM_S)
681 calls_ptr--;
682 vg_assert(cb->instrs[calls_ptr].opcode == CALLM_S);
683 vg_assert(calls_ptr >= 0);
684
685 /* VG_(printf)("interval from %d to %d\n", calls_ptr, callm_ptr ); */
686
687 /* For each PUSH insn in the interval ... */
688 for (i = calls_ptr + 1; i < callm_ptr; i++) {
689 if (cb->instrs[i].opcode != PUSH) continue;
690 t = cb->instrs[i].val1;
691 /* Ensure no later PUSH insns up to callm_ptr push the same
692 TempReg. Return False if any such are found. */
693 for (j = i+1; j < callm_ptr; j++) {
694 if (cb->instrs[j].opcode == PUSH &&
695 cb->instrs[j].val1 == t)
696 return False;
697 }
698 }
699
700 /* This interval is clean. Keep going ... */
701 callm_ptr++;
702 goto find_next_CALLM;
703}
704
705
706/*------------------------------------------------------------*/
707/*--- Printing uinstrs. ---*/
708/*------------------------------------------------------------*/
709
njn25e49d8e72002-09-23 09:36:25 +0000710/* Global that dictates whether to print generated code at all stages */
711Bool VG_(print_codegen);
712
sewardjde4a1d02002-03-22 01:27:54 +0000713Char* VG_(nameCondcode) ( Condcode cond )
714{
715 switch (cond) {
716 case CondO: return "o";
717 case CondNO: return "no";
718 case CondB: return "b";
719 case CondNB: return "nb";
720 case CondZ: return "z";
721 case CondNZ: return "nz";
722 case CondBE: return "be";
723 case CondNBE: return "nbe";
724 case CondS: return "s";
sewardje1042472002-09-30 12:33:11 +0000725 case CondNS: return "ns";
sewardjde4a1d02002-03-22 01:27:54 +0000726 case CondP: return "p";
727 case CondNP: return "np";
728 case CondL: return "l";
729 case CondNL: return "nl";
730 case CondLE: return "le";
731 case CondNLE: return "nle";
732 case CondAlways: return "MP"; /* hack! */
njne427a662002-10-02 11:08:25 +0000733 default: VG_(core_panic)("nameCondcode");
sewardjde4a1d02002-03-22 01:27:54 +0000734 }
735}
736
737
738static void vg_ppFlagSet ( Char* prefix, FlagSet set )
739{
740 VG_(printf)("%s", prefix);
741 if (set & FlagD) VG_(printf)("D");
742 if (set & FlagO) VG_(printf)("O");
743 if (set & FlagS) VG_(printf)("S");
744 if (set & FlagZ) VG_(printf)("Z");
745 if (set & FlagA) VG_(printf)("A");
746 if (set & FlagC) VG_(printf)("C");
747 if (set & FlagP) VG_(printf)("P");
748}
749
750
751static void ppTempReg ( Int tt )
752{
753 if ((tt & 1) == 0)
754 VG_(printf)("t%d", tt);
755 else
756 VG_(printf)("q%d", tt-1);
757}
758
759
njn4ba5a792002-09-30 10:23:54 +0000760void VG_(pp_UOperand) ( UInstr* u, Int operandNo, Int sz, Bool parens )
sewardjde4a1d02002-03-22 01:27:54 +0000761{
762 UInt tag, val;
763 switch (operandNo) {
764 case 1: tag = u->tag1; val = u->val1; break;
765 case 2: tag = u->tag2; val = u->val2; break;
766 case 3: tag = u->tag3; val = u->val3; break;
njne427a662002-10-02 11:08:25 +0000767 default: VG_(core_panic)("VG_(pp_UOperand)(1)");
sewardjde4a1d02002-03-22 01:27:54 +0000768 }
769 if (tag == Literal) val = u->lit32;
770
771 if (parens) VG_(printf)("(");
772 switch (tag) {
sewardje1042472002-09-30 12:33:11 +0000773 case TempReg: ppTempReg(val); break;
774 case RealReg: VG_(printf)("%s",nameIReg(sz==0 ? 4 : sz,val)); break;
775 case Literal: VG_(printf)("$0x%x", val); break;
776 case Lit16: VG_(printf)("$0x%x", val); break;
777 case NoValue: VG_(printf)("NoValue"); break;
778 case ArchReg: VG_(printf)("%S",nameIReg(sz,val)); break;
779 case ArchRegS: VG_(printf)("%S",nameSReg(val)); break;
780 case SpillNo: VG_(printf)("spill%d", val); break;
njne427a662002-10-02 11:08:25 +0000781 default: VG_(core_panic)("VG_(ppUOperand)(2)");
sewardjde4a1d02002-03-22 01:27:54 +0000782 }
783 if (parens) VG_(printf)(")");
784}
785
786
njn4ba5a792002-09-30 10:23:54 +0000787Char* VG_(name_UOpcode) ( Bool upper, Opcode opc )
sewardjde4a1d02002-03-22 01:27:54 +0000788{
789 switch (opc) {
790 case ADD: return (upper ? "ADD" : "add");
791 case ADC: return (upper ? "ADC" : "adc");
792 case AND: return (upper ? "AND" : "and");
793 case OR: return (upper ? "OR" : "or");
794 case XOR: return (upper ? "XOR" : "xor");
795 case SUB: return (upper ? "SUB" : "sub");
796 case SBB: return (upper ? "SBB" : "sbb");
797 case SHL: return (upper ? "SHL" : "shl");
798 case SHR: return (upper ? "SHR" : "shr");
799 case SAR: return (upper ? "SAR" : "sar");
800 case ROL: return (upper ? "ROL" : "rol");
801 case ROR: return (upper ? "ROR" : "ror");
802 case RCL: return (upper ? "RCL" : "rcl");
803 case RCR: return (upper ? "RCR" : "rcr");
804 case NOT: return (upper ? "NOT" : "not");
805 case NEG: return (upper ? "NEG" : "neg");
806 case INC: return (upper ? "INC" : "inc");
807 case DEC: return (upper ? "DEC" : "dec");
808 case BSWAP: return (upper ? "BSWAP" : "bswap");
809 default: break;
810 }
njne427a662002-10-02 11:08:25 +0000811 if (!upper) VG_(core_panic)("vg_name_UOpcode: invalid !upper");
sewardjde4a1d02002-03-22 01:27:54 +0000812 switch (opc) {
sewardjde4a1d02002-03-22 01:27:54 +0000813 case CALLM_S: return "CALLM_S";
814 case CALLM_E: return "CALLM_E";
815 case INCEIP: return "INCEIP";
816 case LEA1: return "LEA1";
817 case LEA2: return "LEA2";
818 case NOP: return "NOP";
sewardj7a5ebcf2002-11-13 22:42:13 +0000819 case LOCK: return "LOCK";
sewardjde4a1d02002-03-22 01:27:54 +0000820 case GET: return "GET";
821 case PUT: return "PUT";
822 case GETF: return "GETF";
823 case PUTF: return "PUTF";
sewardje1042472002-09-30 12:33:11 +0000824 case GETSEG: return "GETSEG";
825 case PUTSEG: return "PUTSEG";
826 case USESEG: return "USESEG";
sewardjde4a1d02002-03-22 01:27:54 +0000827 case LOAD: return "LD" ;
828 case STORE: return "ST" ;
829 case MOV: return "MOV";
830 case CMOV: return "CMOV";
831 case WIDEN: return "WIDEN";
832 case JMP: return "J" ;
833 case JIFZ: return "JIFZ" ;
834 case CALLM: return "CALLM";
njn25e49d8e72002-09-23 09:36:25 +0000835 case CCALL: return "CCALL";
sewardjde4a1d02002-03-22 01:27:54 +0000836 case PUSH: return "PUSH" ;
837 case POP: return "POP" ;
838 case CLEAR: return "CLEAR";
839 case CC2VAL: return "CC2VAL";
840 case FPU_R: return "FPU_R";
841 case FPU_W: return "FPU_W";
842 case FPU: return "FPU" ;
njn25e49d8e72002-09-23 09:36:25 +0000843 default:
844 if (VG_(needs).extended_UCode)
njn4ba5a792002-09-30 10:23:54 +0000845 return SK_(name_XUOpcode)(opc);
njn25e49d8e72002-09-23 09:36:25 +0000846 else {
847 VG_(printf)("unhandled opcode: %u. Perhaps "
848 "VG_(needs).extended_UCode should be set?",
849 opc);
njne427a662002-10-02 11:08:25 +0000850 VG_(core_panic)("name_UOpcode: unhandled opcode");
njn25e49d8e72002-09-23 09:36:25 +0000851 }
sewardjde4a1d02002-03-22 01:27:54 +0000852 }
853}
854
sewardja38e0922002-10-01 00:50:47 +0000855static
njn4ba5a792002-09-30 10:23:54 +0000856void pp_realregs_liveness ( UInstr* u )
njn25e49d8e72002-09-23 09:36:25 +0000857{
858# define PRINT_RREG_LIVENESS(realReg,s) \
njn4ba5a792002-09-30 10:23:54 +0000859 VG_(printf)( IS_RREG_LIVE(VG_(realreg_to_rank)(realReg), \
njn25e49d8e72002-09-23 09:36:25 +0000860 u->regs_live_after) \
861 ? s : "-");
sewardjde4a1d02002-03-22 01:27:54 +0000862
njn25e49d8e72002-09-23 09:36:25 +0000863 VG_(printf)("[");
864 PRINT_RREG_LIVENESS(R_EAX, "a");
865 PRINT_RREG_LIVENESS(R_EBX, "b");
866 PRINT_RREG_LIVENESS(R_ECX, "c");
867 PRINT_RREG_LIVENESS(R_EDX, "d");
868 PRINT_RREG_LIVENESS(R_ESI, "S");
869 PRINT_RREG_LIVENESS(R_EDI, "D");
870 VG_(printf)("]");
871
872# undef PRINT_RREG_LIVENESS
873}
874
875/* Ugly-print UInstr :) */
njn4ba5a792002-09-30 10:23:54 +0000876void VG_(up_UInstr) ( Int i, UInstr* u )
njn25e49d8e72002-09-23 09:36:25 +0000877{
njn4ba5a792002-09-30 10:23:54 +0000878 VG_(pp_UInstr_regs)(i, u);
njn25e49d8e72002-09-23 09:36:25 +0000879
880 VG_(printf)("opcode: %d\n", u->opcode);
sewardjc1b86882002-10-06 21:43:50 +0000881 VG_(printf)("lit32: 0x%x\n", u->lit32);
njn25e49d8e72002-09-23 09:36:25 +0000882 VG_(printf)("size: %d\n", u->size);
883 VG_(printf)("val1,val2,val3: %d, %d, %d\n", u->val1, u->val2, u->val3);
884 VG_(printf)("tag1,tag2,tag3: %d, %d, %d\n", u->tag1, u->tag2, u->tag3);
sewardjc1b86882002-10-06 21:43:50 +0000885 VG_(printf)("flags_r: 0x%x\n", u->flags_r);
886 VG_(printf)("flags_w: 0x%x\n", u->flags_w);
887 VG_(printf)("extra4b: 0x%x\n", u->extra4b);
888 VG_(printf)("cond: 0x%x\n", u->cond);
njn25e49d8e72002-09-23 09:36:25 +0000889 VG_(printf)("signed_widen: %d\n", u->signed_widen);
890 VG_(printf)("jmpkind: %d\n", u->jmpkind);
891 VG_(printf)("argc,regparms_n: %d, %d\n", u->argc, u->regparms_n);
892 VG_(printf)("has_ret_val: %d\n", u->has_ret_val);
893 VG_(printf)("regs_live_after: ");
njn4ba5a792002-09-30 10:23:54 +0000894 pp_realregs_liveness(u);
njn25e49d8e72002-09-23 09:36:25 +0000895 VG_(printf)("\n");
896}
897
sewardja38e0922002-10-01 00:50:47 +0000898static
njn4ba5a792002-09-30 10:23:54 +0000899void pp_UInstrWorker ( Int instrNo, UInstr* u, Bool ppRegsLiveness )
sewardjde4a1d02002-03-22 01:27:54 +0000900{
901 VG_(printf)("\t%4d: %s", instrNo,
njn4ba5a792002-09-30 10:23:54 +0000902 VG_(name_UOpcode)(True, u->opcode));
sewardjde4a1d02002-03-22 01:27:54 +0000903 if (u->opcode == JMP || u->opcode == CC2VAL)
904 VG_(printf)("%s", VG_(nameCondcode(u->cond)));
905
906 switch (u->size) {
907 case 0: VG_(printf)("o"); break;
908 case 1: VG_(printf)("B"); break;
909 case 2: VG_(printf)("W"); break;
910 case 4: VG_(printf)("L"); break;
911 case 8: VG_(printf)("Q"); break;
912 default: VG_(printf)("%d", (Int)u->size); break;
913 }
914
915 switch (u->opcode) {
916
sewardjde4a1d02002-03-22 01:27:54 +0000917 case CALLM_S: case CALLM_E:
918 break;
919
920 case INCEIP:
921 VG_(printf)("\t$%d", u->val1);
922 break;
923
924 case LEA2:
925 VG_(printf)("\t%d(" , u->lit32);
njn4ba5a792002-09-30 10:23:54 +0000926 VG_(pp_UOperand)(u, 1, 4, False);
sewardjde4a1d02002-03-22 01:27:54 +0000927 VG_(printf)(",");
njn4ba5a792002-09-30 10:23:54 +0000928 VG_(pp_UOperand)(u, 2, 4, False);
sewardjde4a1d02002-03-22 01:27:54 +0000929 VG_(printf)(",%d), ", (Int)u->extra4b);
njn4ba5a792002-09-30 10:23:54 +0000930 VG_(pp_UOperand)(u, 3, 4, False);
sewardjde4a1d02002-03-22 01:27:54 +0000931 break;
932
933 case LEA1:
934 VG_(printf)("\t%d" , u->lit32);
njn4ba5a792002-09-30 10:23:54 +0000935 VG_(pp_UOperand)(u, 1, 4, True);
sewardjde4a1d02002-03-22 01:27:54 +0000936 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +0000937 VG_(pp_UOperand)(u, 2, 4, False);
sewardjde4a1d02002-03-22 01:27:54 +0000938 break;
939
sewardj7a5ebcf2002-11-13 22:42:13 +0000940 case NOP: case LOCK:
sewardjde4a1d02002-03-22 01:27:54 +0000941 break;
942
943 case FPU_W:
944 VG_(printf)("\t0x%x:0x%x, ",
945 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
njn4ba5a792002-09-30 10:23:54 +0000946 VG_(pp_UOperand)(u, 2, 4, True);
sewardjde4a1d02002-03-22 01:27:54 +0000947 break;
948
949 case FPU_R:
950 VG_(printf)("\t");
njn4ba5a792002-09-30 10:23:54 +0000951 VG_(pp_UOperand)(u, 2, 4, True);
sewardjde4a1d02002-03-22 01:27:54 +0000952 VG_(printf)(", 0x%x:0x%x",
953 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
954 break;
955
956 case FPU:
957 VG_(printf)("\t0x%x:0x%x",
958 (u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
959 break;
960
sewardjde4a1d02002-03-22 01:27:54 +0000961 case GET: case PUT: case MOV: case LOAD: case STORE: case CMOV:
sewardje1042472002-09-30 12:33:11 +0000962 case GETSEG: case PUTSEG:
sewardjde4a1d02002-03-22 01:27:54 +0000963 VG_(printf)("\t");
njn4ba5a792002-09-30 10:23:54 +0000964 VG_(pp_UOperand)(u, 1, u->size, u->opcode==LOAD);
sewardjde4a1d02002-03-22 01:27:54 +0000965 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +0000966 VG_(pp_UOperand)(u, 2, u->size, u->opcode==STORE);
njn25e49d8e72002-09-23 09:36:25 +0000967 break;
968
969 case JMP:
970 switch (u->jmpkind) {
971 case JmpCall: VG_(printf)("-c"); break;
972 case JmpRet: VG_(printf)("-r"); break;
973 case JmpSyscall: VG_(printf)("-sys"); break;
974 case JmpClientReq: VG_(printf)("-cli"); break;
975 default: break;
976 }
977 VG_(printf)("\t");
njn4ba5a792002-09-30 10:23:54 +0000978 VG_(pp_UOperand)(u, 1, u->size, False);
njn25e49d8e72002-09-23 09:36:25 +0000979 if (CondAlways == u->cond) {
980 /* Print x86 instruction size if filled in */
981 if (0 != u->extra4b)
982 VG_(printf)(" ($%u)", u->extra4b);
983 }
sewardjde4a1d02002-03-22 01:27:54 +0000984 break;
985
986 case GETF: case PUTF:
njn25e49d8e72002-09-23 09:36:25 +0000987 case CC2VAL: case PUSH: case POP: case CLEAR: case CALLM:
988 case NOT: case NEG: case INC: case DEC: case BSWAP:
sewardjde4a1d02002-03-22 01:27:54 +0000989 VG_(printf)("\t");
njn4ba5a792002-09-30 10:23:54 +0000990 VG_(pp_UOperand)(u, 1, u->size, False);
sewardjde4a1d02002-03-22 01:27:54 +0000991 break;
992
njn25e49d8e72002-09-23 09:36:25 +0000993 /* Print a "(s)" after args passed on stack */
994 case CCALL:
995 VG_(printf)("\t");
996 if (u->has_ret_val) {
njn4ba5a792002-09-30 10:23:54 +0000997 VG_(pp_UOperand)(u, 3, 0, False);
njn25e49d8e72002-09-23 09:36:25 +0000998 VG_(printf)(" = ");
sewardj2e93c502002-04-12 11:12:52 +0000999 }
njn25e49d8e72002-09-23 09:36:25 +00001000 VG_(printf)("%p(", u->lit32);
1001 if (u->argc > 0) {
njn4ba5a792002-09-30 10:23:54 +00001002 VG_(pp_UOperand)(u, 1, 0, False);
njn25e49d8e72002-09-23 09:36:25 +00001003 if (u->regparms_n < 1)
1004 VG_(printf)("(s)");
1005 }
1006 if (u->argc > 1) {
1007 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001008 VG_(pp_UOperand)(u, 2, 0, False);
njn25e49d8e72002-09-23 09:36:25 +00001009 if (u->regparms_n < 2)
1010 VG_(printf)("(s)");
1011 }
1012 if (u->argc > 2) {
1013 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001014 VG_(pp_UOperand)(u, 3, 0, False);
njn25e49d8e72002-09-23 09:36:25 +00001015 if (u->regparms_n < 3)
1016 VG_(printf)("(s)");
1017 }
1018 VG_(printf)(") ");
njn6431be72002-07-28 09:53:34 +00001019 break;
1020
sewardje1042472002-09-30 12:33:11 +00001021 case USESEG:
sewardjde4a1d02002-03-22 01:27:54 +00001022 case JIFZ:
sewardjde4a1d02002-03-22 01:27:54 +00001023 case ADD: case ADC: case AND: case OR:
1024 case XOR: case SUB: case SBB:
1025 case SHL: case SHR: case SAR:
1026 case ROL: case ROR: case RCL: case RCR:
1027 VG_(printf)("\t");
njn4ba5a792002-09-30 10:23:54 +00001028 VG_(pp_UOperand)(u, 1, u->size, False);
sewardjde4a1d02002-03-22 01:27:54 +00001029 VG_(printf)(", ");
njn4ba5a792002-09-30 10:23:54 +00001030 VG_(pp_UOperand)(u, 2, u->size, False);
sewardjde4a1d02002-03-22 01:27:54 +00001031 break;
1032
1033 case WIDEN:
1034 VG_(printf)("_%c%c", VG_(toupper)(nameISize(u->extra4b)),
1035 u->signed_widen?'s':'z');
1036 VG_(printf)("\t");
njn4ba5a792002-09-30 10:23:54 +00001037 VG_(pp_UOperand)(u, 1, u->size, False);
sewardjde4a1d02002-03-22 01:27:54 +00001038 break;
1039
njn25e49d8e72002-09-23 09:36:25 +00001040 default:
1041 if (VG_(needs).extended_UCode)
njn4ba5a792002-09-30 10:23:54 +00001042 SK_(pp_XUInstr)(u);
njn25e49d8e72002-09-23 09:36:25 +00001043 else {
1044 VG_(printf)("unhandled opcode: %u. Perhaps "
1045 "VG_(needs).extended_UCode should be set?",
1046 u->opcode);
njne427a662002-10-02 11:08:25 +00001047 VG_(core_panic)("pp_UInstr: unhandled opcode");
njn25e49d8e72002-09-23 09:36:25 +00001048 }
sewardjde4a1d02002-03-22 01:27:54 +00001049 }
sewardjde4a1d02002-03-22 01:27:54 +00001050 if (u->flags_r != FlagsEmpty || u->flags_w != FlagsEmpty) {
1051 VG_(printf)(" (");
1052 if (u->flags_r != FlagsEmpty)
1053 vg_ppFlagSet("-r", u->flags_r);
1054 if (u->flags_w != FlagsEmpty)
1055 vg_ppFlagSet("-w", u->flags_w);
1056 VG_(printf)(")");
1057 }
njn25e49d8e72002-09-23 09:36:25 +00001058
1059 if (ppRegsLiveness) {
1060 VG_(printf)("\t\t");
njn4ba5a792002-09-30 10:23:54 +00001061 pp_realregs_liveness ( u );
njn25e49d8e72002-09-23 09:36:25 +00001062 }
1063
sewardjde4a1d02002-03-22 01:27:54 +00001064 VG_(printf)("\n");
1065}
1066
njn4ba5a792002-09-30 10:23:54 +00001067void VG_(pp_UInstr) ( Int instrNo, UInstr* u )
njn25e49d8e72002-09-23 09:36:25 +00001068{
njn4ba5a792002-09-30 10:23:54 +00001069 pp_UInstrWorker ( instrNo, u, /*ppRegsLiveness*/False );
njn25e49d8e72002-09-23 09:36:25 +00001070}
1071
njn4ba5a792002-09-30 10:23:54 +00001072void VG_(pp_UInstr_regs) ( Int instrNo, UInstr* u )
njn25e49d8e72002-09-23 09:36:25 +00001073{
njn4ba5a792002-09-30 10:23:54 +00001074 pp_UInstrWorker ( instrNo, u, /*ppRegsLiveness*/True );
njn25e49d8e72002-09-23 09:36:25 +00001075}
sewardjde4a1d02002-03-22 01:27:54 +00001076
njn4ba5a792002-09-30 10:23:54 +00001077void VG_(pp_UCodeBlock) ( UCodeBlock* cb, Char* title )
sewardjde4a1d02002-03-22 01:27:54 +00001078{
1079 Int i;
njn25e49d8e72002-09-23 09:36:25 +00001080 VG_(printf)("%s\n", title);
sewardjde4a1d02002-03-22 01:27:54 +00001081 for (i = 0; i < cb->used; i++)
njn25e49d8e72002-09-23 09:36:25 +00001082 if (cb->instrs[i].opcode != NOP)
njn4ba5a792002-09-30 10:23:54 +00001083 VG_(pp_UInstr) ( i, &cb->instrs[i] );
sewardjde4a1d02002-03-22 01:27:54 +00001084 VG_(printf)("\n");
1085}
1086
1087
1088/*------------------------------------------------------------*/
1089/*--- uinstr helpers for register allocation ---*/
1090/*--- and code improvement. ---*/
1091/*------------------------------------------------------------*/
1092
njn25e49d8e72002-09-23 09:36:25 +00001093/* Get the temp/reg use of a uinstr, parking them in an array supplied by
njn810086f2002-11-14 12:42:47 +00001094 the caller (regs), which is assumed to be big enough. Return the number
1095 of entries. Written regs are indicated in parallel array isWrites.
1096 Insns which read _and_ write a register wind up mentioning it twice.
1097 Entries are placed in the array in program order, so that if a reg is
1098 read-modified-written, it appears first as a read and then as a write.
1099 'tag' indicates whether we are looking at TempRegs or RealRegs.
sewardjde4a1d02002-03-22 01:27:54 +00001100*/
njn25e49d8e72002-09-23 09:36:25 +00001101__inline__
njn810086f2002-11-14 12:42:47 +00001102Int VG_(get_reg_usage) ( UInstr* u, Tag tag, Int* regs, Bool* isWrites )
sewardjde4a1d02002-03-22 01:27:54 +00001103{
njn810086f2002-11-14 12:42:47 +00001104# define RD(ono) VG_UINSTR_READS_REG(ono, regs, isWrites)
1105# define WR(ono) VG_UINSTR_WRITES_REG(ono, regs, isWrites)
sewardjde4a1d02002-03-22 01:27:54 +00001106
1107 Int n = 0;
1108 switch (u->opcode) {
1109 case LEA1: RD(1); WR(2); break;
1110 case LEA2: RD(1); RD(2); WR(3); break;
1111
njn25e49d8e72002-09-23 09:36:25 +00001112 case NOP: case FPU: case INCEIP: case CALLM_S: case CALLM_E:
sewardj7a5ebcf2002-11-13 22:42:13 +00001113 case CLEAR: case CALLM: case LOCK: break;
njn25e49d8e72002-09-23 09:36:25 +00001114
1115 case CCALL:
1116 if (u->argc > 0) RD(1);
1117 if (u->argc > 1) RD(2);
1118 if (u->argc > 2) RD(3);
1119 if (u->has_ret_val) WR(3);
1120 break;
1121
sewardjde4a1d02002-03-22 01:27:54 +00001122 case FPU_R: case FPU_W: RD(2); break;
1123
sewardje1042472002-09-30 12:33:11 +00001124 case GETSEG: WR(2); break;
1125 case PUTSEG: RD(1); break;
1126
sewardjde4a1d02002-03-22 01:27:54 +00001127 case GETF: WR(1); break;
1128 case PUTF: RD(1); break;
1129
1130 case GET: WR(2); break;
1131 case PUT: RD(1); break;
1132 case LOAD: RD(1); WR(2); break;
njn25e49d8e72002-09-23 09:36:25 +00001133 case STORE: RD(1); RD(2); break;
sewardjde4a1d02002-03-22 01:27:54 +00001134 case MOV: RD(1); WR(2); break;
1135
1136 case JMP: RD(1); break;
sewardjde4a1d02002-03-22 01:27:54 +00001137
njn25e49d8e72002-09-23 09:36:25 +00001138 case PUSH: RD(1); break;
sewardjde4a1d02002-03-22 01:27:54 +00001139 case POP: WR(1); break;
1140
sewardje1042472002-09-30 12:33:11 +00001141 case USESEG:
sewardjde4a1d02002-03-22 01:27:54 +00001142 case CMOV:
1143 case ADD: case ADC: case AND: case OR:
1144 case XOR: case SUB: case SBB:
1145 RD(1); RD(2); WR(2); break;
1146
1147 case SHL: case SHR: case SAR:
1148 case ROL: case ROR: case RCL: case RCR:
1149 RD(1); RD(2); WR(2); break;
1150
njn25e49d8e72002-09-23 09:36:25 +00001151 case NOT: case NEG: case INC: case DEC: case BSWAP:
sewardjde4a1d02002-03-22 01:27:54 +00001152 RD(1); WR(1); break;
1153
1154 case WIDEN: RD(1); WR(1); break;
1155
1156 case CC2VAL: WR(1); break;
1157 case JIFZ: RD(1); break;
1158
njn25e49d8e72002-09-23 09:36:25 +00001159 default:
1160 if (VG_(needs).extended_UCode)
njn810086f2002-11-14 12:42:47 +00001161 return SK_(get_Xreg_usage)(u, tag, regs, isWrites);
njn25e49d8e72002-09-23 09:36:25 +00001162 else {
1163 VG_(printf)("unhandled opcode: %u. Perhaps "
1164 "VG_(needs).extended_UCode should be set?",
1165 u->opcode);
njne427a662002-10-02 11:08:25 +00001166 VG_(core_panic)("VG_(get_reg_usage): unhandled opcode");
njn25e49d8e72002-09-23 09:36:25 +00001167 }
sewardjde4a1d02002-03-22 01:27:54 +00001168 }
1169 return n;
1170
1171# undef RD
1172# undef WR
1173}
1174
1175
njn25e49d8e72002-09-23 09:36:25 +00001176/* Change temp regs in u into real regs, as directed by the
1177 * temps[i]-->reals[i] mapping. */
1178static __inline__
njn810086f2002-11-14 12:42:47 +00001179void patchUInstr ( UInstr* u, Int temps[], UInt reals[], Int n_tmap )
sewardjde4a1d02002-03-22 01:27:54 +00001180{
1181 Int i;
1182 if (u->tag1 == TempReg) {
1183 for (i = 0; i < n_tmap; i++)
njn810086f2002-11-14 12:42:47 +00001184 if (temps[i] == u->val1) break;
njne427a662002-10-02 11:08:25 +00001185 if (i == n_tmap) VG_(core_panic)("patchUInstr(1)");
sewardjde4a1d02002-03-22 01:27:54 +00001186 u->tag1 = RealReg;
njn25e49d8e72002-09-23 09:36:25 +00001187 u->val1 = reals[i];
sewardjde4a1d02002-03-22 01:27:54 +00001188 }
1189 if (u->tag2 == TempReg) {
1190 for (i = 0; i < n_tmap; i++)
njn810086f2002-11-14 12:42:47 +00001191 if (temps[i] == u->val2) break;
njne427a662002-10-02 11:08:25 +00001192 if (i == n_tmap) VG_(core_panic)("patchUInstr(2)");
sewardjde4a1d02002-03-22 01:27:54 +00001193 u->tag2 = RealReg;
njn25e49d8e72002-09-23 09:36:25 +00001194 u->val2 = reals[i];
sewardjde4a1d02002-03-22 01:27:54 +00001195 }
1196 if (u->tag3 == TempReg) {
1197 for (i = 0; i < n_tmap; i++)
njn810086f2002-11-14 12:42:47 +00001198 if (temps[i] == u->val3) break;
njne427a662002-10-02 11:08:25 +00001199 if (i == n_tmap) VG_(core_panic)("patchUInstr(3)");
sewardjde4a1d02002-03-22 01:27:54 +00001200 u->tag3 = RealReg;
njn25e49d8e72002-09-23 09:36:25 +00001201 u->val3 = reals[i];
sewardjde4a1d02002-03-22 01:27:54 +00001202 }
1203}
1204
1205
1206/* Tedious x86-specific hack which compensates for the fact that the
1207 register numbers for %ah .. %dh do not correspond to those for %eax
1208 .. %edx. It maps a (reg size, reg no) pair to the number of the
1209 containing 32-bit reg. */
1210static __inline__
1211Int containingArchRegOf ( Int sz, Int aregno )
1212{
1213 switch (sz) {
1214 case 4: return aregno;
1215 case 2: return aregno;
1216 case 1: return aregno >= 4 ? aregno-4 : aregno;
njne427a662002-10-02 11:08:25 +00001217 default: VG_(core_panic)("containingArchRegOf");
sewardjde4a1d02002-03-22 01:27:54 +00001218 }
1219}
1220
1221
1222/* If u reads an ArchReg, return the number of the containing arch
njn25e49d8e72002-09-23 09:36:25 +00001223 reg. Otherwise return -1. Used in redundant-PUT elimination.
1224 Note that this is not required for skins extending UCode because
1225 this happens before instrumentation. */
sewardjde4a1d02002-03-22 01:27:54 +00001226static __inline__
1227Int maybe_uinstrReadsArchReg ( UInstr* u )
1228{
1229 switch (u->opcode) {
1230 case GET:
1231 case ADD: case ADC: case AND: case OR:
1232 case XOR: case SUB: case SBB:
1233 case SHL: case SHR: case SAR: case ROL:
1234 case ROR: case RCL: case RCR:
1235 if (u->tag1 == ArchReg)
1236 return containingArchRegOf ( u->size, u->val1 );
1237 else
1238 return -1;
1239
1240 case GETF: case PUTF:
1241 case CALLM_S: case CALLM_E:
1242 case INCEIP:
1243 case LEA1:
1244 case LEA2:
1245 case NOP:
sewardj7a5ebcf2002-11-13 22:42:13 +00001246 case LOCK:
sewardjde4a1d02002-03-22 01:27:54 +00001247 case PUT:
1248 case LOAD:
1249 case STORE:
1250 case MOV:
1251 case CMOV:
1252 case JMP:
1253 case CALLM: case CLEAR: case PUSH: case POP:
1254 case NOT: case NEG: case INC: case DEC: case BSWAP:
1255 case CC2VAL:
1256 case JIFZ:
1257 case FPU: case FPU_R: case FPU_W:
1258 case WIDEN:
sewardje1042472002-09-30 12:33:11 +00001259 /* GETSEG and USESEG are to do with ArchRegS, not ArchReg */
1260 case GETSEG: case PUTSEG:
1261 case USESEG:
sewardjde4a1d02002-03-22 01:27:54 +00001262 return -1;
1263
1264 default:
njn4ba5a792002-09-30 10:23:54 +00001265 VG_(pp_UInstr)(0,u);
njne427a662002-10-02 11:08:25 +00001266 VG_(core_panic)("maybe_uinstrReadsArchReg: unhandled opcode");
sewardjde4a1d02002-03-22 01:27:54 +00001267 }
1268}
1269
1270static __inline__
1271Bool uInstrMentionsTempReg ( UInstr* u, Int tempreg )
1272{
1273 Int i, k;
njn810086f2002-11-14 12:42:47 +00001274 Int tempUse[3];
1275 Bool notUsed[3];
1276
1277 k = VG_(get_reg_usage) ( u, TempReg, &tempUse[0], &notUsed[0] );
sewardjde4a1d02002-03-22 01:27:54 +00001278 for (i = 0; i < k; i++)
njn810086f2002-11-14 12:42:47 +00001279 if (tempUse[i] == tempreg)
sewardjde4a1d02002-03-22 01:27:54 +00001280 return True;
1281 return False;
1282}
1283
1284
1285/*------------------------------------------------------------*/
1286/*--- ucode improvement. ---*/
1287/*------------------------------------------------------------*/
1288
1289/* Improve the code in cb by doing
1290 -- Redundant ArchReg-fetch elimination
1291 -- Redundant PUT elimination
1292 -- Redundant cond-code restore/save elimination
1293 The overall effect of these is to allow target registers to be
1294 cached in host registers over multiple target insns.
1295*/
1296static void vg_improve ( UCodeBlock* cb )
1297{
1298 Int i, j, k, m, n, ar, tr, told, actual_areg;
1299 Int areg_map[8];
1300 Bool annul_put[8];
njn810086f2002-11-14 12:42:47 +00001301 Int tempUse[3];
1302 Bool isWrites[3];
sewardjde4a1d02002-03-22 01:27:54 +00001303 UInstr* u;
1304 Bool wr;
1305 Int* last_live_before;
1306 FlagSet future_dead_flags;
1307
njn25e49d8e72002-09-23 09:36:25 +00001308 if (dis)
1309 VG_(printf) ("Improvements:\n");
1310
sewardjde4a1d02002-03-22 01:27:54 +00001311 if (cb->nextTemp > 0)
njn25e49d8e72002-09-23 09:36:25 +00001312 last_live_before = VG_(arena_malloc) ( VG_AR_JITTER,
1313 cb->nextTemp * sizeof(Int) );
sewardjde4a1d02002-03-22 01:27:54 +00001314 else
1315 last_live_before = NULL;
1316
1317
1318 /* PASS 1: redundant GET elimination. (Actually, more general than
1319 that -- eliminates redundant fetches of ArchRegs). */
1320
1321 /* Find the live-range-ends for all temporaries. Duplicates code
1322 in the register allocator :-( */
1323
1324 for (i = 0; i < cb->nextTemp; i++) last_live_before[i] = -1;
1325
1326 for (i = cb->used-1; i >= 0; i--) {
1327 u = &cb->instrs[i];
1328
njn810086f2002-11-14 12:42:47 +00001329 k = VG_(get_reg_usage)(u, TempReg, &tempUse[0], &isWrites[0]);
sewardjde4a1d02002-03-22 01:27:54 +00001330
1331 /* For each temp usage ... bwds in program order. */
1332 for (j = k-1; j >= 0; j--) {
njn810086f2002-11-14 12:42:47 +00001333 tr = tempUse[j];
1334 wr = isWrites[j];
sewardjde4a1d02002-03-22 01:27:54 +00001335 if (last_live_before[tr] == -1) {
1336 vg_assert(tr >= 0 && tr < cb->nextTemp);
1337 last_live_before[tr] = wr ? (i+1) : i;
1338 }
1339 }
1340
1341 }
1342
1343# define BIND_ARCH_TO_TEMP(archreg,tempreg)\
1344 { Int q; \
1345 /* Invalidate any old binding(s) to tempreg. */ \
1346 for (q = 0; q < 8; q++) \
1347 if (areg_map[q] == tempreg) areg_map[q] = -1; \
1348 /* Add the new binding. */ \
1349 areg_map[archreg] = (tempreg); \
1350 }
1351
1352 /* Set up the A-reg map. */
1353 for (i = 0; i < 8; i++) areg_map[i] = -1;
1354
1355 /* Scan insns. */
1356 for (i = 0; i < cb->used; i++) {
1357 u = &cb->instrs[i];
1358 if (u->opcode == GET && u->size == 4) {
1359 /* GET; see if it can be annulled. */
1360 vg_assert(u->tag1 == ArchReg);
1361 vg_assert(u->tag2 == TempReg);
1362 ar = u->val1;
1363 tr = u->val2;
1364 told = areg_map[ar];
1365 if (told != -1 && last_live_before[told] <= i) {
1366 /* ar already has an old mapping to told, but that runs
1367 out here. Annul this GET, rename tr to told for the
1368 rest of the block, and extend told's live range to that
1369 of tr. */
njn4ba5a792002-09-30 10:23:54 +00001370 VG_(new_NOP)(u);
sewardjde4a1d02002-03-22 01:27:54 +00001371 n = last_live_before[tr] + 1;
1372 if (n > cb->used) n = cb->used;
1373 last_live_before[told] = last_live_before[tr];
1374 last_live_before[tr] = i-1;
njn25e49d8e72002-09-23 09:36:25 +00001375 if (dis)
sewardjde4a1d02002-03-22 01:27:54 +00001376 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001377 " at %2d: delete GET, rename t%d to t%d in (%d .. %d)\n",
sewardjde4a1d02002-03-22 01:27:54 +00001378 i, tr, told,i+1, n-1);
1379 for (m = i+1; m < n; m++) {
1380 if (cb->instrs[m].tag1 == TempReg
1381 && cb->instrs[m].val1 == tr)
1382 cb->instrs[m].val1 = told;
1383 if (cb->instrs[m].tag2 == TempReg
1384 && cb->instrs[m].val2 == tr)
1385 cb->instrs[m].val2 = told;
1386 }
1387 BIND_ARCH_TO_TEMP(ar,told);
1388 }
1389 else
1390 BIND_ARCH_TO_TEMP(ar,tr);
1391 }
1392 else if (u->opcode == GET && u->size != 4) {
1393 /* Invalidate any mapping for this archreg. */
1394 actual_areg = containingArchRegOf ( u->size, u->val1 );
1395 areg_map[actual_areg] = -1;
1396 }
1397 else if (u->opcode == PUT && u->size == 4) {
1398 /* PUT; re-establish t -> a binding */
1399 vg_assert(u->tag1 == TempReg);
1400 vg_assert(u->tag2 == ArchReg);
1401 BIND_ARCH_TO_TEMP(u->val2, u->val1);
1402 }
1403 else if (u->opcode == PUT && u->size != 4) {
1404 /* Invalidate any mapping for this archreg. */
1405 actual_areg = containingArchRegOf ( u->size, u->val2 );
1406 areg_map[actual_areg] = -1;
1407 } else {
1408
1409 /* see if insn has an archreg as a read operand; if so try to
1410 map it. */
1411 if (u->tag1 == ArchReg && u->size == 4
1412 && areg_map[u->val1] != -1) {
1413 switch (u->opcode) {
1414 case ADD: case SUB: case AND: case OR: case XOR:
1415 case ADC: case SBB:
1416 case SHL: case SHR: case SAR: case ROL: case ROR:
1417 case RCL: case RCR:
njn25e49d8e72002-09-23 09:36:25 +00001418 if (dis)
sewardjde4a1d02002-03-22 01:27:54 +00001419 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001420 " at %2d: change ArchReg %S to TempReg t%d\n",
sewardjde4a1d02002-03-22 01:27:54 +00001421 i, nameIReg(4,u->val1), areg_map[u->val1]);
1422 u->tag1 = TempReg;
1423 u->val1 = areg_map[u->val1];
1424 /* Remember to extend the live range of the TempReg,
1425 if necessary. */
1426 if (last_live_before[u->val1] < i)
1427 last_live_before[u->val1] = i;
1428 break;
1429 default:
1430 break;
1431 }
1432 }
1433
1434 /* boring insn; invalidate any mappings to temps it writes */
njn810086f2002-11-14 12:42:47 +00001435 k = VG_(get_reg_usage)(u, TempReg, &tempUse[0], &isWrites[0]);
sewardjde4a1d02002-03-22 01:27:54 +00001436
1437 for (j = 0; j < k; j++) {
njn810086f2002-11-14 12:42:47 +00001438 wr = isWrites[j];
sewardjde4a1d02002-03-22 01:27:54 +00001439 if (!wr) continue;
njn810086f2002-11-14 12:42:47 +00001440 tr = tempUse[j];
sewardjde4a1d02002-03-22 01:27:54 +00001441 for (m = 0; m < 8; m++)
1442 if (areg_map[m] == tr) areg_map[m] = -1;
1443 }
1444 }
1445
1446 }
1447
1448# undef BIND_ARCH_TO_TEMP
1449
sewardj05f1aa12002-04-30 00:29:36 +00001450 /* PASS 2: redundant PUT elimination. Don't annul (delay) puts of
1451 %ESP, since the memory check machinery always requires the
1452 in-memory value of %ESP to be up to date. Although this isn't
1453 actually required by other analyses (cache simulation), it's
1454 simplest to be consistent for all end-uses. */
sewardjde4a1d02002-03-22 01:27:54 +00001455 for (j = 0; j < 8; j++)
1456 annul_put[j] = False;
1457
1458 for (i = cb->used-1; i >= 0; i--) {
1459 u = &cb->instrs[i];
1460 if (u->opcode == NOP) continue;
1461
1462 if (u->opcode == PUT && u->size == 4) {
1463 vg_assert(u->tag2 == ArchReg);
1464 actual_areg = containingArchRegOf ( 4, u->val2 );
1465 if (annul_put[actual_areg]) {
sewardj05f1aa12002-04-30 00:29:36 +00001466 vg_assert(actual_areg != R_ESP);
njn4ba5a792002-09-30 10:23:54 +00001467 VG_(new_NOP)(u);
njn25e49d8e72002-09-23 09:36:25 +00001468 if (dis)
1469 VG_(printf)(" at %2d: delete PUT\n", i );
sewardjde4a1d02002-03-22 01:27:54 +00001470 } else {
sewardj05f1aa12002-04-30 00:29:36 +00001471 if (actual_areg != R_ESP)
sewardjde4a1d02002-03-22 01:27:54 +00001472 annul_put[actual_areg] = True;
1473 }
1474 }
1475 else if (u->opcode == PUT && u->size != 4) {
1476 actual_areg = containingArchRegOf ( u->size, u->val2 );
1477 annul_put[actual_areg] = False;
1478 }
1479 else if (u->opcode == JMP || u->opcode == JIFZ
1480 || u->opcode == CALLM) {
1481 for (j = 0; j < 8; j++)
1482 annul_put[j] = False;
1483 }
1484 else {
1485 /* If an instruction reads an ArchReg, the immediately
1486 preceding PUT cannot be annulled. */
1487 actual_areg = maybe_uinstrReadsArchReg ( u );
1488 if (actual_areg != -1)
1489 annul_put[actual_areg] = False;
1490 }
1491 }
1492
1493 /* PASS 2a: redundant-move elimination. Given MOV t1, t2 and t1 is
1494 dead after this point, annul the MOV insn and rename t2 to t1.
1495 Further modifies the last_live_before map. */
1496
1497# if 0
njn4ba5a792002-09-30 10:23:54 +00001498 VG_(pp_UCodeBlock)(cb, "Before MOV elimination" );
sewardjde4a1d02002-03-22 01:27:54 +00001499 for (i = 0; i < cb->nextTemp; i++)
1500 VG_(printf)("llb[t%d]=%d ", i, last_live_before[i]);
1501 VG_(printf)("\n");
1502# endif
1503
1504 for (i = 0; i < cb->used-1; i++) {
1505 u = &cb->instrs[i];
1506 if (u->opcode != MOV) continue;
1507 if (u->tag1 == Literal) continue;
1508 vg_assert(u->tag1 == TempReg);
1509 vg_assert(u->tag2 == TempReg);
1510 if (last_live_before[u->val1] == i) {
njn25e49d8e72002-09-23 09:36:25 +00001511 if (dis)
sewardjde4a1d02002-03-22 01:27:54 +00001512 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001513 " at %2d: delete MOV, rename t%d to t%d in (%d .. %d)\n",
sewardjde4a1d02002-03-22 01:27:54 +00001514 i, u->val2, u->val1, i+1, last_live_before[u->val2] );
1515 for (j = i+1; j <= last_live_before[u->val2]; j++) {
1516 if (cb->instrs[j].tag1 == TempReg
1517 && cb->instrs[j].val1 == u->val2)
1518 cb->instrs[j].val1 = u->val1;
1519 if (cb->instrs[j].tag2 == TempReg
1520 && cb->instrs[j].val2 == u->val2)
1521 cb->instrs[j].val2 = u->val1;
1522 }
1523 last_live_before[u->val1] = last_live_before[u->val2];
1524 last_live_before[u->val2] = i-1;
njn4ba5a792002-09-30 10:23:54 +00001525 VG_(new_NOP)(u);
sewardjde4a1d02002-03-22 01:27:54 +00001526 }
1527 }
1528
1529 /* PASS 3: redundant condition-code restore/save elimination.
1530 Scan backwards from the end. future_dead_flags records the set
1531 of flags which are dead at this point, that is, will be written
1532 before they are next read. Earlier uinsns which write flags
1533 already in future_dead_flags can have their writes annulled.
1534 */
1535 future_dead_flags = FlagsEmpty;
1536
1537 for (i = cb->used-1; i >= 0; i--) {
1538 u = &cb->instrs[i];
1539
1540 /* We might never make it to insns beyond this one, so be
1541 conservative. */
1542 if (u->opcode == JIFZ || u->opcode == JMP) {
1543 future_dead_flags = FlagsEmpty;
1544 continue;
1545 }
1546
sewardjfbb6cda2002-07-24 09:33:52 +00001547 /* PUTF modifies the %EFLAGS in essentially unpredictable ways.
1548 For example people try to mess with bit 21 to see if CPUID
1549 works. The setting may or may not actually take hold. So we
1550 play safe here. */
1551 if (u->opcode == PUTF) {
1552 future_dead_flags = FlagsEmpty;
1553 continue;
1554 }
1555
sewardjde4a1d02002-03-22 01:27:54 +00001556 /* We can annul the flags written by this insn if it writes a
1557 subset (or eq) of the set of flags known to be dead after
1558 this insn. If not, just record the flags also written by
1559 this insn.*/
1560 if (u->flags_w != FlagsEmpty
1561 && VG_IS_FLAG_SUBSET(u->flags_w, future_dead_flags)) {
njn25e49d8e72002-09-23 09:36:25 +00001562 if (dis) {
1563 VG_(printf)(" at %2d: annul flag write ", i);
sewardjde4a1d02002-03-22 01:27:54 +00001564 vg_ppFlagSet("", u->flags_w);
1565 VG_(printf)(" due to later ");
1566 vg_ppFlagSet("", future_dead_flags);
1567 VG_(printf)("\n");
1568 }
1569 u->flags_w = FlagsEmpty;
1570 } else {
1571 future_dead_flags
1572 = VG_UNION_FLAG_SETS ( u->flags_w, future_dead_flags );
1573 }
1574
1575 /* If this insn also reads flags, empty out future_dead_flags so
1576 as to force preceding writes not to be annulled. */
1577 if (u->flags_r != FlagsEmpty)
1578 future_dead_flags = FlagsEmpty;
1579 }
1580
1581 if (last_live_before)
njn25e49d8e72002-09-23 09:36:25 +00001582 VG_(arena_free) ( VG_AR_JITTER, last_live_before );
1583
1584 if (dis) {
1585 VG_(printf)("\n");
njn4ba5a792002-09-30 10:23:54 +00001586 VG_(pp_UCodeBlock) ( cb, "Improved UCode:" );
njn25e49d8e72002-09-23 09:36:25 +00001587 }
sewardjde4a1d02002-03-22 01:27:54 +00001588}
1589
1590
1591/*------------------------------------------------------------*/
1592/*--- The new register allocator. ---*/
1593/*------------------------------------------------------------*/
1594
1595typedef
1596 struct {
1597 /* Becomes live for the first time after this insn ... */
1598 Int live_after;
1599 /* Becomes dead for the last time after this insn ... */
1600 Int dead_before;
1601 /* The "home" spill slot, if needed. Never changes. */
1602 Int spill_no;
1603 /* Where is it? VG_NOVALUE==in a spill slot; else in reg. */
1604 Int real_no;
1605 }
1606 TempInfo;
1607
1608
1609/* Take a ucode block and allocate its TempRegs to RealRegs, or put
1610 them in spill locations, and add spill code, if there are not
1611 enough real regs. The usual register allocation deal, in short.
1612
1613 Important redundancy of representation:
1614
1615 real_to_temp maps real reg ranks (RRRs) to TempReg nos, or
1616 to VG_NOVALUE if the real reg has no currently assigned TempReg.
1617
1618 The .real_no field of a TempInfo gives the current RRR for
1619 this TempReg, or VG_NOVALUE if the TempReg is currently
1620 in memory, in which case it is in the SpillNo denoted by
1621 spillno.
1622
1623 These pieces of information (a fwds-bwds mapping, really) must
1624 be kept consistent!
1625
1626 This allocator uses the so-called Second Chance Bin Packing
1627 algorithm, as described in "Quality and Speed in Linear-scan
1628 Register Allocation" (Traub, Holloway and Smith, ACM PLDI98,
1629 pp142-151). It is simple and fast and remarkably good at
1630 minimising the amount of spill code introduced.
1631*/
1632
1633static
1634UCodeBlock* vg_do_register_allocation ( UCodeBlock* c1 )
1635{
1636 TempInfo* temp_info;
1637 Int real_to_temp[VG_MAX_REALREGS];
1638 Bool is_spill_cand[VG_MAX_REALREGS];
1639 Int ss_busy_until_before[VG_MAX_SPILLSLOTS];
1640 Int i, j, k, m, r, tno, max_ss_no;
1641 Bool wr, defer, isRead, spill_reqd;
njn25e49d8e72002-09-23 09:36:25 +00001642 UInt realUse[3];
njn810086f2002-11-14 12:42:47 +00001643 Int tempUse[3];
1644 Bool isWrites[3];
sewardjde4a1d02002-03-22 01:27:54 +00001645 UCodeBlock* c2;
1646
1647 /* Used to denote ... well, "no value" in this fn. */
1648# define VG_NOTHING (-2)
1649
1650 /* Initialise the TempReg info. */
1651 if (c1->nextTemp > 0)
njn25e49d8e72002-09-23 09:36:25 +00001652 temp_info = VG_(arena_malloc)(VG_AR_JITTER,
1653 c1->nextTemp * sizeof(TempInfo) );
sewardjde4a1d02002-03-22 01:27:54 +00001654 else
1655 temp_info = NULL;
1656
1657 for (i = 0; i < c1->nextTemp; i++) {
1658 temp_info[i].live_after = VG_NOTHING;
1659 temp_info[i].dead_before = VG_NOTHING;
1660 temp_info[i].spill_no = VG_NOTHING;
1661 /* temp_info[i].real_no is not yet relevant. */
1662 }
1663
1664 spill_reqd = False;
1665
1666 /* Scan fwds to establish live ranges. */
1667
1668 for (i = 0; i < c1->used; i++) {
njn810086f2002-11-14 12:42:47 +00001669 k = VG_(get_reg_usage)(&c1->instrs[i], TempReg, &tempUse[0],
1670 &isWrites[0]);
sewardjde4a1d02002-03-22 01:27:54 +00001671 vg_assert(k >= 0 && k <= 3);
1672
1673 /* For each temp usage ... fwds in program order */
1674 for (j = 0; j < k; j++) {
njn810086f2002-11-14 12:42:47 +00001675 tno = tempUse[j];
1676 wr = isWrites[j];
sewardjde4a1d02002-03-22 01:27:54 +00001677 if (wr) {
1678 /* Writes hold a reg live until after this insn. */
1679 if (temp_info[tno].live_after == VG_NOTHING)
1680 temp_info[tno].live_after = i;
1681 if (temp_info[tno].dead_before < i + 1)
1682 temp_info[tno].dead_before = i + 1;
1683 } else {
1684 /* First use of a tmp should be a write. */
1685 vg_assert(temp_info[tno].live_after != VG_NOTHING);
1686 /* Reads only hold it live until before this insn. */
1687 if (temp_info[tno].dead_before < i)
1688 temp_info[tno].dead_before = i;
1689 }
1690 }
1691 }
1692
1693# if 0
1694 /* Sanity check on live ranges. Expensive but correct. */
1695 for (i = 0; i < c1->nextTemp; i++) {
1696 vg_assert( (temp_info[i].live_after == VG_NOTHING
1697 && temp_info[i].dead_before == VG_NOTHING)
1698 || (temp_info[i].live_after != VG_NOTHING
1699 && temp_info[i].dead_before != VG_NOTHING) );
1700 }
1701# endif
1702
1703 /* Do a rank-based allocation of TempRegs to spill slot numbers.
1704 We put as few as possible values in spill slots, but
1705 nevertheless need to have an assignment to them just in case. */
1706
1707 max_ss_no = -1;
1708
1709 for (i = 0; i < VG_MAX_SPILLSLOTS; i++)
1710 ss_busy_until_before[i] = 0;
1711
1712 for (i = 0; i < c1->nextTemp; i++) {
1713
1714 /* True iff this temp is unused. */
1715 if (temp_info[i].live_after == VG_NOTHING)
1716 continue;
1717
1718 /* Find the lowest-numbered spill slot which is available at the
1719 start point of this interval, and assign the interval to
1720 it. */
1721 for (j = 0; j < VG_MAX_SPILLSLOTS; j++)
1722 if (ss_busy_until_before[j] <= temp_info[i].live_after)
1723 break;
1724 if (j == VG_MAX_SPILLSLOTS) {
1725 VG_(printf)("VG_MAX_SPILLSLOTS is too low; increase and recompile.\n");
njne427a662002-10-02 11:08:25 +00001726 VG_(core_panic)("register allocation failed -- out of spill slots");
sewardjde4a1d02002-03-22 01:27:54 +00001727 }
1728 ss_busy_until_before[j] = temp_info[i].dead_before;
1729 temp_info[i].spill_no = j;
1730 if (j > max_ss_no)
1731 max_ss_no = j;
1732 }
1733
1734 VG_(total_reg_rank) += (max_ss_no+1);
1735
1736 /* Show live ranges and assigned spill slot nos. */
1737
njn25e49d8e72002-09-23 09:36:25 +00001738 if (dis) {
1739 VG_(printf)("Live range assignments:\n");
sewardjde4a1d02002-03-22 01:27:54 +00001740
1741 for (i = 0; i < c1->nextTemp; i++) {
1742 if (temp_info[i].live_after == VG_NOTHING)
1743 continue;
1744 VG_(printf)(
njn25e49d8e72002-09-23 09:36:25 +00001745 " LR %d is after %d to before %d\tspillno %d\n",
sewardjde4a1d02002-03-22 01:27:54 +00001746 i,
1747 temp_info[i].live_after,
1748 temp_info[i].dead_before,
1749 temp_info[i].spill_no
1750 );
1751 }
njn25e49d8e72002-09-23 09:36:25 +00001752 VG_(printf)("\n");
sewardjde4a1d02002-03-22 01:27:54 +00001753 }
1754
1755 /* Now that we've established a spill slot number for each used
1756 temporary, we can go ahead and do the core of the "Second-chance
1757 binpacking" allocation algorithm. */
1758
njn25e49d8e72002-09-23 09:36:25 +00001759 if (dis) VG_(printf)("Register allocated UCode:\n");
1760
1761
sewardjde4a1d02002-03-22 01:27:54 +00001762 /* Resulting code goes here. We generate it all in a forwards
1763 pass. */
njn4ba5a792002-09-30 10:23:54 +00001764 c2 = VG_(alloc_UCodeBlock)();
sewardjde4a1d02002-03-22 01:27:54 +00001765
1766 /* At the start, no TempRegs are assigned to any real register.
1767 Correspondingly, all temps claim to be currently resident in
1768 their spill slots, as computed by the previous two passes. */
1769 for (i = 0; i < VG_MAX_REALREGS; i++)
1770 real_to_temp[i] = VG_NOTHING;
1771 for (i = 0; i < c1->nextTemp; i++)
1772 temp_info[i].real_no = VG_NOTHING;
1773
sewardjde4a1d02002-03-22 01:27:54 +00001774 /* Process each insn in turn. */
1775 for (i = 0; i < c1->used; i++) {
1776
1777 if (c1->instrs[i].opcode == NOP) continue;
1778 VG_(uinstrs_prealloc)++;
1779
1780# if 0
1781 /* Check map consistency. Expensive but correct. */
1782 for (r = 0; r < VG_MAX_REALREGS; r++) {
1783 if (real_to_temp[r] != VG_NOTHING) {
1784 tno = real_to_temp[r];
1785 vg_assert(tno >= 0 && tno < c1->nextTemp);
1786 vg_assert(temp_info[tno].real_no == r);
1787 }
1788 }
1789 for (tno = 0; tno < c1->nextTemp; tno++) {
1790 if (temp_info[tno].real_no != VG_NOTHING) {
1791 r = temp_info[tno].real_no;
1792 vg_assert(r >= 0 && r < VG_MAX_REALREGS);
1793 vg_assert(real_to_temp[r] == tno);
1794 }
1795 }
1796# endif
1797
njn25e49d8e72002-09-23 09:36:25 +00001798 if (dis)
njn4ba5a792002-09-30 10:23:54 +00001799 VG_(pp_UInstr)(i, &c1->instrs[i]);
sewardjde4a1d02002-03-22 01:27:54 +00001800
1801 /* First, free up enough real regs for this insn. This may
1802 generate spill stores since we may have to evict some TempRegs
1803 currently in real regs. Also generates spill loads. */
1804
njn810086f2002-11-14 12:42:47 +00001805 k = VG_(get_reg_usage)(&c1->instrs[i], TempReg, &tempUse[0],
1806 &isWrites[0]);
sewardjde4a1d02002-03-22 01:27:54 +00001807 vg_assert(k >= 0 && k <= 3);
1808
1809 /* For each ***different*** temp mentioned in the insn .... */
1810 for (j = 0; j < k; j++) {
1811
1812 /* First check if the temp is mentioned again later; if so,
1813 ignore this mention. We only want to process each temp
1814 used by the insn once, even if it is mentioned more than
1815 once. */
1816 defer = False;
njn810086f2002-11-14 12:42:47 +00001817 tno = tempUse[j];
sewardjde4a1d02002-03-22 01:27:54 +00001818 for (m = j+1; m < k; m++)
njn810086f2002-11-14 12:42:47 +00001819 if (tempUse[m] == tno)
sewardjde4a1d02002-03-22 01:27:54 +00001820 defer = True;
1821 if (defer)
1822 continue;
1823
njn810086f2002-11-14 12:42:47 +00001824 /* Now we're trying to find a register for tempUse[j].
sewardjde4a1d02002-03-22 01:27:54 +00001825 First of all, if it already has a register assigned, we
1826 don't need to do anything more. */
1827 if (temp_info[tno].real_no != VG_NOTHING)
1828 continue;
1829
1830 /* No luck. The next thing to do is see if there is a
1831 currently unassigned register available. If so, bag it. */
1832 for (r = 0; r < VG_MAX_REALREGS; r++) {
1833 if (real_to_temp[r] == VG_NOTHING)
1834 break;
1835 }
1836 if (r < VG_MAX_REALREGS) {
1837 real_to_temp[r] = tno;
1838 temp_info[tno].real_no = r;
1839 continue;
1840 }
1841
1842 /* Unfortunately, that didn't pan out either. So we'll have
1843 to eject some other unfortunate TempReg into a spill slot
1844 in order to free up a register. Of course, we need to be
1845 careful not to eject some other TempReg needed by this
1846 insn.
1847
1848 Select r in 0 .. VG_MAX_REALREGS-1 such that
1849 real_to_temp[r] is not mentioned in
njn810086f2002-11-14 12:42:47 +00001850 tempUse[0 .. k-1], since it would be just plain
sewardjde4a1d02002-03-22 01:27:54 +00001851 wrong to eject some other TempReg which we need to use in
1852 this insn.
1853
1854 It is here that it is important to make a good choice of
1855 register to spill. */
1856
1857 /* First, mark those regs which are not spill candidates. */
1858 for (r = 0; r < VG_MAX_REALREGS; r++) {
1859 is_spill_cand[r] = True;
1860 for (m = 0; m < k; m++) {
njn810086f2002-11-14 12:42:47 +00001861 if (real_to_temp[r] == tempUse[m]) {
sewardjde4a1d02002-03-22 01:27:54 +00001862 is_spill_cand[r] = False;
1863 break;
1864 }
1865 }
1866 }
1867
1868 /* We can choose any r satisfying is_spill_cand[r]. However,
1869 try to make a good choice. First, try and find r such
1870 that the associated TempReg is already dead. */
1871 for (r = 0; r < VG_MAX_REALREGS; r++) {
1872 if (is_spill_cand[r] &&
1873 temp_info[real_to_temp[r]].dead_before <= i)
1874 goto have_spill_cand;
1875 }
1876
1877 /* No spill cand is mapped to a dead TempReg. Now we really
1878 _do_ have to generate spill code. Choose r so that the
1879 next use of its associated TempReg is as far ahead as
1880 possible, in the hope that this will minimise the number of
1881 consequent reloads required. This is a bit expensive, but
1882 we don't have to do it very often. */
1883 {
1884 Int furthest_r = VG_MAX_REALREGS;
1885 Int furthest = 0;
1886 for (r = 0; r < VG_MAX_REALREGS; r++) {
1887 if (!is_spill_cand[r]) continue;
1888 for (m = i+1; m < c1->used; m++)
1889 if (uInstrMentionsTempReg(&c1->instrs[m],
1890 real_to_temp[r]))
1891 break;
1892 if (m > furthest) {
1893 furthest = m;
1894 furthest_r = r;
1895 }
1896 }
1897 r = furthest_r;
1898 goto have_spill_cand;
1899 }
1900
1901 have_spill_cand:
1902 if (r == VG_MAX_REALREGS)
njne427a662002-10-02 11:08:25 +00001903 VG_(core_panic)("new reg alloc: out of registers ?!");
sewardjde4a1d02002-03-22 01:27:54 +00001904
1905 /* Eject r. Important refinement: don't bother if the
1906 associated TempReg is now dead. */
1907 vg_assert(real_to_temp[r] != VG_NOTHING);
1908 vg_assert(real_to_temp[r] != tno);
1909 temp_info[real_to_temp[r]].real_no = VG_NOTHING;
1910 if (temp_info[real_to_temp[r]].dead_before > i) {
1911 uInstr2(c2, PUT, 4,
njn4ba5a792002-09-30 10:23:54 +00001912 RealReg, VG_(rank_to_realreg)(r),
sewardjde4a1d02002-03-22 01:27:54 +00001913 SpillNo, temp_info[real_to_temp[r]].spill_no);
1914 VG_(uinstrs_spill)++;
1915 spill_reqd = True;
njn25e49d8e72002-09-23 09:36:25 +00001916 if (dis)
njn4ba5a792002-09-30 10:23:54 +00001917 VG_(pp_UInstr)(c2->used-1, &LAST_UINSTR(c2));
sewardjde4a1d02002-03-22 01:27:54 +00001918 }
1919
1920 /* Decide if tno is read. */
1921 isRead = False;
1922 for (m = 0; m < k; m++)
njn810086f2002-11-14 12:42:47 +00001923 if (tempUse[m] == tno && !isWrites[m])
sewardjde4a1d02002-03-22 01:27:54 +00001924 isRead = True;
1925
1926 /* If so, generate a spill load. */
1927 if (isRead) {
1928 uInstr2(c2, GET, 4,
1929 SpillNo, temp_info[tno].spill_no,
njn4ba5a792002-09-30 10:23:54 +00001930 RealReg, VG_(rank_to_realreg)(r) );
sewardjde4a1d02002-03-22 01:27:54 +00001931 VG_(uinstrs_spill)++;
1932 spill_reqd = True;
njn25e49d8e72002-09-23 09:36:25 +00001933 if (dis)
njn4ba5a792002-09-30 10:23:54 +00001934 VG_(pp_UInstr)(c2->used-1, &LAST_UINSTR(c2));
sewardjde4a1d02002-03-22 01:27:54 +00001935 }
1936
1937 /* Update the forwards and backwards maps. */
1938 real_to_temp[r] = tno;
1939 temp_info[tno].real_no = r;
1940 }
1941
1942 /* By this point, all TempRegs mentioned by the insn have been
1943 bought into real regs. We now copy the insn to the output
1944 and use patchUInstr to convert its rTempRegs into
1945 realregs. */
1946 for (j = 0; j < k; j++)
njn810086f2002-11-14 12:42:47 +00001947 realUse[j] = VG_(rank_to_realreg)(temp_info[tempUse[j]].real_no);
njn4ba5a792002-09-30 10:23:54 +00001948 VG_(copy_UInstr)(c2, &c1->instrs[i]);
njn25e49d8e72002-09-23 09:36:25 +00001949 patchUInstr(&LAST_UINSTR(c2), &tempUse[0], &realUse[0], k);
sewardjde4a1d02002-03-22 01:27:54 +00001950
njn25e49d8e72002-09-23 09:36:25 +00001951 if (dis) {
njn4ba5a792002-09-30 10:23:54 +00001952 VG_(pp_UInstr)(c2->used-1, &LAST_UINSTR(c2));
sewardjde4a1d02002-03-22 01:27:54 +00001953 VG_(printf)("\n");
1954 }
1955 }
1956
1957 if (temp_info != NULL)
njn25e49d8e72002-09-23 09:36:25 +00001958 VG_(arena_free)(VG_AR_JITTER, temp_info);
sewardjde4a1d02002-03-22 01:27:54 +00001959
njn4ba5a792002-09-30 10:23:54 +00001960 VG_(free_UCodeBlock)(c1);
sewardjde4a1d02002-03-22 01:27:54 +00001961
1962 if (spill_reqd)
1963 VG_(translations_needing_spill)++;
1964
1965 return c2;
1966
1967# undef VG_NOTHING
1968
1969}
njn25e49d8e72002-09-23 09:36:25 +00001970extern void fooble(int);
1971/* Analysis records liveness of all general-use RealRegs in the UCode. */
1972static void vg_realreg_liveness_analysis ( UCodeBlock* cb )
1973{
1974 Int i, j, k;
1975 RRegSet rregs_live;
njn810086f2002-11-14 12:42:47 +00001976 Int regUse[3];
1977 Bool isWrites[3];
njn25e49d8e72002-09-23 09:36:25 +00001978 UInstr* u;
sewardjde4a1d02002-03-22 01:27:54 +00001979
njn25e49d8e72002-09-23 09:36:25 +00001980 /* All regs are dead at the end of the block */
1981 rregs_live = ALL_RREGS_DEAD;
sewardjde4a1d02002-03-22 01:27:54 +00001982
sewardjde4a1d02002-03-22 01:27:54 +00001983 for (i = cb->used-1; i >= 0; i--) {
1984 u = &cb->instrs[i];
1985
njn25e49d8e72002-09-23 09:36:25 +00001986 u->regs_live_after = rregs_live;
sewardj97ced732002-03-25 00:07:36 +00001987
njn810086f2002-11-14 12:42:47 +00001988 k = VG_(get_reg_usage)(u, RealReg, &regUse[0], &isWrites[0]);
sewardj97ced732002-03-25 00:07:36 +00001989
njn25e49d8e72002-09-23 09:36:25 +00001990 /* For each reg usage ... bwds in program order. Variable is live
1991 before this UInstr if it is read by this UInstr.
njn810086f2002-11-14 12:42:47 +00001992 Note that regUse[j] holds the Intel reg number, so we must
njn25e49d8e72002-09-23 09:36:25 +00001993 convert it to our rank number. */
1994 for (j = k-1; j >= 0; j--) {
njn810086f2002-11-14 12:42:47 +00001995 SET_RREG_LIVENESS ( VG_(realreg_to_rank)(regUse[j]),
njn25e49d8e72002-09-23 09:36:25 +00001996 rregs_live,
njn810086f2002-11-14 12:42:47 +00001997 !isWrites[j] );
sewardjde4a1d02002-03-22 01:27:54 +00001998 }
1999 }
sewardjde4a1d02002-03-22 01:27:54 +00002000}
2001
sewardjde4a1d02002-03-22 01:27:54 +00002002/*------------------------------------------------------------*/
2003/*--- Main entry point for the JITter. ---*/
2004/*------------------------------------------------------------*/
2005
2006/* Translate the basic block beginning at orig_addr, placing the
2007 translation in a vg_malloc'd block, the address and size of which
2008 are returned in trans_addr and trans_size. Length of the original
2009 block is also returned in orig_size. If the latter three are NULL,
2010 this call is being done for debugging purposes, in which case (a)
2011 throw away the translation once it is made, and (b) produce a load
2012 of debugging output.
njn25e49d8e72002-09-23 09:36:25 +00002013
2014 'tst' is the identity of the thread needing this block.
sewardjde4a1d02002-03-22 01:27:54 +00002015*/
njn25e49d8e72002-09-23 09:36:25 +00002016void VG_(translate) ( /*IN*/ ThreadState* tst,
2017 /*IN*/ Addr orig_addr,
2018 /*OUT*/ UInt* orig_size,
2019 /*OUT*/ Addr* trans_addr,
2020 /*OUT*/ UInt* trans_size )
sewardjde4a1d02002-03-22 01:27:54 +00002021{
2022 Int n_disassembled_bytes, final_code_size;
2023 Bool debugging_translation;
2024 UChar* final_code;
2025 UCodeBlock* cb;
2026
2027 VGP_PUSHCC(VgpTranslate);
2028 debugging_translation
2029 = orig_size == NULL || trans_addr == NULL || trans_size == NULL;
2030
njn25e49d8e72002-09-23 09:36:25 +00002031 if (!debugging_translation)
2032 VG_TRACK( pre_mem_read, Vg_CoreTranslate, tst, "", orig_addr, 1 );
sewardjde4a1d02002-03-22 01:27:54 +00002033
njn4ba5a792002-09-30 10:23:54 +00002034 cb = VG_(alloc_UCodeBlock)();
sewardjde4a1d02002-03-22 01:27:54 +00002035
njn25e49d8e72002-09-23 09:36:25 +00002036 /* If doing any code printing, print a basic block start marker */
2037 if (VG_(clo_trace_codegen)) {
2038 Char fnname[64] = "";
2039 VG_(get_fnname_if_entry)(orig_addr, fnname, 64);
2040 VG_(printf)(
2041 "==== BB %d %s(%p) in %dB, out %dB, BBs exec'd %lu ====\n\n",
2042 VG_(overall_in_count), fnname, orig_addr,
2043 VG_(overall_in_osize), VG_(overall_in_tsize),
2044 VG_(bbs_done));
2045 }
2046
2047 /* True if a debug trans., or if bit N set in VG_(clo_trace_codegen). */
2048# define DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(n) \
2049 ( debugging_translation || (VG_(clo_trace_codegen) & (1 << (n-1))) )
2050
sewardjde4a1d02002-03-22 01:27:54 +00002051 /* Disassemble this basic block into cb. */
njn25e49d8e72002-09-23 09:36:25 +00002052 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(1);
2053 VGP_PUSHCC(VgpToUCode);
sewardjde4a1d02002-03-22 01:27:54 +00002054 n_disassembled_bytes = VG_(disBB) ( cb, orig_addr );
njn25e49d8e72002-09-23 09:36:25 +00002055 VGP_POPCC(VgpToUCode);
2056
sewardjde4a1d02002-03-22 01:27:54 +00002057 /* Try and improve the code a bit. */
2058 if (VG_(clo_optimise)) {
njn25e49d8e72002-09-23 09:36:25 +00002059 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(2);
2060 VGP_PUSHCC(VgpImprove);
sewardjde4a1d02002-03-22 01:27:54 +00002061 vg_improve ( cb );
njn25e49d8e72002-09-23 09:36:25 +00002062 VGP_POPCC(VgpImprove);
sewardjde4a1d02002-03-22 01:27:54 +00002063 }
2064
njn25e49d8e72002-09-23 09:36:25 +00002065 /* Skin's instrumentation (Nb: must set VG_(print_codegen) in case
2066 SK_(instrument) looks at it. */
2067 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(3);
2068 VGP_PUSHCC(VgpInstrument);
2069 cb = SK_(instrument) ( cb, orig_addr );
2070 if (VG_(print_codegen))
njn4ba5a792002-09-30 10:23:54 +00002071 VG_(pp_UCodeBlock) ( cb, "Instrumented UCode:" );
njn25e49d8e72002-09-23 09:36:25 +00002072 VG_(saneUCodeBlock)( cb );
2073 VGP_POPCC(VgpInstrument);
njn4f9c9342002-04-29 16:03:24 +00002074
sewardjde4a1d02002-03-22 01:27:54 +00002075 /* Allocate registers. */
njn25e49d8e72002-09-23 09:36:25 +00002076 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(4);
2077 VGP_PUSHCC(VgpRegAlloc);
sewardjde4a1d02002-03-22 01:27:54 +00002078 cb = vg_do_register_allocation ( cb );
njn25e49d8e72002-09-23 09:36:25 +00002079 VGP_POPCC(VgpRegAlloc);
sewardjde4a1d02002-03-22 01:27:54 +00002080
njn25e49d8e72002-09-23 09:36:25 +00002081 /* Do post reg-alloc %e[acd]x liveness analysis (too boring to print
2082 * anything; results can be seen when emitting final code). */
2083 VGP_PUSHCC(VgpLiveness);
2084 vg_realreg_liveness_analysis ( cb );
2085 VGP_POPCC(VgpLiveness);
2086
2087 /* Emit final code */
2088 VG_(print_codegen) = DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE(5);
2089
2090 VGP_PUSHCC(VgpFromUcode);
sewardjde4a1d02002-03-22 01:27:54 +00002091 final_code = VG_(emit_code)(cb, &final_code_size );
njn25e49d8e72002-09-23 09:36:25 +00002092 VGP_POPCC(VgpFromUcode);
njn4ba5a792002-09-30 10:23:54 +00002093 VG_(free_UCodeBlock)(cb);
sewardjde4a1d02002-03-22 01:27:54 +00002094
njn25e49d8e72002-09-23 09:36:25 +00002095#undef DECIDE_IF_PRINTING_CODEGEN_FOR_PHASE
2096
sewardjde4a1d02002-03-22 01:27:54 +00002097 if (debugging_translation) {
2098 /* Only done for debugging -- throw away final result. */
njn25e49d8e72002-09-23 09:36:25 +00002099 VG_(arena_free)(VG_AR_JITTER, final_code);
sewardjde4a1d02002-03-22 01:27:54 +00002100 } else {
2101 /* Doing it for real -- return values to caller. */
sewardjde4a1d02002-03-22 01:27:54 +00002102 *orig_size = n_disassembled_bytes;
2103 *trans_addr = (Addr)final_code;
2104 *trans_size = final_code_size;
2105 }
njn25e49d8e72002-09-23 09:36:25 +00002106 VGP_POPCC(VgpTranslate);
sewardjde4a1d02002-03-22 01:27:54 +00002107}
2108
2109/*--------------------------------------------------------------------*/
2110/*--- end vg_translate.c ---*/
2111/*--------------------------------------------------------------------*/