blob: 1e4bff28d8447e0f920858f548bc9be902c345d6 [file] [log] [blame]
/*--------------------------------------------------------------------*/
/*--- The JITter proper: register allocation & code improvement ---*/
/*--- vg_translate.c ---*/
/*--------------------------------------------------------------------*/
/*
This file is part of Valgrind, an x86 protected-mode emulator
designed for debugging and profiling binaries on x86-Unixes.
Copyright (C) 2000-2002 Julian Seward
jseward@acm.org
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
02111-1307, USA.
The GNU General Public License is contained in the file LICENSE.
*/
#include "vg_include.h"
/*------------------------------------------------------------*/
/*--- Renamings of frequently-used global functions. ---*/
/*------------------------------------------------------------*/
#define uInstr1 VG_(newUInstr1)
#define uInstr2 VG_(newUInstr2)
#define uInstr3 VG_(newUInstr3)
#define dis VG_(disassemble)
#define nameIReg VG_(nameOfIntReg)
#define nameISize VG_(nameOfIntSize)
#define uLiteral VG_(setLiteralField)
#define newTemp VG_(getNewTemp)
#define newShadow VG_(getNewShadow)
/*------------------------------------------------------------*/
/*--- Memory management for the translater. ---*/
/*------------------------------------------------------------*/
#define N_JITBLOCKS 4
#define N_JITBLOCK_SZ 5000
static UChar jitstorage[N_JITBLOCKS][N_JITBLOCK_SZ];
static Bool jitstorage_inuse[N_JITBLOCKS];
static Bool jitstorage_initdone = False;
static __inline__ void jitstorage_initialise ( void )
{
Int i;
if (jitstorage_initdone) return;
jitstorage_initdone = True;
for (i = 0; i < N_JITBLOCKS; i++)
jitstorage_inuse[i] = False;
}
void* VG_(jitmalloc) ( Int nbytes )
{
Int i;
jitstorage_initialise();
if (nbytes > N_JITBLOCK_SZ) {
/* VG_(printf)("too large: %d\n", nbytes); */
return VG_(malloc)(VG_AR_PRIVATE, nbytes);
}
for (i = 0; i < N_JITBLOCKS; i++) {
if (!jitstorage_inuse[i]) {
jitstorage_inuse[i] = True;
/* VG_(printf)("alloc %d -> %d\n", nbytes, i ); */
return & jitstorage[i][0];
}
}
VG_(panic)("out of slots in vg_jitmalloc\n");
return VG_(malloc)(VG_AR_PRIVATE, nbytes);
}
void VG_(jitfree) ( void* ptr )
{
Int i;
jitstorage_initialise();
for (i = 0; i < N_JITBLOCKS; i++) {
if (ptr == & jitstorage[i][0]) {
vg_assert(jitstorage_inuse[i]);
jitstorage_inuse[i] = False;
return;
}
}
VG_(free)(VG_AR_PRIVATE, ptr);
}
/*------------------------------------------------------------*/
/*--- Basics ---*/
/*------------------------------------------------------------*/
UCodeBlock* VG_(allocCodeBlock) ( void )
{
UCodeBlock* cb = VG_(malloc)(VG_AR_PRIVATE, sizeof(UCodeBlock));
cb->used = cb->size = cb->nextTemp = 0;
cb->instrs = NULL;
return cb;
}
void VG_(freeCodeBlock) ( UCodeBlock* cb )
{
if (cb->instrs) VG_(free)(VG_AR_PRIVATE, cb->instrs);
VG_(free)(VG_AR_PRIVATE, cb);
}
/* Ensure there's enough space in a block to add one uinstr. */
static __inline__
void ensureUInstr ( UCodeBlock* cb )
{
if (cb->used == cb->size) {
if (cb->instrs == NULL) {
vg_assert(cb->size == 0);
vg_assert(cb->used == 0);
cb->size = 8;
cb->instrs = VG_(malloc)(VG_AR_PRIVATE, 8 * sizeof(UInstr));
} else {
Int i;
UInstr* instrs2 = VG_(malloc)(VG_AR_PRIVATE,
2 * sizeof(UInstr) * cb->size);
for (i = 0; i < cb->used; i++)
instrs2[i] = cb->instrs[i];
cb->size *= 2;
VG_(free)(VG_AR_PRIVATE, cb->instrs);
cb->instrs = instrs2;
}
}
vg_assert(cb->used < cb->size);
}
__inline__
void VG_(emptyUInstr) ( UInstr* u )
{
u->val1 = u->val2 = u->val3 = 0;
u->tag1 = u->tag2 = u->tag3 = NoValue;
u->flags_r = u->flags_w = FlagsEmpty;
u->jmpkind = JmpBoring;
u->smc_check = u->signed_widen = False;
u->lit32 = 0;
u->opcode = 0;
u->size = 0;
u->cond = 0;
u->extra4b = 0;
}
/* Add an instruction to a ucode block, and return the index of the
instruction. */
__inline__
void VG_(newUInstr3) ( UCodeBlock* cb, Opcode opcode, Int sz,
Tag tag1, UInt val1,
Tag tag2, UInt val2,
Tag tag3, UInt val3 )
{
UInstr* ui;
ensureUInstr(cb);
ui = & cb->instrs[cb->used];
cb->used++;
VG_(emptyUInstr)(ui);
ui->val1 = val1;
ui->val2 = val2;
ui->val3 = val3;
ui->opcode = opcode;
ui->tag1 = tag1;
ui->tag2 = tag2;
ui->tag3 = tag3;
ui->size = sz;
if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
if (tag3 == TempReg) vg_assert(val3 != INVALID_TEMPREG);
}
__inline__
void VG_(newUInstr2) ( UCodeBlock* cb, Opcode opcode, Int sz,
Tag tag1, UInt val1,
Tag tag2, UInt val2 )
{
UInstr* ui;
ensureUInstr(cb);
ui = & cb->instrs[cb->used];
cb->used++;
VG_(emptyUInstr)(ui);
ui->val1 = val1;
ui->val2 = val2;
ui->opcode = opcode;
ui->tag1 = tag1;
ui->tag2 = tag2;
ui->size = sz;
if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
if (tag2 == TempReg) vg_assert(val2 != INVALID_TEMPREG);
}
__inline__
void VG_(newUInstr1) ( UCodeBlock* cb, Opcode opcode, Int sz,
Tag tag1, UInt val1 )
{
UInstr* ui;
ensureUInstr(cb);
ui = & cb->instrs[cb->used];
cb->used++;
VG_(emptyUInstr)(ui);
ui->val1 = val1;
ui->opcode = opcode;
ui->tag1 = tag1;
ui->size = sz;
if (tag1 == TempReg) vg_assert(val1 != INVALID_TEMPREG);
}
__inline__
void VG_(newUInstr0) ( UCodeBlock* cb, Opcode opcode, Int sz )
{
UInstr* ui;
ensureUInstr(cb);
ui = & cb->instrs[cb->used];
cb->used++;
VG_(emptyUInstr)(ui);
ui->opcode = opcode;
ui->size = sz;
}
/* Copy an instruction into the given codeblock. */
__inline__
void VG_(copyUInstr) ( UCodeBlock* cb, UInstr* instr )
{
ensureUInstr(cb);
cb->instrs[cb->used] = *instr;
cb->used++;
}
/* Copy auxiliary info from one uinstr to another. */
static __inline__
void copyAuxInfoFromTo ( UInstr* src, UInstr* dst )
{
dst->cond = src->cond;
dst->extra4b = src->extra4b;
dst->smc_check = src->smc_check;
dst->signed_widen = src->signed_widen;
dst->jmpkind = src->jmpkind;
dst->flags_r = src->flags_r;
dst->flags_w = src->flags_w;
}
/* Set the flag R/W sets on a uinstr. */
void VG_(setFlagRW) ( UInstr* u, FlagSet fr, FlagSet fw )
{
/* VG_(ppUInstr)(-1,u); */
vg_assert(fr == (fr & FlagsALL));
vg_assert(fw == (fw & FlagsALL));
u->flags_r = fr;
u->flags_w = fw;
}
/* Set the lit32 field of the most recent uinsn. */
void VG_(setLiteralField) ( UCodeBlock* cb, UInt lit32 )
{
LAST_UINSTR(cb).lit32 = lit32;
}
Bool VG_(anyFlagUse) ( UInstr* u )
{
return (u->flags_r != FlagsEmpty
|| u->flags_w != FlagsEmpty);
}
/* Convert a rank in the range 0 .. VG_MAX_REALREGS-1 into an Intel
register number. This effectively defines the order in which real
registers are allocated. %ebp is excluded since it is permanently
reserved for pointing at VG_(baseBlock). %edi is a general spare
temp used for Left4 and various misc tag ops.
Important! If you change the set of allocatable registers from
%eax, %ebx, %ecx, %edx, %esi you must change the
save/restore sequences in vg_helper_smc_check4 to match!
*/
__inline__ Int VG_(rankToRealRegNo) ( Int rank )
{
switch (rank) {
# if 1
/* Probably the best allocation ordering. */
case 0: return R_EAX;
case 1: return R_EBX;
case 2: return R_ECX;
case 3: return R_EDX;
case 4: return R_ESI;
# else
/* Contrary; probably the worst. Helpful for debugging, tho. */
case 4: return R_EAX;
case 3: return R_EBX;
case 2: return R_ECX;
case 1: return R_EDX;
case 0: return R_ESI;
# endif
default: VG_(panic)("rankToRealRegNo");
}
}
/*------------------------------------------------------------*/
/*--- Sanity checking uinstrs. ---*/
/*------------------------------------------------------------*/
/* This seems as good a place as any to record some important stuff
about ucode semantics.
* TempRegs are 32 bits wide. LOADs of 8/16 bit values into a
TempReg are defined to zero-extend the loaded value to 32 bits.
This is needed to make the translation of movzbl et al work
properly.
* Similarly, GETs of a 8/16 bit ArchRegs are zero-extended.
* Arithmetic on TempRegs is at the specified size. For example,
SUBW t1, t2 has to result in a real 16 bit x86 subtraction
being emitted -- not a 32 bit one.
* On some insns we allow the cc bit to be set. If so, the
intention is that the simulated machine's %eflags register
is copied into that of the real machine before the insn,
and copied back again afterwards. This means that the
code generated for that insn must be very careful only to
update %eflags in the intended way. This is particularly
important for the routines referenced by CALL insns.
*/
/* Meaning of operand kinds is as follows:
ArchReg is a register of the simulated CPU, stored in memory,
in vg_m_state.m_eax .. m_edi. These values are stored
using the Intel register encoding.
RealReg is a register of the real CPU. There are VG_MAX_REALREGS
available for allocation. As with ArchRegs, these values
are stored using the Intel register encoding.
TempReg is a temporary register used to express the results of
disassembly. There is an unlimited supply of them --
register allocation and spilling eventually assigns them
to RealRegs.
SpillNo is a spill slot number. The number of required spill
slots is VG_MAX_PSEUDOS, in general. Only allowed
as the ArchReg operand of GET and PUT.
Lit16 is a signed 16-bit literal value.
Literal is a 32-bit literal value. Each uinstr can only hold
one of these.
The disassembled code is expressed purely in terms of ArchReg,
TempReg and Literal operands. Eventually, register allocation
removes all the TempRegs, giving a result using ArchRegs, RealRegs,
and Literals. New x86 code can easily be synthesised from this.
There are carefully designed restrictions on which insns can have
which operands, intended to make it possible to generate x86 code
from the result of register allocation on the ucode efficiently and
without need of any further RealRegs.
Restrictions on insns (as generated by the disassembler) are as
follows:
A=ArchReg S=SpillNo T=TempReg L=Literal R=RealReg
N=NoValue
GETF T N N
PUTF T N N
GET A,S T N
PUT T A,S N
LOAD T T N
STORE T T N
MOV T,L T N
CMOV T T N
WIDEN T N N
JMP T,L N N
CALLM L N N
CALLM_S N N N
CALLM_E N N N
PUSH,POP T N N
CLEAR L N N
AND, OR
T T N
ADD, ADC, XOR, SUB, SBB
A,L,T T N
SHL, SHR, SAR, ROL, ROR, RCL, RCR
L,T T N
NOT, NEG, INC, DEC, CC2VAL, BSWAP
T N N
JIFZ T L N
FPU_R L T N
FPU_W L T N
FPU L T N
LEA1 T T (const in a seperate field)
LEA2 T T T (const & shift ditto)
INCEIP L N N
and for instrumentation insns:
LOADV T T N
STOREV T,L T N
GETV A T N
PUTV T,L A N
GETVF T N N
PUTVF T N N
WIDENV T N N
TESTV A,T N N
SETV A,T N N
TAG1 T N N
TAG2 T T N
Before register allocation, S operands should not appear anywhere.
After register allocation, all T operands should have been
converted into Rs, and S operands are allowed in GET and PUT --
denoting spill saves/restores.
The size field should be 0 for insns for which it is meaningless,
ie those which do not directly move/operate on data.
*/
Bool VG_(saneUInstr) ( Bool beforeRA, UInstr* u )
{
# define TR1 (beforeRA ? (u->tag1 == TempReg) : (u->tag1 == RealReg))
# define TR2 (beforeRA ? (u->tag2 == TempReg) : (u->tag2 == RealReg))
# define TR3 (beforeRA ? (u->tag3 == TempReg) : (u->tag3 == RealReg))
# define A1 (u->tag1 == ArchReg)
# define A2 (u->tag2 == ArchReg)
# define AS1 ((u->tag1 == ArchReg) || ((!beforeRA && (u->tag1 == SpillNo))))
# define AS2 ((u->tag2 == ArchReg) || ((!beforeRA && (u->tag2 == SpillNo))))
# define AS3 ((u->tag3 == ArchReg) || ((!beforeRA && (u->tag3 == SpillNo))))
# define L1 (u->tag1 == Literal && u->val1 == 0)
# define L2 (u->tag2 == Literal && u->val2 == 0)
# define Ls1 (u->tag1 == Lit16)
# define Ls3 (u->tag3 == Lit16)
# define N1 (u->tag1 == NoValue)
# define N2 (u->tag2 == NoValue)
# define N3 (u->tag3 == NoValue)
# define SZ4 (u->size == 4)
# define SZ2 (u->size == 2)
# define SZ1 (u->size == 1)
# define SZ0 (u->size == 0)
# define CC0 (u->flags_r == FlagsEmpty && u->flags_w == FlagsEmpty)
# define FLG_RD (u->flags_r == FlagsALL && u->flags_w == FlagsEmpty)
# define FLG_WR (u->flags_r == FlagsEmpty && u->flags_w == FlagsALL)
# define FLG_RD_WR_MAYBE \
((u->flags_r == FlagsEmpty && u->flags_w == FlagsEmpty) \
|| (u->flags_r == FlagsEmpty && u->flags_w == FlagsZCP) \
|| (u->flags_r == FlagsZCP && u->flags_w == FlagsEmpty))
# define CC1 (!(CC0))
# define SZ4_IF_TR1 ((u->tag1 == TempReg || u->tag1 == RealReg) \
? (u->size == 4) : True)
Int n_lits = 0;
if (u->tag1 == Literal) n_lits++;
if (u->tag2 == Literal) n_lits++;
if (u->tag3 == Literal) n_lits++;
if (n_lits > 1)
return False;
switch (u->opcode) {
case GETF:
return (SZ2 || SZ4) && TR1 && N2 && N3 && FLG_RD;
case PUTF:
return (SZ2 || SZ4) && TR1 && N2 && N3 && FLG_WR;
case CALLM_S: case CALLM_E:
return SZ0 && N1 && N2 && N3;
case INCEIP:
return SZ0 && CC0 && Ls1 && N2 && N3;
case LEA1:
return CC0 && TR1 && TR2 && N3 && SZ4;
case LEA2:
return CC0 && TR1 && TR2 && TR3 && SZ4;
case NOP:
return SZ0 && CC0 && N1 && N2 && N3;
case GET:
return CC0 && AS1 && TR2 && N3;
case PUT:
return CC0 && TR1 && AS2 && N3;
case LOAD: case STORE:
return CC0 && TR1 && TR2 && N3;
case MOV:
return CC0 && (TR1 || L1) && TR2 && N3 && SZ4_IF_TR1;
case CMOV:
return CC1 && TR1 && TR2 && N3 && SZ4;
case JMP:
return (u->cond==CondAlways ? CC0 : CC1)
&& (TR1 || L1) && N2 && SZ0 && N3;
case CLEAR:
return CC0 && Ls1 && N2 && SZ0 && N3;
case CALLM:
return SZ0 && Ls1 && N2 && N3;
case PUSH: case POP:
return CC0 && TR1 && N2 && N3;
case AND: case OR:
return TR1 && TR2 && N3;
case ADD: case ADC: case XOR: case SUB: case SBB:
return (A1 || TR1 || L1) && TR2 && N3;
case SHL: case SHR: case SAR: case ROL: case ROR: case RCL: case RCR:
return (TR1 || L1) && TR2 && N3;
case NOT: case NEG: case INC: case DEC:
return TR1 && N2 && N3;
case BSWAP:
return TR1 && N2 && N3 && CC0 && SZ4;
case CC2VAL:
return CC1 && SZ1 && TR1 && N2 && N3;
case JIFZ:
return CC0 && SZ4 && TR1 && L2 && N3;
case FPU_R: case FPU_W:
return CC0 && Ls1 && TR2 && N3;
case FPU:
return SZ0 && FLG_RD_WR_MAYBE && Ls1 && N2 && N3;
case LOADV:
return CC0 && TR1 && TR2 && N3;
case STOREV:
return CC0 && (TR1 || L1) && TR2 && N3;
case GETV:
return CC0 && A1 && TR2 && N3;
case PUTV:
return CC0 && (TR1 || L1) && A2 && N3;
case GETVF:
return CC0 && TR1 && N2 && N3 && SZ0;
case PUTVF:
return CC0 && TR1 && N2 && N3 && SZ0;
case WIDEN:
return CC0 && TR1 && N2 && N3;
case TESTV:
return CC0 && (A1 || TR1) && N2 && N3;
case SETV:
return CC0 && (A1 || TR1) && N2 && N3;
case TAG1:
return CC0 && TR1 && N2 && Ls3 && SZ0;
case TAG2:
return CC0 && TR1 && TR2 && Ls3 && SZ0;
default:
VG_(panic)("vg_saneUInstr: unhandled opcode");
}
# undef SZ4_IF_TR1
# undef CC0
# undef CC1
# undef SZ4
# undef SZ2
# undef SZ1
# undef SZ0
# undef TR1
# undef TR2
# undef TR3
# undef A1
# undef A2
# undef AS1
# undef AS2
# undef AS3
# undef L1
# undef Ls1
# undef L2
# undef Ls3
# undef N1
# undef N2
# undef N3
# undef FLG_RD
# undef FLG_WR
# undef FLG_RD_WR_MAYBE
}
/* Sanity checks to do with CALLMs in UCodeBlocks. */
Bool VG_(saneUCodeBlock) ( UCodeBlock* cb )
{
Int callm = 0;
Int callm_s = 0;
Int callm_e = 0;
Int callm_ptr, calls_ptr;
Int i, j, t;
Bool incall = False;
/* Ensure the number of CALLM, CALLM_S and CALLM_E are the same. */
for (i = 0; i < cb->used; i++) {
switch (cb->instrs[i].opcode) {
case CALLM:
if (!incall) return False;
callm++;
break;
case CALLM_S:
if (incall) return False;
incall = True;
callm_s++;
break;
case CALLM_E:
if (!incall) return False;
incall = False;
callm_e++;
break;
case PUSH: case POP: case CLEAR:
if (!incall) return False;
break;
default:
break;
}
}
if (incall) return False;
if (callm != callm_s || callm != callm_e) return False;
/* Check the sections between CALLM_S and CALLM's. Ensure that no
PUSH uinsn pushes any TempReg that any other PUSH in the same
section pushes. Ie, check that the TempReg args to PUSHes in
the section are unique. If not, the instrumenter generates
incorrect code for CALLM insns. */
callm_ptr = 0;
find_next_CALLM:
/* Search for the next interval, making calls_ptr .. callm_ptr
bracket it. */
while (callm_ptr < cb->used
&& cb->instrs[callm_ptr].opcode != CALLM)
callm_ptr++;
if (callm_ptr == cb->used)
return True;
vg_assert(cb->instrs[callm_ptr].opcode == CALLM);
calls_ptr = callm_ptr - 1;
while (cb->instrs[calls_ptr].opcode != CALLM_S)
calls_ptr--;
vg_assert(cb->instrs[calls_ptr].opcode == CALLM_S);
vg_assert(calls_ptr >= 0);
/* VG_(printf)("interval from %d to %d\n", calls_ptr, callm_ptr ); */
/* For each PUSH insn in the interval ... */
for (i = calls_ptr + 1; i < callm_ptr; i++) {
if (cb->instrs[i].opcode != PUSH) continue;
t = cb->instrs[i].val1;
/* Ensure no later PUSH insns up to callm_ptr push the same
TempReg. Return False if any such are found. */
for (j = i+1; j < callm_ptr; j++) {
if (cb->instrs[j].opcode == PUSH &&
cb->instrs[j].val1 == t)
return False;
}
}
/* This interval is clean. Keep going ... */
callm_ptr++;
goto find_next_CALLM;
}
/*------------------------------------------------------------*/
/*--- Printing uinstrs. ---*/
/*------------------------------------------------------------*/
Char* VG_(nameCondcode) ( Condcode cond )
{
switch (cond) {
case CondO: return "o";
case CondNO: return "no";
case CondB: return "b";
case CondNB: return "nb";
case CondZ: return "z";
case CondNZ: return "nz";
case CondBE: return "be";
case CondNBE: return "nbe";
case CondS: return "s";
case ConsNS: return "ns";
case CondP: return "p";
case CondNP: return "np";
case CondL: return "l";
case CondNL: return "nl";
case CondLE: return "le";
case CondNLE: return "nle";
case CondAlways: return "MP"; /* hack! */
default: VG_(panic)("nameCondcode");
}
}
static void vg_ppFlagSet ( Char* prefix, FlagSet set )
{
VG_(printf)("%s", prefix);
if (set & FlagD) VG_(printf)("D");
if (set & FlagO) VG_(printf)("O");
if (set & FlagS) VG_(printf)("S");
if (set & FlagZ) VG_(printf)("Z");
if (set & FlagA) VG_(printf)("A");
if (set & FlagC) VG_(printf)("C");
if (set & FlagP) VG_(printf)("P");
}
static void ppTempReg ( Int tt )
{
if ((tt & 1) == 0)
VG_(printf)("t%d", tt);
else
VG_(printf)("q%d", tt-1);
}
static void ppUOperand ( UInstr* u, Int operandNo, Int sz, Bool parens )
{
UInt tag, val;
switch (operandNo) {
case 1: tag = u->tag1; val = u->val1; break;
case 2: tag = u->tag2; val = u->val2; break;
case 3: tag = u->tag3; val = u->val3; break;
default: VG_(panic)("ppUOperand(1)");
}
if (tag == Literal) val = u->lit32;
if (parens) VG_(printf)("(");
switch (tag) {
case TempReg: ppTempReg(val); break;
case RealReg: VG_(printf)("%s",nameIReg(sz==0 ? 4 : sz,val)); break;
case Literal: VG_(printf)("$0x%x", val); break;
case Lit16: VG_(printf)("$0x%x", val); break;
case NoValue: VG_(printf)("NoValue"); break;
case ArchReg: VG_(printf)("%S",nameIReg(sz,val)); break;
case SpillNo: VG_(printf)("spill%d", val); break;
default: VG_(panic)("ppUOperand(2)");
}
if (parens) VG_(printf)(")");
}
Char* VG_(nameUOpcode) ( Bool upper, Opcode opc )
{
switch (opc) {
case ADD: return (upper ? "ADD" : "add");
case ADC: return (upper ? "ADC" : "adc");
case AND: return (upper ? "AND" : "and");
case OR: return (upper ? "OR" : "or");
case XOR: return (upper ? "XOR" : "xor");
case SUB: return (upper ? "SUB" : "sub");
case SBB: return (upper ? "SBB" : "sbb");
case SHL: return (upper ? "SHL" : "shl");
case SHR: return (upper ? "SHR" : "shr");
case SAR: return (upper ? "SAR" : "sar");
case ROL: return (upper ? "ROL" : "rol");
case ROR: return (upper ? "ROR" : "ror");
case RCL: return (upper ? "RCL" : "rcl");
case RCR: return (upper ? "RCR" : "rcr");
case NOT: return (upper ? "NOT" : "not");
case NEG: return (upper ? "NEG" : "neg");
case INC: return (upper ? "INC" : "inc");
case DEC: return (upper ? "DEC" : "dec");
case BSWAP: return (upper ? "BSWAP" : "bswap");
default: break;
}
if (!upper) VG_(panic)("vg_nameUOpcode: invalid !upper");
switch (opc) {
case GETVF: return "GETVF";
case PUTVF: return "PUTVF";
case TAG1: return "TAG1";
case TAG2: return "TAG2";
case CALLM_S: return "CALLM_S";
case CALLM_E: return "CALLM_E";
case INCEIP: return "INCEIP";
case LEA1: return "LEA1";
case LEA2: return "LEA2";
case NOP: return "NOP";
case GET: return "GET";
case PUT: return "PUT";
case GETF: return "GETF";
case PUTF: return "PUTF";
case LOAD: return "LD" ;
case STORE: return "ST" ;
case MOV: return "MOV";
case CMOV: return "CMOV";
case WIDEN: return "WIDEN";
case JMP: return "J" ;
case JIFZ: return "JIFZ" ;
case CALLM: return "CALLM";
case PUSH: return "PUSH" ;
case POP: return "POP" ;
case CLEAR: return "CLEAR";
case CC2VAL: return "CC2VAL";
case FPU_R: return "FPU_R";
case FPU_W: return "FPU_W";
case FPU: return "FPU" ;
case LOADV: return "LOADV";
case STOREV: return "STOREV";
case GETV: return "GETV";
case PUTV: return "PUTV";
case TESTV: return "TESTV";
case SETV: return "SETV";
default: VG_(panic)("nameUOpcode: unhandled case");
}
}
void VG_(ppUInstr) ( Int instrNo, UInstr* u )
{
VG_(printf)("\t%4d: %s", instrNo,
VG_(nameUOpcode)(True, u->opcode));
if (u->opcode == JMP || u->opcode == CC2VAL)
VG_(printf)("%s", VG_(nameCondcode(u->cond)));
switch (u->size) {
case 0: VG_(printf)("o"); break;
case 1: VG_(printf)("B"); break;
case 2: VG_(printf)("W"); break;
case 4: VG_(printf)("L"); break;
case 8: VG_(printf)("Q"); break;
default: VG_(printf)("%d", (Int)u->size); break;
}
switch (u->opcode) {
case TAG1:
VG_(printf)("\t");
ppUOperand(u, 1, 4, False);
VG_(printf)(" = %s ( ", VG_(nameOfTagOp)( u->val3 ));
ppUOperand(u, 1, 4, False);
VG_(printf)(" )");
break;
case TAG2:
VG_(printf)("\t");
ppUOperand(u, 2, 4, False);
VG_(printf)(" = %s ( ", VG_(nameOfTagOp)( u->val3 ));
ppUOperand(u, 1, 4, False);
VG_(printf)(", ");
ppUOperand(u, 2, 4, False);
VG_(printf)(" )");
break;
case CALLM_S: case CALLM_E:
break;
case INCEIP:
VG_(printf)("\t$%d", u->val1);
break;
case LEA2:
VG_(printf)("\t%d(" , u->lit32);
ppUOperand(u, 1, 4, False);
VG_(printf)(",");
ppUOperand(u, 2, 4, False);
VG_(printf)(",%d), ", (Int)u->extra4b);
ppUOperand(u, 3, 4, False);
break;
case LEA1:
VG_(printf)("\t%d" , u->lit32);
ppUOperand(u, 1, 4, True);
VG_(printf)(", ");
ppUOperand(u, 2, 4, False);
break;
case NOP:
break;
case FPU_W:
VG_(printf)("\t0x%x:0x%x, ",
(u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
ppUOperand(u, 2, 4, True);
break;
case FPU_R:
VG_(printf)("\t");
ppUOperand(u, 2, 4, True);
VG_(printf)(", 0x%x:0x%x",
(u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
break;
case FPU:
VG_(printf)("\t0x%x:0x%x",
(u->val1 >> 8) & 0xFF, u->val1 & 0xFF );
break;
case STOREV: case LOADV:
case GET: case PUT: case MOV: case LOAD: case STORE: case CMOV:
VG_(printf)("\t");
ppUOperand(u, 1, u->size, u->opcode==LOAD || u->opcode==LOADV);
VG_(printf)(", ");
ppUOperand(u, 2, u->size, u->opcode==STORE || u->opcode==STOREV);
break;
case GETF: case PUTF:
VG_(printf)("\t");
ppUOperand(u, 1, u->size, False);
break;
case JMP: case CC2VAL:
case PUSH: case POP: case CLEAR: case CALLM:
if (u->opcode == JMP) {
switch (u->jmpkind) {
case JmpCall: VG_(printf)("-c"); break;
case JmpRet: VG_(printf)("-r"); break;
case JmpSyscall: VG_(printf)("-sys"); break;
case JmpClientReq: VG_(printf)("-cli"); break;
default: break;
}
}
VG_(printf)("\t");
ppUOperand(u, 1, u->size, False);
break;
case JIFZ:
VG_(printf)("\t");
ppUOperand(u, 1, u->size, False);
VG_(printf)(", ");
ppUOperand(u, 2, u->size, False);
break;
case PUTVF: case GETVF:
VG_(printf)("\t");
ppUOperand(u, 1, 0, False);
break;
case NOT: case NEG: case INC: case DEC: case BSWAP:
VG_(printf)("\t");
ppUOperand(u, 1, u->size, False);
break;
case ADD: case ADC: case AND: case OR:
case XOR: case SUB: case SBB:
case SHL: case SHR: case SAR:
case ROL: case ROR: case RCL: case RCR:
VG_(printf)("\t");
ppUOperand(u, 1, u->size, False);
VG_(printf)(", ");
ppUOperand(u, 2, u->size, False);
break;
case GETV: case PUTV:
VG_(printf)("\t");
ppUOperand(u, 1, u->opcode==PUTV ? 4 : u->size, False);
VG_(printf)(", ");
ppUOperand(u, 2, u->opcode==GETV ? 4 : u->size, False);
break;
case WIDEN:
VG_(printf)("_%c%c", VG_(toupper)(nameISize(u->extra4b)),
u->signed_widen?'s':'z');
VG_(printf)("\t");
ppUOperand(u, 1, u->size, False);
break;
case TESTV: case SETV:
VG_(printf)("\t");
ppUOperand(u, 1, u->size, False);
break;
default: VG_(panic)("ppUInstr: unhandled opcode");
}
if (u->flags_r != FlagsEmpty || u->flags_w != FlagsEmpty) {
VG_(printf)(" (");
if (u->flags_r != FlagsEmpty)
vg_ppFlagSet("-r", u->flags_r);
if (u->flags_w != FlagsEmpty)
vg_ppFlagSet("-w", u->flags_w);
VG_(printf)(")");
}
VG_(printf)("\n");
}
void VG_(ppUCodeBlock) ( UCodeBlock* cb, Char* title )
{
Int i;
VG_(printf)("\n%s\n", title);
for (i = 0; i < cb->used; i++)
if (0 || cb->instrs[i].opcode != NOP)
VG_(ppUInstr) ( i, &cb->instrs[i] );
VG_(printf)("\n");
}
/*------------------------------------------------------------*/
/*--- uinstr helpers for register allocation ---*/
/*--- and code improvement. ---*/
/*------------------------------------------------------------*/
/* A structure for communicating temp uses, and for indicating
temp->real register mappings for patchUInstr. */
typedef
struct {
Int realNo;
Int tempNo;
Bool isWrite;
}
TempUse;
/* Get the temp use of a uinstr, parking them in an array supplied by
the caller, which is assumed to be big enough. Return the number
of entries. Insns which read _and_ write a register wind up
mentioning it twice. Entries are placed in the array in program
order, so that if a reg is read-modified-written, it appears first
as a read and then as a write.
*/
static __inline__
Int getTempUsage ( UInstr* u, TempUse* arr )
{
# define RD(ono) \
if (mycat(u->tag,ono) == TempReg) \
{ arr[n].tempNo = mycat(u->val,ono); \
arr[n].isWrite = False; n++; }
# define WR(ono) \
if (mycat(u->tag,ono) == TempReg) \
{ arr[n].tempNo = mycat(u->val,ono); \
arr[n].isWrite = True; n++; }
Int n = 0;
switch (u->opcode) {
case LEA1: RD(1); WR(2); break;
case LEA2: RD(1); RD(2); WR(3); break;
case NOP: case FPU: case INCEIP: case CALLM_S: case CALLM_E: break;
case FPU_R: case FPU_W: RD(2); break;
case GETF: WR(1); break;
case PUTF: RD(1); break;
case GET: WR(2); break;
case PUT: RD(1); break;
case LOAD: RD(1); WR(2); break;
case STORE: RD(1); RD(2); break;
case MOV: RD(1); WR(2); break;
case JMP: RD(1); break;
case CLEAR: case CALLM: break;
case PUSH: RD(1); break;
case POP: WR(1); break;
case TAG2:
case CMOV:
case ADD: case ADC: case AND: case OR:
case XOR: case SUB: case SBB:
RD(1); RD(2); WR(2); break;
case SHL: case SHR: case SAR:
case ROL: case ROR: case RCL: case RCR:
RD(1); RD(2); WR(2); break;
case NOT: case NEG: case INC: case DEC: case TAG1: case BSWAP:
RD(1); WR(1); break;
case WIDEN: RD(1); WR(1); break;
case CC2VAL: WR(1); break;
case JIFZ: RD(1); break;
/* These sizes are only ever consulted when the instrumentation
code is being added, so the following can return
manifestly-bogus sizes. */
case LOADV: RD(1); WR(2); break;
case STOREV: RD(1); RD(2); break;
case GETV: WR(2); break;
case PUTV: RD(1); break;
case TESTV: RD(1); break;
case SETV: WR(1); break;
case PUTVF: RD(1); break;
case GETVF: WR(1); break;
default: VG_(panic)("getTempUsage: unhandled opcode");
}
return n;
# undef RD
# undef WR
}
/* Change temp regs in u into real regs, as directed by tmap. */
static __inline__
void patchUInstr ( UInstr* u, TempUse* tmap, Int n_tmap )
{
Int i;
if (u->tag1 == TempReg) {
for (i = 0; i < n_tmap; i++)
if (tmap[i].tempNo == u->val1) break;
if (i == n_tmap) VG_(panic)("patchUInstr(1)");
u->tag1 = RealReg;
u->val1 = tmap[i].realNo;
}
if (u->tag2 == TempReg) {
for (i = 0; i < n_tmap; i++)
if (tmap[i].tempNo == u->val2) break;
if (i == n_tmap) VG_(panic)("patchUInstr(2)");
u->tag2 = RealReg;
u->val2 = tmap[i].realNo;
}
if (u->tag3 == TempReg) {
for (i = 0; i < n_tmap; i++)
if (tmap[i].tempNo == u->val3) break;
if (i == n_tmap) VG_(panic)("patchUInstr(3)");
u->tag3 = RealReg;
u->val3 = tmap[i].realNo;
}
}
/* Tedious x86-specific hack which compensates for the fact that the
register numbers for %ah .. %dh do not correspond to those for %eax
.. %edx. It maps a (reg size, reg no) pair to the number of the
containing 32-bit reg. */
static __inline__
Int containingArchRegOf ( Int sz, Int aregno )
{
switch (sz) {
case 4: return aregno;
case 2: return aregno;
case 1: return aregno >= 4 ? aregno-4 : aregno;
default: VG_(panic)("containingArchRegOf");
}
}
/* If u reads an ArchReg, return the number of the containing arch
reg. Otherwise return -1. Used in redundant-PUT elimination. */
static __inline__
Int maybe_uinstrReadsArchReg ( UInstr* u )
{
switch (u->opcode) {
case GET:
case ADD: case ADC: case AND: case OR:
case XOR: case SUB: case SBB:
case SHL: case SHR: case SAR: case ROL:
case ROR: case RCL: case RCR:
if (u->tag1 == ArchReg)
return containingArchRegOf ( u->size, u->val1 );
else
return -1;
case GETF: case PUTF:
case CALLM_S: case CALLM_E:
case INCEIP:
case LEA1:
case LEA2:
case NOP:
case PUT:
case LOAD:
case STORE:
case MOV:
case CMOV:
case JMP:
case CALLM: case CLEAR: case PUSH: case POP:
case NOT: case NEG: case INC: case DEC: case BSWAP:
case CC2VAL:
case JIFZ:
case FPU: case FPU_R: case FPU_W:
case WIDEN:
return -1;
default:
VG_(ppUInstr)(0,u);
VG_(panic)("maybe_uinstrReadsArchReg: unhandled opcode");
}
}
static __inline__
Bool uInstrMentionsTempReg ( UInstr* u, Int tempreg )
{
Int i, k;
TempUse tempUse[3];
k = getTempUsage ( u, &tempUse[0] );
for (i = 0; i < k; i++)
if (tempUse[i].tempNo == tempreg)
return True;
return False;
}
/*------------------------------------------------------------*/
/*--- ucode improvement. ---*/
/*------------------------------------------------------------*/
/* Improve the code in cb by doing
-- Redundant ArchReg-fetch elimination
-- Redundant PUT elimination
-- Redundant cond-code restore/save elimination
The overall effect of these is to allow target registers to be
cached in host registers over multiple target insns.
*/
static void vg_improve ( UCodeBlock* cb )
{
Int i, j, k, m, n, ar, tr, told, actual_areg;
Int areg_map[8];
Bool annul_put[8];
TempUse tempUse[3];
UInstr* u;
Bool wr;
Int* last_live_before;
FlagSet future_dead_flags;
if (cb->nextTemp > 0)
last_live_before = VG_(jitmalloc) ( cb->nextTemp * sizeof(Int) );
else
last_live_before = NULL;
/* PASS 1: redundant GET elimination. (Actually, more general than
that -- eliminates redundant fetches of ArchRegs). */
/* Find the live-range-ends for all temporaries. Duplicates code
in the register allocator :-( */
for (i = 0; i < cb->nextTemp; i++) last_live_before[i] = -1;
for (i = cb->used-1; i >= 0; i--) {
u = &cb->instrs[i];
k = getTempUsage(u, &tempUse[0]);
/* For each temp usage ... bwds in program order. */
for (j = k-1; j >= 0; j--) {
tr = tempUse[j].tempNo;
wr = tempUse[j].isWrite;
if (last_live_before[tr] == -1) {
vg_assert(tr >= 0 && tr < cb->nextTemp);
last_live_before[tr] = wr ? (i+1) : i;
}
}
}
# define BIND_ARCH_TO_TEMP(archreg,tempreg)\
{ Int q; \
/* Invalidate any old binding(s) to tempreg. */ \
for (q = 0; q < 8; q++) \
if (areg_map[q] == tempreg) areg_map[q] = -1; \
/* Add the new binding. */ \
areg_map[archreg] = (tempreg); \
}
/* Set up the A-reg map. */
for (i = 0; i < 8; i++) areg_map[i] = -1;
/* Scan insns. */
for (i = 0; i < cb->used; i++) {
u = &cb->instrs[i];
if (u->opcode == GET && u->size == 4) {
/* GET; see if it can be annulled. */
vg_assert(u->tag1 == ArchReg);
vg_assert(u->tag2 == TempReg);
ar = u->val1;
tr = u->val2;
told = areg_map[ar];
if (told != -1 && last_live_before[told] <= i) {
/* ar already has an old mapping to told, but that runs
out here. Annul this GET, rename tr to told for the
rest of the block, and extend told's live range to that
of tr. */
u->opcode = NOP;
u->tag1 = u->tag2 = NoValue;
n = last_live_before[tr] + 1;
if (n > cb->used) n = cb->used;
last_live_before[told] = last_live_before[tr];
last_live_before[tr] = i-1;
if (VG_(disassemble))
VG_(printf)(
"at %d: delete GET, rename t%d to t%d in (%d .. %d)\n",
i, tr, told,i+1, n-1);
for (m = i+1; m < n; m++) {
if (cb->instrs[m].tag1 == TempReg
&& cb->instrs[m].val1 == tr)
cb->instrs[m].val1 = told;
if (cb->instrs[m].tag2 == TempReg
&& cb->instrs[m].val2 == tr)
cb->instrs[m].val2 = told;
}
BIND_ARCH_TO_TEMP(ar,told);
}
else
BIND_ARCH_TO_TEMP(ar,tr);
}
else if (u->opcode == GET && u->size != 4) {
/* Invalidate any mapping for this archreg. */
actual_areg = containingArchRegOf ( u->size, u->val1 );
areg_map[actual_areg] = -1;
}
else if (u->opcode == PUT && u->size == 4) {
/* PUT; re-establish t -> a binding */
vg_assert(u->tag1 == TempReg);
vg_assert(u->tag2 == ArchReg);
BIND_ARCH_TO_TEMP(u->val2, u->val1);
}
else if (u->opcode == PUT && u->size != 4) {
/* Invalidate any mapping for this archreg. */
actual_areg = containingArchRegOf ( u->size, u->val2 );
areg_map[actual_areg] = -1;
} else {
/* see if insn has an archreg as a read operand; if so try to
map it. */
if (u->tag1 == ArchReg && u->size == 4
&& areg_map[u->val1] != -1) {
switch (u->opcode) {
case ADD: case SUB: case AND: case OR: case XOR:
case ADC: case SBB:
case SHL: case SHR: case SAR: case ROL: case ROR:
case RCL: case RCR:
if (VG_(disassemble))
VG_(printf)(
"at %d: change ArchReg %S to TempReg t%d\n",
i, nameIReg(4,u->val1), areg_map[u->val1]);
u->tag1 = TempReg;
u->val1 = areg_map[u->val1];
/* Remember to extend the live range of the TempReg,
if necessary. */
if (last_live_before[u->val1] < i)
last_live_before[u->val1] = i;
break;
default:
break;
}
}
/* boring insn; invalidate any mappings to temps it writes */
k = getTempUsage(u, &tempUse[0]);
for (j = 0; j < k; j++) {
wr = tempUse[j].isWrite;
if (!wr) continue;
tr = tempUse[j].tempNo;
for (m = 0; m < 8; m++)
if (areg_map[m] == tr) areg_map[m] = -1;
}
}
}
# undef BIND_ARCH_TO_TEMP
/* PASS 2: redundant PUT elimination. Don't annul (delay) puts of
%ESP, since the memory check machinery always requires the
in-memory value of %ESP to be up to date. Although this isn't
actually required by other analyses (cache simulation), it's
simplest to be consistent for all end-uses. */
for (j = 0; j < 8; j++)
annul_put[j] = False;
for (i = cb->used-1; i >= 0; i--) {
u = &cb->instrs[i];
if (u->opcode == NOP) continue;
if (u->opcode == PUT && u->size == 4) {
vg_assert(u->tag2 == ArchReg);
actual_areg = containingArchRegOf ( 4, u->val2 );
if (annul_put[actual_areg]) {
vg_assert(actual_areg != R_ESP);
u->opcode = NOP;
u->tag1 = u->tag2 = NoValue;
if (VG_(disassemble))
VG_(printf)("at %d: delete PUT\n", i );
} else {
if (actual_areg != R_ESP)
annul_put[actual_areg] = True;
}
}
else if (u->opcode == PUT && u->size != 4) {
actual_areg = containingArchRegOf ( u->size, u->val2 );
annul_put[actual_areg] = False;
}
else if (u->opcode == JMP || u->opcode == JIFZ
|| u->opcode == CALLM) {
for (j = 0; j < 8; j++)
annul_put[j] = False;
}
else {
/* If an instruction reads an ArchReg, the immediately
preceding PUT cannot be annulled. */
actual_areg = maybe_uinstrReadsArchReg ( u );
if (actual_areg != -1)
annul_put[actual_areg] = False;
}
}
/* PASS 2a: redundant-move elimination. Given MOV t1, t2 and t1 is
dead after this point, annul the MOV insn and rename t2 to t1.
Further modifies the last_live_before map. */
# if 0
VG_(ppUCodeBlock)(cb, "Before MOV elimination" );
for (i = 0; i < cb->nextTemp; i++)
VG_(printf)("llb[t%d]=%d ", i, last_live_before[i]);
VG_(printf)("\n");
# endif
for (i = 0; i < cb->used-1; i++) {
u = &cb->instrs[i];
if (u->opcode != MOV) continue;
if (u->tag1 == Literal) continue;
vg_assert(u->tag1 == TempReg);
vg_assert(u->tag2 == TempReg);
if (last_live_before[u->val1] == i) {
if (VG_(disassemble))
VG_(printf)(
"at %d: delete MOV, rename t%d to t%d in (%d .. %d)\n",
i, u->val2, u->val1, i+1, last_live_before[u->val2] );
for (j = i+1; j <= last_live_before[u->val2]; j++) {
if (cb->instrs[j].tag1 == TempReg
&& cb->instrs[j].val1 == u->val2)
cb->instrs[j].val1 = u->val1;
if (cb->instrs[j].tag2 == TempReg
&& cb->instrs[j].val2 == u->val2)
cb->instrs[j].val2 = u->val1;
}
last_live_before[u->val1] = last_live_before[u->val2];
last_live_before[u->val2] = i-1;
u->opcode = NOP;
u->tag1 = u->tag2 = NoValue;
}
}
/* PASS 3: redundant condition-code restore/save elimination.
Scan backwards from the end. future_dead_flags records the set
of flags which are dead at this point, that is, will be written
before they are next read. Earlier uinsns which write flags
already in future_dead_flags can have their writes annulled.
*/
future_dead_flags = FlagsEmpty;
for (i = cb->used-1; i >= 0; i--) {
u = &cb->instrs[i];
/* We might never make it to insns beyond this one, so be
conservative. */
if (u->opcode == JIFZ || u->opcode == JMP) {
future_dead_flags = FlagsEmpty;
continue;
}
/* We can annul the flags written by this insn if it writes a
subset (or eq) of the set of flags known to be dead after
this insn. If not, just record the flags also written by
this insn.*/
if (u->flags_w != FlagsEmpty
&& VG_IS_FLAG_SUBSET(u->flags_w, future_dead_flags)) {
if (VG_(disassemble)) {
VG_(printf)("at %d: annul flag write ", i);
vg_ppFlagSet("", u->flags_w);
VG_(printf)(" due to later ");
vg_ppFlagSet("", future_dead_flags);
VG_(printf)("\n");
}
u->flags_w = FlagsEmpty;
} else {
future_dead_flags
= VG_UNION_FLAG_SETS ( u->flags_w, future_dead_flags );
}
/* If this insn also reads flags, empty out future_dead_flags so
as to force preceding writes not to be annulled. */
if (u->flags_r != FlagsEmpty)
future_dead_flags = FlagsEmpty;
}
if (last_live_before)
VG_(jitfree) ( last_live_before );
}
/*------------------------------------------------------------*/
/*--- The new register allocator. ---*/
/*------------------------------------------------------------*/
typedef
struct {
/* Becomes live for the first time after this insn ... */
Int live_after;
/* Becomes dead for the last time after this insn ... */
Int dead_before;
/* The "home" spill slot, if needed. Never changes. */
Int spill_no;
/* Where is it? VG_NOVALUE==in a spill slot; else in reg. */
Int real_no;
}
TempInfo;
/* Take a ucode block and allocate its TempRegs to RealRegs, or put
them in spill locations, and add spill code, if there are not
enough real regs. The usual register allocation deal, in short.
Important redundancy of representation:
real_to_temp maps real reg ranks (RRRs) to TempReg nos, or
to VG_NOVALUE if the real reg has no currently assigned TempReg.
The .real_no field of a TempInfo gives the current RRR for
this TempReg, or VG_NOVALUE if the TempReg is currently
in memory, in which case it is in the SpillNo denoted by
spillno.
These pieces of information (a fwds-bwds mapping, really) must
be kept consistent!
This allocator uses the so-called Second Chance Bin Packing
algorithm, as described in "Quality and Speed in Linear-scan
Register Allocation" (Traub, Holloway and Smith, ACM PLDI98,
pp142-151). It is simple and fast and remarkably good at
minimising the amount of spill code introduced.
*/
static
UCodeBlock* vg_do_register_allocation ( UCodeBlock* c1 )
{
TempInfo* temp_info;
Int real_to_temp[VG_MAX_REALREGS];
Bool is_spill_cand[VG_MAX_REALREGS];
Int ss_busy_until_before[VG_MAX_SPILLSLOTS];
Int i, j, k, m, r, tno, max_ss_no;
Bool wr, defer, isRead, spill_reqd;
TempUse tempUse[3];
UCodeBlock* c2;
/* Used to denote ... well, "no value" in this fn. */
# define VG_NOTHING (-2)
/* Initialise the TempReg info. */
if (c1->nextTemp > 0)
temp_info = VG_(jitmalloc)(c1->nextTemp * sizeof(TempInfo) );
else
temp_info = NULL;
for (i = 0; i < c1->nextTemp; i++) {
temp_info[i].live_after = VG_NOTHING;
temp_info[i].dead_before = VG_NOTHING;
temp_info[i].spill_no = VG_NOTHING;
/* temp_info[i].real_no is not yet relevant. */
}
spill_reqd = False;
/* Scan fwds to establish live ranges. */
for (i = 0; i < c1->used; i++) {
k = getTempUsage(&c1->instrs[i], &tempUse[0]);
vg_assert(k >= 0 && k <= 3);
/* For each temp usage ... fwds in program order */
for (j = 0; j < k; j++) {
tno = tempUse[j].tempNo;
wr = tempUse[j].isWrite;
if (wr) {
/* Writes hold a reg live until after this insn. */
if (temp_info[tno].live_after == VG_NOTHING)
temp_info[tno].live_after = i;
if (temp_info[tno].dead_before < i + 1)
temp_info[tno].dead_before = i + 1;
} else {
/* First use of a tmp should be a write. */
vg_assert(temp_info[tno].live_after != VG_NOTHING);
/* Reads only hold it live until before this insn. */
if (temp_info[tno].dead_before < i)
temp_info[tno].dead_before = i;
}
}
}
# if 0
/* Sanity check on live ranges. Expensive but correct. */
for (i = 0; i < c1->nextTemp; i++) {
vg_assert( (temp_info[i].live_after == VG_NOTHING
&& temp_info[i].dead_before == VG_NOTHING)
|| (temp_info[i].live_after != VG_NOTHING
&& temp_info[i].dead_before != VG_NOTHING) );
}
# endif
/* Do a rank-based allocation of TempRegs to spill slot numbers.
We put as few as possible values in spill slots, but
nevertheless need to have an assignment to them just in case. */
max_ss_no = -1;
for (i = 0; i < VG_MAX_SPILLSLOTS; i++)
ss_busy_until_before[i] = 0;
for (i = 0; i < c1->nextTemp; i++) {
/* True iff this temp is unused. */
if (temp_info[i].live_after == VG_NOTHING)
continue;
/* Find the lowest-numbered spill slot which is available at the
start point of this interval, and assign the interval to
it. */
for (j = 0; j < VG_MAX_SPILLSLOTS; j++)
if (ss_busy_until_before[j] <= temp_info[i].live_after)
break;
if (j == VG_MAX_SPILLSLOTS) {
VG_(printf)("VG_MAX_SPILLSLOTS is too low; increase and recompile.\n");
VG_(panic)("register allocation failed -- out of spill slots");
}
ss_busy_until_before[j] = temp_info[i].dead_before;
temp_info[i].spill_no = j;
if (j > max_ss_no)
max_ss_no = j;
}
VG_(total_reg_rank) += (max_ss_no+1);
/* Show live ranges and assigned spill slot nos. */
if (VG_(disassemble)) {
VG_(printf)("Live Range Assignments\n");
for (i = 0; i < c1->nextTemp; i++) {
if (temp_info[i].live_after == VG_NOTHING)
continue;
VG_(printf)(
" LR %d is after %d to before %d spillno %d\n",
i,
temp_info[i].live_after,
temp_info[i].dead_before,
temp_info[i].spill_no
);
}
}
/* Now that we've established a spill slot number for each used
temporary, we can go ahead and do the core of the "Second-chance
binpacking" allocation algorithm. */
/* Resulting code goes here. We generate it all in a forwards
pass. */
c2 = VG_(allocCodeBlock)();
/* At the start, no TempRegs are assigned to any real register.
Correspondingly, all temps claim to be currently resident in
their spill slots, as computed by the previous two passes. */
for (i = 0; i < VG_MAX_REALREGS; i++)
real_to_temp[i] = VG_NOTHING;
for (i = 0; i < c1->nextTemp; i++)
temp_info[i].real_no = VG_NOTHING;
if (VG_(disassemble))
VG_(printf)("\n");
/* Process each insn in turn. */
for (i = 0; i < c1->used; i++) {
if (c1->instrs[i].opcode == NOP) continue;
VG_(uinstrs_prealloc)++;
# if 0
/* Check map consistency. Expensive but correct. */
for (r = 0; r < VG_MAX_REALREGS; r++) {
if (real_to_temp[r] != VG_NOTHING) {
tno = real_to_temp[r];
vg_assert(tno >= 0 && tno < c1->nextTemp);
vg_assert(temp_info[tno].real_no == r);
}
}
for (tno = 0; tno < c1->nextTemp; tno++) {
if (temp_info[tno].real_no != VG_NOTHING) {
r = temp_info[tno].real_no;
vg_assert(r >= 0 && r < VG_MAX_REALREGS);
vg_assert(real_to_temp[r] == tno);
}
}
# endif
if (VG_(disassemble))
VG_(ppUInstr)(i, &c1->instrs[i]);
/* First, free up enough real regs for this insn. This may
generate spill stores since we may have to evict some TempRegs
currently in real regs. Also generates spill loads. */
k = getTempUsage(&c1->instrs[i], &tempUse[0]);
vg_assert(k >= 0 && k <= 3);
/* For each ***different*** temp mentioned in the insn .... */
for (j = 0; j < k; j++) {
/* First check if the temp is mentioned again later; if so,
ignore this mention. We only want to process each temp
used by the insn once, even if it is mentioned more than
once. */
defer = False;
tno = tempUse[j].tempNo;
for (m = j+1; m < k; m++)
if (tempUse[m].tempNo == tno)
defer = True;
if (defer)
continue;
/* Now we're trying to find a register for tempUse[j].tempNo.
First of all, if it already has a register assigned, we
don't need to do anything more. */
if (temp_info[tno].real_no != VG_NOTHING)
continue;
/* No luck. The next thing to do is see if there is a
currently unassigned register available. If so, bag it. */
for (r = 0; r < VG_MAX_REALREGS; r++) {
if (real_to_temp[r] == VG_NOTHING)
break;
}
if (r < VG_MAX_REALREGS) {
real_to_temp[r] = tno;
temp_info[tno].real_no = r;
continue;
}
/* Unfortunately, that didn't pan out either. So we'll have
to eject some other unfortunate TempReg into a spill slot
in order to free up a register. Of course, we need to be
careful not to eject some other TempReg needed by this
insn.
Select r in 0 .. VG_MAX_REALREGS-1 such that
real_to_temp[r] is not mentioned in
tempUse[0 .. k-1].tempNo, since it would be just plain
wrong to eject some other TempReg which we need to use in
this insn.
It is here that it is important to make a good choice of
register to spill. */
/* First, mark those regs which are not spill candidates. */
for (r = 0; r < VG_MAX_REALREGS; r++) {
is_spill_cand[r] = True;
for (m = 0; m < k; m++) {
if (real_to_temp[r] == tempUse[m].tempNo) {
is_spill_cand[r] = False;
break;
}
}
}
/* We can choose any r satisfying is_spill_cand[r]. However,
try to make a good choice. First, try and find r such
that the associated TempReg is already dead. */
for (r = 0; r < VG_MAX_REALREGS; r++) {
if (is_spill_cand[r] &&
temp_info[real_to_temp[r]].dead_before <= i)
goto have_spill_cand;
}
/* No spill cand is mapped to a dead TempReg. Now we really
_do_ have to generate spill code. Choose r so that the
next use of its associated TempReg is as far ahead as
possible, in the hope that this will minimise the number of
consequent reloads required. This is a bit expensive, but
we don't have to do it very often. */
{
Int furthest_r = VG_MAX_REALREGS;
Int furthest = 0;
for (r = 0; r < VG_MAX_REALREGS; r++) {
if (!is_spill_cand[r]) continue;
for (m = i+1; m < c1->used; m++)
if (uInstrMentionsTempReg(&c1->instrs[m],
real_to_temp[r]))
break;
if (m > furthest) {
furthest = m;
furthest_r = r;
}
}
r = furthest_r;
goto have_spill_cand;
}
have_spill_cand:
if (r == VG_MAX_REALREGS)
VG_(panic)("new reg alloc: out of registers ?!");
/* Eject r. Important refinement: don't bother if the
associated TempReg is now dead. */
vg_assert(real_to_temp[r] != VG_NOTHING);
vg_assert(real_to_temp[r] != tno);
temp_info[real_to_temp[r]].real_no = VG_NOTHING;
if (temp_info[real_to_temp[r]].dead_before > i) {
uInstr2(c2, PUT, 4,
RealReg, VG_(rankToRealRegNo)(r),
SpillNo, temp_info[real_to_temp[r]].spill_no);
VG_(uinstrs_spill)++;
spill_reqd = True;
if (VG_(disassemble))
VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
}
/* Decide if tno is read. */
isRead = False;
for (m = 0; m < k; m++)
if (tempUse[m].tempNo == tno && !tempUse[m].isWrite)
isRead = True;
/* If so, generate a spill load. */
if (isRead) {
uInstr2(c2, GET, 4,
SpillNo, temp_info[tno].spill_no,
RealReg, VG_(rankToRealRegNo)(r) );
VG_(uinstrs_spill)++;
spill_reqd = True;
if (VG_(disassemble))
VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
}
/* Update the forwards and backwards maps. */
real_to_temp[r] = tno;
temp_info[tno].real_no = r;
}
/* By this point, all TempRegs mentioned by the insn have been
bought into real regs. We now copy the insn to the output
and use patchUInstr to convert its rTempRegs into
realregs. */
for (j = 0; j < k; j++)
tempUse[j].realNo
= VG_(rankToRealRegNo)(temp_info[tempUse[j].tempNo].real_no);
VG_(copyUInstr)(c2, &c1->instrs[i]);
patchUInstr(&LAST_UINSTR(c2), &tempUse[0], k);
if (VG_(disassemble)) {
VG_(ppUInstr)(c2->used-1, &LAST_UINSTR(c2));
VG_(printf)("\n");
}
}
if (temp_info != NULL)
VG_(jitfree)(temp_info);
VG_(freeCodeBlock)(c1);
if (spill_reqd)
VG_(translations_needing_spill)++;
return c2;
# undef VG_NOTHING
}
/*------------------------------------------------------------*/
/*--- New instrumentation machinery. ---*/
/*------------------------------------------------------------*/
static
VgTagOp get_VgT_ImproveOR_TQ ( Int sz )
{
switch (sz) {
case 4: return VgT_ImproveOR4_TQ;
case 2: return VgT_ImproveOR2_TQ;
case 1: return VgT_ImproveOR1_TQ;
default: VG_(panic)("get_VgT_ImproveOR_TQ");
}
}
static
VgTagOp get_VgT_ImproveAND_TQ ( Int sz )
{
switch (sz) {
case 4: return VgT_ImproveAND4_TQ;
case 2: return VgT_ImproveAND2_TQ;
case 1: return VgT_ImproveAND1_TQ;
default: VG_(panic)("get_VgT_ImproveAND_TQ");
}
}
static
VgTagOp get_VgT_Left ( Int sz )
{
switch (sz) {
case 4: return VgT_Left4;
case 2: return VgT_Left2;
case 1: return VgT_Left1;
default: VG_(panic)("get_VgT_Left");
}
}
static
VgTagOp get_VgT_UifU ( Int sz )
{
switch (sz) {
case 4: return VgT_UifU4;
case 2: return VgT_UifU2;
case 1: return VgT_UifU1;
case 0: return VgT_UifU0;
default: VG_(panic)("get_VgT_UifU");
}
}
static
VgTagOp get_VgT_DifD ( Int sz )
{
switch (sz) {
case 4: return VgT_DifD4;
case 2: return VgT_DifD2;
case 1: return VgT_DifD1;
default: VG_(panic)("get_VgT_DifD");
}
}
static
VgTagOp get_VgT_PCast ( Int szs, Int szd )
{
if (szs == 4 && szd == 0) return VgT_PCast40;
if (szs == 2 && szd == 0) return VgT_PCast20;
if (szs == 1 && szd == 0) return VgT_PCast10;
if (szs == 0 && szd == 1) return VgT_PCast01;
if (szs == 0 && szd == 2) return VgT_PCast02;
if (szs == 0 && szd == 4) return VgT_PCast04;
if (szs == 1 && szd == 4) return VgT_PCast14;
if (szs == 1 && szd == 2) return VgT_PCast12;
if (szs == 1 && szd == 1) return VgT_PCast11;
VG_(printf)("get_VgT_PCast(%d,%d)\n", szs, szd);
VG_(panic)("get_VgT_PCast");
}
static
VgTagOp get_VgT_Widen ( Bool syned, Int szs, Int szd )
{
if (szs == 1 && szd == 2 && syned) return VgT_SWiden12;
if (szs == 1 && szd == 2 && !syned) return VgT_ZWiden12;
if (szs == 1 && szd == 4 && syned) return VgT_SWiden14;
if (szs == 1 && szd == 4 && !syned) return VgT_ZWiden14;
if (szs == 2 && szd == 4 && syned) return VgT_SWiden24;
if (szs == 2 && szd == 4 && !syned) return VgT_ZWiden24;
VG_(printf)("get_VgT_Widen(%d,%d,%d)\n", (Int)syned, szs, szd);
VG_(panic)("get_VgT_Widen");
}
/* Pessimally cast the spec'd shadow from one size to another. */
static
void create_PCast ( UCodeBlock* cb, Int szs, Int szd, Int tempreg )
{
if (szs == 0 && szd == 0)
return;
uInstr3(cb, TAG1, 0, TempReg, tempreg,
NoValue, 0,
Lit16, get_VgT_PCast(szs,szd));
}
/* Create a signed or unsigned widen of the spec'd shadow from one
size to another. The only allowed size transitions are 1->2, 1->4
and 2->4. */
static
void create_Widen ( UCodeBlock* cb, Bool signed_widen,
Int szs, Int szd, Int tempreg )
{
if (szs == szd) return;
uInstr3(cb, TAG1, 0, TempReg, tempreg,
NoValue, 0,
Lit16, get_VgT_Widen(signed_widen,szs,szd));
}
/* Get the condition codes into a new shadow, at the given size. */
static
Int create_GETVF ( UCodeBlock* cb, Int sz )
{
Int tt = newShadow(cb);
uInstr1(cb, GETVF, 0, TempReg, tt);
create_PCast(cb, 0, sz, tt);
return tt;
}
/* Save the condition codes from the spec'd shadow. */
static
void create_PUTVF ( UCodeBlock* cb, Int sz, Int tempreg )
{
if (sz == 0) {
uInstr1(cb, PUTVF, 0, TempReg, tempreg);
} else {
Int tt = newShadow(cb);
uInstr2(cb, MOV, 4, TempReg, tempreg, TempReg, tt);
create_PCast(cb, sz, 0, tt);
uInstr1(cb, PUTVF, 0, TempReg, tt);
}
}
/* Do Left on the spec'd shadow. */
static
void create_Left ( UCodeBlock* cb, Int sz, Int tempreg )
{
uInstr3(cb, TAG1, 0,
TempReg, tempreg,
NoValue, 0,
Lit16, get_VgT_Left(sz));
}
/* Do UifU on ts and td, putting the result in td. */
static
void create_UifU ( UCodeBlock* cb, Int sz, Int ts, Int td )
{
uInstr3(cb, TAG2, 0, TempReg, ts, TempReg, td,
Lit16, get_VgT_UifU(sz));
}
/* Do DifD on ts and td, putting the result in td. */
static
void create_DifD ( UCodeBlock* cb, Int sz, Int ts, Int td )
{
uInstr3(cb, TAG2, 0, TempReg, ts, TempReg, td,
Lit16, get_VgT_DifD(sz));
}
/* Do HelpAND on value tval and tag tqqq, putting the result in
tqqq. */
static
void create_ImproveAND_TQ ( UCodeBlock* cb, Int sz, Int tval, Int tqqq )
{
uInstr3(cb, TAG2, 0, TempReg, tval, TempReg, tqqq,
Lit16, get_VgT_ImproveAND_TQ(sz));
}
/* Do HelpOR on value tval and tag tqqq, putting the result in
tqqq. */
static
void create_ImproveOR_TQ ( UCodeBlock* cb, Int sz, Int tval, Int tqqq )
{
uInstr3(cb, TAG2, 0, TempReg, tval, TempReg, tqqq,
Lit16, get_VgT_ImproveOR_TQ(sz));
}
/* Get the shadow for an operand described by (tag, val). Emit code
to do this and return the identity of the shadow holding the
result. The result tag is always copied into a new shadow, so it
can be modified without trashing the original.*/
static
Int /* TempReg */ getOperandShadow ( UCodeBlock* cb,
Int sz, Int tag, Int val )
{
Int sh;
sh = newShadow(cb);
if (tag == TempReg) {
uInstr2(cb, MOV, 4, TempReg, SHADOW(val), TempReg, sh);
return sh;
}
if (tag == Literal) {
uInstr1(cb, SETV, sz, TempReg, sh);
return sh;
}
if (tag == ArchReg) {
uInstr2(cb, GETV, sz, ArchReg, val, TempReg, sh);
return sh;
}
VG_(panic)("getOperandShadow");
}
/* Create and return an instrumented version of cb_in. Free cb_in
before returning. */
static UCodeBlock* vg_instrument ( UCodeBlock* cb_in )
{
UCodeBlock* cb;
Int i, j;
UInstr* u_in;
Int qs, qd, qt, qtt;
cb = VG_(allocCodeBlock)();
cb->nextTemp = cb_in->nextTemp;
for (i = 0; i < cb_in->used; i++) {
qs = qd = qt = qtt = INVALID_TEMPREG;
u_in = &cb_in->instrs[i];
/* if (i > 0) uInstr1(cb, NOP, 0, NoValue, 0); */
/* VG_(ppUInstr)(0, u_in); */
switch (u_in->opcode) {
case NOP:
break;
case INCEIP:
VG_(copyUInstr)(cb, u_in);
break;
/* Loads and stores. Test the V bits for the address. 24
Mar 02: since the address is A-checked anyway, there's not
really much point in doing the V-check too, unless you
think that you might use addresses which are undefined but
still addressible. Hence the optionalisation of the V
check.
The LOADV/STOREV does an addressibility check for the
address. */
case LOAD:
if (VG_(clo_check_addrVs)) {
uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val1));
}
uInstr2(cb, LOADV, u_in->size,
TempReg, u_in->val1,
TempReg, SHADOW(u_in->val2));
VG_(copyUInstr)(cb, u_in);
break;
case STORE:
if (VG_(clo_check_addrVs)) {
uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val2));
uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val2));
}
uInstr2(cb, STOREV, u_in->size,
TempReg, SHADOW(u_in->val1),
TempReg, u_in->val2);
VG_(copyUInstr)(cb, u_in);
break;
/* Moving stuff around. Make the V bits follow accordingly,
but don't do anything else. */
case GET:
uInstr2(cb, GETV, u_in->size,
ArchReg, u_in->val1,
TempReg, SHADOW(u_in->val2));
VG_(copyUInstr)(cb, u_in);
break;
case PUT:
uInstr2(cb, PUTV, u_in->size,
TempReg, SHADOW(u_in->val1),
ArchReg, u_in->val2);
VG_(copyUInstr)(cb, u_in);
break;
case GETF:
/* This is not the smartest way to do it, but should work. */
qd = create_GETVF(cb, u_in->size);
uInstr2(cb, MOV, 4, TempReg, qd, TempReg, SHADOW(u_in->val1));
VG_(copyUInstr)(cb, u_in);
break;
case PUTF:
create_PUTVF(cb, u_in->size, SHADOW(u_in->val1));
VG_(copyUInstr)(cb, u_in);
break;
case MOV:
switch (u_in->tag1) {
case TempReg:
uInstr2(cb, MOV, 4,
TempReg, SHADOW(u_in->val1),
TempReg, SHADOW(u_in->val2));
break;
case Literal:
uInstr1(cb, SETV, u_in->size,
TempReg, SHADOW(u_in->val2));
break;
default:
VG_(panic)("vg_instrument: MOV");
}
VG_(copyUInstr)(cb, u_in);
break;
/* Special case of add, where one of the operands is a literal.
lea1(t) = t + some literal.
Therefore: lea1#(qa) = left(qa)
*/
case LEA1:
vg_assert(u_in->size == 4 && !VG_(anyFlagUse)(u_in));
qs = SHADOW(u_in->val1);
qd = SHADOW(u_in->val2);
uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qd);
create_Left(cb, u_in->size, qd);
VG_(copyUInstr)(cb, u_in);
break;
/* Another form of add.
lea2(ts,tt,shift) = ts + (tt << shift); shift is a literal
and is 0,1,2 or 3.
lea2#(qs,qt) = left(qs `UifU` (qt << shift)).
Note, subtly, that the shift puts zeroes at the bottom of qt,
meaning Valid, since the corresponding shift of tt puts
zeroes at the bottom of tb.
*/
case LEA2: {
Int shift;
vg_assert(u_in->size == 4 && !VG_(anyFlagUse)(u_in));
switch (u_in->extra4b) {
case 1: shift = 0; break;
case 2: shift = 1; break;
case 4: shift = 2; break;
case 8: shift = 3; break;
default: VG_(panic)( "vg_instrument(LEA2)" );
}
qs = SHADOW(u_in->val1);
qt = SHADOW(u_in->val2);
qd = SHADOW(u_in->val3);
uInstr2(cb, MOV, 4, TempReg, qt, TempReg, qd);
if (shift > 0) {
uInstr2(cb, SHL, 4, Literal, 0, TempReg, qd);
uLiteral(cb, shift);
}
create_UifU(cb, 4, qs, qd);
create_Left(cb, u_in->size, qd);
VG_(copyUInstr)(cb, u_in);
break;
}
/* inc#/dec#(qd) = q `UifU` left(qd) = left(qd) */
case INC: case DEC:
qd = SHADOW(u_in->val1);
create_Left(cb, u_in->size, qd);
if (u_in->flags_w != FlagsEmpty)
create_PUTVF(cb, u_in->size, qd);
VG_(copyUInstr)(cb, u_in);
break;
/* This is a HACK (approximation :-) */
/* rcl#/rcr#(qs,qd)
= let q0 = pcast-sz-0(qd) `UifU` pcast-sz-0(qs) `UifU` eflags#
eflags# = q0
qd =pcast-0-sz(q0)
Ie, cast everything down to a single bit, then back up.
This assumes that any bad bits infect the whole word and
the eflags.
*/
case RCL: case RCR:
vg_assert(u_in->flags_r != FlagsEmpty);
/* The following assertion looks like it makes sense, but is
actually wrong. Consider this:
rcll %eax
imull %eax, %eax
The rcll writes O and C but so does the imull, so the O and C
write of the rcll is annulled by the prior improvement pass.
Noticed by Kevin Ryde <user42@zip.com.au>
*/
/* vg_assert(u_in->flags_w != FlagsEmpty); */
qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
/* We can safely modify qs; cast it to 0-size. */
create_PCast(cb, u_in->size, 0, qs);
qd = SHADOW(u_in->val2);
create_PCast(cb, u_in->size, 0, qd);
/* qs is cast-to-0(shift count#), and qd is cast-to-0(value#). */
create_UifU(cb, 0, qs, qd);
/* qs is now free; reuse it for the flag definedness. */
qs = create_GETVF(cb, 0);
create_UifU(cb, 0, qs, qd);
create_PUTVF(cb, 0, qd);
create_PCast(cb, 0, u_in->size, qd);
VG_(copyUInstr)(cb, u_in);
break;
/* for OP in shl shr sar rol ror
(qs is shift count#, qd is value to be OP#d)
OP(ts,td)
OP#(qs,qd)
= pcast-1-sz(qs) `UifU` OP(ts,qd)
So we apply OP to the tag bits too, and then UifU with
the shift count# to take account of the possibility of it
being undefined.
A bit subtle:
ROL/ROR rearrange the tag bits as per the value bits.
SHL/SHR shifts zeroes into the value, and corresponding
zeroes indicating Definedness into the tag.
SAR copies the top bit of the value downwards, and therefore
SAR also copies the definedness of the top bit too.
So in all five cases, we just apply the same op to the tag
bits as is applied to the value bits. Neat!
*/
case SHL:
case SHR: case SAR:
case ROL: case ROR: {
Int t_amount = INVALID_TEMPREG;
vg_assert(u_in->tag1 == TempReg || u_in->tag1 == Literal);
vg_assert(u_in->tag2 == TempReg);
qd = SHADOW(u_in->val2);
/* Make qs hold shift-count# and make
t_amount be a TempReg holding the shift count. */
if (u_in->tag1 == Literal) {
t_amount = newTemp(cb);
uInstr2(cb, MOV, 4, Literal, 0, TempReg, t_amount);
uLiteral(cb, u_in->lit32);
qs = SHADOW(t_amount);
uInstr1(cb, SETV, 1, TempReg, qs);
} else {
t_amount = u_in->val1;
qs = SHADOW(u_in->val1);
}
uInstr2(cb, u_in->opcode,
u_in->size,
TempReg, t_amount,
TempReg, qd);
qt = newShadow(cb);
uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qt);
create_PCast(cb, 1, u_in->size, qt);
create_UifU(cb, u_in->size, qt, qd);
VG_(copyUInstr)(cb, u_in);
break;
}
/* One simple tag operation. */
case WIDEN:
vg_assert(u_in->tag1 == TempReg);
create_Widen(cb, u_in->signed_widen, u_in->extra4b, u_in->size,
SHADOW(u_in->val1));
VG_(copyUInstr)(cb, u_in);
break;
/* not#(x) = x (since bitwise independent) */
case NOT:
vg_assert(u_in->tag1 == TempReg);
VG_(copyUInstr)(cb, u_in);
break;
/* neg#(x) = left(x) (derivable from case for SUB) */
case NEG:
vg_assert(u_in->tag1 == TempReg);
create_Left(cb, u_in->size, SHADOW(u_in->val1));
VG_(copyUInstr)(cb, u_in);
break;
/* bswap#(x) = bswap(x) */
case BSWAP:
vg_assert(u_in->tag1 == TempReg);
vg_assert(u_in->size == 4);
qd = SHADOW(u_in->val1);
uInstr1(cb, BSWAP, 4, TempReg, qd);
VG_(copyUInstr)(cb, u_in);
break;
/* cc2val#(qd) = pcast-0-to-size(eflags#) */
case CC2VAL:
vg_assert(u_in->tag1 == TempReg);
vg_assert(u_in->flags_r != FlagsEmpty);
qt = create_GETVF(cb, u_in->size);
uInstr2(cb, MOV, 4, TempReg, qt, TempReg, SHADOW(u_in->val1));
VG_(copyUInstr)(cb, u_in);
break;
/* cmov#(qs,qd) = cmov(qs,qd)
That is, do the cmov of tags using the same flags as for
the data (obviously). However, first do a test on the
validity of the flags.
*/
case CMOV:
vg_assert(u_in->size == 4);
vg_assert(u_in->tag1 == TempReg);
vg_assert(u_in->tag2 == TempReg);
vg_assert(u_in->flags_r != FlagsEmpty);
vg_assert(u_in->flags_w == FlagsEmpty);
qs = SHADOW(u_in->val1);
qd = SHADOW(u_in->val2);
qt = create_GETVF(cb, 0);
uInstr1(cb, TESTV, 0, TempReg, qt);
/* qt should never be referred to again. Nevertheless
... */
uInstr1(cb, SETV, 0, TempReg, qt);
uInstr2(cb, CMOV, 4, TempReg, qs, TempReg, qd);
LAST_UINSTR(cb).cond = u_in->cond;
LAST_UINSTR(cb).flags_r = u_in->flags_r;
VG_(copyUInstr)(cb, u_in);
break;
/* add#/sub#(qs,qd)
= qs `UifU` qd `UifU` left(qs) `UifU` left(qd)
= left(qs) `UifU` left(qd)
= left(qs `UifU` qd)
adc#/sbb#(qs,qd)
= left(qs `UifU` qd) `UifU` pcast(eflags#)
Second arg (dest) is TempReg.
First arg (src) is Literal or TempReg or ArchReg.
*/
case ADD: case SUB:
case ADC: case SBB:
qd = SHADOW(u_in->val2);
qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
create_UifU(cb, u_in->size, qs, qd);
create_Left(cb, u_in->size, qd);
if (u_in->opcode == ADC || u_in->opcode == SBB) {
vg_assert(u_in->flags_r != FlagsEmpty);
qt = create_GETVF(cb, u_in->size);
create_UifU(cb, u_in->size, qt, qd);
}
if (u_in->flags_w != FlagsEmpty) {
create_PUTVF(cb, u_in->size, qd);
}
VG_(copyUInstr)(cb, u_in);
break;
/* xor#(qs,qd) = qs `UifU` qd */
case XOR:
qd = SHADOW(u_in->val2);
qs = getOperandShadow(cb, u_in->size, u_in->tag1, u_in->val1);
create_UifU(cb, u_in->size, qs, qd);
if (u_in->flags_w != FlagsEmpty) {
create_PUTVF(cb, u_in->size, qd);
}
VG_(copyUInstr)(cb, u_in);
break;
/* and#/or#(qs,qd)
= (qs `UifU` qd) `DifD` improve(vs,qs)
`DifD` improve(vd,qd)
where improve is the relevant one of
Improve{AND,OR}_TQ
Use the following steps, with qt as a temp:
qt = improve(vd,qd)
qd = qs `UifU` qd
qd = qt `DifD` qd
qt = improve(vs,qs)
qd = qt `DifD` qd
*/
case AND: case OR:
vg_assert(u_in->tag1 == TempReg);
vg_assert(u_in->tag2 == TempReg);
qd = SHADOW(u_in->val2);
qs = SHADOW(u_in->val1);
qt = newShadow(cb);
/* qt = improve(vd,qd) */
uInstr2(cb, MOV, 4, TempReg, qd, TempReg, qt);
if (u_in->opcode == AND)
create_ImproveAND_TQ(cb, u_in->size, u_in->val2, qt);
else
create_ImproveOR_TQ(cb, u_in->size, u_in->val2, qt);
/* qd = qs `UifU` qd */
create_UifU(cb, u_in->size, qs, qd);
/* qd = qt `DifD` qd */
create_DifD(cb, u_in->size, qt, qd);
/* qt = improve(vs,qs) */
uInstr2(cb, MOV, 4, TempReg, qs, TempReg, qt);
if (u_in->opcode == AND)
create_ImproveAND_TQ(cb, u_in->size, u_in->val1, qt);
else
create_ImproveOR_TQ(cb, u_in->size, u_in->val1, qt);
/* qd = qt `DifD` qd */
create_DifD(cb, u_in->size, qt, qd);
/* So, finally qd is the result tag. */
if (u_in->flags_w != FlagsEmpty) {
create_PUTVF(cb, u_in->size, qd);
}
VG_(copyUInstr)(cb, u_in);
break;
/* Machinery to do with supporting CALLM. Copy the start and
end markers only to make the result easier to read
(debug); they generate no code and have no effect.
*/
case CALLM_S: case CALLM_E:
VG_(copyUInstr)(cb, u_in);
break;
/* Copy PUSH and POP verbatim. Arg/result absval
calculations are done when the associated CALL is
processed. CLEAR has no effect on absval calculations but
needs to be copied.
*/
case PUSH: case POP: case CLEAR:
VG_(copyUInstr)(cb, u_in);
break;
/* In short:
callm#(a1# ... an#) = (a1# `UifU` ... `UifU` an#)
We have to decide on a size to do the computation at,
although the choice doesn't affect correctness. We will
do a pcast to the final size anyway, so the only important
factor is to choose a size which minimises the total
number of casts needed. Valgrind: just use size 0,
regardless. It may not be very good for performance
but does simplify matters, mainly by reducing the number
of different pessimising casts which have to be implemented.
*/
case CALLM: {
UInstr* uu;
Bool res_used;
/* Now generate the code. Get the final result absval
into qt. */
qt = newShadow(cb);
qtt = newShadow(cb);
uInstr1(cb, SETV, 0, TempReg, qt);
for (j = i-1; cb_in->instrs[j].opcode != CALLM_S; j--) {
uu = & cb_in->instrs[j];
if (uu->opcode != PUSH) continue;
/* cast via a temporary */
uInstr2(cb, MOV, 4, TempReg, SHADOW(uu->val1),
TempReg, qtt);
create_PCast(cb, uu->size, 0, qtt);
create_UifU(cb, 0, qtt, qt);
}
/* Remembering also that flags read count as inputs. */
if (u_in->flags_r != FlagsEmpty) {
qtt = create_GETVF(cb, 0);
create_UifU(cb, 0, qtt, qt);
}
/* qt now holds the result tag. If any results from the
call are used, either by fetching with POP or
implicitly by writing the flags, we copy the result
absval to the relevant location. If not used, the call
must have been for its side effects, so we test qt here
and now. Note that this assumes that all values
removed by POP continue to be live. So dead args
*must* be removed with CLEAR, not by POPping them into
a dummy tempreg.
*/
res_used = False;
for (j = i+1; cb_in->instrs[j].opcode != CALLM_E; j++) {
uu = & cb_in->instrs[j];
if (uu->opcode != POP) continue;
/* Cast via a temp. */
uInstr2(cb, MOV, 4, TempReg, qt, TempReg, qtt);
create_PCast(cb, 0, uu->size, qtt);
uInstr2(cb, MOV, 4, TempReg, qtt,
TempReg, SHADOW(uu->val1));
res_used = True;
}
if (u_in->flags_w != FlagsEmpty) {
create_PUTVF(cb, 0, qt);
res_used = True;
}
if (!res_used) {
uInstr1(cb, TESTV, 0, TempReg, qt);
/* qt should never be referred to again. Nevertheless
... */
uInstr1(cb, SETV, 0, TempReg, qt);
}
VG_(copyUInstr)(cb, u_in);
break;
}
/* Whew ... */
case JMP:
if (u_in->tag1 == TempReg) {
uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val1));
} else {
vg_assert(u_in->tag1 == Literal);
}
if (u_in->cond != CondAlways) {
vg_assert(u_in->flags_r != FlagsEmpty);
qt = create_GETVF(cb, 0);
uInstr1(cb, TESTV, 0, TempReg, qt);
/* qt should never be referred to again. Nevertheless
... */
uInstr1(cb, SETV, 0, TempReg, qt);
}
VG_(copyUInstr)(cb, u_in);
break;
case JIFZ:
uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val1));
uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val1));
VG_(copyUInstr)(cb, u_in);
break;
/* Emit a check on the address used. For FPU_R, the value
loaded into the FPU is checked at the time it is read from
memory (see synth_fpu_mem_check_actions). */
case FPU_R: case FPU_W:
vg_assert(u_in->tag2 == TempReg);
uInstr1(cb, TESTV, 4, TempReg, SHADOW(u_in->val2));
uInstr1(cb, SETV, 4, TempReg, SHADOW(u_in->val2));
VG_(copyUInstr)(cb, u_in);
break;
/* For FPU insns not referencing memory, just copy thru. */
case FPU:
VG_(copyUInstr)(cb, u_in);
break;
default:
VG_(ppUInstr)(0, u_in);
VG_(panic)( "vg_instrument: unhandled case");
} /* end of switch (u_in->opcode) */
} /* end of for loop */
VG_(freeCodeBlock)(cb_in);
return cb;
}
/*------------------------------------------------------------*/
/*--- Clean up mem check instrumentation. ---*/
/*------------------------------------------------------------*/
#define VGC_IS_SHADOW(tempreg) ((tempreg % 2) == 1)
#define VGC_UNDEF ((UChar)100)
#define VGC_VALUE ((UChar)101)
#define NOP_no_msg(uu) \
do { uu->opcode = NOP; } while (False)
#define NOP_tag1_op(uu) \
do { uu->opcode = NOP; \
if (VG_(disassemble)) \
VG_(printf)("at %d: delete %s due to defd arg\n", \
i, VG_(nameOfTagOp(u->val3))); \
} while (False)
#define SETV_tag1_op(uu,newsz) \
do { uu->opcode = SETV; \
uu->size = newsz; \
uu->tag2 = uu->tag3 = NoValue; \
if (VG_(disassemble)) \
VG_(printf)("at %d: convert %s to SETV%d " \
"due to defd arg\n", \
i, VG_(nameOfTagOp(u->val3)), newsz); \
} while (False)
/* Run backwards and delete SETVs on shadow temps for which the next
action is a write. Needs an env saying whether or not the next
action is a write. The supplied UCodeBlock is destructively
modified.
*/
static void vg_delete_redundant_SETVs ( UCodeBlock* cb )
{
Bool* next_is_write;
Int i, j, k, n_temps;
UInstr* u;
TempUse tempUse[3];
n_temps = cb->nextTemp;
if (n_temps == 0) return;
next_is_write = VG_(jitmalloc)(n_temps * sizeof(Bool));
for (i = 0; i < n_temps; i++) next_is_write[i] = True;
for (i = cb->used-1; i >= 0; i--) {
u = &cb->instrs[i];
/* If we're not checking address V bits, there will be a lot of
GETVs, TAG1s and TAG2s calculating values which are never
used. These first three cases get rid of them. */
if (u->opcode == GETV && VGC_IS_SHADOW(u->val2)
&& next_is_write[u->val2]
&& !VG_(clo_check_addrVs)) {
u->opcode = NOP;
u->size = 0;
if (VG_(disassemble))
VG_(printf)("at %d: delete GETV\n", i);
} else
if (u->opcode == TAG1 && VGC_IS_SHADOW(u->val1)
&& next_is_write[u->val1]
&& !VG_(clo_check_addrVs)) {
u->opcode = NOP;
u->size = 0;
if (VG_(disassemble))
VG_(printf)("at %d: delete TAG1\n", i);
} else
if (u->opcode == TAG2 && VGC_IS_SHADOW(u->val2)
&& next_is_write[u->val2]
&& !VG_(clo_check_addrVs)) {
u->opcode = NOP;
u->size = 0;
if (VG_(disassemble))
VG_(printf)("at %d: delete TAG2\n", i);
} else
/* We do the rest of these regardless of whether or not
addresses are V-checked. */
if (u->opcode == MOV && VGC_IS_SHADOW(u->val2)
&& next_is_write[u->val2]) {
/* This MOV is pointless because the target is dead at this
point. Delete it. */
u->opcode = NOP;
u->size = 0;
if (VG_(disassemble))
VG_(printf)("at %d: delete MOV\n", i);
} else
if (u->opcode == SETV) {
if (u->tag1 == TempReg) {
vg_assert(VGC_IS_SHADOW(u->val1));
if (next_is_write[u->val1]) {
/* This write is pointless, so annul it. */
u->opcode = NOP;
u->size = 0;
if (VG_(disassemble))
VG_(printf)("at %d: delete SETV\n", i);
} else {
/* This write has a purpose; don't annul it, but do
notice that we did it. */
next_is_write[u->val1] = True;
}
}
} else {
/* Find out what this insn does to the temps. */
k = getTempUsage(u, &tempUse[0]);
vg_assert(k <= 3);
for (j = k-1; j >= 0; j--) {
next_is_write[ tempUse[j].tempNo ]
= tempUse[j].isWrite;
}
}
}
VG_(jitfree)(next_is_write);
}
/* Run forwards, propagating and using the is-completely-defined
property. This removes a lot of redundant tag-munging code.
Unfortunately it requires intimate knowledge of how each uinstr and
tagop modifies its arguments. This duplicates knowledge of uinstr
tempreg uses embodied in getTempUsage(), which is unfortunate.
The supplied UCodeBlock* is modified in-place.
For each value temp, def[] should hold VGC_VALUE.
For each shadow temp, def[] may hold 4,2,1 or 0 iff that shadow is
definitely known to be fully defined at that size. In all other
circumstances a shadow's def[] entry is VGC_UNDEF, meaning possibly
undefined. In cases of doubt, VGC_UNDEF is always safe.
*/
static void vg_propagate_definedness ( UCodeBlock* cb )
{
UChar* def;
Int i, j, k, t, n_temps;
UInstr* u;
TempUse tempUse[3];
n_temps = cb->nextTemp;
if (n_temps == 0) return;
def = VG_(jitmalloc)(n_temps * sizeof(UChar));
for (i = 0; i < n_temps; i++)
def[i] = VGC_IS_SHADOW(i) ? VGC_UNDEF : VGC_VALUE;
/* Run forwards, detecting and using the all-defined property. */
for (i = 0; i < cb->used; i++) {
u = &cb->instrs[i];
switch (u->opcode) {
/* Tag-handling uinstrs. */
/* Deal with these quickly. */
case NOP:
case INCEIP:
break;
/* Make a tag defined. */
case SETV:
vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
def[u->val1] = u->size;
break;
/* Check definedness of a tag. */
case TESTV:
vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
if (def[u->val1] <= 4) {
vg_assert(def[u->val1] == u->size);
NOP_no_msg(u);
if (VG_(disassemble))
VG_(printf)("at %d: delete TESTV on defd arg\n", i);
}
break;
/* Applies to both values and tags. Propagate Definedness
property through copies. Note that this isn't optional;
we *have* to do this to keep def[] correct. */
case MOV:
vg_assert(u->tag2 == TempReg);
if (u->tag1 == TempReg) {
if (VGC_IS_SHADOW(u->val1)) {
vg_assert(VGC_IS_SHADOW(u->val2));
def[u->val2] = def[u->val1];
}
}
break;
case PUTV:
vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
if (def[u->val1] <= 4) {
vg_assert(def[u->val1] == u->size);
u->tag1 = Literal;
u->val1 = 0;
switch (u->size) {
case 4: u->lit32 = 0x00000000; break;
case 2: u->lit32 = 0xFFFF0000; break;
case 1: u->lit32 = 0xFFFFFF00; break;
default: VG_(panic)("vg_cleanup(PUTV)");
}
if (VG_(disassemble))
VG_(printf)(
"at %d: propagate definedness into PUTV\n", i);
}
break;
case STOREV:
vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
if (def[u->val1] <= 4) {
vg_assert(def[u->val1] == u->size);
u->tag1 = Literal;
u->val1 = 0;
switch (u->size) {
case 4: u->lit32 = 0x00000000; break;
case 2: u->lit32 = 0xFFFF0000; break;
case 1: u->lit32 = 0xFFFFFF00; break;
default: VG_(panic)("vg_cleanup(STOREV)");
}
if (VG_(disassemble))
VG_(printf)(
"at %d: propagate definedness into STandV\n", i);
}
break;
/* Nothing interesting we can do with this, I think. */
case PUTVF:
break;
/* Tag handling operations. */
case TAG2:
vg_assert(u->tag2 == TempReg && VGC_IS_SHADOW(u->val2));
vg_assert(u->tag3 == Lit16);
/* Ultra-paranoid "type" checking. */
switch (u->val3) {
case VgT_ImproveAND4_TQ: case VgT_ImproveAND2_TQ:
case VgT_ImproveAND1_TQ: case VgT_ImproveOR4_TQ:
case VgT_ImproveOR2_TQ: case VgT_ImproveOR1_TQ:
vg_assert(u->tag1 == TempReg && !VGC_IS_SHADOW(u->val1));
break;
default:
vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
break;
}
switch (u->val3) {
Int sz;
case VgT_UifU4:
sz = 4; goto do_UifU;
case VgT_UifU2:
sz = 2; goto do_UifU;
case VgT_UifU1:
sz = 1; goto do_UifU;
case VgT_UifU0:
sz = 0; goto do_UifU;
do_UifU:
vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
vg_assert(u->tag2 == TempReg && VGC_IS_SHADOW(u->val2));
if (def[u->val1] <= 4) {
/* UifU. The first arg is defined, so result is
simply second arg. Delete this operation. */
vg_assert(def[u->val1] == sz);
NOP_no_msg(u);
if (VG_(disassemble))
VG_(printf)(
"at %d: delete UifU%d due to defd arg1\n",
i, sz);
}
else
if (def[u->val2] <= 4) {
/* UifU. The second arg is defined, so result is
simply first arg. Copy to second. */
vg_assert(def[u->val2] == sz);
u->opcode = MOV;
u->size = 4;
u->tag3 = NoValue;
def[u->val2] = def[u->val1];
if (VG_(disassemble))
VG_(printf)(
"at %d: change UifU%d to MOV due to defd"
" arg2\n",
i, sz);
}
break;
case VgT_ImproveAND4_TQ:
sz = 4; goto do_ImproveAND;
case VgT_ImproveAND1_TQ:
sz = 1; goto do_ImproveAND;
do_ImproveAND:
/* Implements Q = T OR Q. So if Q is entirely defined,
ie all 0s, we get MOV T, Q. */
if (def[u->val2] <= 4) {
vg_assert(def[u->val2] == sz);
u->size = 4; /* Regardless of sz */
u->opcode = MOV;
u->tag3 = NoValue;
def[u->val2] = VGC_UNDEF;
if (VG_(disassemble))
VG_(printf)(
"at %d: change ImproveAND%d_TQ to MOV due "
"to defd arg2\n",
i, sz);
}
break;
default:
goto unhandled;
}
break;
case TAG1:
vg_assert(u->tag1 == TempReg && VGC_IS_SHADOW(u->val1));
if (def[u->val1] > 4) break;
/* We now know that the arg to the op is entirely defined.
If the op changes the size of the arg, we must replace
it with a SETV at the new size. If it doesn't change
the size, we can delete it completely. */
switch (u->val3) {
/* Maintain the same size ... */
case VgT_Left4:
vg_assert(def[u->val1] == 4);
NOP_tag1_op(u);
break;
case VgT_PCast11:
vg_assert(def[u->val1] == 1);
NOP_tag1_op(u);
break;
/* Change size ... */
case VgT_PCast40:
vg_assert(def[u->val1] == 4);
SETV_tag1_op(u,0);
def[u->val1] = 0;
break;
case VgT_PCast14:
vg_assert(def[u->val1] == 1);
SETV_tag1_op(u,4);
def[u->val1] = 4;
break;
case VgT_PCast12:
vg_assert(def[u->val1] == 1);
SETV_tag1_op(u,2);
def[u->val1] = 2;
break;
case VgT_PCast10:
vg_assert(def[u->val1] == 1);
SETV_tag1_op(u,0);
def[u->val1] = 0;
break;
case VgT_PCast02:
vg_assert(def[u->val1] == 0);
SETV_tag1_op(u,2);
def[u->val1] = 2;
break;
default:
goto unhandled;
}
if (VG_(disassemble))
VG_(printf)(
"at %d: delete TAG1 %s due to defd arg\n",
i, VG_(nameOfTagOp(u->val3)));
break;
default:
unhandled:
/* We don't know how to handle this uinstr. Be safe, and
set to VGC_VALUE or VGC_UNDEF all temps written by it. */
k = getTempUsage(u, &tempUse[0]);
vg_assert(k <= 3);
for (j = 0; j < k; j++) {
t = tempUse[j].tempNo;
vg_assert(t >= 0 && t < n_temps);
if (!tempUse[j].isWrite) {
/* t is read; ignore it. */
if (0&& VGC_IS_SHADOW(t) && def[t] <= 4)
VG_(printf)("ignoring def %d at %s %s\n",
def[t],
VG_(nameUOpcode)(True, u->opcode),
(u->opcode == TAG1 || u->opcode == TAG2)
? VG_(nameOfTagOp)(u->val3)
: (Char*)"");
} else {
/* t is written; better nullify it. */
def[t] = VGC_IS_SHADOW(t) ? VGC_UNDEF : VGC_VALUE;
}
}
}
}
VG_(jitfree)(def);
}
/* Top level post-instrumentation cleanup function. */
static void vg_cleanup ( UCodeBlock* cb )
{
vg_propagate_definedness ( cb );
vg_delete_redundant_SETVs ( cb );
}
/*------------------------------------------------------------*/
/*--- Main entry point for the JITter. ---*/
/*------------------------------------------------------------*/
/* Translate the basic block beginning at orig_addr, placing the
translation in a vg_malloc'd block, the address and size of which
are returned in trans_addr and trans_size. Length of the original
block is also returned in orig_size. If the latter three are NULL,
this call is being done for debugging purposes, in which case (a)
throw away the translation once it is made, and (b) produce a load
of debugging output.
*/
void VG_(translate) ( ThreadState* tst,
/* Identity of thread needing this block */
Addr orig_addr,
UInt* orig_size,
Addr* trans_addr,
UInt* trans_size )
{
Int n_disassembled_bytes, final_code_size;
Bool debugging_translation;
UChar* final_code;
UCodeBlock* cb;
VGP_PUSHCC(VgpTranslate);
debugging_translation
= orig_size == NULL || trans_addr == NULL || trans_size == NULL;
dis = True;
dis = debugging_translation;
/* Check if we're being asked to jump to a silly address, and if so
record an error message before potentially crashing the entire
system. */
if (VG_(clo_instrument) && !debugging_translation && !dis) {
Addr bad_addr;
Bool ok = VGM_(check_readable) ( orig_addr, 1, &bad_addr );
if (!ok) {
VG_(record_jump_error)(tst, bad_addr);
}
}
/* if (VG_(overall_in_count) >= 4800) dis=True; */
if (VG_(disassemble))
VG_(printf)("\n");
if (0 || dis
|| (VG_(overall_in_count) > 0 &&
(VG_(overall_in_count) % 1000 == 0))) {
if (0&& (VG_(clo_verbosity) > 1 || dis))
VG_(message)(Vg_UserMsg,
"trans# %d, bb# %lu, in %d, out %d",
VG_(overall_in_count),
VG_(bbs_done),
VG_(overall_in_osize), VG_(overall_in_tsize),
orig_addr );
}
cb = VG_(allocCodeBlock)();
/* Disassemble this basic block into cb. */
/* VGP_PUSHCC(VgpToUCode); */
n_disassembled_bytes = VG_(disBB) ( cb, orig_addr );
/* VGP_POPCC; */
/* dis=True; */
/* if (0&& VG_(translations_done) < 617) */
/* dis=False; */
/* Try and improve the code a bit. */
if (VG_(clo_optimise)) {
/* VGP_PUSHCC(VgpImprove); */
vg_improve ( cb );
if (VG_(disassemble))
VG_(ppUCodeBlock) ( cb, "Improved code:" );
/* VGP_POPCC; */
}
/* dis=False; */
/* Add instrumentation code. */
if (VG_(clo_instrument)) {
/* VGP_PUSHCC(VgpInstrument); */
cb = vg_instrument(cb);
/* VGP_POPCC; */
if (VG_(disassemble))
VG_(ppUCodeBlock) ( cb, "Instrumented code:" );
if (VG_(clo_cleanup)) {
/* VGP_PUSHCC(VgpCleanup); */
vg_cleanup(cb);
/* VGP_POPCC; */
if (VG_(disassemble))
VG_(ppUCodeBlock) ( cb, "Cleaned-up instrumented code:" );
}
}
//VG_(disassemble) = True;
/* Add cache simulation code. */
if (VG_(clo_cachesim)) {
/* VGP_PUSHCC(VgpCacheInstrument); */
cb = VG_(cachesim_instrument)(cb, orig_addr);
/* VGP_POPCC; */
if (VG_(disassemble))
VG_(ppUCodeBlock) ( cb, "Cachesim instrumented code:" );
}
//VG_(disassemble) = False;
/* Allocate registers. */
/* VGP_PUSHCC(VgpRegAlloc); */
cb = vg_do_register_allocation ( cb );
/* VGP_POPCC; */
/* dis=False; */
/*
if (VG_(disassemble))
VG_(ppUCodeBlock) ( cb, "After Register Allocation:");
*/
/* VGP_PUSHCC(VgpFromUcode); */
/* NB final_code is allocated with VG_(jitmalloc), not VG_(malloc)
and so must be VG_(jitfree)'d. */
final_code = VG_(emit_code)(cb, &final_code_size );
/* VGP_POPCC; */
VG_(freeCodeBlock)(cb);
if (debugging_translation) {
/* Only done for debugging -- throw away final result. */
VG_(jitfree)(final_code);
} else {
/* Doing it for real -- return values to caller. */
*orig_size = n_disassembled_bytes;
*trans_addr = (Addr)final_code;
*trans_size = final_code_size;
}
VGP_POPCC;
}
/*--------------------------------------------------------------------*/
/*--- end vg_translate.c ---*/
/*--------------------------------------------------------------------*/