Merge branches/TCHAIN from r2271 (its creation point) into trunk.
git-svn-id: svn://svn.valgrind.org/vex/trunk@2296 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/Makefile-gcc b/Makefile-gcc
index a3611e0..76f7230 100644
--- a/Makefile-gcc
+++ b/Makefile-gcc
@@ -88,7 +88,9 @@
-Wpointer-arith -Wbad-function-cast -Wcast-qual \
-Wcast-align -Wmissing-declarations \
-Wno-pointer-sign \
- $(EXTRA_CFLAGS) -g -O2 -fstrict-aliasing
+ $(EXTRA_CFLAGS) -g -O2 -fstrict-aliasing \
+ \
+ -O
#CC = icc
#CCFLAGS = -g -Wall -wd981 -wd279 -wd1287 -wd869 -wd111 -wd188 -wd186
diff --git a/priv/guest_amd64_defs.h b/priv/guest_amd64_defs.h
index 6038ded..93b6d12 100644
--- a/priv/guest_amd64_defs.h
+++ b/priv/guest_amd64_defs.h
@@ -47,7 +47,6 @@
bb_to_IR.h. */
extern
DisResult disInstr_AMD64 ( IRSB* irbb,
- Bool put_IP,
Bool (*resteerOkFn) ( void*, Addr64 ),
Bool resteerCisOk,
void* callback_opaque,
diff --git a/priv/guest_amd64_helpers.c b/priv/guest_amd64_helpers.c
index 022edaa..80b5a74 100644
--- a/priv/guest_amd64_helpers.c
+++ b/priv/guest_amd64_helpers.c
@@ -3452,6 +3452,10 @@
/* VISIBLE TO LIBVEX CLIENT */
void LibVEX_GuestAMD64_initialise ( /*OUT*/VexGuestAMD64State* vex_state )
{
+ vex_state->host_EvC_FAILADDR = 0;
+ vex_state->host_EvC_COUNTER = 0;
+ vex_state->pad0 = 0;
+
vex_state->guest_RAX = 0;
vex_state->guest_RCX = 0;
vex_state->guest_RDX = 0;
@@ -3522,7 +3526,7 @@
vex_state->guest_GS_0x60 = 0;
vex_state->guest_IP_AT_SYSCALL = 0;
- /* vex_state->padding = 0; */
+ vex_state->pad1 = 0;
}
diff --git a/priv/guest_amd64_toIR.c b/priv/guest_amd64_toIR.c
index b94d0b6..ab79312 100644
--- a/priv/guest_amd64_toIR.c
+++ b/priv/guest_amd64_toIR.c
@@ -1511,7 +1511,8 @@
binop( mkSizedOp(tyE,Iop_CasCmpNE8),
mkexpr(oldTmp), mkexpr(expTmp) ),
Ijk_Boring, /*Ijk_NoRedir*/
- IRConst_U64( restart_point )
+ IRConst_U64( restart_point ),
+ OFFB_RIP
));
}
@@ -2091,36 +2092,55 @@
/*--- JMP helpers ---*/
/*------------------------------------------------------------*/
-static void jmp_lit( IRJumpKind kind, Addr64 d64 )
+static void jmp_lit( /*MOD*/DisResult* dres,
+ IRJumpKind kind, Addr64 d64 )
{
- irsb->next = mkU64(d64);
- irsb->jumpkind = kind;
+ vassert(dres->whatNext == Dis_Continue);
+ vassert(dres->len == 0);
+ vassert(dres->continueAt == 0);
+ vassert(dres->jk_StopHere == Ijk_INVALID);
+ dres->whatNext = Dis_StopHere;
+ dres->jk_StopHere = kind;
+ stmt( IRStmt_Put( OFFB_RIP, mkU64(d64) ) );
}
-static void jmp_treg( IRJumpKind kind, IRTemp t )
+static void jmp_treg( /*MOD*/DisResult* dres,
+ IRJumpKind kind, IRTemp t )
{
- irsb->next = mkexpr(t);
- irsb->jumpkind = kind;
+ vassert(dres->whatNext == Dis_Continue);
+ vassert(dres->len == 0);
+ vassert(dres->continueAt == 0);
+ vassert(dres->jk_StopHere == Ijk_INVALID);
+ dres->whatNext = Dis_StopHere;
+ dres->jk_StopHere = kind;
+ stmt( IRStmt_Put( OFFB_RIP, mkexpr(t) ) );
}
static
-void jcc_01 ( AMD64Condcode cond, Addr64 d64_false, Addr64 d64_true )
+void jcc_01 ( /*MOD*/DisResult* dres,
+ AMD64Condcode cond, Addr64 d64_false, Addr64 d64_true )
{
Bool invert;
AMD64Condcode condPos;
+ vassert(dres->whatNext == Dis_Continue);
+ vassert(dres->len == 0);
+ vassert(dres->continueAt == 0);
+ vassert(dres->jk_StopHere == Ijk_INVALID);
+ dres->whatNext = Dis_StopHere;
+ dres->jk_StopHere = Ijk_Boring;
condPos = positiveIse_AMD64Condcode ( cond, &invert );
if (invert) {
stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos),
Ijk_Boring,
- IRConst_U64(d64_false) ) );
- irsb->next = mkU64(d64_true);
- irsb->jumpkind = Ijk_Boring;
+ IRConst_U64(d64_false),
+ OFFB_RIP ) );
+ stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_true) ) );
} else {
stmt( IRStmt_Exit( mk_amd64g_calculate_condition(condPos),
Ijk_Boring,
- IRConst_U64(d64_true) ) );
- irsb->next = mkU64(d64_false);
- irsb->jumpkind = Ijk_Boring;
+ IRConst_U64(d64_true),
+ OFFB_RIP ) );
+ stmt( IRStmt_Put( OFFB_RIP, mkU64(d64_false) ) );
}
}
@@ -3966,7 +3986,7 @@
static
ULong dis_Grp5 ( VexAbiInfo* vbi,
Prefix pfx, Int sz, Long delta,
- DisResult* dres, Bool* decode_OK )
+ /*MOD*/DisResult* dres, /*OUT*/Bool* decode_OK )
{
Int len;
UChar modrm;
@@ -4009,8 +4029,8 @@
putIReg64(R_RSP, mkexpr(t2));
storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+1));
make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(reg)");
- jmp_treg(Ijk_Call,t3);
- dres->whatNext = Dis_StopHere;
+ jmp_treg(dres, Ijk_Call, t3);
+ vassert(dres->whatNext == Dis_StopHere);
showSz = False;
break;
case 4: /* jmp Ev */
@@ -4019,8 +4039,8 @@
sz = 8;
t3 = newTemp(Ity_I64);
assign(t3, getIRegE(sz,pfx,modrm));
- jmp_treg(Ijk_Boring,t3);
- dres->whatNext = Dis_StopHere;
+ jmp_treg(dres, Ijk_Boring, t3);
+ vassert(dres->whatNext == Dis_StopHere);
showSz = False;
break;
default:
@@ -4073,8 +4093,8 @@
putIReg64(R_RSP, mkexpr(t2));
storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta+len));
make_redzone_AbiHint(vbi, t2, t3/*nia*/, "call-Ev(mem)");
- jmp_treg(Ijk_Call,t3);
- dres->whatNext = Dis_StopHere;
+ jmp_treg(dres, Ijk_Call, t3);
+ vassert(dres->whatNext == Dis_StopHere);
showSz = False;
break;
case 4: /* JMP Ev */
@@ -4083,8 +4103,8 @@
sz = 8;
t3 = newTemp(Ity_I64);
assign(t3, loadLE(Ity_I64,mkexpr(addr)));
- jmp_treg(Ijk_Boring,t3);
- dres->whatNext = Dis_StopHere;
+ jmp_treg(dres, Ijk_Boring, t3);
+ vassert(dres->whatNext == Dis_StopHere);
showSz = False;
break;
case 6: /* PUSH Ev */
@@ -4287,7 +4307,8 @@
the insn is the last one in the basic block, and so emit a jump to
the next insn, rather than just falling through. */
static
-void dis_REP_op ( AMD64Condcode cond,
+void dis_REP_op ( /*MOD*/DisResult* dres,
+ AMD64Condcode cond,
void (*dis_OP)(Int, IRTemp, Prefix),
Int sz, Addr64 rip, Addr64 rip_next, HChar* name,
Prefix pfx )
@@ -4310,7 +4331,8 @@
cmp = binop(Iop_CmpEQ64, mkexpr(tc), mkU64(0));
}
- stmt( IRStmt_Exit( cmp, Ijk_Boring, IRConst_U64(rip_next) ) );
+ stmt( IRStmt_Exit( cmp, Ijk_Boring,
+ IRConst_U64(rip_next), OFFB_RIP ) );
if (haveASO(pfx))
putIReg32(R_RCX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) );
@@ -4321,12 +4343,15 @@
dis_OP (sz, t_inc, pfx);
if (cond == AMD64CondAlways) {
- jmp_lit(Ijk_Boring,rip);
+ jmp_lit(dres, Ijk_Boring, rip);
+ vassert(dres->whatNext == Dis_StopHere);
} else {
stmt( IRStmt_Exit( mk_amd64g_calculate_condition(cond),
Ijk_Boring,
- IRConst_U64(rip) ) );
- jmp_lit(Ijk_Boring,rip_next);
+ IRConst_U64(rip),
+ OFFB_RIP ) );
+ jmp_lit(dres, Ijk_Boring, rip_next);
+ vassert(dres->whatNext == Dis_StopHere);
}
DIP("%s%c\n", name, nameISize(sz));
}
@@ -5130,7 +5155,8 @@
IRStmt_Exit(
binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
Ijk_EmWarn,
- IRConst_U64( guest_RIP_bbstart+delta )
+ IRConst_U64( guest_RIP_bbstart+delta ),
+ OFFB_RIP
)
);
@@ -5172,7 +5198,8 @@
IRStmt_Exit(
binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
Ijk_EmWarn,
- IRConst_U64( guest_RIP_bbstart+delta )
+ IRConst_U64( guest_RIP_bbstart+delta ),
+ OFFB_RIP
)
);
break;
@@ -6108,7 +6135,8 @@
IRStmt_Exit(
binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
Ijk_EmWarn,
- IRConst_U64( guest_RIP_bbstart+delta )
+ IRConst_U64( guest_RIP_bbstart+delta ),
+ OFFB_RIP
)
);
@@ -8143,7 +8171,7 @@
//.. }
static
-void dis_ret ( VexAbiInfo* vbi, ULong d64 )
+void dis_ret ( /*MOD*/DisResult* dres, VexAbiInfo* vbi, ULong d64 )
{
IRTemp t1 = newTemp(Ity_I64);
IRTemp t2 = newTemp(Ity_I64);
@@ -8153,7 +8181,8 @@
assign(t3, binop(Iop_Add64, mkexpr(t1), mkU64(8+d64)));
putIReg64(R_RSP, mkexpr(t3));
make_redzone_AbiHint(vbi, t3, t2/*nia*/, "ret");
- jmp_treg(Ijk_Ret,t2);
+ jmp_treg(dres, Ijk_Ret, t2);
+ vassert(dres->whatNext == Dis_StopHere);
}
@@ -8964,7 +8993,8 @@
binop(Iop_And64,mkexpr(effective_addr),mkU64(0xF)),
mkU64(0)),
Ijk_SigSEGV,
- IRConst_U64(guest_RIP_curr_instr)
+ IRConst_U64(guest_RIP_curr_instr),
+ OFFB_RIP
)
);
}
@@ -11452,10 +11482,8 @@
mkU64( ~(lineszB-1) ))) );
stmt( IRStmt_Put(OFFB_TILEN, mkU64(lineszB) ) );
-
- irsb->jumpkind = Ijk_TInval;
- irsb->next = mkU64(guest_RIP_bbstart+delta);
- dres->whatNext = Dis_StopHere;
+
+ jmp_lit(dres, Ijk_TInval, (Addr64)(guest_RIP_bbstart+delta));
DIP("clflush %s\n", dis_buf);
goto decode_success;
@@ -11527,7 +11555,8 @@
IRStmt_Exit(
binop(Iop_CmpNE64, unop(Iop_32Uto64,mkexpr(ew)), mkU64(0)),
Ijk_EmWarn,
- IRConst_U64(guest_RIP_bbstart+delta)
+ IRConst_U64(guest_RIP_bbstart+delta),
+ OFFB_RIP
)
);
goto decode_success;
@@ -16954,7 +16983,8 @@
mk_amd64g_calculate_condition(
(AMD64Condcode)(1 ^ (opc - 0x70))),
Ijk_Boring,
- IRConst_U64(guest_RIP_bbstart+delta) ) );
+ IRConst_U64(guest_RIP_bbstart+delta),
+ OFFB_RIP ) );
dres->whatNext = Dis_ResteerC;
dres->continueAt = d64;
comment = "(assumed taken)";
@@ -16972,7 +17002,8 @@
stmt( IRStmt_Exit(
mk_amd64g_calculate_condition((AMD64Condcode)(opc - 0x70)),
Ijk_Boring,
- IRConst_U64(d64) ) );
+ IRConst_U64(d64),
+ OFFB_RIP ) );
dres->whatNext = Dis_ResteerC;
dres->continueAt = guest_RIP_bbstart+delta;
comment = "(assumed not taken)";
@@ -16980,10 +17011,9 @@
else {
/* Conservative default translation - end the block at this
point. */
- jcc_01( (AMD64Condcode)(opc - 0x70),
- guest_RIP_bbstart+delta,
- d64 );
- dres->whatNext = Dis_StopHere;
+ jcc_01( dres, (AMD64Condcode)(opc - 0x70),
+ guest_RIP_bbstart+delta, d64 );
+ vassert(dres->whatNext == Dis_StopHere);
}
DIP("j%s-8 0x%llx %s\n", name_AMD64Condcode(opc - 0x70), d64, comment);
return delta;
@@ -17154,8 +17184,8 @@
DIP("rep nop (P4 pause)\n");
/* "observe" the hint. The Vex client needs to be careful not
to cause very long delays as a result, though. */
- jmp_lit(Ijk_Yield, guest_RIP_bbstart+delta);
- dres->whatNext = Dis_StopHere;
+ jmp_lit(dres, Ijk_Yield, guest_RIP_bbstart+delta);
+ vassert(dres->whatNext == Dis_StopHere);
return delta;
}
/* detect and handle NOPs specially */
@@ -17393,7 +17423,7 @@
if (haveF3(pfx) && !haveF2(pfx)) {
if (opc == 0xA4)
sz = 1;
- dis_REP_op ( AMD64CondAlways, dis_MOVS, sz,
+ dis_REP_op ( dres, AMD64CondAlways, dis_MOVS, sz,
guest_RIP_curr_instr,
guest_RIP_bbstart+delta, "rep movs", pfx );
dres->whatNext = Dis_StopHere;
@@ -17414,7 +17444,7 @@
if (haveF3(pfx) && !haveF2(pfx)) {
if (opc == 0xA6)
sz = 1;
- dis_REP_op ( AMD64CondZ, dis_CMPS, sz,
+ dis_REP_op ( dres, AMD64CondZ, dis_CMPS, sz,
guest_RIP_curr_instr,
guest_RIP_bbstart+delta, "repe cmps", pfx );
dres->whatNext = Dis_StopHere;
@@ -17428,11 +17458,11 @@
if (haveF3(pfx) && !haveF2(pfx)) {
if (opc == 0xAA)
sz = 1;
- dis_REP_op ( AMD64CondAlways, dis_STOS, sz,
+ dis_REP_op ( dres, AMD64CondAlways, dis_STOS, sz,
guest_RIP_curr_instr,
guest_RIP_bbstart+delta, "rep stos", pfx );
- dres->whatNext = Dis_StopHere;
- return delta;
+ vassert(dres->whatNext == Dis_StopHere);
+ return delta;
}
/* AA/AB: stosb/stos{w,l,q} */
if (!haveF3(pfx) && !haveF2(pfx)) {
@@ -17463,20 +17493,20 @@
if (haveF2(pfx) && !haveF3(pfx)) {
if (opc == 0xAE)
sz = 1;
- dis_REP_op ( AMD64CondNZ, dis_SCAS, sz,
+ dis_REP_op ( dres, AMD64CondNZ, dis_SCAS, sz,
guest_RIP_curr_instr,
guest_RIP_bbstart+delta, "repne scas", pfx );
- dres->whatNext = Dis_StopHere;
+ vassert(dres->whatNext == Dis_StopHere);
return delta;
}
/* F3 AE/AF: repe scasb/repe scas{w,l,q} */
if (!haveF2(pfx) && haveF3(pfx)) {
if (opc == 0xAE)
sz = 1;
- dis_REP_op ( AMD64CondZ, dis_SCAS, sz,
+ dis_REP_op ( dres, AMD64CondZ, dis_SCAS, sz,
guest_RIP_curr_instr,
guest_RIP_bbstart+delta, "repe scas", pfx );
- dres->whatNext = Dis_StopHere;
+ vassert(dres->whatNext == Dis_StopHere);
return delta;
}
/* AE/AF: scasb/scas{w,l,q} */
@@ -17563,16 +17593,14 @@
if (have66orF2orF3(pfx)) goto decode_failure;
d64 = getUDisp16(delta);
delta += 2;
- dis_ret(vbi, d64);
- dres->whatNext = Dis_StopHere;
+ dis_ret(dres, vbi, d64);
DIP("ret $%lld\n", d64);
return delta;
case 0xC3: /* RET */
if (have66orF2(pfx)) goto decode_failure;
/* F3 is acceptable on AMD. */
- dis_ret(vbi, 0);
- dres->whatNext = Dis_StopHere;
+ dis_ret(dres, vbi, 0);
DIP(haveF3(pfx) ? "rep ; ret\n" : "ret\n");
return delta;
@@ -17655,8 +17683,8 @@
return delta;
case 0xCC: /* INT 3 */
- jmp_lit(Ijk_SigTRAP, guest_RIP_bbstart + delta);
- dres->whatNext = Dis_StopHere;
+ jmp_lit(dres, Ijk_SigTRAP, guest_RIP_bbstart + delta);
+ vassert(dres->whatNext == Dis_StopHere);
DIP("int $0x3\n");
return delta;
@@ -17808,7 +17836,7 @@
default:
vassert(0);
}
- stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U64(d64)) );
+ stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U64(d64), OFFB_RIP) );
DIP("loop%s%s 0x%llx\n", xtra, haveASO(pfx) ? "l" : "", d64);
return delta;
@@ -17822,20 +17850,22 @@
if (haveASO(pfx)) {
/* 32-bit */
stmt( IRStmt_Exit( binop(Iop_CmpEQ64,
- unop(Iop_32Uto64, getIReg32(R_RCX)),
- mkU64(0)),
- Ijk_Boring,
- IRConst_U64(d64))
- );
+ unop(Iop_32Uto64, getIReg32(R_RCX)),
+ mkU64(0)),
+ Ijk_Boring,
+ IRConst_U64(d64),
+ OFFB_RIP
+ ));
DIP("jecxz 0x%llx\n", d64);
} else {
/* 64-bit */
stmt( IRStmt_Exit( binop(Iop_CmpEQ64,
getIReg64(R_RCX),
mkU64(0)),
- Ijk_Boring,
- IRConst_U64(d64))
- );
+ Ijk_Boring,
+ IRConst_U64(d64),
+ OFFB_RIP
+ ));
DIP("jrcxz 0x%llx\n", d64);
}
return delta;
@@ -17953,8 +17983,8 @@
dres->whatNext = Dis_ResteerU;
dres->continueAt = d64;
} else {
- jmp_lit(Ijk_Call,d64);
- dres->whatNext = Dis_StopHere;
+ jmp_lit(dres, Ijk_Call, d64);
+ vassert(dres->whatNext == Dis_StopHere);
}
DIP("call 0x%llx\n",d64);
return delta;
@@ -17969,8 +17999,8 @@
dres->whatNext = Dis_ResteerU;
dres->continueAt = d64;
} else {
- jmp_lit(Ijk_Boring,d64);
- dres->whatNext = Dis_StopHere;
+ jmp_lit(dres, Ijk_Boring, d64);
+ vassert(dres->whatNext == Dis_StopHere);
}
DIP("jmp 0x%llx\n", d64);
return delta;
@@ -17985,8 +18015,8 @@
dres->whatNext = Dis_ResteerU;
dres->continueAt = d64;
} else {
- jmp_lit(Ijk_Boring,d64);
- dres->whatNext = Dis_StopHere;
+ jmp_lit(dres, Ijk_Boring, d64);
+ vassert(dres->whatNext == Dis_StopHere);
}
DIP("jmp-8 0x%llx\n", d64);
return delta;
@@ -18153,8 +18183,8 @@
/* It's important that all guest state is up-to-date
at this point. So we declare an end-of-block here, which
forces any cached guest state to be flushed. */
- jmp_lit(Ijk_Sys_syscall, guest_RIP_next_assumed);
- dres->whatNext = Dis_StopHere;
+ jmp_lit(dres, Ijk_Sys_syscall, guest_RIP_next_assumed);
+ vassert(dres->whatNext == Dis_StopHere);
DIP("syscall\n");
return delta;
@@ -18243,7 +18273,9 @@
mk_amd64g_calculate_condition(
(AMD64Condcode)(1 ^ (opc - 0x80))),
Ijk_Boring,
- IRConst_U64(guest_RIP_bbstart+delta) ) );
+ IRConst_U64(guest_RIP_bbstart+delta),
+ OFFB_RIP
+ ));
dres->whatNext = Dis_ResteerC;
dres->continueAt = d64;
comment = "(assumed taken)";
@@ -18262,7 +18294,9 @@
mk_amd64g_calculate_condition((AMD64Condcode)
(opc - 0x80)),
Ijk_Boring,
- IRConst_U64(d64) ) );
+ IRConst_U64(d64),
+ OFFB_RIP
+ ));
dres->whatNext = Dis_ResteerC;
dres->continueAt = guest_RIP_bbstart+delta;
comment = "(assumed not taken)";
@@ -18270,10 +18304,9 @@
else {
/* Conservative default translation - end the block at
this point. */
- jcc_01( (AMD64Condcode)(opc - 0x80),
- guest_RIP_bbstart+delta,
- d64 );
- dres->whatNext = Dis_StopHere;
+ jcc_01( dres, (AMD64Condcode)(opc - 0x80),
+ guest_RIP_bbstart+delta, d64 );
+ vassert(dres->whatNext == Dis_StopHere);
}
DIP("j%s-32 0x%llx %s\n", name_AMD64Condcode(opc - 0x80), d64, comment);
return delta;
@@ -18985,7 +19018,6 @@
static
DisResult disInstr_AMD64_WRK (
/*OUT*/Bool* expect_CAS,
- Bool put_IP,
Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
Bool resteerCisOk,
void* callback_opaque,
@@ -19015,10 +19047,10 @@
Prefix pfx = PFX_EMPTY;
/* Set result defaults. */
- dres.whatNext = Dis_Continue;
- dres.len = 0;
- dres.continueAt = 0;
-
+ dres.whatNext = Dis_Continue;
+ dres.len = 0;
+ dres.continueAt = 0;
+ dres.jk_StopHere = Ijk_INVALID;
*expect_CAS = False;
vassert(guest_RIP_next_assumed == 0);
@@ -19028,10 +19060,6 @@
DIP("\t0x%llx: ", guest_RIP_bbstart+delta);
- /* We may be asked to update the guest RIP before going further. */
- if (put_IP)
- stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr)) );
-
/* Spot "Special" instructions (see comment at top of file). */
{
UChar* code = (UChar*)(guest_code + delta);
@@ -19055,8 +19083,8 @@
/* %RDX = client_request ( %RAX ) */
DIP("%%rdx = client_request ( %%rax )\n");
delta += 19;
- jmp_lit(Ijk_ClientReq, guest_RIP_bbstart+delta);
- dres.whatNext = Dis_StopHere;
+ jmp_lit(&dres, Ijk_ClientReq, guest_RIP_bbstart+delta);
+ vassert(dres.whatNext == Dis_StopHere);
goto decode_success;
}
else
@@ -19080,8 +19108,8 @@
assign(t2, binop(Iop_Sub64, getIReg64(R_RSP), mkU64(8)));
putIReg64(R_RSP, mkexpr(t2));
storeLE( mkexpr(t2), mkU64(guest_RIP_bbstart+delta));
- jmp_treg(Ijk_NoRedir,t1);
- dres.whatNext = Dis_StopHere;
+ jmp_treg(&dres, Ijk_NoRedir, t1);
+ vassert(dres.whatNext == Dis_StopHere);
goto decode_success;
}
/* We don't know what it is. */
@@ -19309,7 +19337,7 @@
/* It's important that all ArchRegs carry their up-to-date value
at this point. So we declare an end-of-block here, which
forces any TempRegs caching ArchRegs to be flushed. */
- dres.whatNext = Dis_StopHere;
+ vassert(dres.whatNext == Dis_StopHere);
DIP("int $0x%02x\n", (UInt)d64);
break;
}
@@ -19452,9 +19480,9 @@
insn, but nevertheless be paranoid and update it again right
now. */
stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_curr_instr) ) );
- jmp_lit(Ijk_NoDecode, guest_RIP_curr_instr);
- dres.whatNext = Dis_StopHere;
- dres.len = 0;
+ jmp_lit(&dres, Ijk_NoDecode, guest_RIP_curr_instr);
+ vassert(dres.whatNext == Dis_StopHere);
+ dres.len = 0;
/* We also need to say that a CAS is not expected now, regardless
of what it might have been set to at the start of the function,
since the IR that we've emitted just above (to synthesis a
@@ -19467,6 +19495,20 @@
decode_success:
/* All decode successes end up here. */
+ switch (dres.whatNext) {
+ case Dis_Continue:
+ stmt( IRStmt_Put( OFFB_RIP, mkU64(guest_RIP_bbstart + delta) ) );
+ break;
+ case Dis_ResteerU:
+ case Dis_ResteerC:
+ stmt( IRStmt_Put( OFFB_RIP, mkU64(dres.continueAt) ) );
+ break;
+ case Dis_StopHere:
+ break;
+ default:
+ vassert(0);
+ }
+
DIP("\n");
dres.len = (Int)toUInt(delta - delta_start);
return dres;
@@ -19484,7 +19526,6 @@
is located in host memory at &guest_code[delta]. */
DisResult disInstr_AMD64 ( IRSB* irsb_IN,
- Bool put_IP,
Bool (*resteerOkFn) ( void*, Addr64 ),
Bool resteerCisOk,
void* callback_opaque,
@@ -19514,7 +19555,7 @@
x1 = irsb_IN->stmts_used;
expect_CAS = False;
- dres = disInstr_AMD64_WRK ( &expect_CAS, put_IP, resteerOkFn,
+ dres = disInstr_AMD64_WRK ( &expect_CAS, resteerOkFn,
resteerCisOk,
callback_opaque,
delta, archinfo, abiinfo );
@@ -19547,7 +19588,7 @@
/* inconsistency detected. re-disassemble the instruction so as
to generate a useful error message; then assert. */
vex_traceflags |= VEX_TRACE_FE;
- dres = disInstr_AMD64_WRK ( &expect_CAS, put_IP, resteerOkFn,
+ dres = disInstr_AMD64_WRK ( &expect_CAS, resteerOkFn,
resteerCisOk,
callback_opaque,
delta, archinfo, abiinfo );
diff --git a/priv/guest_arm_defs.h b/priv/guest_arm_defs.h
index be6dd1c..a225240 100644
--- a/priv/guest_arm_defs.h
+++ b/priv/guest_arm_defs.h
@@ -41,7 +41,6 @@
bb_to_IR.h. */
extern
DisResult disInstr_ARM ( IRSB* irbb,
- Bool put_IP,
Bool (*resteerOkFn) ( void*, Addr64 ),
Bool resteerCisOk,
void* callback_opaque,
@@ -157,7 +156,7 @@
OP DEP1 DEP2 DEP3
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- OP_COPY current NZCV unused unused
+ OP_COPY curr_NZCV:28x0 unused unused
OP_ADD argL argR unused
OP_SUB argL argR unused
OP_ADC argL argR 31x0:old_C
diff --git a/priv/guest_arm_helpers.c b/priv/guest_arm_helpers.c
index a545e34..122d034 100644
--- a/priv/guest_arm_helpers.c
+++ b/priv/guest_arm_helpers.c
@@ -697,6 +697,18 @@
mkU32(1)));
}
+ /*---------------- COPY ----------------*/
+
+ if (isU32(cond_n_op, (ARMCondNE << 4) | ARMG_CC_OP_COPY)) {
+ /* NE after COPY --> ((cc_dep1 >> ARMG_CC_SHIFT_Z) ^ 1) & 1 */
+ return binop(Iop_And32,
+ binop(Iop_Xor32,
+ binop(Iop_Shr32, cc_dep1,
+ mkU8(ARMG_CC_SHIFT_Z)),
+ mkU32(1)),
+ mkU32(1));
+ }
+
/*----------------- AL -----------------*/
/* A critically important case for Thumb code.
@@ -937,6 +949,9 @@
/* VISIBLE TO LIBVEX CLIENT */
void LibVEX_GuestARM_initialise ( /*OUT*/VexGuestARMState* vex_state )
{
+ vex_state->host_EvC_FAILADDR = 0;
+ vex_state->host_EvC_COUNTER = 0;
+
vex_state->guest_R0 = 0;
vex_state->guest_R1 = 0;
vex_state->guest_R2 = 0;
@@ -1014,8 +1029,6 @@
vex_state->guest_ITSTATE = 0;
vex_state->padding1 = 0;
- vex_state->padding2 = 0;
- vex_state->padding3 = 0;
}
diff --git a/priv/guest_arm_toIR.c b/priv/guest_arm_toIR.c
index 5935b1e..dcf1787 100644
--- a/priv/guest_arm_toIR.c
+++ b/priv/guest_arm_toIR.c
@@ -1398,7 +1398,8 @@
stmt( IRStmt_Exit(
unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
Ijk_Boring,
- IRConst_U32(toUInt(guest_R15_curr_instr_notENC + 4))
+ IRConst_U32(toUInt(guest_R15_curr_instr_notENC + 4)),
+ OFFB_R15T
));
}
@@ -1414,7 +1415,8 @@
stmt( IRStmt_Exit(
unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
Ijk_Boring,
- IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 2) | 1))
+ IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 2) | 1)),
+ OFFB_R15T
));
}
@@ -1431,7 +1433,8 @@
stmt( IRStmt_Exit(
unop(Iop_Not1, unop(Iop_32to1, mkexpr(guardT))),
Ijk_Boring,
- IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 4) | 1))
+ IRConst_U32(toUInt((guest_R15_curr_instr_notENC + 4) | 1)),
+ OFFB_R15T
));
}
@@ -1448,7 +1451,8 @@
IRStmt_Exit(
binop(Iop_CmpNE32, mkexpr(t), mkU32(0)),
Ijk_NoDecode,
- IRConst_U32(toUInt(guest_R15_curr_instr_notENC | 1))
+ IRConst_U32(toUInt(guest_R15_curr_instr_notENC | 1)),
+ OFFB_R15T
)
);
}
@@ -11962,9 +11966,9 @@
UInt dst = guest_R15_curr_instr_notENC + 8 + (simm24 | 1);
putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4),
IRTemp_INVALID/*because AL*/, Ijk_Boring );
- irsb->next = mkU32(dst);
- irsb->jumpkind = Ijk_Call;
- dres->whatNext = Dis_StopHere;
+ llPutIReg(15, mkU32(dst));
+ dres->jk_StopHere = Ijk_Call;
+ dres->whatNext = Dis_StopHere;
DIP("blx 0x%x (and switch to Thumb mode)\n", dst - 1);
return True;
}
@@ -12040,7 +12044,6 @@
static
DisResult disInstr_ARM_WRK (
- Bool put_IP,
Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
Bool resteerCisOk,
void* callback_opaque,
@@ -12066,9 +12069,10 @@
// etc etc
/* Set result defaults. */
- dres.whatNext = Dis_Continue;
- dres.len = 4;
- dres.continueAt = 0;
+ dres.whatNext = Dis_Continue;
+ dres.len = 4;
+ dres.continueAt = 0;
+ dres.jk_StopHere = Ijk_INVALID;
/* Set default actions for post-insn handling of writes to r15, if
required. */
@@ -12085,11 +12089,7 @@
DIP("\t(arm) 0x%x: ", (UInt)guest_R15_curr_instr_notENC);
- /* We may be asked to update the guest R15 before going further. */
vassert(0 == (guest_R15_curr_instr_notENC & 3));
- if (put_IP) {
- llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC) );
- }
/* ----------------------------------------------------------- */
@@ -12116,9 +12116,9 @@
/* orr r10,r10,r10 */) {
/* R3 = client_request ( R4 ) */
DIP("r3 = client_request ( %%r4 )\n");
- irsb->next = mkU32( guest_R15_curr_instr_notENC + 20 );
- irsb->jumpkind = Ijk_ClientReq;
- dres.whatNext = Dis_StopHere;
+ llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 20 ));
+ dres.jk_StopHere = Ijk_ClientReq;
+ dres.whatNext = Dis_StopHere;
goto decode_success;
}
else
@@ -12136,9 +12136,9 @@
/* branch-and-link-to-noredir R4 */
DIP("branch-and-link-to-noredir r4\n");
llPutIReg(14, mkU32( guest_R15_curr_instr_notENC + 20) );
- irsb->next = llGetIReg(4);
- irsb->jumpkind = Ijk_NoRedir;
- dres.whatNext = Dis_StopHere;
+ llPutIReg(15, llGetIReg(4));
+ dres.jk_StopHere = Ijk_NoRedir;
+ dres.whatNext = Dis_StopHere;
goto decode_success;
}
/* We don't know what it is. Set opc1/opc2 so decode_failure
@@ -12977,9 +12977,9 @@
dres.continueAt = (Addr64)dst;
} else {
/* no; terminate the SB at this point. */
- irsb->next = mkU32(dst);
- irsb->jumpkind = jk;
- dres.whatNext = Dis_StopHere;
+ llPutIReg(15, mkU32(dst));
+ dres.jk_StopHere = jk;
+ dres.whatNext = Dis_StopHere;
}
DIP("b%s 0x%x\n", link ? "l" : "", dst);
} else {
@@ -13002,7 +13002,8 @@
stmt( IRStmt_Exit( unop(Iop_Not1,
unop(Iop_32to1, mkexpr(condT))),
Ijk_Boring,
- IRConst_U32(guest_R15_curr_instr_notENC+4) ));
+ IRConst_U32(guest_R15_curr_instr_notENC+4),
+ OFFB_R15T ));
dres.whatNext = Dis_ResteerC;
dres.continueAt = (Addr64)(Addr32)dst;
comment = "(assumed taken)";
@@ -13021,7 +13022,8 @@
following this one. */
stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
Ijk_Boring,
- IRConst_U32(dst) ));
+ IRConst_U32(dst),
+ OFFB_R15T ));
dres.whatNext = Dis_ResteerC;
dres.continueAt = (Addr64)(Addr32)
(guest_R15_curr_instr_notENC+4);
@@ -13031,10 +13033,10 @@
/* Conservative default translation - end the block at
this point. */
stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(condT)),
- jk, IRConst_U32(dst) ));
- irsb->next = mkU32(guest_R15_curr_instr_notENC + 4);
- irsb->jumpkind = Ijk_Boring;
- dres.whatNext = Dis_StopHere;
+ jk, IRConst_U32(dst), OFFB_R15T ));
+ llPutIReg(15, mkU32(guest_R15_curr_instr_notENC + 4));
+ dres.jk_StopHere = Ijk_Boring;
+ dres.whatNext = Dis_StopHere;
}
DIP("b%s%s 0x%x %s\n", link ? "l" : "", nCC(INSN_COND),
dst, comment);
@@ -13065,10 +13067,10 @@
putIRegA( 14, mkU32(guest_R15_curr_instr_notENC + 4),
IRTemp_INVALID/*because AL*/, Ijk_Boring );
}
- irsb->next = mkexpr(dst);
- irsb->jumpkind = link ? Ijk_Call
- : (rM == 14 ? Ijk_Ret : Ijk_Boring);
- dres.whatNext = Dis_StopHere;
+ llPutIReg(15, mkexpr(dst));
+ dres.jk_StopHere = link ? Ijk_Call
+ : (rM == 14 ? Ijk_Ret : Ijk_Boring);
+ dres.whatNext = Dis_StopHere;
if (condT == IRTemp_INVALID) {
DIP("b%sx r%u\n", link ? "l" : "", rM);
} else {
@@ -13363,9 +13365,9 @@
mk_skip_over_A32_if_cond_is_false( condT );
}
// AL after here
- irsb->next = mkU32( guest_R15_curr_instr_notENC + 4 );
- irsb->jumpkind = Ijk_Sys_syscall;
- dres.whatNext = Dis_StopHere;
+ llPutIReg(15, mkU32( guest_R15_curr_instr_notENC + 4 ));
+ dres.jk_StopHere = Ijk_Sys_syscall;
+ dres.whatNext = Dis_StopHere;
DIP("svc%s #0x%08x\n", nCC(INSN_COND), imm24);
goto decode_success;
}
@@ -13415,7 +13417,8 @@
}
stmt( IRStmt_Exit(unop(Iop_Not1, mkexpr(tSC1)),
/*Ijk_NoRedir*/Ijk_Boring,
- IRConst_U32(guest_R15_curr_instr_notENC)) );
+ IRConst_U32(guest_R15_curr_instr_notENC),
+ OFFB_R15T ));
putIRegA(rD, isB ? unop(Iop_8Uto32, mkexpr(tOld)) : mkexpr(tOld),
IRTemp_INVALID, Ijk_Boring);
DIP("swp%s%s r%u, r%u, [r%u]\n",
@@ -14142,10 +14145,9 @@
now. */
vassert(0 == (guest_R15_curr_instr_notENC & 3));
llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC) );
- irsb->next = mkU32(guest_R15_curr_instr_notENC);
- irsb->jumpkind = Ijk_NoDecode;
- dres.whatNext = Dis_StopHere;
- dres.len = 0;
+ dres.whatNext = Dis_StopHere;
+ dres.jk_StopHere = Ijk_NoDecode;
+ dres.len = 0;
return dres;
decode_success:
@@ -14186,12 +14188,31 @@
binop(Iop_Xor32,
mkexpr(r15guard), mkU32(1))),
r15kind,
- IRConst_U32(guest_R15_curr_instr_notENC + 4)
+ IRConst_U32(guest_R15_curr_instr_notENC + 4),
+ OFFB_R15T
));
}
- irsb->next = llGetIReg(15);
- irsb->jumpkind = r15kind;
- dres.whatNext = Dis_StopHere;
+ /* This seems crazy, but we're required to finish the insn with
+ a write to the guest PC. As usual we rely on ir_opt to tidy
+ up later. */
+ llPutIReg(15, llGetIReg(15));
+ dres.whatNext = Dis_StopHere;
+ dres.jk_StopHere = r15kind;
+ } else {
+ /* Set up the end-state in the normal way. */
+ switch (dres.whatNext) {
+ case Dis_Continue:
+ llPutIReg(15, mkU32(dres.len + guest_R15_curr_instr_notENC));
+ break;
+ case Dis_ResteerU:
+ case Dis_ResteerC:
+ llPutIReg(15, mkU32(dres.continueAt));
+ break;
+ case Dis_StopHere:
+ break;
+ default:
+ vassert(0);
+ }
}
return dres;
@@ -14219,7 +14240,6 @@
static
DisResult disInstr_THUMB_WRK (
- Bool put_IP,
Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
Bool resteerCisOk,
void* callback_opaque,
@@ -14249,9 +14269,10 @@
// etc etc
/* Set result defaults. */
- dres.whatNext = Dis_Continue;
- dres.len = 2;
- dres.continueAt = 0;
+ dres.whatNext = Dis_Continue;
+ dres.len = 2;
+ dres.continueAt = 0;
+ dres.jk_StopHere = Ijk_INVALID;
/* Set default actions for post-insn handling of writes to r15, if
required. */
@@ -14270,11 +14291,7 @@
DIP("\t(thumb) 0x%x: ", (UInt)guest_R15_curr_instr_notENC);
- /* We may be asked to update the guest R15 before going further. */
vassert(0 == (guest_R15_curr_instr_notENC & 1));
- if (put_IP) {
- llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC | 1) );
- }
/* ----------------------------------------------------------- */
/* Spot "Special" instructions (see comment at top of file). */
@@ -14301,9 +14318,9 @@
/* orr.w r10,r10,r10 */) {
/* R3 = client_request ( R4 ) */
DIP("r3 = client_request ( %%r4 )\n");
- irsb->next = mkU32( (guest_R15_curr_instr_notENC + 20) | 1 );
- irsb->jumpkind = Ijk_ClientReq;
- dres.whatNext = Dis_StopHere;
+ llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
+ dres.jk_StopHere = Ijk_ClientReq;
+ dres.whatNext = Dis_StopHere;
goto decode_success;
}
else
@@ -14323,9 +14340,9 @@
/* branch-and-link-to-noredir R4 */
DIP("branch-and-link-to-noredir r4\n");
llPutIReg(14, mkU32( (guest_R15_curr_instr_notENC + 20) | 1 ));
- irsb->next = getIRegT(4);
- irsb->jumpkind = Ijk_NoRedir;
- dres.whatNext = Dis_StopHere;
+ llPutIReg(15, getIRegT(4));
+ dres.jk_StopHere = Ijk_NoRedir;
+ dres.whatNext = Dis_StopHere;
goto decode_success;
}
/* We don't know what it is. Set insn0 so decode_failure
@@ -14982,9 +14999,9 @@
vassert(rM == 15);
assign( dst, mkU32(guest_R15_curr_instr_notENC + 4) );
}
- irsb->next = mkexpr(dst);
- irsb->jumpkind = rM == 14 ? Ijk_Ret : Ijk_Boring;
- dres.whatNext = Dis_StopHere;
+ llPutIReg(15, mkexpr(dst));
+ dres.jk_StopHere = rM == 14 ? Ijk_Ret : Ijk_Boring;
+ dres.whatNext = Dis_StopHere;
DIP("bx r%u (possibly switch to ARM mode)\n", rM);
goto decode_success;
}
@@ -15006,9 +15023,9 @@
assign( dst, getIRegT(rM) );
putIRegT( 14, mkU32( (guest_R15_curr_instr_notENC + 2) | 1 ),
IRTemp_INVALID );
- irsb->next = mkexpr(dst);
- irsb->jumpkind = Ijk_Call;
- dres.whatNext = Dis_StopHere;
+ llPutIReg(15, mkexpr(dst));
+ dres.jk_StopHere = Ijk_Call;
+ dres.whatNext = Dis_StopHere;
DIP("blx r%u (possibly switch to ARM mode)\n", rM);
goto decode_success;
}
@@ -15039,9 +15056,9 @@
// stash pseudo-reg, and back up from that if we have to
// restart.
// uncond after here
- irsb->next = mkU32( (guest_R15_curr_instr_notENC + 2) | 1 );
- irsb->jumpkind = Ijk_Sys_syscall;
- dres.whatNext = Dis_StopHere;
+ llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 2) | 1 ));
+ dres.jk_StopHere = Ijk_Sys_syscall;
+ dres.whatNext = Dis_StopHere;
DIP("svc #0x%08x\n", imm8);
goto decode_success;
}
@@ -15121,9 +15138,9 @@
condT = IRTemp_INVALID;
// now uncond
/* non-interworking branch */
- irsb->next = binop(Iop_Or32, mkexpr(val), mkU32(1));
- irsb->jumpkind = rM == 14 ? Ijk_Ret : Ijk_Boring;
- dres.whatNext = Dis_StopHere;
+ llPutIReg(15, binop(Iop_Or32, mkexpr(val), mkU32(1)));
+ dres.jk_StopHere = rM == 14 ? Ijk_Ret : Ijk_Boring;
+ dres.whatNext = Dis_StopHere;
}
DIP("mov r%u, r%u\n", rD, rM);
goto decode_success;
@@ -15178,7 +15195,8 @@
UInt dst = (guest_R15_curr_instr_notENC + 4 + imm32) | 1;
stmt(IRStmt_Exit( mkexpr(kond),
Ijk_Boring,
- IRConst_U32(toUInt(dst)) ));
+ IRConst_U32(toUInt(dst)),
+ OFFB_R15T ));
DIP("cb%s r%u, 0x%x\n", bOP ? "nz" : "z", rN, dst - 1);
goto decode_success;
}
@@ -15322,9 +15340,9 @@
it as is, no need to mess with it. Note, therefore, this
is an interworking return. */
if (bitR) {
- irsb->next = mkexpr(newPC);
- irsb->jumpkind = Ijk_Ret;
- dres.whatNext = Dis_StopHere;
+ llPutIReg(15, mkexpr(newPC));
+ dres.jk_StopHere = Ijk_Ret;
+ dres.whatNext = Dis_StopHere;
}
DIP("pop {%s0x%04x}\n", bitR ? "pc," : "", regList & 0xFF);
@@ -15869,9 +15887,9 @@
mk_skip_over_T16_if_cond_is_false(condT);
condT = IRTemp_INVALID;
// now uncond
- irsb->next = mkU32( dst | 1 /*CPSR.T*/ );
- irsb->jumpkind = Ijk_Boring;
- dres.whatNext = Dis_StopHere;
+ llPutIReg(15, mkU32( dst | 1 /*CPSR.T*/ ));
+ dres.jk_StopHere = Ijk_Boring;
+ dres.whatNext = Dis_StopHere;
DIP("b 0x%x\n", dst);
goto decode_success;
}
@@ -15900,11 +15918,12 @@
assign( kondT, mk_armg_calculate_condition(cond) );
stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(kondT)),
Ijk_Boring,
- IRConst_U32(dst | 1/*CPSR.T*/) ));
- irsb->next = mkU32( (guest_R15_curr_instr_notENC + 2)
- | 1 /*CPSR.T*/ );
- irsb->jumpkind = Ijk_Boring;
- dres.whatNext = Dis_StopHere;
+ IRConst_U32(dst | 1/*CPSR.T*/),
+ OFFB_R15T ));
+ llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 2)
+ | 1 /*CPSR.T*/ ));
+ dres.jk_StopHere = Ijk_Boring;
+ dres.whatNext = Dis_StopHere;
DIP("b%s 0x%x\n", nCC(cond), dst);
goto decode_success;
}
@@ -15982,17 +16001,17 @@
if (isBL) {
/* BL: unconditional T -> T call */
/* we're calling Thumb code, hence "| 1" */
- irsb->next = mkU32( dst | 1 );
+ llPutIReg(15, mkU32( dst | 1 ));
DIP("bl 0x%x (stay in Thumb mode)\n", dst);
} else {
/* BLX: unconditional T -> A call */
/* we're calling ARM code, hence "& 3" to align to a
valid ARM insn address */
- irsb->next = mkU32( dst & ~3 );
+ llPutIReg(15, mkU32( dst & ~3 ));
DIP("blx 0x%x (switch to ARM mode)\n", dst & ~3);
}
- irsb->jumpkind = Ijk_Call;
- dres.whatNext = Dis_StopHere;
+ dres.whatNext = Dis_StopHere;
+ dres.jk_StopHere = Ijk_Call;
goto decode_success;
}
}
@@ -16057,15 +16076,15 @@
condT = IRTemp_INVALID;
// now uncond
- /* Generate the IR. This might generate a write to R15, */
+ /* Generate the IR. This might generate a write to R15. */
mk_ldm_stm(False/*!arm*/, rN, bINC, bBEFORE, bW, bL, regList);
if (bL == 1 && (regList & (1<<15))) {
// If we wrote to R15, we have an interworking return to
// deal with.
- irsb->next = llGetIReg(15);
- irsb->jumpkind = Ijk_Ret;
- dres.whatNext = Dis_StopHere;
+ llPutIReg(15, llGetIReg(15));
+ dres.jk_StopHere = Ijk_Ret;
+ dres.whatNext = Dis_StopHere;
}
DIP("%sm%c%c r%u%s, {0x%04x}\n",
@@ -16930,18 +16949,19 @@
putIRegT(rT, mkexpr(newRt), IRTemp_INVALID);
}
- if (loadsPC) {
- /* Presumably this is an interworking branch. */
- irsb->next = mkexpr(newRt);
- irsb->jumpkind = Ijk_Boring; /* or _Ret ? */
- dres.whatNext = Dis_StopHere;
- }
-
/* Update Rn if necessary. */
if (bW == 1) {
vassert(rN != rT); // assured by validity check above
putIRegT(rN, mkexpr(postAddr), IRTemp_INVALID);
}
+
+ if (loadsPC) {
+ /* Presumably this is an interworking branch. */
+ vassert(rN != 15); // assured by validity check above
+ llPutIReg(15, mkexpr(newRt));
+ dres.jk_StopHere = Ijk_Boring; /* or _Ret ? */
+ dres.whatNext = Dis_StopHere;
+ }
}
if (bP == 1 && bW == 0) {
@@ -17087,9 +17107,9 @@
if (loadsPC) {
/* Presumably this is an interworking branch. */
- irsb->next = mkexpr(newRt);
- irsb->jumpkind = Ijk_Boring; /* or _Ret ? */
- dres.whatNext = Dis_StopHere;
+ llPutIReg(15, mkexpr(newRt));
+ dres.jk_StopHere = Ijk_Boring; /* or _Ret ? */
+ dres.whatNext = Dis_StopHere;
}
}
@@ -17345,11 +17365,12 @@
assign( kondT, mk_armg_calculate_condition(cond) );
stmt( IRStmt_Exit( unop(Iop_32to1, mkexpr(kondT)),
Ijk_Boring,
- IRConst_U32(dst | 1/*CPSR.T*/) ));
- irsb->next = mkU32( (guest_R15_curr_instr_notENC + 4)
- | 1 /*CPSR.T*/ );
- irsb->jumpkind = Ijk_Boring;
- dres.whatNext = Dis_StopHere;
+ IRConst_U32(dst | 1/*CPSR.T*/),
+ OFFB_R15T ));
+ llPutIReg(15, mkU32( (guest_R15_curr_instr_notENC + 4)
+ | 1 /*CPSR.T*/ ));
+ dres.jk_StopHere = Ijk_Boring;
+ dres.whatNext = Dis_StopHere;
DIP("b%s.w 0x%x\n", nCC(cond), dst);
goto decode_success;
}
@@ -17390,9 +17411,9 @@
// now uncond
// branch to dst
- irsb->next = mkU32( dst | 1 /*CPSR.T*/ );
- irsb->jumpkind = Ijk_Boring;
- dres.whatNext = Dis_StopHere;
+ llPutIReg(15, mkU32( dst | 1 /*CPSR.T*/ ));
+ dres.jk_StopHere = Ijk_Boring;
+ dres.whatNext = Dis_StopHere;
DIP("b.w 0x%x\n", dst);
goto decode_success;
}
@@ -17423,16 +17444,17 @@
assign(delta, unop(Iop_8Uto32, loadLE(Ity_I8, ea)));
}
- irsb->next
- = binop(Iop_Or32,
- binop(Iop_Add32,
- getIRegT(15),
- binop(Iop_Shl32, mkexpr(delta), mkU8(1))
- ),
- mkU32(1)
- );
- irsb->jumpkind = Ijk_Boring;
- dres.whatNext = Dis_StopHere;
+ llPutIReg(
+ 15,
+ binop(Iop_Or32,
+ binop(Iop_Add32,
+ getIRegT(15),
+ binop(Iop_Shl32, mkexpr(delta), mkU8(1))
+ ),
+ mkU32(1)
+ ));
+ dres.jk_StopHere = Ijk_Boring;
+ dres.whatNext = Dis_StopHere;
DIP("tb%c [r%u, r%u%s]\n",
bH ? 'h' : 'b', rN, rM, bH ? ", LSL #1" : "");
goto decode_success;
@@ -18199,60 +18221,29 @@
now. */
vassert(0 == (guest_R15_curr_instr_notENC & 1));
llPutIReg( 15, mkU32(guest_R15_curr_instr_notENC | 1) );
- irsb->next = mkU32(guest_R15_curr_instr_notENC | 1 /* CPSR.T */);
- irsb->jumpkind = Ijk_NoDecode;
- dres.whatNext = Dis_StopHere;
- dres.len = 0;
+ dres.whatNext = Dis_StopHere;
+ dres.jk_StopHere = Ijk_NoDecode;
+ dres.len = 0;
return dres;
decode_success:
/* All decode successes end up here. */
- DIP("\n");
-
- vassert(dres.len == 2 || dres.len == 4 || dres.len == 20);
-
-#if 0
- // XXX is this necessary on Thumb?
- /* Now then. Do we have an implicit jump to r15 to deal with? */
- if (r15written) {
- /* If we get jump to deal with, we assume that there's been no
- other competing branch stuff previously generated for this
- insn. That's reasonable, in the sense that the ARM insn set
- appears to declare as "Unpredictable" any instruction which
- generates more than one possible new value for r15. Hence
- just assert. The decoders themselves should check against
- all such instructions which are thusly Unpredictable, and
- decline to decode them. Hence we should never get here if we
- have competing new values for r15, and hence it is safe to
- assert here. */
- vassert(dres.whatNext == Dis_Continue);
- vassert(irsb->next == NULL);
- vassert(irsb->jumpkind == Ijk_Boring);
- /* If r15 is unconditionally written, terminate the block by
- jumping to it. If it's conditionally written, still
- terminate the block (a shame, but we can't do side exits to
- arbitrary destinations), but first jump to the next
- instruction if the condition doesn't hold. */
- /* We can't use getIRegT(15) to get the destination, since that
- will produce r15+4, which isn't what we want. Must use
- llGetIReg(15) instead. */
- if (r15guard == IRTemp_INVALID) {
- /* unconditional */
- } else {
- /* conditional */
- stmt( IRStmt_Exit(
- unop(Iop_32to1,
- binop(Iop_Xor32,
- mkexpr(r15guard), mkU32(1))),
- r15kind,
- IRConst_U32(guest_R15_curr_instr_notENC + 4)
- ));
- }
- irsb->next = llGetIReg(15);
- irsb->jumpkind = r15kind;
- dres.whatNext = Dis_StopHere;
+ vassert(dres.len == 4 || dres.len == 2 || dres.len == 20);
+ switch (dres.whatNext) {
+ case Dis_Continue:
+ llPutIReg(15, mkU32(dres.len + (guest_R15_curr_instr_notENC | 1)));
+ break;
+ case Dis_ResteerU:
+ case Dis_ResteerC:
+ llPutIReg(15, mkU32(dres.continueAt));
+ break;
+ case Dis_StopHere:
+ break;
+ default:
+ vassert(0);
}
-#endif
+
+ DIP("\n");
return dres;
@@ -18351,7 +18342,6 @@
is located in host memory at &guest_code[delta]. */
DisResult disInstr_ARM ( IRSB* irsb_IN,
- Bool put_IP,
Bool (*resteerOkFn) ( void*, Addr64 ),
Bool resteerCisOk,
void* callback_opaque,
@@ -18380,12 +18370,12 @@
}
if (isThumb) {
- dres = disInstr_THUMB_WRK ( put_IP, resteerOkFn,
+ dres = disInstr_THUMB_WRK ( resteerOkFn,
resteerCisOk, callback_opaque,
&guest_code_IN[delta_ENCODED - 1],
archinfo, abiinfo );
} else {
- dres = disInstr_ARM_WRK ( put_IP, resteerOkFn,
+ dres = disInstr_ARM_WRK ( resteerOkFn,
resteerCisOk, callback_opaque,
&guest_code_IN[delta_ENCODED],
archinfo, abiinfo );
diff --git a/priv/guest_generic_bb_to_IR.c b/priv/guest_generic_bb_to_IR.c
index 32dca8c..6066fe0 100644
--- a/priv/guest_generic_bb_to_IR.c
+++ b/priv/guest_generic_bb_to_IR.c
@@ -140,6 +140,43 @@
(In fact it's a VgInstrumentClosure.)
*/
+/* Regarding IP updating. dis_instr_fn (that does the guest specific
+ work of disassembling an individual instruction) must finish the
+ resulting IR with "PUT(guest_IP) = ". Hence in all cases it must
+ state the next instruction address.
+
+ If the block is to be ended at that point, then this routine
+ (bb_to_IR) will set up the next/jumpkind/offsIP fields so as to
+ make a transfer (of the right kind) to "GET(guest_IP)". Hence if
+ dis_instr_fn generates incorrect IP updates we will see it
+ immediately (due to jumping to the wrong next guest address).
+
+ However it is also necessary to set this up so it can be optimised
+ nicely. The IRSB exit is defined to update the guest IP, so that
+ chaining works -- since the chain_me stubs expect the chain-to
+ address to be in the guest state. Hence what the IRSB next fields
+ will contain initially is (implicitly)
+
+ PUT(guest_IP) [implicitly] = GET(guest_IP) [explicit expr on ::next]
+
+ which looks pretty strange at first. Eg so unconditional branch
+ to some address 0x123456 looks like this:
+
+ PUT(guest_IP) = 0x123456; // dis_instr_fn generates this
+ // the exit
+ PUT(guest_IP) [implicitly] = GET(guest_IP); exit-Boring
+
+ after redundant-GET and -PUT removal by iropt, we get what we want:
+
+ // the exit
+ PUT(guest_IP) [implicitly] = 0x123456; exit-Boring
+
+ This makes the IRSB-end case the same as the side-exit case: update
+ IP, then transfer. There is no redundancy of representation for
+ the destination, and we use the destination specified by
+ dis_instr_fn, so any errors it makes show up sooner.
+*/
+
IRSB* bb_to_IR (
/*OUT*/VexGuestExtents* vge,
/*OUT*/UInt* n_sc_extents,
@@ -155,13 +192,15 @@
/*IN*/ IRType guest_word_type,
/*IN*/ UInt (*needs_self_check)(void*,VexGuestExtents*),
/*IN*/ Bool (*preamble_function)(void*,IRSB*),
- /*IN*/ Int offB_TISTART,
- /*IN*/ Int offB_TILEN
+ /*IN*/ Int offB_GUEST_TISTART,
+ /*IN*/ Int offB_GUEST_TILEN,
+ /*IN*/ Int offB_GUEST_IP,
+ /*IN*/ Int szB_GUEST_IP
)
{
Long delta;
Int i, n_instrs, first_stmt_idx;
- Bool resteerOK, need_to_put_IP, debug_print;
+ Bool resteerOK, debug_print;
DisResult dres;
IRStmt* imark;
IRStmt* nop;
@@ -185,6 +224,14 @@
vassert(vex_control.guest_chase_thresh < vex_control.guest_max_insns);
vassert(guest_word_type == Ity_I32 || guest_word_type == Ity_I64);
+ if (guest_word_type == Ity_I32) {
+ vassert(szB_GUEST_IP == 4);
+ vassert((offB_GUEST_IP % 4) == 0);
+ } else {
+ vassert(szB_GUEST_IP == 8);
+ vassert((offB_GUEST_IP % 8) == 0);
+ }
+
/* Start a new, empty extent. */
vge->n_used = 1;
vge->base[0] = guest_IP_bbstart;
@@ -297,13 +344,12 @@
);
}
- /* for the first insn, the dispatch loop will have set
- %IP, but for all the others we have to do it ourselves. */
- need_to_put_IP = toBool(n_instrs > 0);
+ if (debug_print && n_instrs > 0)
+ vex_printf("\n");
/* Finally, actually disassemble an instruction. */
+ vassert(irsb->next == NULL);
dres = dis_instr_fn ( irsb,
- need_to_put_IP,
resteerOKfn,
toBool(n_cond_resteers_allowed > 0),
callback_opaque,
@@ -347,18 +393,22 @@
}
}
- /* If dis_instr_fn terminated the BB at this point, check it
- also filled in the irsb->next field. */
- if (dres.whatNext == Dis_StopHere) {
- vassert(irsb->next != NULL);
- if (debug_print) {
- vex_printf(" ");
- vex_printf( "goto {");
- ppIRJumpKind(irsb->jumpkind);
- vex_printf( "} ");
- ppIRExpr( irsb->next );
- vex_printf( "\n");
- }
+ /* Individual insn disassembly may not mess with irsb->next.
+ This function is the only place where it can be set. */
+ vassert(irsb->next == NULL);
+ vassert(irsb->jumpkind == Ijk_Boring);
+ vassert(irsb->offsIP == 0);
+
+ /* Individual insn disassembly must finish the IR for each
+ instruction with an assignment to the guest PC. */
+ vassert(first_stmt_idx < irsb->stmts_used);
+ /* it follows that irsb->stmts_used must be > 0 */
+ { IRStmt* st = irsb->stmts[irsb->stmts_used-1];
+ vassert(st);
+ vassert(st->tag == Ist_Put);
+ vassert(st->Ist.Put.offset == offB_GUEST_IP);
+ /* Really we should also check that the type of the Put'd data
+ == guest_word_type, but that's a bit expensive. */
}
/* Update the VexGuestExtents we are constructing. */
@@ -370,36 +420,38 @@
vge->len[vge->n_used-1]
= toUShort(toUInt( vge->len[vge->n_used-1] + dres.len ));
n_instrs++;
- if (debug_print)
- vex_printf("\n");
/* Advance delta (inconspicuous but very important :-) */
delta += (Long)dres.len;
switch (dres.whatNext) {
case Dis_Continue:
- vassert(irsb->next == NULL);
+ vassert(dres.continueAt == 0);
+ vassert(dres.jk_StopHere == Ijk_INVALID);
if (n_instrs < vex_control.guest_max_insns) {
/* keep going */
} else {
- /* We have to stop. */
- irsb->next
- = IRExpr_Const(
- guest_word_type == Ity_I32
- ? IRConst_U32(toUInt(guest_IP_bbstart+delta))
- : IRConst_U64(guest_IP_bbstart+delta)
- );
+ /* We have to stop. See comment above re irsb field
+ settings here. */
+ irsb->next = IRExpr_Get(offB_GUEST_IP, guest_word_type);
+ /* irsb->jumpkind must already by Ijk_Boring */
+ irsb->offsIP = offB_GUEST_IP;
goto done;
}
break;
case Dis_StopHere:
- vassert(irsb->next != NULL);
+ vassert(dres.continueAt == 0);
+ vassert(dres.jk_StopHere != Ijk_INVALID);
+ /* See comment above re irsb field settings here. */
+ irsb->next = IRExpr_Get(offB_GUEST_IP, guest_word_type);
+ irsb->jumpkind = dres.jk_StopHere;
+ irsb->offsIP = offB_GUEST_IP;
goto done;
+
case Dis_ResteerU:
case Dis_ResteerC:
/* Check that we actually allowed a resteer .. */
vassert(resteerOK);
- vassert(irsb->next == NULL);
if (dres.whatNext == Dis_ResteerC) {
vassert(n_cond_resteers_allowed > 0);
n_cond_resteers_allowed--;
@@ -628,10 +680,10 @@
= IRStmt_WrTmp(tilen_tmp, IRExpr_Const(len2check_IRConst) );
irsb->stmts[selfcheck_idx + i * 5 + 2]
- = IRStmt_Put( offB_TISTART, IRExpr_RdTmp(tistart_tmp) );
+ = IRStmt_Put( offB_GUEST_TISTART, IRExpr_RdTmp(tistart_tmp) );
irsb->stmts[selfcheck_idx + i * 5 + 3]
- = IRStmt_Put( offB_TILEN, IRExpr_RdTmp(tilen_tmp) );
+ = IRStmt_Put( offB_GUEST_TILEN, IRExpr_RdTmp(tilen_tmp) );
/* Generate the entry point descriptors */
if (abiinfo_both->host_ppc_calls_use_fndescrs) {
@@ -685,11 +737,25 @@
/* Where we must restart if there's a failure: at the
first extent, regardless of which extent the
failure actually happened in. */
- guest_IP_bbstart_IRConst
+ guest_IP_bbstart_IRConst,
+ offB_GUEST_IP
);
} /* for (i = 0; i < vge->n_used; i++) */
}
+ /* irsb->next must now be set, since we've finished the block.
+ Print it if necessary.*/
+ vassert(irsb->next != NULL);
+ if (debug_print) {
+ vex_printf(" ");
+ vex_printf( "PUT(%d) = ", irsb->offsIP);
+ ppIRExpr( irsb->next );
+ vex_printf( "; exit-");
+ ppIRJumpKind(irsb->jumpkind);
+ vex_printf( "\n");
+ vex_printf( "\n");
+ }
+
return irsb;
}
diff --git a/priv/guest_generic_bb_to_IR.h b/priv/guest_generic_bb_to_IR.h
index f623443..9c1e740 100644
--- a/priv/guest_generic_bb_to_IR.h
+++ b/priv/guest_generic_bb_to_IR.h
@@ -76,6 +76,13 @@
enum { Dis_StopHere, Dis_Continue,
Dis_ResteerU, Dis_ResteerC } whatNext;
+ /* For Dis_StopHere, we need to end the block and create a
+ transfer to whatever the NIA is. That will have presumably
+ been set by the IR generated for this insn. So we need to
+ know the jump kind to use. Should Ijk_INVALID in other Dis_
+ cases. */
+ IRJumpKind jk_StopHere;
+
/* For Dis_Resteer, this is the guest address we should continue
at. Otherwise ignored (should be zero). */
Addr64 continueAt;
@@ -112,10 +119,6 @@
/* This is the IRSB to which the resulting IR is to be appended. */
/*OUT*/ IRSB* irbb,
- /* Do we need to generate IR to set the guest IP for this insn,
- or not? */
- /*IN*/ Bool put_IP,
-
/* Return True iff resteering to the given addr is allowed (for
branches/calls to destinations that are known at JIT-time) */
/*IN*/ Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
@@ -173,8 +176,10 @@
/*IN*/ IRType guest_word_type,
/*IN*/ UInt (*needs_self_check)(void*,VexGuestExtents*),
/*IN*/ Bool (*preamble_function)(void*,IRSB*),
- /*IN*/ Int offB_TISTART,
- /*IN*/ Int offB_TILEN
+ /*IN*/ Int offB_GUEST_TISTART,
+ /*IN*/ Int offB_GUEST_TILEN,
+ /*IN*/ Int offB_GUEST_IP,
+ /*IN*/ Int szB_GUEST_IP
);
diff --git a/priv/guest_ppc_defs.h b/priv/guest_ppc_defs.h
index 7c8dc8e..b60766d 100644
--- a/priv/guest_ppc_defs.h
+++ b/priv/guest_ppc_defs.h
@@ -48,7 +48,6 @@
bb_to_IR.h. */
extern
DisResult disInstr_PPC ( IRSB* irbb,
- Bool put_IP,
Bool (*resteerOkFn) ( void*, Addr64 ),
Bool resteerCisOk,
void* callback_opaque,
diff --git a/priv/guest_ppc_helpers.c b/priv/guest_ppc_helpers.c
index 26ec86f..7944f82 100644
--- a/priv/guest_ppc_helpers.c
+++ b/priv/guest_ppc_helpers.c
@@ -352,6 +352,11 @@
void LibVEX_GuestPPC32_initialise ( /*OUT*/VexGuestPPC32State* vex_state )
{
Int i;
+ vex_state->host_EvC_FAILADDR = 0;
+ vex_state->host_EvC_COUNTER = 0;
+ vex_state->pad3 = 0;
+ vex_state->pad4 = 0;
+
vex_state->guest_GPR0 = 0;
vex_state->guest_GPR1 = 0;
vex_state->guest_GPR2 = 0;
@@ -385,7 +390,6 @@
vex_state->guest_GPR30 = 0;
vex_state->guest_GPR31 = 0;
-
/* Initialise the vector state. */
# define VECZERO(_vr) _vr[0]=_vr[1]=_vr[2]=_vr[3] = 0;
@@ -484,6 +488,8 @@
vex_state->guest_FPROUND = PPCrm_NEAREST;
vex_state->guest_DFPROUND = PPCrm_NEAREST;
+ vex_state->pad1 = 0;
+ vex_state->pad2 = 0;
vex_state->guest_VRSAVE = 0;
@@ -503,6 +509,8 @@
vex_state->guest_IP_AT_SYSCALL = 0;
vex_state->guest_SPRG3_RO = 0;
+
+ vex_state->padding = 0;
}
@@ -510,6 +518,9 @@
void LibVEX_GuestPPC64_initialise ( /*OUT*/VexGuestPPC64State* vex_state )
{
Int i;
+ vex_state->host_EvC_FAILADDR = 0;
+ vex_state->host_EvC_COUNTER = 0;
+ vex_state->pad0 = 0;
vex_state->guest_GPR0 = 0;
vex_state->guest_GPR1 = 0;
vex_state->guest_GPR2 = 0;
@@ -641,6 +652,8 @@
vex_state->guest_FPROUND = PPCrm_NEAREST;
vex_state->guest_DFPROUND = PPCrm_NEAREST;
+ vex_state->pad1 = 0;
+ vex_state->pad2 = 0;
vex_state->guest_VRSAVE = 0;
diff --git a/priv/guest_ppc_toIR.c b/priv/guest_ppc_toIR.c
index 91f568e..cfad56a 100644
--- a/priv/guest_ppc_toIR.c
+++ b/priv/guest_ppc_toIR.c
@@ -1505,7 +1505,7 @@
binop(Iop_And64, mkexpr(addr), mkU64(align-1)),
mkU64(0)),
Ijk_SigBUS,
- IRConst_U64( guest_CIA_curr_instr )
+ IRConst_U64( guest_CIA_curr_instr ), OFFB_CIA
)
);
} else {
@@ -1516,7 +1516,7 @@
binop(Iop_And32, mkexpr(addr), mkU32(align-1)),
mkU32(0)),
Ijk_SigBUS,
- IRConst_U32( guest_CIA_curr_instr )
+ IRConst_U32( guest_CIA_curr_instr ), OFFB_CIA
)
);
}
@@ -2693,7 +2693,7 @@
IRStmt_Exit(
binop(Iop_CmpNE32, mkU32(ew), mkU32(EmWarn_NONE)),
Ijk_EmWarn,
- mkSzConst( ty, nextInsnAddr()) ));
+ mkSzConst( ty, nextInsnAddr()), OFFB_CIA ));
}
/* Ignore all other writes */
@@ -4977,7 +4977,7 @@
/* if (nBytes < (i+1)) goto NIA; */
stmt( IRStmt_Exit( binop(Iop_CmpLT32U, e_nbytes, mkU32(i+1)),
Ijk_Boring,
- mkSzConst( ty, nextInsnAddr()) ));
+ mkSzConst( ty, nextInsnAddr()), OFFB_CIA ));
/* when crossing into a new dest register, set it to zero. */
if ((i % 4) == 0) {
rD++; if (rD == 32) rD = 0;
@@ -5028,7 +5028,7 @@
/* if (nBytes < (i+1)) goto NIA; */
stmt( IRStmt_Exit( binop(Iop_CmpLT32U, e_nbytes, mkU32(i+1)),
Ijk_Boring,
- mkSzConst( ty, nextInsnAddr() ) ));
+ mkSzConst( ty, nextInsnAddr() ), OFFB_CIA ));
/* check for crossing into a new src register. */
if ((i % 4) == 0) {
rS++; if (rS == 32) rS = 0;
@@ -5250,6 +5250,7 @@
/* The default what-next. Individual cases can override it. */
dres->whatNext = Dis_StopHere;
+ vassert(dres->jk_StopHere == Ijk_INVALID);
switch (opc1) {
case 0x12: // b (Branch, PPC32 p360)
@@ -5282,8 +5283,8 @@
dres->whatNext = Dis_ResteerU;
dres->continueAt = tgt;
} else {
- irsb->jumpkind = flag_LK ? Ijk_Call : Ijk_Boring;
- irsb->next = mkSzImm(ty, tgt);
+ dres->jk_StopHere = flag_LK ? Ijk_Call : Ijk_Boring; ;
+ putGST( PPC_GST_CIA, mkSzImm(ty, tgt) );
}
break;
@@ -5319,10 +5320,10 @@
stmt( IRStmt_Exit(
binop(Iop_CmpNE32, mkexpr(do_branch), mkU32(0)),
flag_LK ? Ijk_Call : Ijk_Boring,
- mkSzConst(ty, tgt) ) );
-
- irsb->jumpkind = Ijk_Boring;
- irsb->next = e_nia;
+ mkSzConst(ty, tgt), OFFB_CIA ) );
+
+ dres->jk_StopHere = Ijk_Boring;
+ putGST( PPC_GST_CIA, e_nia );
break;
case 0x13:
@@ -5354,15 +5355,15 @@
stmt( IRStmt_Exit(
binop(Iop_CmpEQ32, mkexpr(cond_ok), mkU32(0)),
Ijk_Boring,
- c_nia ));
+ c_nia, OFFB_CIA ));
if (flag_LK && vbi->guest_ppc_zap_RZ_at_bl) {
make_redzone_AbiHint( vbi, lr_old,
"b-ctr-l (indirect call)" );
}
- irsb->jumpkind = flag_LK ? Ijk_Call : Ijk_Boring;
- irsb->next = mkexpr(lr_old);
+ dres->jk_StopHere = flag_LK ? Ijk_Call : Ijk_Boring;;
+ putGST( PPC_GST_CIA, mkexpr(lr_old) );
break;
case 0x010: { // bclr (Branch Cond. to Link Register, PPC32 p365)
@@ -5394,7 +5395,7 @@
stmt( IRStmt_Exit(
binop(Iop_CmpEQ32, mkexpr(do_branch), mkU32(0)),
Ijk_Boring,
- c_nia ));
+ c_nia, OFFB_CIA ));
if (vanilla_return && vbi->guest_ppc_zap_RZ_at_blr) {
make_redzone_AbiHint( vbi, lr_old,
@@ -5404,8 +5405,8 @@
/* blrl is pretty strange; it's like a return that sets the
return address of its caller to the insn following this
one. Mark it as a return. */
- irsb->jumpkind = Ijk_Ret; /* was flag_LK ? Ijk_Call : Ijk_Ret; */
- irsb->next = mkexpr(lr_old);
+ dres->jk_StopHere = Ijk_Ret; /* was flag_LK ? Ijk_Call : Ijk_Ret; */
+ putGST( PPC_GST_CIA, mkexpr(lr_old) );
break;
}
default:
@@ -5561,7 +5562,8 @@
stmt( IRStmt_Exit(
binop(opCMPEQ, const0, const0),
Ijk_SigTRAP,
- mode64 ? IRConst_U64(cia) : IRConst_U32((UInt)cia)
+ mode64 ? IRConst_U64(cia) : IRConst_U32((UInt)cia),
+ OFFB_CIA
));
return True; /* unconditional trap */
}
@@ -5604,7 +5606,8 @@
stmt( IRStmt_Exit(
binop(opCMPNE, cond, const0),
Ijk_SigTRAP,
- mode64 ? IRConst_U64(cia) : IRConst_U32((UInt)cia)
+ mode64 ? IRConst_U64(cia) : IRConst_U32((UInt)cia),
+ OFFB_CIA
));
return False; /* not an unconditional trap */
}
@@ -5652,9 +5655,9 @@
if (uncond) {
/* If the trap shows signs of being unconditional, don't
continue decoding past it. */
- irsb->next = mkSzImm( ty, nextInsnAddr() );
- irsb->jumpkind = Ijk_Boring;
- dres->whatNext = Dis_StopHere;
+ putGST( PPC_GST_CIA, mkSzImm( ty, nextInsnAddr() ));
+ dres->jk_StopHere = Ijk_Boring;
+ dres->whatNext = Dis_StopHere;
}
return True;
@@ -5706,9 +5709,9 @@
if (uncond) {
/* If the trap shows signs of being unconditional, don't
continue decoding past it. */
- irsb->next = mkSzImm( ty, nextInsnAddr() );
- irsb->jumpkind = Ijk_Boring;
- dres->whatNext = Dis_StopHere;
+ putGST( PPC_GST_CIA, mkSzImm( ty, nextInsnAddr() ));
+ dres->jk_StopHere = Ijk_Boring;
+ dres->whatNext = Dis_StopHere;
}
return True;
@@ -5739,12 +5742,12 @@
/* It's important that all ArchRegs carry their up-to-date value
at this point. So we declare an end-of-block here, which
forces any TempRegs caching ArchRegs to be flushed. */
- irsb->next = abiinfo->guest_ppc_sc_continues_at_LR
- ? getGST( PPC_GST_LR )
- : mkSzImm( ty, nextInsnAddr() );
- irsb->jumpkind = Ijk_Sys_syscall;
+ putGST( PPC_GST_CIA, abiinfo->guest_ppc_sc_continues_at_LR
+ ? getGST( PPC_GST_LR )
+ : mkSzImm( ty, nextInsnAddr() ));
- dres->whatNext = Dis_StopHere;
+ dres->whatNext = Dis_StopHere;
+ dres->jk_StopHere = Ijk_Sys_syscall;
return True;
}
@@ -6722,9 +6725,9 @@
/* be paranoid ... */
stmt( IRStmt_MBE(Imbe_Fence) );
- irsb->jumpkind = Ijk_TInval;
- irsb->next = mkSzImm(ty, nextInsnAddr());
- dres->whatNext = Dis_StopHere;
+ putGST( PPC_GST_CIA, mkSzImm(ty, nextInsnAddr()));
+ dres->jk_StopHere = Ijk_TInval;
+ dres->whatNext = Dis_StopHere;
break;
}
@@ -13761,7 +13764,6 @@
static
DisResult disInstr_PPC_WRK (
- Bool put_IP,
Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
Bool resteerCisOk,
void* callback_opaque,
@@ -13805,9 +13807,10 @@
delta = (Long)mkSzAddr(ty, (ULong)delta64);
/* Set result defaults. */
- dres.whatNext = Dis_Continue;
- dres.len = 0;
- dres.continueAt = 0;
+ dres.whatNext = Dis_Continue;
+ dres.len = 0;
+ dres.continueAt = 0;
+ dres.jk_StopHere = Ijk_INVALID;
/* At least this is simple on PPC32: insns are all 4 bytes long, and
4-aligned. So just fish the whole thing out of memory right now
@@ -13818,10 +13821,6 @@
DIP("\t0x%llx: ", (ULong)guest_CIA_curr_instr);
- /* We may be asked to update the guest CIA before going further. */
- if (put_IP)
- putGST( PPC_GST_CIA, mkSzImm(ty, guest_CIA_curr_instr) );
-
/* Spot "Special" instructions (see comment at top of file). */
{
UChar* code = (UChar*)(guest_code + delta);
@@ -13850,9 +13849,9 @@
/* %R3 = client_request ( %R4 ) */
DIP("r3 = client_request ( %%r4 )\n");
delta += 20;
- irsb->next = mkSzImm( ty, guest_CIA_bbstart + delta );
- irsb->jumpkind = Ijk_ClientReq;
- dres.whatNext = Dis_StopHere;
+ putGST( PPC_GST_CIA, mkSzImm( ty, guest_CIA_bbstart + delta ));
+ dres.jk_StopHere = Ijk_ClientReq;
+ dres.whatNext = Dis_StopHere;
goto decode_success;
}
else
@@ -13870,9 +13869,9 @@
DIP("branch-and-link-to-noredir r11\n");
delta += 20;
putGST( PPC_GST_LR, mkSzImm(ty, guest_CIA_bbstart + (Long)delta) );
- irsb->next = getIReg(11);
- irsb->jumpkind = Ijk_NoRedir;
- dres.whatNext = Dis_StopHere;
+ putGST( PPC_GST_CIA, getIReg(11));
+ dres.jk_StopHere = Ijk_NoRedir;
+ dres.whatNext = Dis_StopHere;
goto decode_success;
}
else
@@ -14493,9 +14492,9 @@
Bool ok = dis_int_ldst_str( theInstr, &stopHere );
if (!ok) goto decode_failure;
if (stopHere) {
- irsb->next = mkSzImm(ty, nextInsnAddr());
- irsb->jumpkind = Ijk_Boring;
- dres.whatNext = Dis_StopHere;
+ putGST( PPC_GST_CIA, mkSzImm(ty, nextInsnAddr()) );
+ dres.jk_StopHere = Ijk_Boring;
+ dres.whatNext = Dis_StopHere;
}
goto decode_success;
}
@@ -14848,16 +14847,28 @@
insn, but nevertheless be paranoid and update it again right
now. */
putGST( PPC_GST_CIA, mkSzImm(ty, guest_CIA_curr_instr) );
- irsb->next = mkSzImm(ty, guest_CIA_curr_instr);
- irsb->jumpkind = Ijk_NoDecode;
- dres.whatNext = Dis_StopHere;
- dres.len = 0;
+ dres.whatNext = Dis_StopHere;
+ dres.jk_StopHere = Ijk_NoDecode;
+ dres.len = 0;
return dres;
} /* switch (opc) for the main (primary) opcode switch. */
decode_success:
/* All decode successes end up here. */
+ switch (dres.whatNext) {
+ case Dis_Continue:
+ putGST( PPC_GST_CIA, mkSzImm(ty, guest_CIA_curr_instr + 4));
+ break;
+ case Dis_ResteerU:
+ case Dis_ResteerC:
+ putGST( PPC_GST_CIA, mkSzImm(ty, dres.continueAt));
+ break;
+ case Dis_StopHere:
+ break;
+ default:
+ vassert(0);
+ }
DIP("\n");
if (dres.len == 0) {
@@ -14880,7 +14891,6 @@
is located in host memory at &guest_code[delta]. */
DisResult disInstr_PPC ( IRSB* irsb_IN,
- Bool put_IP,
Bool (*resteerOkFn) ( void*, Addr64 ),
Bool resteerCisOk,
void* callback_opaque,
@@ -14925,8 +14935,7 @@
guest_CIA_curr_instr = mkSzAddr(ty, guest_IP);
guest_CIA_bbstart = mkSzAddr(ty, guest_IP - delta);
- dres = disInstr_PPC_WRK ( put_IP,
- resteerOkFn, resteerCisOk, callback_opaque,
+ dres = disInstr_PPC_WRK ( resteerOkFn, resteerCisOk, callback_opaque,
delta, archinfo, abiinfo );
return dres;
diff --git a/priv/guest_s390_defs.h b/priv/guest_s390_defs.h
index 754ce3d..b7e57ba 100644
--- a/priv/guest_s390_defs.h
+++ b/priv/guest_s390_defs.h
@@ -43,7 +43,6 @@
/* Convert one s390 insn to IR. See the type DisOneInstrFn in
bb_to_IR.h. */
DisResult disInstr_S390 ( IRSB* irbb,
- Bool put_IP,
Bool (*resteerOkFn) ( void*, Addr64 ),
Bool resteerCisOk,
void* callback_opaque,
diff --git a/priv/guest_s390_helpers.c b/priv/guest_s390_helpers.c
index 47a0635..167d426 100644
--- a/priv/guest_s390_helpers.c
+++ b/priv/guest_s390_helpers.c
@@ -130,6 +130,8 @@
state->guest_TILEN = 0;
state->guest_IP_AT_SYSCALL = 0;
state->guest_EMWARN = EmWarn_NONE;
+ state->host_EvC_COUNTER = 0;
+ state->host_EvC_FAILADDR = 0;
/*------------------------------------------------------------*/
/*--- Initialise thunk ---*/
diff --git a/priv/guest_s390_toIR.c b/priv/guest_s390_toIR.c
index d0dc00e..26190f0 100644
--- a/priv/guest_s390_toIR.c
+++ b/priv/guest_s390_toIR.c
@@ -120,6 +120,13 @@
return IRExpr_RdTmp(tmp);
}
+/* Generate an expression node for an address. */
+static __inline__ IRExpr *
+mkaddr_expr(Addr64 addr)
+{
+ return IRExpr_Const(IRConst_U64(addr));
+}
+
/* Add a statement that assigns to a temporary */
static __inline__ void
assign(IRTemp dst, IRExpr *expr)
@@ -127,6 +134,22 @@
stmt(IRStmt_WrTmp(dst, expr));
}
+/* Write an address into the guest_IA */
+static __inline__ void
+put_IA(IRExpr *address)
+{
+ stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_IA), address));
+}
+
+/* Add a dummy put to the guest_IA to satisfy an assert in bb_to_IR
+ that wants the last statement in an IRSB to be a put to the guest_IA.
+ Mostly used for insns that use the "counter" pseudo guest reg. */
+static __inline__ void
+dummy_put_IA(void)
+{
+ put_IA(IRExpr_Get(S390X_GUEST_OFFSET(guest_IA), Ity_I64));
+}
+
/* Create a temporary of the given type and assign the expression to it */
static __inline__ IRTemp
mktemp(IRType type, IRExpr *expr)
@@ -242,10 +265,10 @@
static void
call_function(IRExpr *callee_address)
{
- irsb->next = callee_address;
- irsb->jumpkind = Ijk_Call;
+ put_IA(callee_address);
- dis_res->whatNext = Dis_StopHere;
+ dis_res->whatNext = Dis_StopHere;
+ dis_res->jk_StopHere = Ijk_Call;
}
/* Function call with known target. */
@@ -256,9 +279,10 @@
dis_res->whatNext = Dis_ResteerU;
dis_res->continueAt = callee_address;
} else {
- irsb->next = mkU64(callee_address);
- irsb->jumpkind = Ijk_Call;
+ put_IA(mkaddr_expr(callee_address));
+
dis_res->whatNext = Dis_StopHere;
+ dis_res->jk_StopHere = Ijk_Call;
}
}
@@ -266,10 +290,10 @@
static void
return_from_function(IRExpr *return_address)
{
- irsb->next = return_address;
- irsb->jumpkind = Ijk_Ret;
+ put_IA(return_address);
- dis_res->whatNext = Dis_StopHere;
+ dis_res->whatNext = Dis_StopHere;
+ dis_res->jk_StopHere = Ijk_Ret;
}
/* A conditional branch whose target is not known at instrumentation time.
@@ -289,12 +313,13 @@
{
vassert(typeOfIRExpr(irsb->tyenv, condition) == Ity_I1);
- stmt(IRStmt_Exit(condition, Ijk_Boring, IRConst_U64(guest_IA_next_instr)));
+ stmt(IRStmt_Exit(condition, Ijk_Boring, IRConst_U64(guest_IA_next_instr),
+ S390X_GUEST_OFFSET(guest_IA)));
- irsb->next = target;
- irsb->jumpkind = Ijk_Boring;
+ put_IA(target);
- dis_res->whatNext = Dis_StopHere;
+ dis_res->whatNext = Dis_StopHere;
+ dis_res->jk_StopHere = Ijk_Boring;
}
/* A conditional branch whose target is known at instrumentation time. */
@@ -303,8 +328,13 @@
{
vassert(typeOfIRExpr(irsb->tyenv, condition) == Ity_I1);
- stmt(IRStmt_Exit(condition, Ijk_Boring, IRConst_U64(target)));
- dis_res->whatNext = Dis_Continue;
+ stmt(IRStmt_Exit(condition, Ijk_Boring, IRConst_U64(target),
+ S390X_GUEST_OFFSET(guest_IA)));
+
+ put_IA(mkaddr_expr(guest_IA_next_instr));
+
+ dis_res->whatNext = Dis_StopHere;
+ dis_res->jk_StopHere = Ijk_Boring;
}
/* An unconditional branch. Target may or may not be known at instrumentation
@@ -312,23 +342,26 @@
static void
always_goto(IRExpr *target)
{
- irsb->next = target;
- irsb->jumpkind = Ijk_Boring;
+ put_IA(target);
- dis_res->whatNext = Dis_StopHere;
+ dis_res->whatNext = Dis_StopHere;
+ dis_res->jk_StopHere = Ijk_Boring;
}
+
/* An unconditional branch to a known target. */
static void
always_goto_and_chase(Addr64 target)
{
if (resteer_fn(resteer_data, target)) {
+ /* Follow into the target */
dis_res->whatNext = Dis_ResteerU;
dis_res->continueAt = target;
} else {
- irsb->next = mkU64(target);
- irsb->jumpkind = Ijk_Boring;
- dis_res->whatNext = Dis_StopHere;
+ put_IA(mkaddr_expr(target));
+
+ dis_res->whatNext = Dis_StopHere;
+ dis_res->jk_StopHere = Ijk_Boring;
}
}
@@ -343,14 +376,13 @@
stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_IP_AT_SYSCALL),
mkU64(guest_IA_curr_instr)));
+ put_IA(mkaddr_expr(guest_IA_next_instr));
+
/* It's important that all ArchRegs carry their up-to-date value
at this point. So we declare an end-of-block here, which
forces any TempRegs caching ArchRegs to be flushed. */
- irsb->next = mkU64(guest_IA_next_instr);
-
- irsb->jumpkind = Ijk_Sys_syscall;
-
- dis_res->whatNext = Dis_StopHere;
+ dis_res->whatNext = Dis_StopHere;
+ dis_res->jk_StopHere = Ijk_Sys_syscall;
}
/* Encode the s390 rounding mode as it appears in the m3/m4 fields of certain
@@ -1796,6 +1828,7 @@
mkU64(0)));
irgen(r1, op2addr);
+ dummy_put_IA();
if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
s390_disasm(ENC3(XMNM, GPR, SDXB), xmnm_kind, m3, r1, dh2, dl2, 0, b2);
@@ -5841,6 +5874,7 @@
if_condition_goto(binop(Iop_CmpEQ32, s390_call_calculate_cond(m3), mkU32(0)),
guest_IA_next_instr);
put_gpr_w1(r1, get_gpr_w1(r2));
+ dummy_put_IA();
return "locr";
}
@@ -5851,6 +5885,7 @@
if_condition_goto(binop(Iop_CmpEQ32, s390_call_calculate_cond(m3), mkU32(0)),
guest_IA_next_instr);
put_gpr_dw0(r1, get_gpr_dw0(r2));
+ dummy_put_IA();
return "locgr";
}
@@ -8576,6 +8611,7 @@
if_condition_goto(binop(Iop_CmpNE64, mkexpr(counter), mkU64(length)),
guest_IA_curr_instr);
put_counter_dw0(mkU64(0));
+ dummy_put_IA();
return "clc";
}
@@ -8869,8 +8905,8 @@
stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_TISTART),
mkU64(guest_IA_curr_instr)));
stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_TILEN), mkU64(4)));
- stmt(IRStmt_Exit(mkexpr(cond), Ijk_TInval,
- IRConst_U64(guest_IA_curr_instr)));
+ stmt(IRStmt_Exit(mkexpr(cond), Ijk_TInval, IRConst_U64(guest_IA_curr_instr),
+ S390X_GUEST_OFFSET(guest_IA)));
ss.bytes = last_execute_target;
assign(start1, binop(Iop_Add64, mkU64(ss.dec.d1),
@@ -8880,6 +8916,8 @@
assign(len, unop(lensize == 64 ? Iop_8Uto64 : Iop_8Uto32, binop(Iop_Or8,
r != 0 ? get_gpr_b7(r): mkU8(0), mkU8(ss.dec.l))));
irgen(len, start1, start2);
+ dummy_put_IA();
+
last_execute_target = 0;
}
@@ -8901,10 +8939,12 @@
mkU64(guest_IA_curr_instr)));
stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_TILEN), mkU64(4)));
stmt(IRStmt_Exit(IRExpr_Const(IRConst_U1(True)), Ijk_TInval,
- IRConst_U64(guest_IA_curr_instr)));
+ IRConst_U64(guest_IA_curr_instr),
+ S390X_GUEST_OFFSET(guest_IA)));
/* we know that this will be invalidated */
- irsb->next = mkU64(guest_IA_next_instr);
+ put_IA(mkaddr_expr(guest_IA_next_instr));
dis_res->whatNext = Dis_StopHere;
+ dis_res->jk_StopHere = Ijk_TInval;
break;
}
@@ -8959,7 +8999,8 @@
stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_TISTART), mkU64(guest_IA_curr_instr)));
stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_TILEN), mkU64(4)));
stmt(IRStmt_Exit(mkexpr(cond), Ijk_TInval,
- IRConst_U64(guest_IA_curr_instr)));
+ IRConst_U64(guest_IA_curr_instr),
+ S390X_GUEST_OFFSET(guest_IA)));
/* Now comes the actual translation */
bytes = (UChar *) &last_execute_target;
@@ -8969,6 +9010,7 @@
vex_printf(" which was executed by\n");
/* dont make useless translations in the next execute */
last_execute_target = 0;
+ dummy_put_IA();
}
}
return "ex";
@@ -9033,10 +9075,12 @@
put_gpr_dw0(r1, mkexpr(next));
put_gpr_dw0(r2, binop(Iop_Add64, mkexpr(address), mkU64(1)));
stmt(IRStmt_Exit(binop(Iop_CmpNE64, mkexpr(counter), mkU64(255)),
- Ijk_Boring, IRConst_U64(guest_IA_curr_instr)));
+ Ijk_Boring, IRConst_U64(guest_IA_curr_instr),
+ S390X_GUEST_OFFSET(guest_IA)));
// >= 256 bytes done CC=3
s390_cc_set(3);
put_counter_dw0(mkU64(0));
+ dummy_put_IA();
return "srst";
}
@@ -9099,10 +9143,12 @@
put_gpr_dw0(r1, binop(Iop_Add64, get_gpr_dw0(r1), mkU64(1)));
put_gpr_dw0(r2, binop(Iop_Add64, get_gpr_dw0(r2), mkU64(1)));
stmt(IRStmt_Exit(binop(Iop_CmpNE64, mkexpr(counter), mkU64(255)),
- Ijk_Boring, IRConst_U64(guest_IA_curr_instr)));
+ Ijk_Boring, IRConst_U64(guest_IA_curr_instr),
+ S390X_GUEST_OFFSET(guest_IA)));
// >= 256 bytes done CC=3
s390_cc_set(3);
put_counter_dw0(mkU64(0));
+ dummy_put_IA();
return "clst";
}
@@ -9297,6 +9343,7 @@
s390_cc_thunk_put1(S390_CC_OP_BITWISE, mktemp(Ity_I32, get_counter_w1()),
False);
put_counter_dw0(mkU64(0));
+ dummy_put_IA();
}
static HChar *
@@ -9341,6 +9388,7 @@
}
s390_cc_thunk_put1(S390_CC_OP_BITWISE, mktemp(Ity_I32, mkU32(0)), False);
+ dummy_put_IA();
if (UNLIKELY(vex_traceflags & VEX_TRACE_FE))
s390_disasm(ENC3(MNM, UDLB, UDXB), "xc", d, length, b, d, 0, b);
@@ -9378,6 +9426,7 @@
if_condition_goto(binop(Iop_CmpNE64, mkexpr(counter), mkU64(length)),
guest_IA_curr_instr);
put_counter_dw0(mkU64(0));
+ dummy_put_IA();
return "mvc";
}
@@ -9558,6 +9607,7 @@
s390_cc_set(1);
put_gpr_dw0(r1, binop(Iop_Add64, mkexpr(addr1), mkexpr(counter)));
put_counter_dw0(mkU64(0));
+ dummy_put_IA();
return "mvst";
}
@@ -9824,7 +9874,8 @@
assign(nequal, binop(Iop_CmpNE32, s390_call_calculate_cc(), mkU32(0)));
put_gpr_w1(r1, mkite(mkexpr(nequal), mkexpr(old_mem), mkexpr(op1)));
stmt(IRStmt_Exit(mkexpr(nequal), Ijk_Yield,
- IRConst_U64(guest_IA_next_instr)));
+ IRConst_U64(guest_IA_next_instr),
+ S390X_GUEST_OFFSET(guest_IA)));
}
static HChar *
@@ -9873,7 +9924,8 @@
assign(nequal, binop(Iop_CmpNE32, s390_call_calculate_cc(), mkU32(0)));
put_gpr_dw0(r1, mkite(mkexpr(nequal), mkexpr(old_mem), mkexpr(op1)));
stmt(IRStmt_Exit(mkexpr(nequal), Ijk_Yield,
- IRConst_U64(guest_IA_next_instr)));
+ IRConst_U64(guest_IA_next_instr),
+ S390X_GUEST_OFFSET(guest_IA)));
return "csg";
}
@@ -11059,6 +11111,7 @@
guest_IA_curr_instr);
put_counter_dw0(mkU64(0));
+ dummy_put_IA();
return "tr";
}
@@ -11118,12 +11171,13 @@
if (0)
vex_printf("%%R3 = client_request ( %%R2 )\n");
- irsb->next = mkU64((ULong)(guest_IA_curr_instr
- + S390_SPECIAL_OP_PREAMBLE_SIZE
- + S390_SPECIAL_OP_SIZE));
- irsb->jumpkind = Ijk_ClientReq;
+ Addr64 next = guest_IA_curr_instr + S390_SPECIAL_OP_PREAMBLE_SIZE
+ + S390_SPECIAL_OP_SIZE;
+ dis_res->jk_StopHere = Ijk_ClientReq;
dis_res->whatNext = Dis_StopHere;
+
+ put_IA(mkaddr_expr(next));
}
static void
@@ -11138,16 +11192,17 @@
static void
s390_irgen_call_noredir(void)
{
+ Addr64 next = guest_IA_curr_instr + S390_SPECIAL_OP_PREAMBLE_SIZE
+ + S390_SPECIAL_OP_SIZE;
+
/* Continue after special op */
- put_gpr_dw0(14, mkU64(guest_IA_curr_instr
- + S390_SPECIAL_OP_PREAMBLE_SIZE
- + S390_SPECIAL_OP_SIZE));
+ put_gpr_dw0(14, mkaddr_expr(next));
/* The address is in REG1, all parameters are in the right (guest) places */
- irsb->next = get_gpr_dw0(1);
- irsb->jumpkind = Ijk_NoRedir;
+ put_IA(get_gpr_dw0(1));
dis_res->whatNext = Dis_StopHere;
+ dis_res->jk_StopHere = Ijk_NoRedir;
}
/* Force proper alignment for the structures below. */
@@ -13475,11 +13530,10 @@
}
}
/* If next instruction is execute, stop here */
- if (irsb->next == NULL && dis_res->whatNext == Dis_Continue
- && bytes[insn_length] == 0x44) {
- irsb->next = IRExpr_Const(IRConst_U64(guest_IA_next_instr));
+ if (dis_res->whatNext == Dis_Continue && bytes[insn_length] == 0x44) {
+ put_IA(mkaddr_expr(guest_IA_next_instr));
dis_res->whatNext = Dis_StopHere;
- dis_res->continueAt = 0;
+ dis_res->jk_StopHere = Ijk_Boring;
}
if (status == S390_DECODE_OK) return insn_length; /* OK */
@@ -13518,14 +13572,6 @@
}
-/* Generate an IRExpr for an address. */
-static __inline__ IRExpr *
-mkaddr_expr(Addr64 addr)
-{
- return IRExpr_Const(IRConst_U64(addr));
-}
-
-
/* Disassemble a single instruction INSN into IR. */
static DisResult
disInstr_S390_WRK(UChar *insn)
@@ -13553,6 +13599,7 @@
dres.whatNext = Dis_Continue;
dres.len = insn_length;
dres.continueAt = 0;
+ dres.jk_StopHere = Ijk_INVALID;
/* fixs390: consider chasing of conditional jumps */
@@ -13561,17 +13608,28 @@
/* All decode failures end up here. The decoder has already issued an
error message.
Tell the dispatcher that this insn cannot be decoded, and so has
- not been executed, and (is currently) the next to be executed.
- IA should be up-to-date since it made so at the start of each
- insn, but nevertheless be paranoid and update it again right
- now. */
- stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_IA),
- mkaddr_expr(guest_IA_curr_instr)));
+ not been executed, and (is currently) the next to be executed. */
+ put_IA(mkaddr_expr(guest_IA_curr_instr));
- irsb->next = mkaddr_expr(guest_IA_next_instr);
- irsb->jumpkind = Ijk_NoDecode;
- dres.whatNext = Dis_StopHere;
- dres.len = 0;
+ dres.whatNext = Dis_StopHere;
+ dres.jk_StopHere = Ijk_NoDecode;
+ dres.continueAt = 0;
+ dres.len = 0;
+ } else {
+ /* Decode success */
+ switch (dres.whatNext) {
+ case Dis_Continue:
+ put_IA(mkaddr_expr(guest_IA_next_instr));
+ break;
+ case Dis_ResteerU:
+ case Dis_ResteerC:
+ put_IA(mkaddr_expr(dres.continueAt));
+ break;
+ case Dis_StopHere:
+ break;
+ default:
+ vassert(0);
+ }
}
return dres;
@@ -13587,7 +13645,6 @@
DisResult
disInstr_S390(IRSB *irsb_IN,
- Bool put_IP __attribute__((unused)),
Bool (*resteerOkFn)(void *, Addr64),
Bool resteerCisOk,
void *callback_opaque,
@@ -13610,10 +13667,6 @@
resteer_fn = resteerOkFn;
resteer_data = callback_opaque;
- /* Always update the guest IA. See comment in s390_isel_stmt for Ist_Put. */
- stmt(IRStmt_Put(S390X_GUEST_OFFSET(guest_IA),
- mkaddr_expr(guest_IA_curr_instr)));
-
return disInstr_S390_WRK(guest_code + delta);
}
diff --git a/priv/guest_x86_defs.h b/priv/guest_x86_defs.h
index 130d84d..e0b1526 100644
--- a/priv/guest_x86_defs.h
+++ b/priv/guest_x86_defs.h
@@ -47,7 +47,6 @@
bb_to_IR.h. */
extern
DisResult disInstr_X86 ( IRSB* irbb,
- Bool put_IP,
Bool (*resteerOkFn) ( void*, Addr64 ),
Bool resteerCisOk,
void* callback_opaque,
diff --git a/priv/guest_x86_helpers.c b/priv/guest_x86_helpers.c
index d14d08b..9f7a8f5 100644
--- a/priv/guest_x86_helpers.c
+++ b/priv/guest_x86_helpers.c
@@ -2670,6 +2670,9 @@
/* VISIBLE TO LIBVEX CLIENT */
void LibVEX_GuestX86_initialise ( /*OUT*/VexGuestX86State* vex_state )
{
+ vex_state->host_EvC_FAILADDR = 0;
+ vex_state->host_EvC_COUNTER = 0;
+
vex_state->guest_EAX = 0;
vex_state->guest_ECX = 0;
vex_state->guest_EDX = 0;
@@ -2727,8 +2730,6 @@
vex_state->guest_IP_AT_SYSCALL = 0;
vex_state->padding1 = 0;
- vex_state->padding2 = 0;
- vex_state->padding3 = 0;
}
diff --git a/priv/guest_x86_toIR.c b/priv/guest_x86_toIR.c
index 4b15c61..8db5b54 100644
--- a/priv/guest_x86_toIR.c
+++ b/priv/guest_x86_toIR.c
@@ -768,7 +768,8 @@
binop( mkSizedOp(tyE,Iop_CasCmpNE8),
mkexpr(oldTmp), mkexpr(expTmp) ),
Ijk_Boring, /*Ijk_NoRedir*/
- IRConst_U32( restart_point )
+ IRConst_U32( restart_point ),
+ OFFB_EIP
));
}
@@ -1340,36 +1341,55 @@
/*--- JMP helpers ---*/
/*------------------------------------------------------------*/
-static void jmp_lit( IRJumpKind kind, Addr32 d32 )
+static void jmp_lit( /*MOD*/DisResult* dres,
+ IRJumpKind kind, Addr32 d32 )
{
- irsb->next = mkU32(d32);
- irsb->jumpkind = kind;
+ vassert(dres->whatNext == Dis_Continue);
+ vassert(dres->len == 0);
+ vassert(dres->continueAt == 0);
+ vassert(dres->jk_StopHere == Ijk_INVALID);
+ dres->whatNext = Dis_StopHere;
+ dres->jk_StopHere = kind;
+ stmt( IRStmt_Put( OFFB_EIP, mkU32(d32) ) );
}
-static void jmp_treg( IRJumpKind kind, IRTemp t )
+static void jmp_treg( /*MOD*/DisResult* dres,
+ IRJumpKind kind, IRTemp t )
{
- irsb->next = mkexpr(t);
- irsb->jumpkind = kind;
+ vassert(dres->whatNext == Dis_Continue);
+ vassert(dres->len == 0);
+ vassert(dres->continueAt == 0);
+ vassert(dres->jk_StopHere == Ijk_INVALID);
+ dres->whatNext = Dis_StopHere;
+ dres->jk_StopHere = kind;
+ stmt( IRStmt_Put( OFFB_EIP, mkexpr(t) ) );
}
static
-void jcc_01( X86Condcode cond, Addr32 d32_false, Addr32 d32_true )
+void jcc_01( /*MOD*/DisResult* dres,
+ X86Condcode cond, Addr32 d32_false, Addr32 d32_true )
{
Bool invert;
X86Condcode condPos;
+ vassert(dres->whatNext == Dis_Continue);
+ vassert(dres->len == 0);
+ vassert(dres->continueAt == 0);
+ vassert(dres->jk_StopHere == Ijk_INVALID);
+ dres->whatNext = Dis_StopHere;
+ dres->jk_StopHere = Ijk_Boring;
condPos = positiveIse_X86Condcode ( cond, &invert );
if (invert) {
stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos),
Ijk_Boring,
- IRConst_U32(d32_false) ) );
- irsb->next = mkU32(d32_true);
- irsb->jumpkind = Ijk_Boring;
+ IRConst_U32(d32_false),
+ OFFB_EIP ) );
+ stmt( IRStmt_Put( OFFB_EIP, mkU32(d32_true) ) );
} else {
stmt( IRStmt_Exit( mk_x86g_calculate_condition(condPos),
Ijk_Boring,
- IRConst_U32(d32_true) ) );
- irsb->next = mkU32(d32_false);
- irsb->jumpkind = Ijk_Boring;
+ IRConst_U32(d32_true),
+ OFFB_EIP ) );
+ stmt( IRStmt_Put( OFFB_EIP, mkU32(d32_false) ) );
}
}
@@ -1450,7 +1470,8 @@
IRStmt_Exit(
binop(Iop_CmpNE32, unop(Iop_64HIto32, mkexpr(r64)), mkU32(0)),
Ijk_MapFail,
- IRConst_U32( guest_EIP_curr_instr )
+ IRConst_U32( guest_EIP_curr_instr ),
+ OFFB_EIP
)
);
@@ -3009,7 +3030,7 @@
/* Group 5 extended opcodes. */
static
UInt dis_Grp5 ( UChar sorb, Bool locked, Int sz, Int delta,
- DisResult* dres, Bool* decode_OK )
+ /*MOD*/DisResult* dres, /*OUT*/Bool* decode_OK )
{
Int len;
UChar modrm;
@@ -3054,13 +3075,13 @@
assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4)));
putIReg(4, R_ESP, mkexpr(t2));
storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta+1));
- jmp_treg(Ijk_Call,t1);
- dres->whatNext = Dis_StopHere;
+ jmp_treg(dres, Ijk_Call, t1);
+ vassert(dres->whatNext == Dis_StopHere);
break;
case 4: /* jmp Ev */
vassert(sz == 4);
- jmp_treg(Ijk_Boring,t1);
- dres->whatNext = Dis_StopHere;
+ jmp_treg(dres, Ijk_Boring, t1);
+ vassert(dres->whatNext == Dis_StopHere);
break;
case 6: /* PUSH Ev */
vassert(sz == 4 || sz == 2);
@@ -3110,13 +3131,13 @@
assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4)));
putIReg(4, R_ESP, mkexpr(t2));
storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta+len));
- jmp_treg(Ijk_Call,t1);
- dres->whatNext = Dis_StopHere;
+ jmp_treg(dres, Ijk_Call, t1);
+ vassert(dres->whatNext == Dis_StopHere);
break;
case 4: /* JMP Ev */
vassert(sz == 4);
- jmp_treg(Ijk_Boring,t1);
- dres->whatNext = Dis_StopHere;
+ jmp_treg(dres, Ijk_Boring, t1);
+ vassert(dres->whatNext == Dis_StopHere);
break;
case 6: /* PUSH Ev */
vassert(sz == 4 || sz == 2);
@@ -3253,7 +3274,8 @@
We assume the insn is the last one in the basic block, and so emit a jump
to the next insn, rather than just falling through. */
static
-void dis_REP_op ( X86Condcode cond,
+void dis_REP_op ( /*MOD*/DisResult* dres,
+ X86Condcode cond,
void (*dis_OP)(Int, IRTemp),
Int sz, Addr32 eip, Addr32 eip_next, HChar* name )
{
@@ -3264,7 +3286,7 @@
stmt( IRStmt_Exit( binop(Iop_CmpEQ32,mkexpr(tc),mkU32(0)),
Ijk_Boring,
- IRConst_U32(eip_next) ) );
+ IRConst_U32(eip_next), OFFB_EIP ) );
putIReg(4, R_ECX, binop(Iop_Sub32, mkexpr(tc), mkU32(1)) );
@@ -3272,12 +3294,14 @@
dis_OP (sz, t_inc);
if (cond == X86CondAlways) {
- jmp_lit(Ijk_Boring,eip);
+ jmp_lit(dres, Ijk_Boring, eip);
+ vassert(dres->whatNext == Dis_StopHere);
} else {
stmt( IRStmt_Exit( mk_x86g_calculate_condition(cond),
Ijk_Boring,
- IRConst_U32(eip) ) );
- jmp_lit(Ijk_Boring,eip_next);
+ IRConst_U32(eip), OFFB_EIP ) );
+ jmp_lit(dres, Ijk_Boring, eip_next);
+ vassert(dres->whatNext == Dis_StopHere);
}
DIP("%s%c\n", name, nameISize(sz));
}
@@ -3958,7 +3982,8 @@
IRStmt_Exit(
binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
Ijk_EmWarn,
- IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta)
+ IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta),
+ OFFB_EIP
)
);
@@ -4000,7 +4025,8 @@
IRStmt_Exit(
binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
Ijk_EmWarn,
- IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta)
+ IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta),
+ OFFB_EIP
)
);
break;
@@ -4948,7 +4974,8 @@
IRStmt_Exit(
binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
Ijk_EmWarn,
- IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta)
+ IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta),
+ OFFB_EIP
)
);
@@ -6811,13 +6838,15 @@
}
static
-void dis_ret ( UInt d32 )
+void dis_ret ( /*MOD*/DisResult* dres, UInt d32 )
{
- IRTemp t1 = newTemp(Ity_I32), t2 = newTemp(Ity_I32);
+ IRTemp t1 = newTemp(Ity_I32);
+ IRTemp t2 = newTemp(Ity_I32);
assign(t1, getIReg(4,R_ESP));
assign(t2, loadLE(Ity_I32,mkexpr(t1)));
putIReg(4, R_ESP,binop(Iop_Add32, mkexpr(t1), mkU32(4+d32)));
- jmp_treg(Ijk_Ret,t2);
+ jmp_treg(dres, Ijk_Ret, t2);
+ vassert(dres->whatNext == Dis_StopHere);
}
/*------------------------------------------------------------*/
@@ -7523,7 +7552,8 @@
binop(Iop_And32, mkexpr(t1), mkU32(1<<18)),
mkU32(0) ),
Ijk_EmWarn,
- IRConst_U32( next_insn_EIP )
+ IRConst_U32( next_insn_EIP ),
+ OFFB_EIP
)
);
}
@@ -7700,7 +7730,8 @@
binop(Iop_And32,mkexpr(effective_addr),mkU32(0xF)),
mkU32(0)),
Ijk_SigSEGV,
- IRConst_U32(guest_EIP_curr_instr)
+ IRConst_U32(guest_EIP_curr_instr),
+ OFFB_EIP
)
);
}
@@ -7854,7 +7885,6 @@
static
DisResult disInstr_X86_WRK (
/*OUT*/Bool* expect_CAS,
- Bool put_IP,
Bool (*resteerOkFn) ( /*opaque*/void*, Addr64 ),
Bool resteerCisOk,
void* callback_opaque,
@@ -7893,9 +7923,10 @@
Bool pfx_lock = False;
/* Set result defaults. */
- dres.whatNext = Dis_Continue;
- dres.len = 0;
- dres.continueAt = 0;
+ dres.whatNext = Dis_Continue;
+ dres.len = 0;
+ dres.continueAt = 0;
+ dres.jk_StopHere = Ijk_INVALID;
*expect_CAS = False;
@@ -7904,10 +7935,6 @@
vassert(guest_EIP_bbstart + delta == guest_EIP_curr_instr);
DIP("\t0x%x: ", guest_EIP_bbstart+delta);
- /* We may be asked to update the guest EIP before going further. */
- if (put_IP)
- stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_curr_instr)) );
-
/* Spot "Special" instructions (see comment at top of file). */
{
UChar* code = (UChar*)(guest_code + delta);
@@ -7926,8 +7953,8 @@
/* %EDX = client_request ( %EAX ) */
DIP("%%edx = client_request ( %%eax )\n");
delta += 14;
- jmp_lit(Ijk_ClientReq, guest_EIP_bbstart+delta);
- dres.whatNext = Dis_StopHere;
+ jmp_lit(&dres, Ijk_ClientReq, guest_EIP_bbstart+delta);
+ vassert(dres.whatNext == Dis_StopHere);
goto decode_success;
}
else
@@ -7949,8 +7976,8 @@
assign(t2, binop(Iop_Sub32, getIReg(4,R_ESP), mkU32(4)));
putIReg(4, R_ESP, mkexpr(t2));
storeLE( mkexpr(t2), mkU32(guest_EIP_bbstart+delta));
- jmp_treg(Ijk_NoRedir,t1);
- dres.whatNext = Dis_StopHere;
+ jmp_treg(&dres, Ijk_NoRedir, t1);
+ vassert(dres.whatNext == Dis_StopHere);
goto decode_success;
}
/* We don't know what it is. */
@@ -8537,7 +8564,8 @@
IRStmt_Exit(
binop(Iop_CmpNE32, mkexpr(ew), mkU32(0)),
Ijk_EmWarn,
- IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta)
+ IRConst_U32( ((Addr32)guest_EIP_bbstart)+delta),
+ OFFB_EIP
)
);
goto decode_success;
@@ -11521,9 +11549,7 @@
stmt( IRStmt_Put(OFFB_TILEN, mkU32(lineszB) ) );
- irsb->jumpkind = Ijk_TInval;
- irsb->next = mkU32(guest_EIP_bbstart+delta);
- dres.whatNext = Dis_StopHere;
+ jmp_lit(&dres, Ijk_TInval, (Addr32)(guest_EIP_bbstart+delta));
DIP("clflush %s\n", dis_buf);
goto decode_success;
@@ -12729,7 +12755,8 @@
stmt( IRStmt_Exit(
binop(Iop_CmpEQ16, getIReg(2,R_ECX), mkU16(0)),
Ijk_Boring,
- IRConst_U32(d32)
+ IRConst_U32(d32),
+ OFFB_EIP
));
DIP("jcxz 0x%x\n", d32);
goto decode_success;
@@ -12752,13 +12779,11 @@
case 0xC2: /* RET imm16 */
d32 = getUDisp16(delta);
delta += 2;
- dis_ret(d32);
- dres.whatNext = Dis_StopHere;
+ dis_ret(&dres, d32);
DIP("ret %d\n", (Int)d32);
break;
case 0xC3: /* RET */
- dis_ret(0);
- dres.whatNext = Dis_StopHere;
+ dis_ret(&dres, 0);
DIP("ret\n");
break;
@@ -12782,8 +12807,8 @@
/* set %EFLAGS */
set_EFLAGS_from_value( t4, False/*!emit_AC_emwarn*/, 0/*unused*/ );
/* goto new EIP value */
- jmp_treg(Ijk_Ret,t2);
- dres.whatNext = Dis_StopHere;
+ jmp_treg(&dres, Ijk_Ret, t2);
+ vassert(dres.whatNext == Dis_StopHere);
DIP("iret (very kludgey)\n");
break;
@@ -12815,8 +12840,8 @@
dres.whatNext = Dis_ResteerU;
dres.continueAt = (Addr64)(Addr32)d32;
} else {
- jmp_lit(Ijk_Call,d32);
- dres.whatNext = Dis_StopHere;
+ jmp_lit(&dres, Ijk_Call, d32);
+ vassert(dres.whatNext == Dis_StopHere);
}
DIP("call 0x%x\n",d32);
}
@@ -13060,8 +13085,8 @@
/* ------------------------ INT ------------------------ */
case 0xCC: /* INT 3 */
- jmp_lit(Ijk_SigTRAP,((Addr32)guest_EIP_bbstart)+delta);
- dres.whatNext = Dis_StopHere;
+ jmp_lit(&dres, Ijk_SigTRAP, ((Addr32)guest_EIP_bbstart)+delta);
+ vassert(dres.whatNext == Dis_StopHere);
DIP("int $0x3\n");
break;
@@ -13082,8 +13107,8 @@
This used to handle just 0x40-0x43; Jikes RVM uses a larger
range (0x3F-0x49), and this allows some slack as well. */
if (d32 >= 0x3F && d32 <= 0x4F) {
- jmp_lit(Ijk_SigSEGV,((Addr32)guest_EIP_bbstart)+delta-2);
- dres.whatNext = Dis_StopHere;
+ jmp_lit(&dres, Ijk_SigSEGV, ((Addr32)guest_EIP_bbstart)+delta-2);
+ vassert(dres.whatNext == Dis_StopHere);
DIP("int $0x%x\n", (Int)d32);
break;
}
@@ -13095,24 +13120,24 @@
if (d32 == 0x80) {
stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL,
mkU32(guest_EIP_curr_instr) ) );
- jmp_lit(Ijk_Sys_int128,((Addr32)guest_EIP_bbstart)+delta);
- dres.whatNext = Dis_StopHere;
+ jmp_lit(&dres, Ijk_Sys_int128, ((Addr32)guest_EIP_bbstart)+delta);
+ vassert(dres.whatNext == Dis_StopHere);
DIP("int $0x80\n");
break;
}
if (d32 == 0x81) {
stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL,
mkU32(guest_EIP_curr_instr) ) );
- jmp_lit(Ijk_Sys_int129,((Addr32)guest_EIP_bbstart)+delta);
- dres.whatNext = Dis_StopHere;
+ jmp_lit(&dres, Ijk_Sys_int129, ((Addr32)guest_EIP_bbstart)+delta);
+ vassert(dres.whatNext == Dis_StopHere);
DIP("int $0x81\n");
break;
}
if (d32 == 0x82) {
stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL,
mkU32(guest_EIP_curr_instr) ) );
- jmp_lit(Ijk_Sys_int130,((Addr32)guest_EIP_bbstart)+delta);
- dres.whatNext = Dis_StopHere;
+ jmp_lit(&dres, Ijk_Sys_int130, ((Addr32)guest_EIP_bbstart)+delta);
+ vassert(dres.whatNext == Dis_StopHere);
DIP("int $0x82\n");
break;
}
@@ -13129,8 +13154,8 @@
dres.whatNext = Dis_ResteerU;
dres.continueAt = (Addr64)(Addr32)d32;
} else {
- jmp_lit(Ijk_Boring,d32);
- dres.whatNext = Dis_StopHere;
+ jmp_lit(&dres, Ijk_Boring, d32);
+ vassert(dres.whatNext == Dis_StopHere);
}
DIP("jmp-8 0x%x\n", d32);
break;
@@ -13143,8 +13168,8 @@
dres.whatNext = Dis_ResteerU;
dres.continueAt = (Addr64)(Addr32)d32;
} else {
- jmp_lit(Ijk_Boring,d32);
- dres.whatNext = Dis_StopHere;
+ jmp_lit(&dres, Ijk_Boring, d32);
+ vassert(dres.whatNext == Dis_StopHere);
}
DIP("jmp 0x%x\n", d32);
break;
@@ -13185,7 +13210,8 @@
stmt( IRStmt_Exit(
mk_x86g_calculate_condition((X86Condcode)(1 ^ (opc - 0x70))),
Ijk_Boring,
- IRConst_U32(guest_EIP_bbstart+delta) ) );
+ IRConst_U32(guest_EIP_bbstart+delta),
+ OFFB_EIP ) );
dres.whatNext = Dis_ResteerC;
dres.continueAt = (Addr64)(Addr32)d32;
comment = "(assumed taken)";
@@ -13204,7 +13230,8 @@
stmt( IRStmt_Exit(
mk_x86g_calculate_condition((X86Condcode)(opc - 0x70)),
Ijk_Boring,
- IRConst_U32(d32) ) );
+ IRConst_U32(d32),
+ OFFB_EIP ) );
dres.whatNext = Dis_ResteerC;
dres.continueAt = (Addr64)(Addr32)(guest_EIP_bbstart+delta);
comment = "(assumed not taken)";
@@ -13212,9 +13239,9 @@
else {
/* Conservative default translation - end the block at this
point. */
- jcc_01( (X86Condcode)(opc - 0x70),
+ jcc_01( &dres, (X86Condcode)(opc - 0x70),
(Addr32)(guest_EIP_bbstart+delta), d32);
- dres.whatNext = Dis_StopHere;
+ vassert(dres.whatNext == Dis_StopHere);
}
DIP("j%s-8 0x%x %s\n", name_X86Condcode(opc - 0x70), d32, comment);
break;
@@ -13227,7 +13254,8 @@
stmt( IRStmt_Exit(
binop(Iop_CmpEQ32, getIReg(4,R_ECX), mkU32(0)),
Ijk_Boring,
- IRConst_U32(d32)
+ IRConst_U32(d32),
+ OFFB_EIP
));
DIP("jecxz 0x%x\n", d32);
break;
@@ -13268,7 +13296,7 @@
default:
vassert(0);
}
- stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U32(d32)) );
+ stmt( IRStmt_Exit(cond, Ijk_Boring, IRConst_U32(d32), OFFB_EIP) );
DIP("loop%s 0x%x\n", xtra, d32);
break;
@@ -13948,33 +13976,32 @@
abyte = getIByte(delta); delta++;
if (abyte == 0x66) { sz = 2; abyte = getIByte(delta); delta++; }
- dres.whatNext = Dis_StopHere;
switch (abyte) {
/* According to the Intel manual, "repne movs" should never occur, but
* in practice it has happened, so allow for it here... */
case 0xA4: sz = 1; /* REPNE MOVS<sz> */
case 0xA5:
- dis_REP_op ( X86CondNZ, dis_MOVS, sz, eip_orig,
- guest_EIP_bbstart+delta, "repne movs" );
+ dis_REP_op ( &dres, X86CondNZ, dis_MOVS, sz, eip_orig,
+ guest_EIP_bbstart+delta, "repne movs" );
break;
case 0xA6: sz = 1; /* REPNE CMP<sz> */
case 0xA7:
- dis_REP_op ( X86CondNZ, dis_CMPS, sz, eip_orig,
- guest_EIP_bbstart+delta, "repne cmps" );
+ dis_REP_op ( &dres, X86CondNZ, dis_CMPS, sz, eip_orig,
+ guest_EIP_bbstart+delta, "repne cmps" );
break;
case 0xAA: sz = 1; /* REPNE STOS<sz> */
case 0xAB:
- dis_REP_op ( X86CondNZ, dis_STOS, sz, eip_orig,
- guest_EIP_bbstart+delta, "repne stos" );
+ dis_REP_op ( &dres, X86CondNZ, dis_STOS, sz, eip_orig,
+ guest_EIP_bbstart+delta, "repne stos" );
break;
case 0xAE: sz = 1; /* REPNE SCAS<sz> */
case 0xAF:
- dis_REP_op ( X86CondNZ, dis_SCAS, sz, eip_orig,
- guest_EIP_bbstart+delta, "repne scas" );
+ dis_REP_op ( &dres, X86CondNZ, dis_SCAS, sz, eip_orig,
+ guest_EIP_bbstart+delta, "repne scas" );
break;
default:
@@ -13991,37 +14018,36 @@
abyte = getIByte(delta); delta++;
if (abyte == 0x66) { sz = 2; abyte = getIByte(delta); delta++; }
- dres.whatNext = Dis_StopHere;
switch (abyte) {
case 0xA4: sz = 1; /* REP MOVS<sz> */
case 0xA5:
- dis_REP_op ( X86CondAlways, dis_MOVS, sz, eip_orig,
- guest_EIP_bbstart+delta, "rep movs" );
+ dis_REP_op ( &dres, X86CondAlways, dis_MOVS, sz, eip_orig,
+ guest_EIP_bbstart+delta, "rep movs" );
break;
case 0xA6: sz = 1; /* REPE CMP<sz> */
case 0xA7:
- dis_REP_op ( X86CondZ, dis_CMPS, sz, eip_orig,
- guest_EIP_bbstart+delta, "repe cmps" );
+ dis_REP_op ( &dres, X86CondZ, dis_CMPS, sz, eip_orig,
+ guest_EIP_bbstart+delta, "repe cmps" );
break;
case 0xAA: sz = 1; /* REP STOS<sz> */
case 0xAB:
- dis_REP_op ( X86CondAlways, dis_STOS, sz, eip_orig,
- guest_EIP_bbstart+delta, "rep stos" );
+ dis_REP_op ( &dres, X86CondAlways, dis_STOS, sz, eip_orig,
+ guest_EIP_bbstart+delta, "rep stos" );
break;
case 0xAC: sz = 1; /* REP LODS<sz> */
case 0xAD:
- dis_REP_op ( X86CondAlways, dis_LODS, sz, eip_orig,
- guest_EIP_bbstart+delta, "rep lods" );
+ dis_REP_op ( &dres, X86CondAlways, dis_LODS, sz, eip_orig,
+ guest_EIP_bbstart+delta, "rep lods" );
break;
case 0xAE: sz = 1; /* REPE SCAS<sz> */
case 0xAF:
- dis_REP_op ( X86CondZ, dis_SCAS, sz, eip_orig,
- guest_EIP_bbstart+delta, "repe scas" );
+ dis_REP_op ( &dres, X86CondZ, dis_SCAS, sz, eip_orig,
+ guest_EIP_bbstart+delta, "repe scas" );
break;
case 0x90: /* REP NOP (PAUSE) */
@@ -14029,13 +14055,12 @@
DIP("rep nop (P4 pause)\n");
/* "observe" the hint. The Vex client needs to be careful not
to cause very long delays as a result, though. */
- jmp_lit(Ijk_Yield, ((Addr32)guest_EIP_bbstart)+delta);
- dres.whatNext = Dis_StopHere;
+ jmp_lit(&dres, Ijk_Yield, ((Addr32)guest_EIP_bbstart)+delta);
+ vassert(dres.whatNext == Dis_StopHere);
break;
case 0xC3: /* REP RET -- same as normal ret? */
- dis_ret(0);
- dres.whatNext = Dis_StopHere;
+ dis_ret(&dres, 0);
DIP("rep ret\n");
break;
@@ -14741,7 +14766,8 @@
mk_x86g_calculate_condition((X86Condcode)
(1 ^ (opc - 0x80))),
Ijk_Boring,
- IRConst_U32(guest_EIP_bbstart+delta) ) );
+ IRConst_U32(guest_EIP_bbstart+delta),
+ OFFB_EIP ) );
dres.whatNext = Dis_ResteerC;
dres.continueAt = (Addr64)(Addr32)d32;
comment = "(assumed taken)";
@@ -14760,7 +14786,8 @@
stmt( IRStmt_Exit(
mk_x86g_calculate_condition((X86Condcode)(opc - 0x80)),
Ijk_Boring,
- IRConst_U32(d32) ) );
+ IRConst_U32(d32),
+ OFFB_EIP ) );
dres.whatNext = Dis_ResteerC;
dres.continueAt = (Addr64)(Addr32)(guest_EIP_bbstart+delta);
comment = "(assumed not taken)";
@@ -14768,9 +14795,9 @@
else {
/* Conservative default translation - end the block at
this point. */
- jcc_01( (X86Condcode)(opc - 0x80),
+ jcc_01( &dres, (X86Condcode)(opc - 0x80),
(Addr32)(guest_EIP_bbstart+delta), d32);
- dres.whatNext = Dis_StopHere;
+ vassert(dres.whatNext == Dis_StopHere);
}
DIP("j%s-32 0x%x %s\n", name_X86Condcode(opc - 0x80), d32, comment);
break;
@@ -14896,8 +14923,8 @@
point if the syscall needs to be restarted. */
stmt( IRStmt_Put( OFFB_IP_AT_SYSCALL,
mkU32(guest_EIP_curr_instr) ) );
- jmp_lit(Ijk_Sys_sysenter, 0/*bogus next EIP value*/);
- dres.whatNext = Dis_StopHere;
+ jmp_lit(&dres, Ijk_Sys_sysenter, 0/*bogus next EIP value*/);
+ vassert(dres.whatNext == Dis_StopHere);
DIP("sysenter");
break;
@@ -15073,8 +15100,8 @@
insn, but nevertheless be paranoid and update it again right
now. */
stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_curr_instr) ) );
- jmp_lit(Ijk_NoDecode, guest_EIP_curr_instr);
- dres.whatNext = Dis_StopHere;
+ jmp_lit(&dres, Ijk_NoDecode, guest_EIP_curr_instr);
+ vassert(dres.whatNext == Dis_StopHere);
dres.len = 0;
/* We also need to say that a CAS is not expected now, regardless
of what it might have been set to at the start of the function,
@@ -15088,6 +15115,20 @@
decode_success:
/* All decode successes end up here. */
+ switch (dres.whatNext) {
+ case Dis_Continue:
+ stmt( IRStmt_Put( OFFB_EIP, mkU32(guest_EIP_bbstart + delta) ) );
+ break;
+ case Dis_ResteerU:
+ case Dis_ResteerC:
+ stmt( IRStmt_Put( OFFB_EIP, mkU32(dres.continueAt) ) );
+ break;
+ case Dis_StopHere:
+ break;
+ default:
+ vassert(0);
+ }
+
DIP("\n");
dres.len = delta - delta_start;
return dres;
@@ -15105,7 +15146,6 @@
is located in host memory at &guest_code[delta]. */
DisResult disInstr_X86 ( IRSB* irsb_IN,
- Bool put_IP,
Bool (*resteerOkFn) ( void*, Addr64 ),
Bool resteerCisOk,
void* callback_opaque,
@@ -15131,7 +15171,7 @@
x1 = irsb_IN->stmts_used;
expect_CAS = False;
- dres = disInstr_X86_WRK ( &expect_CAS, put_IP, resteerOkFn,
+ dres = disInstr_X86_WRK ( &expect_CAS, resteerOkFn,
resteerCisOk,
callback_opaque,
delta, archinfo, abiinfo );
@@ -15151,7 +15191,7 @@
/* inconsistency detected. re-disassemble the instruction so as
to generate a useful error message; then assert. */
vex_traceflags |= VEX_TRACE_FE;
- dres = disInstr_X86_WRK ( &expect_CAS, put_IP, resteerOkFn,
+ dres = disInstr_X86_WRK ( &expect_CAS, resteerOkFn,
resteerCisOk,
callback_opaque,
delta, archinfo, abiinfo );
diff --git a/priv/host_amd64_defs.c b/priv/host_amd64_defs.c
index 8b97772..479a0c5 100644
--- a/priv/host_amd64_defs.c
+++ b/priv/host_amd64_defs.c
@@ -118,13 +118,6 @@
HReg hregAMD64_R14 ( void ) { return mkHReg(14, HRcInt64, False); }
HReg hregAMD64_R15 ( void ) { return mkHReg(15, HRcInt64, False); }
-//.. HReg hregAMD64_FAKE0 ( void ) { return mkHReg(0, HRcFlt64, False); }
-//.. HReg hregAMD64_FAKE1 ( void ) { return mkHReg(1, HRcFlt64, False); }
-//.. HReg hregAMD64_FAKE2 ( void ) { return mkHReg(2, HRcFlt64, False); }
-//.. HReg hregAMD64_FAKE3 ( void ) { return mkHReg(3, HRcFlt64, False); }
-//.. HReg hregAMD64_FAKE4 ( void ) { return mkHReg(4, HRcFlt64, False); }
-//.. HReg hregAMD64_FAKE5 ( void ) { return mkHReg(5, HRcFlt64, False); }
-//..
HReg hregAMD64_XMM0 ( void ) { return mkHReg( 0, HRcVec128, False); }
HReg hregAMD64_XMM1 ( void ) { return mkHReg( 1, HRcVec128, False); }
HReg hregAMD64_XMM2 ( void ) { return mkHReg( 2, HRcVec128, False); }
@@ -231,18 +224,6 @@
return am;
}
-//.. AMD64AMode* dopyAMD64AMode ( AMD64AMode* am ) {
-//.. switch (am->tag) {
-//.. case Xam_IR:
-//.. return AMD64AMode_IR( am->Xam.IR.imm, am->Xam.IR.reg );
-//.. case Xam_IRRS:
-//.. return AMD64AMode_IRRS( am->Xam.IRRS.imm, am->Xam.IRRS.base,
-//.. am->Xam.IRRS.index, am->Xam.IRRS.shift );
-//.. default:
-//.. vpanic("dopyAMD64AMode");
-//.. }
-//.. }
-
void ppAMD64AMode ( AMD64AMode* am ) {
switch (am->tag) {
case Aam_IR:
@@ -538,10 +519,6 @@
HChar* showA87FpOp ( A87FpOp op ) {
switch (op) {
-//.. case Xfp_ADD: return "add";
-//.. case Xfp_SUB: return "sub";
-//.. case Xfp_MUL: return "mul";
-//.. case Xfp_DIV: return "div";
case Afp_SCALE: return "scale";
case Afp_ATAN: return "atan";
case Afp_YL2X: return "yl2x";
@@ -549,9 +526,6 @@
case Afp_PREM: return "prem";
case Afp_PREM1: return "prem1";
case Afp_SQRT: return "sqrt";
-//.. case Xfp_ABS: return "abs";
-//.. case Xfp_NEG: return "chs";
-//.. case Xfp_MOV: return "mov";
case Afp_SIN: return "sin";
case Afp_COS: return "cos";
case Afp_TAN: return "tan";
@@ -717,16 +691,6 @@
vassert(sz == 4 || sz == 8);
return i;
}
-//.. AMD64Instr* AMD64Instr_Sh3232 ( AMD64ShiftOp op, UInt amt, HReg src, HReg dst ) {
-//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
-//.. i->tag = Xin_Sh3232;
-//.. i->Xin.Sh3232.op = op;
-//.. i->Xin.Sh3232.amt = amt;
-//.. i->Xin.Sh3232.src = src;
-//.. i->Xin.Sh3232.dst = dst;
-//.. vassert(op == Xsh_SHL || op == Xsh_SHR);
-//.. return i;
-//.. }
AMD64Instr* AMD64Instr_Push( AMD64RMI* src ) {
AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
i->tag = Ain_Push;
@@ -742,14 +706,37 @@
vassert(regparms >= 0 && regparms <= 6);
return i;
}
-AMD64Instr* AMD64Instr_Goto ( IRJumpKind jk, AMD64CondCode cond, AMD64RI* dst ) {
- AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
- i->tag = Ain_Goto;
- i->Ain.Goto.cond = cond;
- i->Ain.Goto.dst = dst;
- i->Ain.Goto.jk = jk;
+
+AMD64Instr* AMD64Instr_XDirect ( Addr64 dstGA, AMD64AMode* amRIP,
+ AMD64CondCode cond, Bool toFastEP ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_XDirect;
+ i->Ain.XDirect.dstGA = dstGA;
+ i->Ain.XDirect.amRIP = amRIP;
+ i->Ain.XDirect.cond = cond;
+ i->Ain.XDirect.toFastEP = toFastEP;
return i;
}
+AMD64Instr* AMD64Instr_XIndir ( HReg dstGA, AMD64AMode* amRIP,
+ AMD64CondCode cond ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_XIndir;
+ i->Ain.XIndir.dstGA = dstGA;
+ i->Ain.XIndir.amRIP = amRIP;
+ i->Ain.XIndir.cond = cond;
+ return i;
+}
+AMD64Instr* AMD64Instr_XAssisted ( HReg dstGA, AMD64AMode* amRIP,
+ AMD64CondCode cond, IRJumpKind jk ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_XAssisted;
+ i->Ain.XAssisted.dstGA = dstGA;
+ i->Ain.XAssisted.amRIP = amRIP;
+ i->Ain.XAssisted.cond = cond;
+ i->Ain.XAssisted.jk = jk;
+ return i;
+}
+
AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode cond, AMD64RM* src, HReg dst ) {
AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
i->tag = Ain_CMov64;
@@ -863,72 +850,12 @@
i->Ain.A87StSW.addr = addr;
return i;
}
-
-//.. AMD64Instr* AMD64Instr_FpUnary ( AMD64FpOp op, HReg src, HReg dst ) {
-//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
-//.. i->tag = Xin_FpUnary;
-//.. i->Xin.FpUnary.op = op;
-//.. i->Xin.FpUnary.src = src;
-//.. i->Xin.FpUnary.dst = dst;
-//.. return i;
-//.. }
-//.. AMD64Instr* AMD64Instr_FpBinary ( AMD64FpOp op, HReg srcL, HReg srcR, HReg dst ) {
-//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
-//.. i->tag = Xin_FpBinary;
-//.. i->Xin.FpBinary.op = op;
-//.. i->Xin.FpBinary.srcL = srcL;
-//.. i->Xin.FpBinary.srcR = srcR;
-//.. i->Xin.FpBinary.dst = dst;
-//.. return i;
-//.. }
-//.. AMD64Instr* AMD64Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, AMD64AMode* addr ) {
-//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
-//.. i->tag = Xin_FpLdSt;
-//.. i->Xin.FpLdSt.isLoad = isLoad;
-//.. i->Xin.FpLdSt.sz = sz;
-//.. i->Xin.FpLdSt.reg = reg;
-//.. i->Xin.FpLdSt.addr = addr;
-//.. vassert(sz == 4 || sz == 8);
-//.. return i;
-//.. }
-//.. AMD64Instr* AMD64Instr_FpLdStI ( Bool isLoad, UChar sz,
-//.. HReg reg, AMD64AMode* addr ) {
-//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
-//.. i->tag = Xin_FpLdStI;
-//.. i->Xin.FpLdStI.isLoad = isLoad;
-//.. i->Xin.FpLdStI.sz = sz;
-//.. i->Xin.FpLdStI.reg = reg;
-//.. i->Xin.FpLdStI.addr = addr;
-//.. vassert(sz == 2 || sz == 4 || sz == 8);
-//.. return i;
-//.. }
-//.. AMD64Instr* AMD64Instr_Fp64to32 ( HReg src, HReg dst ) {
-//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
-//.. i->tag = Xin_Fp64to32;
-//.. i->Xin.Fp64to32.src = src;
-//.. i->Xin.Fp64to32.dst = dst;
-//.. return i;
-//.. }
-//.. AMD64Instr* AMD64Instr_FpCMov ( AMD64CondCode cond, HReg src, HReg dst ) {
-//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
-//.. i->tag = Xin_FpCMov;
-//.. i->Xin.FpCMov.cond = cond;
-//.. i->Xin.FpCMov.src = src;
-//.. i->Xin.FpCMov.dst = dst;
-//.. vassert(cond != Xcc_ALWAYS);
-//.. return i;
-//.. }
AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* addr ) {
AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
i->tag = Ain_LdMXCSR;
i->Ain.LdMXCSR.addr = addr;
return i;
}
-//.. AMD64Instr* AMD64Instr_FpStSW_AX ( void ) {
-//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
-//.. i->tag = Xin_FpStSW_AX;
-//.. return i;
-//.. }
AMD64Instr* AMD64Instr_SseUComIS ( Int sz, HReg srcL, HReg srcR, HReg dst ) {
AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
i->tag = Ain_SseUComIS;
@@ -970,15 +897,6 @@
i->Ain.SseSDSS.dst = dst;
return i;
}
-
-//.. AMD64Instr* AMD64Instr_SseConst ( UShort con, HReg dst ) {
-//.. AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
-//.. i->tag = Xin_SseConst;
-//.. i->Xin.SseConst.con = con;
-//.. i->Xin.SseConst.dst = dst;
-//.. vassert(hregClass(dst) == HRcVec128);
-//.. return i;
-//.. }
AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz,
HReg reg, AMD64AMode* addr ) {
AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
@@ -1062,6 +980,19 @@
vassert(order >= 0 && order <= 0xFF);
return i;
}
+AMD64Instr* AMD64Instr_EvCheck ( AMD64AMode* amCounter,
+ AMD64AMode* amFailAddr ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_EvCheck;
+ i->Ain.EvCheck.amCounter = amCounter;
+ i->Ain.EvCheck.amFailAddr = amFailAddr;
+ return i;
+}
+AMD64Instr* AMD64Instr_ProfInc ( void ) {
+ AMD64Instr* i = LibVEX_Alloc(sizeof(AMD64Instr));
+ i->tag = Ain_ProfInc;
+ return i;
+}
void ppAMD64Instr ( AMD64Instr* i, Bool mode64 )
{
@@ -1121,16 +1052,6 @@
showAMD64ScalarSz(i->Ain.Div.sz));
ppAMD64RM(i->Ain.Div.src);
return;
-//.. case Xin_Sh3232:
-//.. vex_printf("%sdl ", showAMD64ShiftOp(i->Xin.Sh3232.op));
-//.. if (i->Xin.Sh3232.amt == 0)
-//.. vex_printf(" %%cl,");
-//.. else
-//.. vex_printf(" $%d,", i->Xin.Sh3232.amt);
-//.. ppHRegAMD64(i->Xin.Sh3232.src);
-//.. vex_printf(",");
-//.. ppHRegAMD64(i->Xin.Sh3232.dst);
-//.. return;
case Ain_Push:
vex_printf("pushq ");
ppAMD64RMI(i->Ain.Push.src);
@@ -1142,25 +1063,41 @@
i->Ain.Call.regparms );
vex_printf("0x%llx", i->Ain.Call.target);
break;
- case Ain_Goto:
- if (i->Ain.Goto.cond != Acc_ALWAYS) {
- vex_printf("if (%%rflags.%s) { ",
- showAMD64CondCode(i->Ain.Goto.cond));
- }
- if (i->Ain.Goto.jk != Ijk_Boring
- && i->Ain.Goto.jk != Ijk_Call
- && i->Ain.Goto.jk != Ijk_Ret) {
- vex_printf("movl $");
- ppIRJumpKind(i->Ain.Goto.jk);
- vex_printf(",%%ebp ; ");
- }
- vex_printf("movq ");
- ppAMD64RI(i->Ain.Goto.dst);
- vex_printf(",%%rax ; movabsq $dispatcher_addr,%%rdx ; jmp *%%rdx");
- if (i->Ain.Goto.cond != Acc_ALWAYS) {
- vex_printf(" }");
- }
+
+ case Ain_XDirect:
+ vex_printf("(xDirect) ");
+ vex_printf("if (%%rflags.%s) { ",
+ showAMD64CondCode(i->Ain.XDirect.cond));
+ vex_printf("movabsq $0x%llx,%%r11; ", i->Ain.XDirect.dstGA);
+ vex_printf("movq %%r11,");
+ ppAMD64AMode(i->Ain.XDirect.amRIP);
+ vex_printf("; ");
+ vex_printf("movabsq $disp_cp_chain_me_to_%sEP,%%r11; call *%%r11 }",
+ i->Ain.XDirect.toFastEP ? "fast" : "slow");
return;
+ case Ain_XIndir:
+ vex_printf("(xIndir) ");
+ vex_printf("if (%%rflags.%s) { ",
+ showAMD64CondCode(i->Ain.XIndir.cond));
+ vex_printf("movq ");
+ ppHRegAMD64(i->Ain.XIndir.dstGA);
+ vex_printf(",");
+ ppAMD64AMode(i->Ain.XIndir.amRIP);
+ vex_printf("; movabsq $disp_indir,%%r11; jmp *%%r11 }");
+ return;
+ case Ain_XAssisted:
+ vex_printf("(xAssisted) ");
+ vex_printf("if (%%rflags.%s) { ",
+ showAMD64CondCode(i->Ain.XAssisted.cond));
+ vex_printf("movq ");
+ ppHRegAMD64(i->Ain.XAssisted.dstGA);
+ vex_printf(",");
+ ppAMD64AMode(i->Ain.XAssisted.amRIP);
+ vex_printf("; movl $IRJumpKind_to_TRCVAL(%d),%%rbp",
+ (Int)i->Ain.XAssisted.jk);
+ vex_printf("; movabsq $disp_assisted,%%r11; jmp *%%r11 }");
+ return;
+
case Ain_CMov64:
vex_printf("cmov%s ", showAMD64CondCode(i->Ain.CMov64.cond));
ppAMD64RM(i->Ain.CMov64.src);
@@ -1241,67 +1178,6 @@
vex_printf("fstsw ");
ppAMD64AMode(i->Ain.A87StSW.addr);
break;
-//.. case Xin_FpUnary:
-//.. vex_printf("g%sD ", showAMD64FpOp(i->Xin.FpUnary.op));
-//.. ppHRegAMD64(i->Xin.FpUnary.src);
-//.. vex_printf(",");
-//.. ppHRegAMD64(i->Xin.FpUnary.dst);
-//.. break;
-//.. case Xin_FpBinary:
-//.. vex_printf("g%sD ", showAMD64FpOp(i->Xin.FpBinary.op));
-//.. ppHRegAMD64(i->Xin.FpBinary.srcL);
-//.. vex_printf(",");
-//.. ppHRegAMD64(i->Xin.FpBinary.srcR);
-//.. vex_printf(",");
-//.. ppHRegAMD64(i->Xin.FpBinary.dst);
-//.. break;
-//.. case Xin_FpLdSt:
-//.. if (i->Xin.FpLdSt.isLoad) {
-//.. vex_printf("gld%c " , i->Xin.FpLdSt.sz==8 ? 'D' : 'F');
-//.. ppAMD64AMode(i->Xin.FpLdSt.addr);
-//.. vex_printf(", ");
-//.. ppHRegAMD64(i->Xin.FpLdSt.reg);
-//.. } else {
-//.. vex_printf("gst%c " , i->Xin.FpLdSt.sz==8 ? 'D' : 'F');
-//.. ppHRegAMD64(i->Xin.FpLdSt.reg);
-//.. vex_printf(", ");
-//.. ppAMD64AMode(i->Xin.FpLdSt.addr);
-//.. }
-//.. return;
-//.. case Xin_FpLdStI:
-//.. if (i->Xin.FpLdStI.isLoad) {
-//.. vex_printf("gild%s ", i->Xin.FpLdStI.sz==8 ? "ll" :
-//.. i->Xin.FpLdStI.sz==4 ? "l" : "w");
-//.. ppAMD64AMode(i->Xin.FpLdStI.addr);
-//.. vex_printf(", ");
-//.. ppHRegAMD64(i->Xin.FpLdStI.reg);
-//.. } else {
-//.. vex_printf("gist%s ", i->Xin.FpLdStI.sz==8 ? "ll" :
-//.. i->Xin.FpLdStI.sz==4 ? "l" : "w");
-//.. ppHRegAMD64(i->Xin.FpLdStI.reg);
-//.. vex_printf(", ");
-//.. ppAMD64AMode(i->Xin.FpLdStI.addr);
-//.. }
-//.. return;
-//.. case Xin_Fp64to32:
-//.. vex_printf("gdtof ");
-//.. ppHRegAMD64(i->Xin.Fp64to32.src);
-//.. vex_printf(",");
-//.. ppHRegAMD64(i->Xin.Fp64to32.dst);
-//.. return;
-//.. case Xin_FpCMov:
-//.. vex_printf("gcmov%s ", showAMD64CondCode(i->Xin.FpCMov.cond));
-//.. ppHRegAMD64(i->Xin.FpCMov.src);
-//.. vex_printf(",");
-//.. ppHRegAMD64(i->Xin.FpCMov.dst);
-//.. return;
-//.. case Xin_FpLdStCW:
-//.. vex_printf(i->Xin.FpLdStCW.isLoad ? "fldcw " : "fstcw ");
-//.. ppAMD64AMode(i->Xin.FpLdStCW.addr);
-//.. return;
-//.. case Xin_FpStSW_AX:
-//.. vex_printf("fstsw %%ax");
-//.. return;
case Ain_LdMXCSR:
vex_printf("ldmxcsr ");
ppAMD64AMode(i->Ain.LdMXCSR.addr);
@@ -1334,10 +1210,6 @@
vex_printf(",");
ppHRegAMD64(i->Ain.SseSDSS.dst);
break;
-//.. case Xin_SseConst:
-//.. vex_printf("const $0x%04x,", (Int)i->Xin.SseConst.con);
-//.. ppHRegAMD64(i->Xin.SseConst.dst);
-//.. break;
case Ain_SseLdSt:
switch (i->Ain.SseLdSt.sz) {
case 4: vex_printf("movss "); break;
@@ -1403,7 +1275,16 @@
vex_printf(",");
ppHRegAMD64(i->Ain.SseShuf.dst);
return;
-
+ case Ain_EvCheck:
+ vex_printf("(evCheck) decl ");
+ ppAMD64AMode(i->Ain.EvCheck.amCounter);
+ vex_printf("; jns nofail; jmp *");
+ ppAMD64AMode(i->Ain.EvCheck.amFailAddr);
+ vex_printf("; nofail:");
+ return;
+ case Ain_ProfInc:
+ vex_printf("(profInc) movabsq $NotKnownYet, %%r11; incq (%%r11)");
+ return;
default:
vpanic("ppAMD64Instr");
}
@@ -1470,12 +1351,6 @@
addHRegUse(u, HRmModify, hregAMD64_RAX());
addHRegUse(u, HRmModify, hregAMD64_RDX());
return;
-//.. case Xin_Sh3232:
-//.. addHRegUse(u, HRmRead, i->Xin.Sh3232.src);
-//.. addHRegUse(u, HRmModify, i->Xin.Sh3232.dst);
-//.. if (i->Xin.Sh3232.amt == 0)
-//.. addHRegUse(u, HRmRead, hregAMD64_ECX());
-//.. return;
case Ain_Push:
addRegUsage_AMD64RMI(u, i->Ain.Push.src);
addHRegUse(u, HRmModify, hregAMD64_RSP());
@@ -1533,16 +1408,25 @@
/* Upshot of this is that the assembler really must use r11,
and no other, as a destination temporary. */
return;
- case Ain_Goto:
- addRegUsage_AMD64RI(u, i->Ain.Goto.dst);
- addHRegUse(u, HRmWrite, hregAMD64_RAX()); /* used for next guest addr */
- addHRegUse(u, HRmWrite, hregAMD64_RDX()); /* used for dispatcher addr */
- if (i->Ain.Goto.jk != Ijk_Boring
- && i->Ain.Goto.jk != Ijk_Call
- && i->Ain.Goto.jk != Ijk_Ret)
- /* note, this is irrelevant since rbp is not actually
- available to the allocator. But still .. */
- addHRegUse(u, HRmWrite, hregAMD64_RBP());
+ /* XDirect/XIndir/XAssisted are also a bit subtle. They
+ conditionally exit the block. Hence we only need to list (1)
+ the registers that they read, and (2) the registers that they
+ write in the case where the block is not exited. (2) is
+ empty, hence only (1) is relevant here. */
+ case Ain_XDirect:
+ /* Don't bother to mention the write to %r11, since it is not
+ available to the allocator. */
+ addRegUsage_AMD64AMode(u, i->Ain.XDirect.amRIP);
+ return;
+ case Ain_XIndir:
+ /* Ditto re %r11 */
+ addHRegUse(u, HRmRead, i->Ain.XIndir.dstGA);
+ addRegUsage_AMD64AMode(u, i->Ain.XIndir.amRIP);
+ return;
+ case Ain_XAssisted:
+ /* Ditto re %r11 and %rbp (the baseblock ptr) */
+ addHRegUse(u, HRmRead, i->Ain.XAssisted.dstGA);
+ addRegUsage_AMD64AMode(u, i->Ain.XAssisted.amRIP);
return;
case Ain_CMov64:
addRegUsage_AMD64RM(u, i->Ain.CMov64.src, HRmRead);
@@ -1594,39 +1478,9 @@
case Ain_A87StSW:
addRegUsage_AMD64AMode(u, i->Ain.A87StSW.addr);
return;
-//.. case Xin_FpUnary:
-//.. addHRegUse(u, HRmRead, i->Xin.FpUnary.src);
-//.. addHRegUse(u, HRmWrite, i->Xin.FpUnary.dst);
-//.. return;
-//.. case Xin_FpBinary:
-//.. addHRegUse(u, HRmRead, i->Xin.FpBinary.srcL);
-//.. addHRegUse(u, HRmRead, i->Xin.FpBinary.srcR);
-//.. addHRegUse(u, HRmWrite, i->Xin.FpBinary.dst);
-//.. return;
-//.. case Xin_FpLdSt:
-//.. addRegUsage_AMD64AMode(u, i->Xin.FpLdSt.addr);
-//.. addHRegUse(u, i->Xin.FpLdSt.isLoad ? HRmWrite : HRmRead,
-//.. i->Xin.FpLdSt.reg);
-//.. return;
-//.. case Xin_FpLdStI:
-//.. addRegUsage_AMD64AMode(u, i->Xin.FpLdStI.addr);
-//.. addHRegUse(u, i->Xin.FpLdStI.isLoad ? HRmWrite : HRmRead,
-//.. i->Xin.FpLdStI.reg);
-//.. return;
-//.. case Xin_Fp64to32:
-//.. addHRegUse(u, HRmRead, i->Xin.Fp64to32.src);
-//.. addHRegUse(u, HRmWrite, i->Xin.Fp64to32.dst);
-//.. return;
-//.. case Xin_FpCMov:
-//.. addHRegUse(u, HRmRead, i->Xin.FpCMov.src);
-//.. addHRegUse(u, HRmModify, i->Xin.FpCMov.dst);
-//.. return;
case Ain_LdMXCSR:
addRegUsage_AMD64AMode(u, i->Ain.LdMXCSR.addr);
return;
-//.. case Xin_FpStSW_AX:
-//.. addHRegUse(u, HRmWrite, hregAMD64_EAX());
-//.. return;
case Ain_SseUComIS:
addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcL);
addHRegUse(u, HRmRead, i->Ain.SseUComIS.srcR);
@@ -1653,9 +1507,6 @@
addRegUsage_AMD64AMode(u, i->Ain.SseLdzLO.addr);
addHRegUse(u, HRmWrite, i->Ain.SseLdzLO.reg);
return;
-//.. case Xin_SseConst:
-//.. addHRegUse(u, HRmWrite, i->Xin.SseConst.dst);
-//.. return;
case Ain_Sse32Fx4:
vassert(i->Ain.Sse32Fx4.op != Asse_MOV);
unary = toBool( i->Ain.Sse32Fx4.op == Asse_RCPF
@@ -1716,6 +1567,15 @@
addHRegUse(u, HRmRead, i->Ain.SseShuf.src);
addHRegUse(u, HRmWrite, i->Ain.SseShuf.dst);
return;
+ case Ain_EvCheck:
+ /* We expect both amodes only to mention %rbp, so this is in
+ fact pointless, since %rbp isn't allocatable, but anyway.. */
+ addRegUsage_AMD64AMode(u, i->Ain.EvCheck.amCounter);
+ addRegUsage_AMD64AMode(u, i->Ain.EvCheck.amFailAddr);
+ return;
+ case Ain_ProfInc:
+ addHRegUse(u, HRmWrite, hregAMD64_R11());
+ return;
default:
ppAMD64Instr(i, mode64);
vpanic("getRegUsage_AMD64Instr");
@@ -1766,17 +1626,21 @@
case Ain_Div:
mapRegs_AMD64RM(m, i->Ain.Div.src);
return;
-//.. case Xin_Sh3232:
-//.. mapReg(m, &i->Xin.Sh3232.src);
-//.. mapReg(m, &i->Xin.Sh3232.dst);
-//.. return;
case Ain_Push:
mapRegs_AMD64RMI(m, i->Ain.Push.src);
return;
case Ain_Call:
return;
- case Ain_Goto:
- mapRegs_AMD64RI(m, i->Ain.Goto.dst);
+ case Ain_XDirect:
+ mapRegs_AMD64AMode(m, i->Ain.XDirect.amRIP);
+ return;
+ case Ain_XIndir:
+ mapReg(m, &i->Ain.XIndir.dstGA);
+ mapRegs_AMD64AMode(m, i->Ain.XIndir.amRIP);
+ return;
+ case Ain_XAssisted:
+ mapReg(m, &i->Ain.XAssisted.dstGA);
+ mapRegs_AMD64AMode(m, i->Ain.XAssisted.amRIP);
return;
case Ain_CMov64:
mapRegs_AMD64RM(m, i->Ain.CMov64.src);
@@ -1822,36 +1686,9 @@
case Ain_A87StSW:
mapRegs_AMD64AMode(m, i->Ain.A87StSW.addr);
return;
-//.. case Xin_FpUnary:
-//.. mapReg(m, &i->Xin.FpUnary.src);
-//.. mapReg(m, &i->Xin.FpUnary.dst);
-//.. return;
-//.. case Xin_FpBinary:
-//.. mapReg(m, &i->Xin.FpBinary.srcL);
-//.. mapReg(m, &i->Xin.FpBinary.srcR);
-//.. mapReg(m, &i->Xin.FpBinary.dst);
-//.. return;
-//.. case Xin_FpLdSt:
-//.. mapRegs_AMD64AMode(m, i->Xin.FpLdSt.addr);
-//.. mapReg(m, &i->Xin.FpLdSt.reg);
-//.. return;
-//.. case Xin_FpLdStI:
-//.. mapRegs_AMD64AMode(m, i->Xin.FpLdStI.addr);
-//.. mapReg(m, &i->Xin.FpLdStI.reg);
-//.. return;
-//.. case Xin_Fp64to32:
-//.. mapReg(m, &i->Xin.Fp64to32.src);
-//.. mapReg(m, &i->Xin.Fp64to32.dst);
-//.. return;
-//.. case Xin_FpCMov:
-//.. mapReg(m, &i->Xin.FpCMov.src);
-//.. mapReg(m, &i->Xin.FpCMov.dst);
-//.. return;
case Ain_LdMXCSR:
mapRegs_AMD64AMode(m, i->Ain.LdMXCSR.addr);
return;
-//.. case Xin_FpStSW_AX:
-//.. return;
case Ain_SseUComIS:
mapReg(m, &i->Ain.SseUComIS.srcL);
mapReg(m, &i->Ain.SseUComIS.srcR);
@@ -1869,9 +1706,6 @@
mapReg(m, &i->Ain.SseSDSS.src);
mapReg(m, &i->Ain.SseSDSS.dst);
return;
-//.. case Xin_SseConst:
-//.. mapReg(m, &i->Xin.SseConst.dst);
-//.. return;
case Ain_SseLdSt:
mapReg(m, &i->Ain.SseLdSt.reg);
mapRegs_AMD64AMode(m, i->Ain.SseLdSt.addr);
@@ -1908,6 +1742,15 @@
mapReg(m, &i->Ain.SseShuf.src);
mapReg(m, &i->Ain.SseShuf.dst);
return;
+ case Ain_EvCheck:
+ /* We expect both amodes only to mention %rbp, so this is in
+ fact pointless, since %rbp isn't allocatable, but anyway.. */
+ mapRegs_AMD64AMode(m, i->Ain.EvCheck.amCounter);
+ mapRegs_AMD64AMode(m, i->Ain.EvCheck.amFailAddr);
+ return;
+ case Ain_ProfInc:
+ /* hardwires r11 -- nothing to modify. */
+ return;
default:
ppAMD64Instr(i, mode64);
vpanic("mapRegs_AMD64Instr");
@@ -2252,101 +2095,19 @@
return p;
}
-//.. /* Emit fstp %st(i), 1 <= i <= 7 */
-//.. static UChar* do_fstp_st ( UChar* p, Int i )
-//.. {
-//.. vassert(1 <= i && i <= 7);
-//.. *p++ = 0xDD;
-//.. *p++ = 0xD8+i;
-//.. return p;
-//.. }
-//..
-//.. /* Emit fld %st(i), 0 <= i <= 6 */
-//.. static UChar* do_fld_st ( UChar* p, Int i )
-//.. {
-//.. vassert(0 <= i && i <= 6);
-//.. *p++ = 0xD9;
-//.. *p++ = 0xC0+i;
-//.. return p;
-//.. }
-//..
-//.. /* Emit f<op> %st(0) */
-//.. static UChar* do_fop1_st ( UChar* p, AMD64FpOp op )
-//.. {
-//.. switch (op) {
-//.. case Xfp_NEG: *p++ = 0xD9; *p++ = 0xE0; break;
-//.. case Xfp_ABS: *p++ = 0xD9; *p++ = 0xE1; break;
-//.. case Xfp_SQRT: *p++ = 0xD9; *p++ = 0xFA; break;
-//.. case Xfp_ROUND: *p++ = 0xD9; *p++ = 0xFC; break;
-//.. case Xfp_SIN: *p++ = 0xD9; *p++ = 0xFE; break;
-//.. case Xfp_COS: *p++ = 0xD9; *p++ = 0xFF; break;
-//.. case Xfp_2XM1: *p++ = 0xD9; *p++ = 0xF0; break;
-//.. case Xfp_MOV: break;
-//.. case Xfp_TAN: p = do_ffree_st7(p); /* since fptan pushes 1.0 */
-//.. *p++ = 0xD9; *p++ = 0xF2; /* fptan */
-//.. *p++ = 0xD9; *p++ = 0xF7; /* fincstp */
-//.. break;
-//.. default: vpanic("do_fop1_st: unknown op");
-//.. }
-//.. return p;
-//.. }
-//..
-//.. /* Emit f<op> %st(i), 1 <= i <= 5 */
-//.. static UChar* do_fop2_st ( UChar* p, AMD64FpOp op, Int i )
-//.. {
-//.. # define fake(_n) mkHReg((_n), HRcInt32, False)
-//.. Int subopc;
-//.. switch (op) {
-//.. case Xfp_ADD: subopc = 0; break;
-//.. case Xfp_SUB: subopc = 4; break;
-//.. case Xfp_MUL: subopc = 1; break;
-//.. case Xfp_DIV: subopc = 6; break;
-//.. default: vpanic("do_fop2_st: unknown op");
-//.. }
-//.. *p++ = 0xD8;
-//.. p = doAMode_R(p, fake(subopc), fake(i));
-//.. return p;
-//.. # undef fake
-//.. }
-//..
-//.. /* Push a 32-bit word on the stack. The word depends on tags[3:0];
-//.. each byte is either 0x00 or 0xFF depending on the corresponding bit in tags[].
-//.. */
-//.. static UChar* push_word_from_tags ( UChar* p, UShort tags )
-//.. {
-//.. UInt w;
-//.. vassert(0 == (tags & ~0xF));
-//.. if (tags == 0) {
-//.. /* pushl $0x00000000 */
-//.. *p++ = 0x6A;
-//.. *p++ = 0x00;
-//.. }
-//.. else
-//.. /* pushl $0xFFFFFFFF */
-//.. if (tags == 0xF) {
-//.. *p++ = 0x6A;
-//.. *p++ = 0xFF;
-//.. } else {
-//.. vassert(0); /* awaiting test case */
-//.. w = 0;
-//.. if (tags & 1) w |= 0x000000FF;
-//.. if (tags & 2) w |= 0x0000FF00;
-//.. if (tags & 4) w |= 0x00FF0000;
-//.. if (tags & 8) w |= 0xFF000000;
-//.. *p++ = 0x68;
-//.. p = emit32(p, w);
-//.. }
-//.. return p;
-//.. }
-
/* Emit an instruction into buf and return the number of bytes used.
Note that buf is not the insn's final place, and therefore it is
- imperative to emit position-independent code. */
+ imperative to emit position-independent code. If the emitted
+ instruction was a profiler inc, set *is_profInc to True, else
+ leave it unchanged. */
-Int emit_AMD64Instr ( UChar* buf, Int nbuf, AMD64Instr* i,
+Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc,
+ UChar* buf, Int nbuf, AMD64Instr* i,
Bool mode64,
- void* dispatch_unassisted,
- void* dispatch_assisted )
+ void* disp_cp_chain_me_to_slowEP,
+ void* disp_cp_chain_me_to_fastEP,
+ void* disp_cp_xindir,
+ void* disp_cp_xassisted )
{
UInt /*irno,*/ opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc;
UInt xtra;
@@ -2545,35 +2306,6 @@
goto bad;
}
}
-//.. /* ADD/SUB/ADC/SBB/AND/OR/XOR/CMP. MUL is not
-//.. allowed here. */
-//.. opc = subopc_imm = opc_imma = 0;
-//.. switch (i->Xin.Alu32M.op) {
-//.. case Xalu_ADD: opc = 0x01; subopc_imm = 0; break;
-//.. case Xalu_SUB: opc = 0x29; subopc_imm = 5; break;
-//.. default: goto bad;
-//.. }
-//.. switch (i->Xin.Alu32M.src->tag) {
-//.. case Xri_Reg:
-//.. *p++ = opc;
-//.. p = doAMode_M(p, i->Xin.Alu32M.src->Xri.Reg.reg,
-//.. i->Xin.Alu32M.dst);
-//.. goto done;
-//.. case Xri_Imm:
-//.. if (fits8bits(i->Xin.Alu32M.src->Xri.Imm.imm32)) {
-//.. *p++ = 0x83;
-//.. p = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst);
-//.. *p++ = 0xFF & i->Xin.Alu32M.src->Xri.Imm.imm32;
-//.. goto done;
-//.. } else {
-//.. *p++ = 0x81;
-//.. p = doAMode_M(p, fake(subopc_imm), i->Xin.Alu32M.dst);
-//.. p = emit32(p, i->Xin.Alu32M.src->Xri.Imm.imm32);
-//.. goto done;
-//.. }
-//.. default:
-//.. goto bad;
-//.. }
break;
case Ain_Sh64:
@@ -2756,21 +2488,6 @@
}
break;
-//.. case Xin_Sh3232:
-//.. vassert(i->Xin.Sh3232.op == Xsh_SHL || i->Xin.Sh3232.op == Xsh_SHR);
-//.. if (i->Xin.Sh3232.amt == 0) {
-//.. /* shldl/shrdl by %cl */
-//.. *p++ = 0x0F;
-//.. if (i->Xin.Sh3232.op == Xsh_SHL) {
-//.. *p++ = 0xA5;
-//.. } else {
-//.. *p++ = 0xAD;
-//.. }
-//.. p = doAMode_R(p, i->Xin.Sh3232.src, i->Xin.Sh3232.dst);
-//.. goto done;
-//.. }
-//.. break;
-
case Ain_Push:
switch (i->Ain.Push.src->tag) {
case Armi_Mem:
@@ -2822,117 +2539,167 @@
goto done;
}
- case Ain_Goto: {
- void* dispatch_to_use = NULL;
- vassert(dispatch_unassisted != NULL);
- vassert(dispatch_assisted != NULL);
+ case Ain_XDirect: {
+ /* NB: what goes on here has to be very closely coordinated with the
+ chainXDirect_AMD64 and unchainXDirect_AMD64 below. */
+ /* We're generating chain-me requests here, so we need to be
+ sure this is actually allowed -- no-redir translations can't
+ use chain-me's. Hence: */
+ vassert(disp_cp_chain_me_to_slowEP != NULL);
+ vassert(disp_cp_chain_me_to_fastEP != NULL);
+
+ HReg r11 = hregAMD64_R11();
/* Use ptmp for backpatching conditional jumps. */
ptmp = NULL;
/* First off, if this is conditional, create a conditional
jump over the rest of it. */
- if (i->Ain.Goto.cond != Acc_ALWAYS) {
+ if (i->Ain.XDirect.cond != Acc_ALWAYS) {
/* jmp fwds if !condition */
- *p++ = toUChar(0x70 + (i->Ain.Goto.cond ^ 1));
+ *p++ = toUChar(0x70 + (0xF & (i->Ain.XDirect.cond ^ 1)));
ptmp = p; /* fill in this bit later */
*p++ = 0; /* # of bytes to jump over; don't know how many yet. */
}
- /* If a non-boring, set %rbp (the guest state pointer)
- appropriately. Since these numbers are all small positive
- integers, we can get away with "movl $N, %ebp" rather than
- the longer "movq $N, %rbp". Also, decide which dispatcher we
- need to use. */
- dispatch_to_use = dispatch_assisted;
+ /* Update the guest RIP. */
+ /* movabsq $dstGA, %r11 */
+ *p++ = 0x49;
+ *p++ = 0xBB;
+ p = emit64(p, i->Ain.XDirect.dstGA);
+ /* movq %r11, amRIP */
+ *p++ = rexAMode_M(r11, i->Ain.XDirect.amRIP);
+ *p++ = 0x89;
+ p = doAMode_M(p, r11, i->Ain.XDirect.amRIP);
- /* movl $magic_number, %ebp */
- switch (i->Ain.Goto.jk) {
- case Ijk_ClientReq:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_CLIENTREQ); break;
- case Ijk_Sys_syscall:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_SYS_SYSCALL); break;
- case Ijk_Sys_int32:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_SYS_INT32); break;
- case Ijk_Yield:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_YIELD); break;
- case Ijk_EmWarn:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_EMWARN); break;
- case Ijk_MapFail:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_MAPFAIL); break;
- case Ijk_NoDecode:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_NODECODE); break;
- case Ijk_TInval:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_TINVAL); break;
- case Ijk_NoRedir:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_NOREDIR); break;
- case Ijk_SigTRAP:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_SIGTRAP); break;
- case Ijk_SigSEGV:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_SIGSEGV); break;
- case Ijk_Ret:
- case Ijk_Call:
- case Ijk_Boring:
- dispatch_to_use = dispatch_unassisted;
- break;
- default:
- ppIRJumpKind(i->Ain.Goto.jk);
- vpanic("emit_AMD64Instr.Ain_Goto: unknown jump kind");
- }
-
- /* Get the destination address into %rax */
- if (i->Ain.Goto.dst->tag == Ari_Imm) {
- /* movl sign-ext($immediate), %rax ; ret */
- *p++ = 0x48;
- *p++ = 0xC7;
- *p++ = 0xC0;
- p = emit32(p, i->Ain.Goto.dst->Ari.Imm.imm32);
- } else {
- vassert(i->Ain.Goto.dst->tag == Ari_Reg);
- /* movq %reg, %rax ; ret */
- if (i->Ain.Goto.dst->Ari.Reg.reg != hregAMD64_RAX()) {
- *p++ = rexAMode_R(i->Ain.Goto.dst->Ari.Reg.reg, hregAMD64_RAX());
- *p++ = 0x89;
- p = doAMode_R(p, i->Ain.Goto.dst->Ari.Reg.reg, hregAMD64_RAX());
- }
- }
-
- /* Get the dispatcher address into %rdx. This has to happen
- after the load of %rax since %rdx might be carrying the value
- destined for %rax immediately prior to this Ain_Goto. */
- vassert(sizeof(ULong) == sizeof(void*));
-
- if (fitsIn32Bits(Ptr_to_ULong(dispatch_to_use))) {
- /* movl sign-extend(imm32), %rdx */
- *p++ = 0x48;
- *p++ = 0xC7;
- *p++ = 0xC2;
- p = emit32(p, (UInt)Ptr_to_ULong(dispatch_to_use));
- } else {
- /* movabsq $imm64, %rdx */
- *p++ = 0x48;
- *p++ = 0xBA;
- p = emit64(p, Ptr_to_ULong(dispatch_to_use));
- }
- /* jmp *%rdx */
+ /* --- FIRST PATCHABLE BYTE follows --- */
+ /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling
+ to) backs up the return address, so as to find the address of
+ the first patchable byte. So: don't change the length of the
+ two instructions below. */
+ /* movabsq $disp_cp_chain_me_to_{slow,fast}EP,%r11; */
+ *p++ = 0x49;
+ *p++ = 0xBB;
+ void* disp_cp_chain_me
+ = i->Ain.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
+ : disp_cp_chain_me_to_slowEP;
+ p = emit64(p, Ptr_to_ULong(disp_cp_chain_me));
+ /* call *%r11 */
+ *p++ = 0x41;
*p++ = 0xFF;
- *p++ = 0xE2;
+ *p++ = 0xD3;
+ /* --- END of PATCHABLE BYTES --- */
/* Fix up the conditional jump, if there was one. */
- if (i->Ain.Goto.cond != Acc_ALWAYS) {
+ if (i->Ain.XDirect.cond != Acc_ALWAYS) {
Int delta = p - ptmp;
- vassert(delta > 0 && delta < 30);
+ vassert(delta > 0 && delta < 40);
+ *ptmp = toUChar(delta-1);
+ }
+ goto done;
+ }
+
+ case Ain_XIndir: {
+ /* We're generating transfers that could lead indirectly to a
+ chain-me, so we need to be sure this is actually allowed --
+ no-redir translations are not allowed to reach normal
+ translations without going through the scheduler. That means
+ no XDirects or XIndirs out from no-redir translations.
+ Hence: */
+ vassert(disp_cp_xindir != NULL);
+
+ /* Use ptmp for backpatching conditional jumps. */
+ ptmp = NULL;
+
+ /* First off, if this is conditional, create a conditional
+ jump over the rest of it. */
+ if (i->Ain.XIndir.cond != Acc_ALWAYS) {
+ /* jmp fwds if !condition */
+ *p++ = toUChar(0x70 + (0xF & (i->Ain.XIndir.cond ^ 1)));
+ ptmp = p; /* fill in this bit later */
+ *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
+ }
+
+ /* movq dstGA(a reg), amRIP -- copied from Alu64M MOV case */
+ *p++ = rexAMode_M(i->Ain.XIndir.dstGA, i->Ain.XIndir.amRIP);
+ *p++ = 0x89;
+ p = doAMode_M(p, i->Ain.XIndir.dstGA, i->Ain.XIndir.amRIP);
+ /* movabsq $disp_indir, %r11 */
+ *p++ = 0x49;
+ *p++ = 0xBB;
+ p = emit64(p, Ptr_to_ULong(disp_cp_xindir));
+ /* jmp *%r11 */
+ *p++ = 0x41;
+ *p++ = 0xFF;
+ *p++ = 0xE3;
+
+ /* Fix up the conditional jump, if there was one. */
+ if (i->Ain.XIndir.cond != Acc_ALWAYS) {
+ Int delta = p - ptmp;
+ vassert(delta > 0 && delta < 40);
+ *ptmp = toUChar(delta-1);
+ }
+ goto done;
+ }
+
+ case Ain_XAssisted: {
+ /* Use ptmp for backpatching conditional jumps. */
+ ptmp = NULL;
+
+ /* First off, if this is conditional, create a conditional
+ jump over the rest of it. */
+ if (i->Ain.XAssisted.cond != Acc_ALWAYS) {
+ /* jmp fwds if !condition */
+ *p++ = toUChar(0x70 + (0xF & (i->Ain.XAssisted.cond ^ 1)));
+ ptmp = p; /* fill in this bit later */
+ *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
+ }
+
+ /* movq dstGA(a reg), amRIP -- copied from Alu64M MOV case */
+ *p++ = rexAMode_M(i->Ain.XAssisted.dstGA, i->Ain.XAssisted.amRIP);
+ *p++ = 0x89;
+ p = doAMode_M(p, i->Ain.XAssisted.dstGA, i->Ain.XAssisted.amRIP);
+ /* movl $magic_number, %ebp. Since these numbers are all small positive
+ integers, we can get away with "movl $N, %ebp" rather than
+ the longer "movq $N, %rbp". */
+ UInt trcval = 0;
+ switch (i->Ain.XAssisted.jk) {
+ case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break;
+ case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
+ case Ijk_Sys_int32: trcval = VEX_TRC_JMP_SYS_INT32; break;
+ case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break;
+ case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
+ case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
+ case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break;
+ case Ijk_TInval: trcval = VEX_TRC_JMP_TINVAL; break;
+ case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break;
+ case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break;
+ case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
+ case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break;
+ /* We don't expect to see the following being assisted. */
+ case Ijk_Ret:
+ case Ijk_Call:
+ /* fallthrough */
+ default:
+ ppIRJumpKind(i->Ain.XAssisted.jk);
+ vpanic("emit_AMD64Instr.Ain_XAssisted: unexpected jump kind");
+ }
+ vassert(trcval != 0);
+ *p++ = 0xBD;
+ p = emit32(p, trcval);
+ /* movabsq $disp_assisted, %r11 */
+ *p++ = 0x49;
+ *p++ = 0xBB;
+ p = emit64(p, Ptr_to_ULong(disp_cp_xassisted));
+ /* jmp *%r11 */
+ *p++ = 0x41;
+ *p++ = 0xFF;
+ *p++ = 0xE3;
+
+ /* Fix up the conditional jump, if there was one. */
+ if (i->Ain.XAssisted.cond != Acc_ALWAYS) {
+ Int delta = p - ptmp;
+ vassert(delta > 0 && delta < 40);
*ptmp = toUChar(delta-1);
}
goto done;
@@ -3164,165 +2931,6 @@
}
break;
-//.. case Xin_FpUnary:
-//.. /* gop %src, %dst
-//.. --> ffree %st7 ; fld %st(src) ; fop %st(0) ; fstp %st(1+dst)
-//.. */
-//.. p = do_ffree_st7(p);
-//.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpUnary.src));
-//.. p = do_fop1_st(p, i->Xin.FpUnary.op);
-//.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpUnary.dst));
-//.. goto done;
-//..
-//.. case Xin_FpBinary:
-//.. if (i->Xin.FpBinary.op == Xfp_YL2X
-//.. || i->Xin.FpBinary.op == Xfp_YL2XP1) {
-//.. /* Have to do this specially. */
-//.. /* ffree %st7 ; fld %st(srcL) ;
-//.. ffree %st7 ; fld %st(srcR+1) ; fyl2x{p1} ; fstp(1+dst) */
-//.. p = do_ffree_st7(p);
-//.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
-//.. p = do_ffree_st7(p);
-//.. p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR));
-//.. *p++ = 0xD9;
-//.. *p++ = i->Xin.FpBinary.op==Xfp_YL2X ? 0xF1 : 0xF9;
-//.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
-//.. goto done;
-//.. }
-//.. if (i->Xin.FpBinary.op == Xfp_ATAN) {
-//.. /* Have to do this specially. */
-//.. /* ffree %st7 ; fld %st(srcL) ;
-//.. ffree %st7 ; fld %st(srcR+1) ; fpatan ; fstp(1+dst) */
-//.. p = do_ffree_st7(p);
-//.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
-//.. p = do_ffree_st7(p);
-//.. p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcR));
-//.. *p++ = 0xD9; *p++ = 0xF3;
-//.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
-//.. goto done;
-//.. }
-//.. if (i->Xin.FpBinary.op == Xfp_PREM
-//.. || i->Xin.FpBinary.op == Xfp_PREM1
-//.. || i->Xin.FpBinary.op == Xfp_SCALE) {
-//.. /* Have to do this specially. */
-//.. /* ffree %st7 ; fld %st(srcR) ;
-//.. ffree %st7 ; fld %st(srcL+1) ; fprem/fprem1/fscale ; fstp(2+dst) ;
-//.. fincstp ; ffree %st7 */
-//.. p = do_ffree_st7(p);
-//.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcR));
-//.. p = do_ffree_st7(p);
-//.. p = do_fld_st(p, 1+hregNumber(i->Xin.FpBinary.srcL));
-//.. *p++ = 0xD9;
-//.. switch (i->Xin.FpBinary.op) {
-//.. case Xfp_PREM: *p++ = 0xF8; break;
-//.. case Xfp_PREM1: *p++ = 0xF5; break;
-//.. case Xfp_SCALE: *p++ = 0xFD; break;
-//.. default: vpanic("emitAMD64Instr(FpBinary,PREM/PREM1/SCALE)");
-//.. }
-//.. p = do_fstp_st(p, 2+hregNumber(i->Xin.FpBinary.dst));
-//.. *p++ = 0xD9; *p++ = 0xF7;
-//.. p = do_ffree_st7(p);
-//.. goto done;
-//.. }
-//.. /* General case */
-//.. /* gop %srcL, %srcR, %dst
-//.. --> ffree %st7 ; fld %st(srcL) ; fop %st(1+srcR) ; fstp %st(1+dst)
-//.. */
-//.. p = do_ffree_st7(p);
-//.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpBinary.srcL));
-//.. p = do_fop2_st(p, i->Xin.FpBinary.op,
-//.. 1+hregNumber(i->Xin.FpBinary.srcR));
-//.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpBinary.dst));
-//.. goto done;
-//..
-//.. case Xin_FpLdSt:
-//.. vassert(i->Xin.FpLdSt.sz == 4 || i->Xin.FpLdSt.sz == 8);
-//.. if (i->Xin.FpLdSt.isLoad) {
-//.. /* Load from memory into %fakeN.
-//.. --> ffree %st(7) ; fld{s/l} amode ; fstp st(N+1)
-//.. */
-//.. p = do_ffree_st7(p);
-//.. *p++ = i->Xin.FpLdSt.sz==4 ? 0xD9 : 0xDD;
-//.. p = doAMode_M(p, fake(0)/*subopcode*/, i->Xin.FpLdSt.addr);
-//.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdSt.reg));
-//.. goto done;
-//.. } else {
-//.. /* Store from %fakeN into memory.
-//.. --> ffree %st(7) ; fld st(N) ; fstp{l|s} amode
-//.. */
-//.. p = do_ffree_st7(p);
-//.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdSt.reg));
-//.. *p++ = i->Xin.FpLdSt.sz==4 ? 0xD9 : 0xDD;
-//.. p = doAMode_M(p, fake(3)/*subopcode*/, i->Xin.FpLdSt.addr);
-//.. goto done;
-//.. }
-//.. break;
-//..
-//.. case Xin_FpLdStI:
-//.. if (i->Xin.FpLdStI.isLoad) {
-//.. /* Load from memory into %fakeN, converting from an int.
-//.. --> ffree %st(7) ; fild{w/l/ll} amode ; fstp st(N+1)
-//.. */
-//.. switch (i->Xin.FpLdStI.sz) {
-//.. case 8: opc = 0xDF; subopc_imm = 5; break;
-//.. case 4: opc = 0xDB; subopc_imm = 0; break;
-//.. case 2: vassert(0); opc = 0xDF; subopc_imm = 0; break;
-//.. default: vpanic("emitAMD64Instr(Xin_FpLdStI-load)");
-//.. }
-//.. p = do_ffree_st7(p);
-//.. *p++ = opc;
-//.. p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr);
-//.. p = do_fstp_st(p, 1+hregNumber(i->Xin.FpLdStI.reg));
-//.. goto done;
-//.. } else {
-//.. /* Store from %fakeN into memory, converting to an int.
-//.. --> ffree %st(7) ; fld st(N) ; fistp{w/l/ll} amode
-//.. */
-//.. switch (i->Xin.FpLdStI.sz) {
-//.. case 8: opc = 0xDF; subopc_imm = 7; break;
-//.. case 4: opc = 0xDB; subopc_imm = 3; break;
-//.. case 2: opc = 0xDF; subopc_imm = 3; break;
-//.. default: vpanic("emitAMD64Instr(Xin_FpLdStI-store)");
-//.. }
-//.. p = do_ffree_st7(p);
-//.. p = do_fld_st(p, 0+hregNumber(i->Xin.FpLdStI.reg));
-//.. *p++ = opc;
-//.. p = doAMode_M(p, fake(subopc_imm)/*subopcode*/, i->Xin.FpLdStI.addr);
-//.. goto done;
-//.. }
-//.. break;
-//..
-//.. case Xin_Fp64to32:
-//.. /* ffree %st7 ; fld %st(src) */
-//.. p = do_ffree_st7(p);
-//.. p = do_fld_st(p, 0+fregNo(i->Xin.Fp64to32.src));
-//.. /* subl $4, %esp */
-//.. *p++ = 0x83; *p++ = 0xEC; *p++ = 0x04;
-//.. /* fstps (%esp) */
-//.. *p++ = 0xD9; *p++ = 0x1C; *p++ = 0x24;
-//.. /* flds (%esp) */
-//.. *p++ = 0xD9; *p++ = 0x04; *p++ = 0x24;
-//.. /* addl $4, %esp */
-//.. *p++ = 0x83; *p++ = 0xC4; *p++ = 0x04;
-//.. /* fstp %st(1+dst) */
-//.. p = do_fstp_st(p, 1+fregNo(i->Xin.Fp64to32.dst));
-//.. goto done;
-//..
-//.. case Xin_FpCMov:
-//.. /* jmp fwds if !condition */
-//.. *p++ = 0x70 + (i->Xin.FpCMov.cond ^ 1);
-//.. *p++ = 0; /* # of bytes in the next bit, which we don't know yet */
-//.. ptmp = p;
-//..
-//.. /* ffree %st7 ; fld %st(src) ; fstp %st(1+dst) */
-//.. p = do_ffree_st7(p);
-//.. p = do_fld_st(p, 0+fregNo(i->Xin.FpCMov.src));
-//.. p = do_fstp_st(p, 1+fregNo(i->Xin.FpCMov.dst));
-//..
-//.. /* Fill in the jump offset. */
-//.. *(ptmp-1) = p - ptmp;
-//.. goto done;
-
case Ain_LdMXCSR:
*p++ = clearWBit(rexAMode_M( fake(0), i->Ain.LdMXCSR.addr));
*p++ = 0x0F;
@@ -3330,12 +2938,6 @@
p = doAMode_M(p, fake(2)/*subopcode*/, i->Ain.LdMXCSR.addr);
goto done;
-//.. case Xin_FpStSW_AX:
-//.. /* note, this emits fnstsw %ax, not fstsw %ax */
-//.. *p++ = 0xDF;
-//.. *p++ = 0xE0;
-//.. goto done;
-
case Ain_SseUComIS:
/* ucomi[sd] %srcL, %srcR ; pushfq ; popq %dst */
/* ucomi[sd] %srcL, %srcR */
@@ -3395,45 +2997,6 @@
vreg2ireg(i->Ain.SseSDSS.src) );
goto done;
-//..
-//.. case Xin_FpCmp:
-//.. /* gcmp %fL, %fR, %dst
-//.. -> ffree %st7; fpush %fL ; fucomp %(fR+1) ;
-//.. fnstsw %ax ; movl %eax, %dst
-//.. */
-//.. /* ffree %st7 */
-//.. p = do_ffree_st7(p);
-//.. /* fpush %fL */
-//.. p = do_fld_st(p, 0+fregNo(i->Xin.FpCmp.srcL));
-//.. /* fucomp %(fR+1) */
-//.. *p++ = 0xDD;
-//.. *p++ = 0xE8 + (7 & (1+fregNo(i->Xin.FpCmp.srcR)));
-//.. /* fnstsw %ax */
-//.. *p++ = 0xDF;
-//.. *p++ = 0xE0;
-//.. /* movl %eax, %dst */
-//.. *p++ = 0x89;
-//.. p = doAMode_R(p, hregAMD64_EAX(), i->Xin.FpCmp.dst);
-//.. goto done;
-//..
-//.. case Xin_SseConst: {
-//.. UShort con = i->Xin.SseConst.con;
-//.. p = push_word_from_tags(p, (con >> 12) & 0xF);
-//.. p = push_word_from_tags(p, (con >> 8) & 0xF);
-//.. p = push_word_from_tags(p, (con >> 4) & 0xF);
-//.. p = push_word_from_tags(p, con & 0xF);
-//.. /* movl (%esp), %xmm-dst */
-//.. *p++ = 0x0F;
-//.. *p++ = 0x10;
-//.. *p++ = 0x04 + 8 * (7 & vregNo(i->Xin.SseConst.dst));
-//.. *p++ = 0x24;
-//.. /* addl $16, %esp */
-//.. *p++ = 0x83;
-//.. *p++ = 0xC4;
-//.. *p++ = 0x10;
-//.. goto done;
-//.. }
-
case Ain_SseLdSt:
if (i->Ain.SseLdSt.sz == 8) {
*p++ = 0xF2;
@@ -3505,8 +3068,6 @@
case Asse_MAXF: *p++ = 0x5F; break;
case Asse_MINF: *p++ = 0x5D; break;
case Asse_MULF: *p++ = 0x59; break;
-//.. case Xsse_RCPF: *p++ = 0x53; break;
-//.. case Xsse_RSQRTF: *p++ = 0x52; break;
case Asse_SQRTF: *p++ = 0x51; break;
case Asse_SUBF: *p++ = 0x5C; break;
case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
@@ -3563,8 +3124,6 @@
case Asse_MAXF: *p++ = 0x5F; break;
case Asse_MINF: *p++ = 0x5D; break;
case Asse_MULF: *p++ = 0x59; break;
-//.. case Xsse_RCPF: *p++ = 0x53; break;
-//.. case Xsse_RSQRTF: *p++ = 0x52; break;
case Asse_SQRTF: *p++ = 0x51; break;
case Asse_SUBF: *p++ = 0x5C; break;
case Asse_CMPEQF: *p++ = 0xC2; xtra = 0x100; break;
@@ -3680,6 +3239,70 @@
*p++ = (UChar)(i->Ain.SseShuf.order);
goto done;
+ case Ain_EvCheck: {
+ /* We generate:
+ (3 bytes) decl 8(%rbp) 8 == offsetof(host_EvC_COUNTER)
+ (2 bytes) jns nofail expected taken
+ (3 bytes) jmp* 0(%rbp) 0 == offsetof(host_EvC_FAILADDR)
+ nofail:
+ */
+ /* This is heavily asserted re instruction lengths. It needs to
+ be. If we get given unexpected forms of .amCounter or
+ .amFailAddr -- basically, anything that's not of the form
+ uimm7(%rbp) -- they are likely to fail. */
+ /* Note also that after the decl we must be very careful not to
+ read the carry flag, else we get a partial flags stall.
+ js/jns avoids that, though. */
+ UChar* p0 = p;
+ /* --- decl 8(%rbp) --- */
+ /* Need to compute the REX byte for the decl in order to prove
+ that we don't need it, since this is a 32-bit inc and all
+ registers involved in the amode are < r8. "fake(1)" because
+ there's no register in this encoding; instead the register
+ field is used as a sub opcode. The encoding for "decl r/m32"
+ is FF /1, hence the fake(1). */
+ rex = clearWBit(rexAMode_M(fake(1), i->Ain.EvCheck.amCounter));
+ if (rex != 0x40) goto bad; /* We don't expect to need the REX byte. */
+ *p++ = 0xFF;
+ p = doAMode_M(p, fake(1), i->Ain.EvCheck.amCounter);
+ vassert(p - p0 == 3);
+ /* --- jns nofail --- */
+ *p++ = 0x79;
+ *p++ = 0x03; /* need to check this 0x03 after the next insn */
+ vassert(p - p0 == 5);
+ /* --- jmp* 0(%rbp) --- */
+ /* Once again, verify we don't need REX. The encoding is FF /4.
+ We don't need REX.W since by default FF /4 in 64-bit mode
+ implies a 64 bit load. */
+ rex = clearWBit(rexAMode_M(fake(4), i->Ain.EvCheck.amFailAddr));
+ if (rex != 0x40) goto bad;
+ *p++ = 0xFF;
+ p = doAMode_M(p, fake(4), i->Ain.EvCheck.amFailAddr);
+ vassert(p - p0 == 8); /* also ensures that 0x03 offset above is ok */
+ /* And crosscheck .. */
+ vassert(evCheckSzB_AMD64() == 8);
+ goto done;
+ }
+
+ case Ain_ProfInc: {
+ /* We generate movabsq $0, %r11
+ incq (%r11)
+ in the expectation that a later call to LibVEX_patchProfCtr
+ will be used to fill in the immediate field once the right
+ value is known.
+ 49 BB 00 00 00 00 00 00 00 00
+ 49 FF 03
+ */
+ *p++ = 0x49; *p++ = 0xBB;
+ *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
+ *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
+ *p++ = 0x49; *p++ = 0xFF; *p++ = 0x03;
+ /* Tell the caller .. */
+ vassert(!(*is_profInc));
+ *is_profInc = True;
+ goto done;
+ }
+
default:
goto bad;
}
@@ -3696,6 +3319,200 @@
# undef fake
}
+
+/* How big is an event check? See case for Ain_EvCheck in
+ emit_AMD64Instr just above. That crosschecks what this returns, so
+ we can tell if we're inconsistent. */
+Int evCheckSzB_AMD64 ( void )
+{
+ return 8;
+}
+
+
+/* NB: what goes on here has to be very closely coordinated with the
+ emitInstr case for XDirect, above. */
+VexInvalRange chainXDirect_AMD64 ( void* place_to_chain,
+ void* disp_cp_chain_me_EXPECTED,
+ void* place_to_jump_to )
+{
+ /* What we're expecting to see is:
+ movabsq $disp_cp_chain_me_EXPECTED, %r11
+ call *%r11
+ viz
+ 49 BB <8 bytes value == disp_cp_chain_me_EXPECTED>
+ 41 FF D3
+ */
+ UChar* p = (UChar*)place_to_chain;
+ vassert(p[0] == 0x49);
+ vassert(p[1] == 0xBB);
+ vassert(*(ULong*)(&p[2]) == Ptr_to_ULong(disp_cp_chain_me_EXPECTED));
+ vassert(p[10] == 0x41);
+ vassert(p[11] == 0xFF);
+ vassert(p[12] == 0xD3);
+ /* And what we want to change it to is either:
+ (general case):
+ movabsq $place_to_jump_to, %r11
+ jmpq *%r11
+ viz
+ 49 BB <8 bytes value == place_to_jump_to>
+ 41 FF E3
+ So it's the same length (convenient, huh) and we don't
+ need to change all the bits.
+ ---OR---
+ in the case where the displacement falls within 32 bits
+ jmpq disp32 where disp32 is relative to the next insn
+ ud2; ud2; ud2; ud2
+ viz
+ E9 <4 bytes == disp32>
+ 0F 0B 0F 0B 0F 0B 0F 0B
+
+ In both cases the replacement has the same length as the original.
+ To remain sane & verifiable,
+ (1) limit the displacement for the short form to
+ (say) +/- one billion, so as to avoid wraparound
+ off-by-ones
+ (2) even if the short form is applicable, once every (say)
+ 1024 times use the long form anyway, so as to maintain
+ verifiability
+ */
+ /* This is the delta we need to put into a JMP d32 insn. It's
+ relative to the start of the next insn, hence the -5. */
+ Long delta = (Long)((UChar*)place_to_jump_to - (UChar*)p) - (Long)5;
+ Bool shortOK = delta >= -1000*1000*1000 && delta < 1000*1000*1000;
+
+ static UInt shortCTR = 0; /* DO NOT MAKE NON-STATIC */
+ if (shortOK) {
+ shortCTR++; // thread safety bleh
+ if (0 == (shortCTR & 0x3FF)) {
+ shortOK = False;
+ if (0)
+ vex_printf("QQQ chainXDirect_AMD64: shortCTR = %u, "
+ "using long jmp\n", shortCTR);
+ }
+ }
+
+ /* And make the modifications. */
+ if (shortOK) {
+ p[0] = 0xE9;
+ p[1] = (delta >> 0) & 0xFF;
+ p[2] = (delta >> 8) & 0xFF;
+ p[3] = (delta >> 16) & 0xFF;
+ p[4] = (delta >> 24) & 0xFF;
+ p[5] = 0x0F; p[6] = 0x0B;
+ p[7] = 0x0F; p[8] = 0x0B;
+ p[9] = 0x0F; p[10] = 0x0B;
+ p[11] = 0x0F; p[12] = 0x0B;
+ /* sanity check on the delta -- top 32 are all 0 or all 1 */
+ delta >>= 32;
+ vassert(delta == 0LL || delta == -1LL);
+ } else {
+ /* Minimal modifications from the starting sequence. */
+ *(ULong*)(&p[2]) = Ptr_to_ULong(place_to_jump_to);
+ p[12] = 0xE3;
+ }
+ VexInvalRange vir = {0, 0};
+ return vir;
+}
+
+
+/* NB: what goes on here has to be very closely coordinated with the
+ emitInstr case for XDirect, above. */
+VexInvalRange unchainXDirect_AMD64 ( void* place_to_unchain,
+ void* place_to_jump_to_EXPECTED,
+ void* disp_cp_chain_me )
+{
+ /* What we're expecting to see is either:
+ (general case)
+ movabsq $place_to_jump_to_EXPECTED, %r11
+ jmpq *%r11
+ viz
+ 49 BB <8 bytes value == place_to_jump_to_EXPECTED>
+ 41 FF E3
+ ---OR---
+ in the case where the displacement falls within 32 bits
+ jmpq d32
+ ud2; ud2; ud2; ud2
+ viz
+ E9 <4 bytes == disp32>
+ 0F 0B 0F 0B 0F 0B 0F 0B
+ */
+ UChar* p = (UChar*)place_to_unchain;
+ Bool valid = False;
+ if (p[0] == 0x49 && p[1] == 0xBB
+ && *(ULong*)(&p[2]) == Ptr_to_ULong(place_to_jump_to_EXPECTED)
+ && p[10] == 0x41 && p[11] == 0xFF && p[12] == 0xE3) {
+ /* it's the long form */
+ valid = True;
+ }
+ else
+ if (p[0] == 0xE9
+ && p[5] == 0x0F && p[6] == 0x0B
+ && p[7] == 0x0F && p[8] == 0x0B
+ && p[9] == 0x0F && p[10] == 0x0B
+ && p[11] == 0x0F && p[12] == 0x0B) {
+ /* It's the short form. Check the offset is right. */
+ Int s32 = *(Int*)(&p[1]);
+ Long s64 = (Long)s32;
+ if ((UChar*)p + 5 + s64 == (UChar*)place_to_jump_to_EXPECTED) {
+ valid = True;
+ if (0)
+ vex_printf("QQQ unchainXDirect_AMD64: found short form\n");
+ }
+ }
+ vassert(valid);
+ /* And what we want to change it to is:
+ movabsq $disp_cp_chain_me, %r11
+ call *%r11
+ viz
+ 49 BB <8 bytes value == disp_cp_chain_me>
+ 41 FF D3
+ So it's the same length (convenient, huh).
+ */
+ p[0] = 0x49;
+ p[1] = 0xBB;
+ *(ULong*)(&p[2]) = Ptr_to_ULong(disp_cp_chain_me);
+ p[10] = 0x41;
+ p[11] = 0xFF;
+ p[12] = 0xD3;
+ VexInvalRange vir = {0, 0};
+ return vir;
+}
+
+
+/* Patch the counter address into a profile inc point, as previously
+ created by the Ain_ProfInc case for emit_AMD64Instr. */
+VexInvalRange patchProfInc_AMD64 ( void* place_to_patch,
+ ULong* location_of_counter )
+{
+ vassert(sizeof(ULong*) == 8);
+ UChar* p = (UChar*)place_to_patch;
+ vassert(p[0] == 0x49);
+ vassert(p[1] == 0xBB);
+ vassert(p[2] == 0x00);
+ vassert(p[3] == 0x00);
+ vassert(p[4] == 0x00);
+ vassert(p[5] == 0x00);
+ vassert(p[6] == 0x00);
+ vassert(p[7] == 0x00);
+ vassert(p[8] == 0x00);
+ vassert(p[9] == 0x00);
+ vassert(p[10] == 0x49);
+ vassert(p[11] == 0xFF);
+ vassert(p[12] == 0x03);
+ ULong imm64 = (ULong)Ptr_to_ULong(location_of_counter);
+ p[2] = imm64 & 0xFF; imm64 >>= 8;
+ p[3] = imm64 & 0xFF; imm64 >>= 8;
+ p[4] = imm64 & 0xFF; imm64 >>= 8;
+ p[5] = imm64 & 0xFF; imm64 >>= 8;
+ p[6] = imm64 & 0xFF; imm64 >>= 8;
+ p[7] = imm64 & 0xFF; imm64 >>= 8;
+ p[8] = imm64 & 0xFF; imm64 >>= 8;
+ p[9] = imm64 & 0xFF; imm64 >>= 8;
+ VexInvalRange vir = {0, 0};
+ return vir;
+}
+
+
/*---------------------------------------------------------------*/
/*--- end host_amd64_defs.c ---*/
/*---------------------------------------------------------------*/
diff --git a/priv/host_amd64_defs.h b/priv/host_amd64_defs.h
index 4e7ae05..bc63bd2 100644
--- a/priv/host_amd64_defs.h
+++ b/priv/host_amd64_defs.h
@@ -363,10 +363,11 @@
Ain_Alu32R, /* 32-bit add/sub/and/or/xor/cmp, dst=REG (a la Alu64R) */
Ain_MulL, /* widening multiply */
Ain_Div, /* div and mod */
-//.. Xin_Sh3232, /* shldl or shrdl */
Ain_Push, /* push 64-bit value on stack */
Ain_Call, /* call to address in register */
- Ain_Goto, /* conditional/unconditional jmp to dst */
+ Ain_XDirect, /* direct transfer to GA */
+ Ain_XIndir, /* indirect transfer to GA */
+ Ain_XAssisted, /* assisted transfer to GA */
Ain_CMov64, /* conditional move */
Ain_MovxLQ, /* reg-reg move, zx-ing/sx-ing top half */
Ain_LoadEX, /* mov{s,z}{b,w,l}q from mem to reg */
@@ -377,28 +378,17 @@
Ain_ACAS, /* 8/16/32/64-bit lock;cmpxchg */
Ain_DACAS, /* lock;cmpxchg8b/16b (doubleword ACAS, 2 x
32-bit or 2 x 64-bit only) */
-
Ain_A87Free, /* free up x87 registers */
Ain_A87PushPop, /* x87 loads/stores */
Ain_A87FpOp, /* x87 operations */
Ain_A87LdCW, /* load x87 control word */
Ain_A87StSW, /* store x87 status word */
-//..
-//.. Xin_FpUnary, /* FP fake unary op */
-//.. Xin_FpBinary, /* FP fake binary op */
-//.. Xin_FpLdSt, /* FP fake load/store */
-//.. Xin_FpLdStI, /* FP fake load/store, converting to/from Int */
-//.. Xin_Fp64to32, /* FP round IEEE754 double to IEEE754 single */
-//.. Xin_FpCMov, /* FP fake floating point conditional move */
Ain_LdMXCSR, /* load %mxcsr */
-//.. Xin_FpStSW_AX, /* fstsw %ax */
Ain_SseUComIS, /* ucomisd/ucomiss, then get %rflags into int
register */
Ain_SseSI2SF, /* scalar 32/64 int to 32/64 float conversion */
Ain_SseSF2SI, /* scalar 32/64 float to 32/64 int conversion */
Ain_SseSDSS, /* scalar float32 to/from float64 */
-//..
-//.. Xin_SseConst, /* Generate restricted SSE literal */
Ain_SseLdSt, /* SSE load/store 32/64/128 bits, no alignment
constraints, upper 96/64/0 bits arbitrary */
Ain_SseLdzLO, /* SSE load low 32/64 bits, zero remainder of reg */
@@ -408,7 +398,9 @@
Ain_Sse64FLo, /* SSE binary, 64F in lowest lane only */
Ain_SseReRg, /* SSE binary general reg-reg, Re, Rg */
Ain_SseCMov, /* SSE conditional move */
- Ain_SseShuf /* SSE2 shuffle (pshufd) */
+ Ain_SseShuf, /* SSE2 shuffle (pshufd) */
+ Ain_EvCheck, /* Event check */
+ Ain_ProfInc /* 64-bit profile counter increment */
}
AMD64InstrTag;
@@ -470,13 +462,6 @@
Int sz; /* 4 or 8 only */
AMD64RM* src;
} Div;
-//.. /* shld/shrd. op may only be Xsh_SHL or Xsh_SHR */
-//.. struct {
-//.. X86ShiftOp op;
-//.. UInt amt; /* shift amount, or 0 means %cl */
-//.. HReg src;
-//.. HReg dst;
-//.. } Sh3232;
struct {
AMD64RMI* src;
} Push;
@@ -487,13 +472,29 @@
Addr64 target;
Int regparms; /* 0 .. 6 */
} Call;
- /* Pseudo-insn. Goto dst, on given condition (which could be
- Acc_ALWAYS). */
+ /* Update the guest RIP value, then exit requesting to chain
+ to it. May be conditional. */
struct {
+ Addr64 dstGA; /* next guest address */
+ AMD64AMode* amRIP; /* amode in guest state for RIP */
+ AMD64CondCode cond; /* can be Acc_ALWAYS */
+ Bool toFastEP; /* chain to the slow or fast point? */
+ } XDirect;
+ /* Boring transfer to a guest address not known at JIT time.
+ Not chainable. May be conditional. */
+ struct {
+ HReg dstGA;
+ AMD64AMode* amRIP;
+ AMD64CondCode cond; /* can be Acc_ALWAYS */
+ } XIndir;
+ /* Assisted transfer to a guest address, most general case.
+ Not chainable. May be conditional. */
+ struct {
+ HReg dstGA;
+ AMD64AMode* amRIP;
+ AMD64CondCode cond; /* can be Acc_ALWAYS */
IRJumpKind jk;
- AMD64CondCode cond;
- AMD64RI* dst;
- } Goto;
+ } XAssisted;
/* Mov src to dst on the given condition, which may not
be the bogus Acc_ALWAYS. */
struct {
@@ -588,11 +589,6 @@
AMD64AMode* addr;
}
LdMXCSR;
-//.. /* fstsw %ax */
-//.. struct {
-//.. /* no fields */
-//.. }
-//.. FpStSW_AX;
/* ucomisd/ucomiss, then get %rflags into int register */
struct {
UChar sz; /* 4 or 8 only */
@@ -620,12 +616,6 @@
HReg src;
HReg dst;
} SseSDSS;
-//..
-//.. /* Simplistic SSE[123] */
-//.. struct {
-//.. UShort con;
-//.. HReg dst;
-//.. } SseConst;
struct {
Bool isLoad;
UChar sz; /* 4, 8 or 16 only */
@@ -674,6 +664,15 @@
HReg src;
HReg dst;
} SseShuf;
+ struct {
+ AMD64AMode* amCounter;
+ AMD64AMode* amFailAddr;
+ } EvCheck;
+ struct {
+ /* No fields. The address of the counter to inc is
+ installed later, post-translation, by patching it in,
+ as it is not known at translation time. */
+ } ProfInc;
} Ain;
}
@@ -689,10 +688,14 @@
extern AMD64Instr* AMD64Instr_Test64 ( UInt imm32, HReg dst );
extern AMD64Instr* AMD64Instr_MulL ( Bool syned, AMD64RM* );
extern AMD64Instr* AMD64Instr_Div ( Bool syned, Int sz, AMD64RM* );
-//.. extern AMD64Instr* AMD64Instr_Sh3232 ( AMD64ShiftOp, UInt amt, HReg src, HReg dst );
extern AMD64Instr* AMD64Instr_Push ( AMD64RMI* );
extern AMD64Instr* AMD64Instr_Call ( AMD64CondCode, Addr64, Int );
-extern AMD64Instr* AMD64Instr_Goto ( IRJumpKind, AMD64CondCode cond, AMD64RI* dst );
+extern AMD64Instr* AMD64Instr_XDirect ( Addr64 dstGA, AMD64AMode* amRIP,
+ AMD64CondCode cond, Bool toFastEP );
+extern AMD64Instr* AMD64Instr_XIndir ( HReg dstGA, AMD64AMode* amRIP,
+ AMD64CondCode cond );
+extern AMD64Instr* AMD64Instr_XAssisted ( HReg dstGA, AMD64AMode* amRIP,
+ AMD64CondCode cond, IRJumpKind jk );
extern AMD64Instr* AMD64Instr_CMov64 ( AMD64CondCode, AMD64RM* src, HReg dst );
extern AMD64Instr* AMD64Instr_MovxLQ ( Bool syned, HReg src, HReg dst );
extern AMD64Instr* AMD64Instr_LoadEX ( UChar szSmall, Bool syned,
@@ -709,21 +712,11 @@
extern AMD64Instr* AMD64Instr_A87FpOp ( A87FpOp op );
extern AMD64Instr* AMD64Instr_A87LdCW ( AMD64AMode* addr );
extern AMD64Instr* AMD64Instr_A87StSW ( AMD64AMode* addr );
-//..
-//.. extern AMD64Instr* AMD64Instr_FpUnary ( AMD64FpOp op, HReg src, HReg dst );
-//.. extern AMD64Instr* AMD64Instr_FpBinary ( AMD64FpOp op, HReg srcL, HReg srcR, HReg dst );
-//.. extern AMD64Instr* AMD64Instr_FpLdSt ( Bool isLoad, UChar sz, HReg reg, AMD64AMode* );
-//.. extern AMD64Instr* AMD64Instr_FpLdStI ( Bool isLoad, UChar sz, HReg reg, AMD64AMode* );
-//.. extern AMD64Instr* AMD64Instr_Fp64to32 ( HReg src, HReg dst );
-//.. extern AMD64Instr* AMD64Instr_FpCMov ( AMD64CondCode, HReg src, HReg dst );
extern AMD64Instr* AMD64Instr_LdMXCSR ( AMD64AMode* );
-//.. extern AMD64Instr* AMD64Instr_FpStSW_AX ( void );
extern AMD64Instr* AMD64Instr_SseUComIS ( Int sz, HReg srcL, HReg srcR, HReg dst );
extern AMD64Instr* AMD64Instr_SseSI2SF ( Int szS, Int szD, HReg src, HReg dst );
extern AMD64Instr* AMD64Instr_SseSF2SI ( Int szS, Int szD, HReg src, HReg dst );
extern AMD64Instr* AMD64Instr_SseSDSS ( Bool from64, HReg src, HReg dst );
-//..
-//.. extern AMD64Instr* AMD64Instr_SseConst ( UShort con, HReg dst );
extern AMD64Instr* AMD64Instr_SseLdSt ( Bool isLoad, Int sz, HReg, AMD64AMode* );
extern AMD64Instr* AMD64Instr_SseLdzLO ( Int sz, HReg, AMD64AMode* );
extern AMD64Instr* AMD64Instr_Sse32Fx4 ( AMD64SseOp, HReg, HReg );
@@ -733,6 +726,9 @@
extern AMD64Instr* AMD64Instr_SseReRg ( AMD64SseOp, HReg, HReg );
extern AMD64Instr* AMD64Instr_SseCMov ( AMD64CondCode, HReg src, HReg dst );
extern AMD64Instr* AMD64Instr_SseShuf ( Int order, HReg src, HReg dst );
+extern AMD64Instr* AMD64Instr_EvCheck ( AMD64AMode* amCounter,
+ AMD64AMode* amFailAddr );
+extern AMD64Instr* AMD64Instr_ProfInc ( void );
extern void ppAMD64Instr ( AMD64Instr*, Bool );
@@ -742,10 +738,13 @@
extern void getRegUsage_AMD64Instr ( HRegUsage*, AMD64Instr*, Bool );
extern void mapRegs_AMD64Instr ( HRegRemap*, AMD64Instr*, Bool );
extern Bool isMove_AMD64Instr ( AMD64Instr*, HReg*, HReg* );
-extern Int emit_AMD64Instr ( UChar* buf, Int nbuf, AMD64Instr*,
- Bool,
- void* dispatch_unassisted,
- void* dispatch_assisted );
+extern Int emit_AMD64Instr ( /*MB_MOD*/Bool* is_profInc,
+ UChar* buf, Int nbuf, AMD64Instr* i,
+ Bool mode64,
+ void* disp_cp_chain_me_to_slowEP,
+ void* disp_cp_chain_me_to_fastEP,
+ void* disp_cp_xindir,
+ void* disp_cp_xassisted );
extern void genSpill_AMD64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
HReg rreg, Int offset, Bool );
@@ -753,9 +752,36 @@
HReg rreg, Int offset, Bool );
extern void getAllocableRegs_AMD64 ( Int*, HReg** );
-extern HInstrArray* iselSB_AMD64 ( IRSB*, VexArch,
- VexArchInfo*,
- VexAbiInfo* );
+extern HInstrArray* iselSB_AMD64 ( IRSB*,
+ VexArch,
+ VexArchInfo*,
+ VexAbiInfo*,
+ Int offs_Host_EvC_Counter,
+ Int offs_Host_EvC_FailAddr,
+ Bool chainingAllowed,
+ Bool addProfInc,
+ Addr64 max_ga );
+
+/* How big is an event check? This is kind of a kludge because it
+ depends on the offsets of host_EvC_FAILADDR and host_EvC_COUNTER,
+ and so assumes that they are both <= 128, and so can use the short
+ offset encoding. This is all checked with assertions, so in the
+ worst case we will merely assert at startup. */
+extern Int evCheckSzB_AMD64 ( void );
+
+/* Perform a chaining and unchaining of an XDirect jump. */
+extern VexInvalRange chainXDirect_AMD64 ( void* place_to_chain,
+ void* disp_cp_chain_me_EXPECTED,
+ void* place_to_jump_to );
+
+extern VexInvalRange unchainXDirect_AMD64 ( void* place_to_unchain,
+ void* place_to_jump_to_EXPECTED,
+ void* disp_cp_chain_me );
+
+/* Patch the counter location into an existing ProfInc point. */
+extern VexInvalRange patchProfInc_AMD64 ( void* place_to_patch,
+ ULong* location_of_counter );
+
#endif /* ndef __VEX_HOST_AMD64_DEFS_H */
diff --git a/priv/host_amd64_isel.c b/priv/host_amd64_isel.c
index bcd213f..a365a5a 100644
--- a/priv/host_amd64_isel.c
+++ b/priv/host_amd64_isel.c
@@ -112,12 +112,24 @@
64-bit virtual HReg, which holds the high half
of the value.
+ - The host subarchitecture we are selecting insns for.
+ This is set at the start and does not change.
+
- The code array, that is, the insns selected so far.
- A counter, for generating new virtual registers.
- - The host subarchitecture we are selecting insns for.
- This is set at the start and does not change.
+ - A Bool for indicating whether we may generate chain-me
+ instructions for control flow transfers, or whether we must use
+ XAssisted.
+
+ - The maximum guest address of any guest insn in this block.
+ Actually, the address of the highest-addressed byte from any insn
+ in this block. Is set at the start and does not change. This is
+ used for detecting jumps which are definitely forward-edges from
+ this block, and therefore can be made (chained) to the fast entry
+ point of the destination, thereby avoiding the destination's
+ event check.
Note, this is all host-independent. (JRS 20050201: well, kinda
... not completely. Compare with ISelEnv for X86.)
@@ -125,17 +137,21 @@
typedef
struct {
+ /* Constant -- are set at the start and do not change. */
IRTypeEnv* type_env;
HReg* vregmap;
HReg* vregmapHI;
Int n_vregmap;
- HInstrArray* code;
-
- Int vreg_ctr;
-
UInt hwcaps;
+
+ Bool chainingAllowed;
+ Addr64 max_ga;
+
+ /* These are modified as we go along. */
+ HInstrArray* code;
+ Int vreg_ctr;
}
ISelEnv;
@@ -4131,14 +4147,47 @@
/* --------- EXIT --------- */
case Ist_Exit: {
- AMD64RI* dst;
- AMD64CondCode cc;
if (stmt->Ist.Exit.dst->tag != Ico_U64)
vpanic("iselStmt(amd64): Ist_Exit: dst is not a 64-bit value");
- dst = iselIntExpr_RI(env, IRExpr_Const(stmt->Ist.Exit.dst));
- cc = iselCondCode(env,stmt->Ist.Exit.guard);
- addInstr(env, AMD64Instr_Goto(stmt->Ist.Exit.jk, cc, dst));
- return;
+
+ AMD64CondCode cc = iselCondCode(env, stmt->Ist.Exit.guard);
+ AMD64AMode* amRIP = AMD64AMode_IR(stmt->Ist.Exit.offsIP,
+ hregAMD64_RBP());
+
+ /* Case: boring transfer to known address */
+ if (stmt->Ist.Exit.jk == Ijk_Boring) {
+ if (env->chainingAllowed) {
+ /* .. almost always true .. */
+ /* Skip the event check at the dst if this is a forwards
+ edge. */
+ Bool toFastEP
+ = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga;
+ if (0) vex_printf("%s", toFastEP ? "Y" : ",");
+ addInstr(env, AMD64Instr_XDirect(stmt->Ist.Exit.dst->Ico.U64,
+ amRIP, cc, toFastEP));
+ } else {
+ /* .. very occasionally .. */
+ /* We can't use chaining, so ask for an assisted transfer,
+ as that's the only alternative that is allowable. */
+ HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
+ addInstr(env, AMD64Instr_XAssisted(r, amRIP, cc, Ijk_Boring));
+ }
+ return;
+ }
+
+ /* Case: assisted transfer to arbitrary address */
+ switch (stmt->Ist.Exit.jk) {
+ case Ijk_SigSEGV: case Ijk_TInval: case Ijk_EmWarn: {
+ HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
+ addInstr(env, AMD64Instr_XAssisted(r, amRIP, cc, stmt->Ist.Exit.jk));
+ return;
+ }
+ default:
+ break;
+ }
+
+ /* Do we ever expect to see any other kind? */
+ goto stmt_fail;
}
default: break;
@@ -4153,18 +4202,83 @@
/*--- ISEL: Basic block terminators (Nexts) ---*/
/*---------------------------------------------------------*/
-static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk )
+static void iselNext ( ISelEnv* env,
+ IRExpr* next, IRJumpKind jk, Int offsIP )
{
- AMD64RI* ri;
if (vex_traceflags & VEX_TRACE_VCODE) {
- vex_printf("\n-- goto {");
+ vex_printf( "\n-- PUT(%d) = ", offsIP);
+ ppIRExpr( next );
+ vex_printf( "; exit-");
ppIRJumpKind(jk);
- vex_printf("} ");
- ppIRExpr(next);
- vex_printf("\n");
+ vex_printf( "\n");
}
- ri = iselIntExpr_RI(env, next);
- addInstr(env, AMD64Instr_Goto(jk, Acc_ALWAYS,ri));
+
+ /* Case: boring transfer to known address */
+ if (next->tag == Iex_Const) {
+ IRConst* cdst = next->Iex.Const.con;
+ vassert(cdst->tag == Ico_U64);
+ if (jk == Ijk_Boring || jk == Ijk_Call) {
+ /* Boring transfer to known address */
+ AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP());
+ if (env->chainingAllowed) {
+ /* .. almost always true .. */
+ /* Skip the event check at the dst if this is a forwards
+ edge. */
+ Bool toFastEP
+ = ((Addr64)cdst->Ico.U64) > env->max_ga;
+ if (0) vex_printf("%s", toFastEP ? "X" : ".");
+ addInstr(env, AMD64Instr_XDirect(cdst->Ico.U64,
+ amRIP, Acc_ALWAYS,
+ toFastEP));
+ } else {
+ /* .. very occasionally .. */
+ /* We can't use chaining, so ask for an indirect transfer,
+ as that's the cheapest alternative that is
+ allowable. */
+ HReg r = iselIntExpr_R(env, next);
+ addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS,
+ Ijk_Boring));
+ }
+ return;
+ }
+ }
+
+ /* Case: call/return (==boring) transfer to any address */
+ switch (jk) {
+ case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
+ HReg r = iselIntExpr_R(env, next);
+ AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP());
+ if (env->chainingAllowed) {
+ addInstr(env, AMD64Instr_XIndir(r, amRIP, Acc_ALWAYS));
+ } else {
+ addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS,
+ Ijk_Boring));
+ }
+ return;
+ }
+ default:
+ break;
+ }
+
+ /* Case: some other kind of transfer to any address */
+ switch (jk) {
+ case Ijk_Sys_syscall: case Ijk_ClientReq: case Ijk_NoRedir:
+ case Ijk_Yield: case Ijk_SigTRAP: case Ijk_TInval: {
+ HReg r = iselIntExpr_R(env, next);
+ AMD64AMode* amRIP = AMD64AMode_IR(offsIP, hregAMD64_RBP());
+ addInstr(env, AMD64Instr_XAssisted(r, amRIP, Acc_ALWAYS, jk));
+ return;
+ }
+ default:
+ break;
+ }
+
+ vex_printf( "\n-- PUT(%d) = ", offsIP);
+ ppIRExpr( next );
+ vex_printf( "; exit-");
+ ppIRJumpKind(jk);
+ vex_printf( "\n");
+ vassert(0); // are we expecting any other kind?
}
@@ -4174,14 +4288,21 @@
/* Translate an entire SB to amd64 code. */
-HInstrArray* iselSB_AMD64 ( IRSB* bb, VexArch arch_host,
- VexArchInfo* archinfo_host,
- VexAbiInfo* vbi/*UNUSED*/ )
+HInstrArray* iselSB_AMD64 ( IRSB* bb,
+ VexArch arch_host,
+ VexArchInfo* archinfo_host,
+ VexAbiInfo* vbi/*UNUSED*/,
+ Int offs_Host_EvC_Counter,
+ Int offs_Host_EvC_FailAddr,
+ Bool chainingAllowed,
+ Bool addProfInc,
+ Addr64 max_ga )
{
- Int i, j;
- HReg hreg, hregHI;
- ISelEnv* env;
- UInt hwcaps_host = archinfo_host->hwcaps;
+ Int i, j;
+ HReg hreg, hregHI;
+ ISelEnv* env;
+ UInt hwcaps_host = archinfo_host->hwcaps;
+ AMD64AMode *amCounter, *amFailAddr;
/* sanity ... */
vassert(arch_host == VexArchAMD64);
@@ -4207,7 +4328,9 @@
env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
/* and finally ... */
- env->hwcaps = hwcaps_host;
+ env->chainingAllowed = chainingAllowed;
+ env->hwcaps = hwcaps_host;
+ env->max_ga = max_ga;
/* For each IR temporary, allocate a suitably-kinded virtual
register. */
@@ -4233,12 +4356,25 @@
}
env->vreg_ctr = j;
+ /* The very first instruction must be an event check. */
+ amCounter = AMD64AMode_IR(offs_Host_EvC_Counter, hregAMD64_RBP());
+ amFailAddr = AMD64AMode_IR(offs_Host_EvC_FailAddr, hregAMD64_RBP());
+ addInstr(env, AMD64Instr_EvCheck(amCounter, amFailAddr));
+
+ /* Possibly a block counter increment (for profiling). At this
+ point we don't know the address of the counter, so just pretend
+ it is zero. It will have to be patched later, but before this
+ translation is used, by a call to LibVEX_patchProfCtr. */
+ if (addProfInc) {
+ addInstr(env, AMD64Instr_ProfInc());
+ }
+
/* Ok, finally we can iterate over the statements. */
for (i = 0; i < bb->stmts_used; i++)
if (bb->stmts[i])
- iselStmt(env,bb->stmts[i]);
+ iselStmt(env, bb->stmts[i]);
- iselNext(env,bb->next,bb->jumpkind);
+ iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
/* record the number of vregs we used. */
env->code->n_vregs = env->vreg_ctr;
diff --git a/priv/host_arm_defs.c b/priv/host_arm_defs.c
index 2f0ebf0..755699e 100644
--- a/priv/host_arm_defs.c
+++ b/priv/host_arm_defs.c
@@ -1170,13 +1170,33 @@
i->ARMin.LdSt8U.amode = amode;
return i;
}
-//extern ARMInstr* ARMInstr_Ld8S ( HReg, ARMAMode2* );
-ARMInstr* ARMInstr_Goto ( IRJumpKind jk, ARMCondCode cond, HReg gnext ) {
- ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
- i->tag = ARMin_Goto;
- i->ARMin.Goto.jk = jk;
- i->ARMin.Goto.cond = cond;
- i->ARMin.Goto.gnext = gnext;
+ARMInstr* ARMInstr_XDirect ( Addr32 dstGA, ARMAMode1* amR15T,
+ ARMCondCode cond, Bool toFastEP ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_XDirect;
+ i->ARMin.XDirect.dstGA = dstGA;
+ i->ARMin.XDirect.amR15T = amR15T;
+ i->ARMin.XDirect.cond = cond;
+ i->ARMin.XDirect.toFastEP = toFastEP;
+ return i;
+}
+ARMInstr* ARMInstr_XIndir ( HReg dstGA, ARMAMode1* amR15T,
+ ARMCondCode cond ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_XIndir;
+ i->ARMin.XIndir.dstGA = dstGA;
+ i->ARMin.XIndir.amR15T = amR15T;
+ i->ARMin.XIndir.cond = cond;
+ return i;
+}
+ARMInstr* ARMInstr_XAssisted ( HReg dstGA, ARMAMode1* amR15T,
+ ARMCondCode cond, IRJumpKind jk ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_XAssisted;
+ i->ARMin.XAssisted.dstGA = dstGA;
+ i->ARMin.XAssisted.amR15T = amR15T;
+ i->ARMin.XAssisted.cond = cond;
+ i->ARMin.XAssisted.jk = jk;
return i;
}
ARMInstr* ARMInstr_CMov ( ARMCondCode cond, HReg dst, ARMRI84* src ) {
@@ -1479,6 +1499,21 @@
return i;
}
+ARMInstr* ARMInstr_EvCheck ( ARMAMode1* amCounter,
+ ARMAMode1* amFailAddr ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_EvCheck;
+ i->ARMin.EvCheck.amCounter = amCounter;
+ i->ARMin.EvCheck.amFailAddr = amFailAddr;
+ return i;
+}
+
+ARMInstr* ARMInstr_ProfInc ( void ) {
+ ARMInstr* i = LibVEX_Alloc(sizeof(ARMInstr));
+ i->tag = ARMin_ProfInc;
+ return i;
+}
+
/* ... */
void ppARMInstr ( ARMInstr* i ) {
@@ -1564,28 +1599,47 @@
return;
case ARMin_Ld8S:
goto unhandled;
- case ARMin_Goto:
- if (i->ARMin.Goto.cond != ARMcc_AL) {
- vex_printf("if (%%cpsr.%s) { ",
- showARMCondCode(i->ARMin.Goto.cond));
- } else {
- vex_printf("if (1) { ");
- }
- if (i->ARMin.Goto.jk != Ijk_Boring
- && i->ARMin.Goto.jk != Ijk_Call
- && i->ARMin.Goto.jk != Ijk_Ret) {
- vex_printf("mov r8, $");
- ppIRJumpKind(i->ARMin.Goto.jk);
- vex_printf(" ; ");
- }
- vex_printf("mov r0, ");
- ppHRegARM(i->ARMin.Goto.gnext);
- vex_printf(" ; bx r14");
- if (i->ARMin.Goto.cond != ARMcc_AL) {
- vex_printf(" }");
- } else {
- vex_printf(" }");
- }
+ case ARMin_XDirect:
+ vex_printf("(xDirect) ");
+ vex_printf("if (%%cpsr.%s) { ",
+ showARMCondCode(i->ARMin.XDirect.cond));
+ vex_printf("movw r12,0x%x; ",
+ (UInt)(i->ARMin.XDirect.dstGA & 0xFFFF));
+ vex_printf("movt r12,0x%x; ",
+ (UInt)((i->ARMin.XDirect.dstGA >> 16) & 0xFFFF));
+ vex_printf("str r12,");
+ ppARMAMode1(i->ARMin.XDirect.amR15T);
+ vex_printf("; movw r12,LO16($disp_cp_chain_me_to_%sEP); ",
+ i->ARMin.XDirect.toFastEP ? "fast" : "slow");
+ vex_printf("movt r12,HI16($disp_cp_chain_me_to_%sEP); ",
+ i->ARMin.XDirect.toFastEP ? "fast" : "slow");
+ vex_printf("blx r12 }");
+ return;
+ case ARMin_XIndir:
+ vex_printf("(xIndir) ");
+ vex_printf("if (%%cpsr.%s) { ",
+ showARMCondCode(i->ARMin.XIndir.cond));
+ vex_printf("str ");
+ ppHRegARM(i->ARMin.XIndir.dstGA);
+ vex_printf(",");
+ ppARMAMode1(i->ARMin.XIndir.amR15T);
+ vex_printf("; movw r12,LO16($disp_cp_xindir); ");
+ vex_printf("movt r12,HI16($disp_cp_xindir); ");
+ vex_printf("blx r12 }");
+ return;
+ case ARMin_XAssisted:
+ vex_printf("(xAssisted) ");
+ vex_printf("if (%%cpsr.%s) { ",
+ showARMCondCode(i->ARMin.XAssisted.cond));
+ vex_printf("str ");
+ ppHRegARM(i->ARMin.XAssisted.dstGA);
+ vex_printf(",");
+ ppARMAMode1(i->ARMin.XAssisted.amR15T);
+ vex_printf("movw r8,$IRJumpKind_to_TRCVAL(%d); ",
+ (Int)i->ARMin.XAssisted.jk);
+ vex_printf("movw r12,LO16($disp_cp_xassisted); ");
+ vex_printf("movt r12,HI16($disp_cp_xassisted); ");
+ vex_printf("blx r12 }");
return;
case ARMin_CMov:
vex_printf("mov%s ", showARMCondCode(i->ARMin.CMov.cond));
@@ -1761,8 +1815,7 @@
}
return;
case ARMin_MFence:
- vex_printf("mfence (mcr 15,0,r0,c7,c10,4; 15,0,r0,c7,c10,5; "
- "15,0,r0,c7,c5,4)");
+ vex_printf("(mfence) dsb sy; dmb sy; isb");
return;
case ARMin_CLREX:
vex_printf("clrex");
@@ -1878,6 +1931,25 @@
vex_printf(", ");
vex_printf("%d", i->ARMin.Add32.imm32);
return;
+ case ARMin_EvCheck:
+ vex_printf("(evCheck) ldr r12,");
+ ppARMAMode1(i->ARMin.EvCheck.amCounter);
+ vex_printf("; subs r12,r12,$1; str r12,");
+ ppARMAMode1(i->ARMin.EvCheck.amCounter);
+ vex_printf("; bpl nofail; ldr r12,");
+ ppARMAMode1(i->ARMin.EvCheck.amFailAddr);
+ vex_printf("; bx r12; nofail:");
+ return;
+ case ARMin_ProfInc:
+ vex_printf("(profInc) movw r12,LO16($NotKnownYet); "
+ "movw r12,HI16($NotKnownYet); "
+ "ldr r11,[r12]; "
+ "adds r11,r11,$1; "
+ "str r11,[r12]; "
+ "ldr r11,[r12+4]; "
+ "adc r11,r11,$0; "
+ "str r11,[r12+4]");
+ return;
default:
unhandled:
vex_printf("ppARMInstr: unhandled case (tag %d)", (Int)i->tag);
@@ -1945,18 +2017,21 @@
return;
case ARMin_Ld8S:
goto unhandled;
- case ARMin_Goto:
- /* reads the reg holding the next guest addr */
- addHRegUse(u, HRmRead, i->ARMin.Goto.gnext);
- /* writes it to the standard integer return register */
- addHRegUse(u, HRmWrite, hregARM_R0());
- /* possibly messes with the baseblock pointer */
- if (i->ARMin.Goto.jk != Ijk_Boring
- && i->ARMin.Goto.jk != Ijk_Call
- && i->ARMin.Goto.jk != Ijk_Ret)
- /* note, this is irrelevant since r8 is not actually
- available to the allocator. But still .. */
- addHRegUse(u, HRmWrite, hregARM_R8());
+ /* XDirect/XIndir/XAssisted are also a bit subtle. They
+ conditionally exit the block. Hence we only need to list (1)
+ the registers that they read, and (2) the registers that they
+ write in the case where the block is not exited. (2) is
+ empty, hence only (1) is relevant here. */
+ case ARMin_XDirect:
+ addRegUsage_ARMAMode1(u, i->ARMin.XDirect.amR15T);
+ return;
+ case ARMin_XIndir:
+ addHRegUse(u, HRmRead, i->ARMin.XIndir.dstGA);
+ addRegUsage_ARMAMode1(u, i->ARMin.XIndir.amR15T);
+ return;
+ case ARMin_XAssisted:
+ addHRegUse(u, HRmRead, i->ARMin.XAssisted.dstGA);
+ addRegUsage_ARMAMode1(u, i->ARMin.XAssisted.amR15T);
return;
case ARMin_CMov:
addHRegUse(u, HRmWrite, i->ARMin.CMov.dst);
@@ -2159,6 +2234,18 @@
addHRegUse(u, HRmWrite, i->ARMin.Add32.rD);
addHRegUse(u, HRmRead, i->ARMin.Add32.rN);
return;
+ case ARMin_EvCheck:
+ /* We expect both amodes only to mention r8, so this is in
+ fact pointless, since r8 isn't allocatable, but
+ anyway.. */
+ addRegUsage_ARMAMode1(u, i->ARMin.EvCheck.amCounter);
+ addRegUsage_ARMAMode1(u, i->ARMin.EvCheck.amFailAddr);
+ addHRegUse(u, HRmWrite, hregARM_R12()); /* also unavail to RA */
+ return;
+ case ARMin_ProfInc:
+ addHRegUse(u, HRmWrite, hregARM_R12());
+ addHRegUse(u, HRmWrite, hregARM_R11());
+ return;
unhandled:
default:
ppARMInstr(i);
@@ -2210,8 +2297,18 @@
return;
case ARMin_Ld8S:
goto unhandled;
- case ARMin_Goto:
- i->ARMin.Goto.gnext = lookupHRegRemap(m, i->ARMin.Goto.gnext);
+ case ARMin_XDirect:
+ mapRegs_ARMAMode1(m, i->ARMin.XDirect.amR15T);
+ return;
+ case ARMin_XIndir:
+ i->ARMin.XIndir.dstGA
+ = lookupHRegRemap(m, i->ARMin.XIndir.dstGA);
+ mapRegs_ARMAMode1(m, i->ARMin.XIndir.amR15T);
+ return;
+ case ARMin_XAssisted:
+ i->ARMin.XAssisted.dstGA
+ = lookupHRegRemap(m, i->ARMin.XAssisted.dstGA);
+ mapRegs_ARMAMode1(m, i->ARMin.XAssisted.amR15T);
return;
case ARMin_CMov:
i->ARMin.CMov.dst = lookupHRegRemap(m, i->ARMin.CMov.dst);
@@ -2329,6 +2426,17 @@
case ARMin_Add32:
i->ARMin.Add32.rD = lookupHRegRemap(m, i->ARMin.Add32.rD);
i->ARMin.Add32.rN = lookupHRegRemap(m, i->ARMin.Add32.rN);
+ return;
+ case ARMin_EvCheck:
+ /* We expect both amodes only to mention r8, so this is in
+ fact pointless, since r8 isn't allocatable, but
+ anyway.. */
+ mapRegs_ARMAMode1(m, i->ARMin.EvCheck.amCounter);
+ mapRegs_ARMAMode1(m, i->ARMin.EvCheck.amFailAddr);
+ return;
+ case ARMin_ProfInc:
+ /* hardwires r11 and r12 -- nothing to modify. */
+ return;
unhandled:
default:
ppARMInstr(i);
@@ -2586,6 +2694,9 @@
(((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8) | \
(((zzx1) & 0xF) << 4) | (((zzx0) & 0xF) << 0))
+#define XX______(zzx7,zzx6) \
+ ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24))
+
/* Generate a skeletal insn that involves an a RI84 shifter operand.
Returns a word which is all zeroes apart from bits 25 and 11..0,
since it is those that encode the shifter operand (at least to the
@@ -2704,10 +2815,92 @@
return p;
}
+/* Get an immediate into a register, using only that register, and
+ generating exactly 2 instructions, regardless of the value of the
+ immediate. This is used when generating sections of code that need
+ to be patched later, so as to guarantee a specific size. */
+static UInt* imm32_to_iregNo_EXACTLY2 ( UInt* p, Int rD, UInt imm32 )
+{
+ if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
+ /* Generate movw rD, #low16 ; movt rD, #high16. */
+ UInt lo16 = imm32 & 0xFFFF;
+ UInt hi16 = (imm32 >> 16) & 0xFFFF;
+ UInt instr;
+ instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
+ (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
+ lo16 & 0xF);
+ *p++ = instr;
+ instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
+ (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
+ hi16 & 0xF);
+ *p++ = instr;
+ } else {
+ vassert(0); /* lose */
+ }
+ return p;
+}
-Int emit_ARMInstr ( UChar* buf, Int nbuf, ARMInstr* i,
+/* Check whether p points at a 2-insn sequence cooked up by
+ imm32_to_iregNo_EXACTLY2(). */
+static Bool is_imm32_to_iregNo_EXACTLY2 ( UInt* p, Int rD, UInt imm32 )
+{
+ if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
+ /* Generate movw rD, #low16 ; movt rD, #high16. */
+ UInt lo16 = imm32 & 0xFFFF;
+ UInt hi16 = (imm32 >> 16) & 0xFFFF;
+ UInt i0, i1;
+ i0 = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
+ (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
+ lo16 & 0xF);
+ i1 = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
+ (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
+ hi16 & 0xF);
+ return p[0] == i0 && p[1] == i1;
+ } else {
+ vassert(0); /* lose */
+ }
+}
+
+
+static UInt* do_load_or_store32 ( UInt* p,
+ Bool isLoad, UInt rD, ARMAMode1* am )
+{
+ vassert(rD <= 12);
+ vassert(am->tag == ARMam1_RI); // RR case is not handled
+ UInt bB = 0;
+ UInt bL = isLoad ? 1 : 0;
+ Int simm12;
+ UInt instr, bP;
+ if (am->ARMam1.RI.simm13 < 0) {
+ bP = 0;
+ simm12 = -am->ARMam1.RI.simm13;
+ } else {
+ bP = 1;
+ simm12 = am->ARMam1.RI.simm13;
+ }
+ vassert(simm12 >= 0 && simm12 <= 4095);
+ instr = XXXXX___(X1110,X0101,BITS4(bP,bB,0,bL),
+ iregNo(am->ARMam1.RI.reg),
+ rD);
+ instr |= simm12;
+ *p++ = instr;
+ return p;
+}
+
+
+/* Emit an instruction into buf and return the number of bytes used.
+ Note that buf is not the insn's final place, and therefore it is
+ imperative to emit position-independent code. If the emitted
+ instruction was a profiler inc, set *is_profInc to True, else
+ leave it unchanged. */
+
+Int emit_ARMInstr ( /*MB_MOD*/Bool* is_profInc,
+ UChar* buf, Int nbuf, ARMInstr* i,
Bool mode64,
- void* dispatch_unassisted, void* dispatch_assisted )
+ void* disp_cp_chain_me_to_slowEP,
+ void* disp_cp_chain_me_to_fastEP,
+ void* disp_cp_xindir,
+ void* disp_cp_xassisted )
{
UInt* p = (UInt*)buf;
vassert(nbuf >= 32);
@@ -2894,59 +3087,177 @@
}
case ARMin_Ld8S:
goto bad;
- case ARMin_Goto: {
- UInt instr;
- IRJumpKind jk = i->ARMin.Goto.jk;
- ARMCondCode cond = i->ARMin.Goto.cond;
- UInt rnext = iregNo(i->ARMin.Goto.gnext);
- Int trc = -1;
- /* since we branch to lr(r13) to get back to dispatch: */
- vassert(dispatch_unassisted == NULL);
- vassert(dispatch_assisted == NULL);
- switch (jk) {
- case Ijk_Ret: case Ijk_Call: case Ijk_Boring:
- break; /* no need to set GST in these common cases */
- case Ijk_ClientReq:
- trc = VEX_TRC_JMP_CLIENTREQ; break;
- case Ijk_Sys_int128:
- case Ijk_Sys_int129:
- case Ijk_Sys_int130:
- case Ijk_Yield:
- case Ijk_EmWarn:
- case Ijk_MapFail:
- goto unhandled_jk;
- case Ijk_NoDecode:
- trc = VEX_TRC_JMP_NODECODE; break;
- case Ijk_TInval:
- trc = VEX_TRC_JMP_TINVAL; break;
- case Ijk_NoRedir:
- trc = VEX_TRC_JMP_NOREDIR; break;
- case Ijk_Sys_sysenter:
- case Ijk_SigTRAP:
- case Ijk_SigSEGV:
- goto unhandled_jk;
- case Ijk_Sys_syscall:
- trc = VEX_TRC_JMP_SYS_SYSCALL; break;
- unhandled_jk:
- default:
- goto bad;
+
+ case ARMin_XDirect: {
+ /* NB: what goes on here has to be very closely coordinated
+ with the chainXDirect_ARM and unchainXDirect_ARM below. */
+ /* We're generating chain-me requests here, so we need to be
+ sure this is actually allowed -- no-redir translations
+ can't use chain-me's. Hence: */
+ vassert(disp_cp_chain_me_to_slowEP != NULL);
+ vassert(disp_cp_chain_me_to_fastEP != NULL);
+
+ /* Use ptmp for backpatching conditional jumps. */
+ UInt* ptmp = NULL;
+
+ /* First off, if this is conditional, create a conditional
+ jump over the rest of it. Or at least, leave a space for
+ it that we will shortly fill in. */
+ if (i->ARMin.XDirect.cond != ARMcc_AL) {
+ vassert(i->ARMin.XDirect.cond != ARMcc_NV);
+ ptmp = p;
+ *p++ = 0;
}
- if (trc != -1) {
- // mov{cond} r8, #trc
- vassert(trc >= 0 && trc <= 255);
- instr = (cond << 28) | 0x03A08000 | (0xFF & (UInt)trc);
- *p++ = instr;
+
+ /* Update the guest R15T. */
+ /* movw r12, lo16(dstGA) */
+ /* movt r12, hi16(dstGA) */
+ /* str r12, amR15T */
+ p = imm32_to_iregNo(p, /*r*/12, i->ARMin.XDirect.dstGA);
+ p = do_load_or_store32(p, False/*!isLoad*/,
+ /*r*/12, i->ARMin.XDirect.amR15T);
+
+ /* --- FIRST PATCHABLE BYTE follows --- */
+ /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're
+ calling to) backs up the return address, so as to find the
+ address of the first patchable byte. So: don't change the
+ number of instructions (3) below. */
+ /* movw r12, lo16(VG_(disp_cp_chain_me_to_{slowEP,fastEP})) */
+ /* movt r12, hi16(VG_(disp_cp_chain_me_to_{slowEP,fastEP})) */
+ /* blx r12 (A1) */
+ void* disp_cp_chain_me
+ = i->ARMin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
+ : disp_cp_chain_me_to_slowEP;
+ p = imm32_to_iregNo_EXACTLY2(p, /*r*/12,
+ (UInt)Ptr_to_ULong(disp_cp_chain_me));
+ *p++ = 0xE12FFF3C;
+ /* --- END of PATCHABLE BYTES --- */
+
+ /* Fix up the conditional jump, if there was one. */
+ if (i->ARMin.XDirect.cond != ARMcc_AL) {
+ Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
+ vassert(delta > 0 && delta < 40);
+ vassert((delta & 3) == 0);
+ UInt notCond = 1 ^ (UInt)i->ARMin.XDirect.cond;
+ vassert(notCond <= 13); /* Neither AL nor NV */
+ delta = (delta >> 2) - 2;
+ *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
}
- // mov{cond} r0, rnext
- if (rnext != 0) {
- instr = (cond << 28) | 0x01A00000 | rnext;
- *p++ = instr;
- }
- // bx{cond} r14
- instr =(cond << 28) | 0x012FFF1E;
- *p++ = instr;
goto done;
}
+
+ case ARMin_XIndir: {
+ /* We're generating transfers that could lead indirectly to a
+ chain-me, so we need to be sure this is actually allowed
+ -- no-redir translations are not allowed to reach normal
+ translations without going through the scheduler. That
+ means no XDirects or XIndirs out from no-redir
+ translations. Hence: */
+ vassert(disp_cp_xindir != NULL);
+
+ /* Use ptmp for backpatching conditional jumps. */
+ UInt* ptmp = NULL;
+
+ /* First off, if this is conditional, create a conditional
+ jump over the rest of it. Or at least, leave a space for
+ it that we will shortly fill in. */
+ if (i->ARMin.XIndir.cond != ARMcc_AL) {
+ vassert(i->ARMin.XIndir.cond != ARMcc_NV);
+ ptmp = p;
+ *p++ = 0;
+ }
+
+ /* Update the guest R15T. */
+ /* str r-dstGA, amR15T */
+ p = do_load_or_store32(p, False/*!isLoad*/,
+ iregNo(i->ARMin.XIndir.dstGA),
+ i->ARMin.XIndir.amR15T);
+
+ /* movw r12, lo16(VG_(disp_cp_xindir)) */
+ /* movt r12, hi16(VG_(disp_cp_xindir)) */
+ /* bx r12 (A1) */
+ p = imm32_to_iregNo(p, /*r*/12,
+ (UInt)Ptr_to_ULong(disp_cp_xindir));
+ *p++ = 0xE12FFF1C;
+
+ /* Fix up the conditional jump, if there was one. */
+ if (i->ARMin.XIndir.cond != ARMcc_AL) {
+ Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
+ vassert(delta > 0 && delta < 40);
+ vassert((delta & 3) == 0);
+ UInt notCond = 1 ^ (UInt)i->ARMin.XIndir.cond;
+ vassert(notCond <= 13); /* Neither AL nor NV */
+ delta = (delta >> 2) - 2;
+ *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
+ }
+ goto done;
+ }
+
+ case ARMin_XAssisted: {
+ /* Use ptmp for backpatching conditional jumps. */
+ UInt* ptmp = NULL;
+
+ /* First off, if this is conditional, create a conditional
+ jump over the rest of it. Or at least, leave a space for
+ it that we will shortly fill in. */
+ if (i->ARMin.XAssisted.cond != ARMcc_AL) {
+ vassert(i->ARMin.XAssisted.cond != ARMcc_NV);
+ ptmp = p;
+ *p++ = 0;
+ }
+
+ /* Update the guest R15T. */
+ /* str r-dstGA, amR15T */
+ p = do_load_or_store32(p, False/*!isLoad*/,
+ iregNo(i->ARMin.XAssisted.dstGA),
+ i->ARMin.XAssisted.amR15T);
+
+ /* movw r8, $magic_number */
+ UInt trcval = 0;
+ switch (i->ARMin.XAssisted.jk) {
+ case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break;
+ case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
+ //case Ijk_Sys_int128: trcval = VEX_TRC_JMP_SYS_INT128; break;
+ //case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break;
+ //case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
+ //case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
+ case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break;
+ //case Ijk_TInval: trcval = VEX_TRC_JMP_TINVAL; break;
+ case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break;
+ //case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break;
+ //case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
+ case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break;
+ /* We don't expect to see the following being assisted. */
+ //case Ijk_Ret:
+ //case Ijk_Call:
+ /* fallthrough */
+ default:
+ ppIRJumpKind(i->ARMin.XAssisted.jk);
+ vpanic("emit_ARMInstr.ARMin_XAssisted: unexpected jump kind");
+ }
+ vassert(trcval != 0);
+ p = imm32_to_iregNo(p, /*r*/8, trcval);
+
+ /* movw r12, lo16(VG_(disp_cp_xassisted)) */
+ /* movt r12, hi16(VG_(disp_cp_xassisted)) */
+ /* bx r12 (A1) */
+ p = imm32_to_iregNo(p, /*r*/12,
+ (UInt)Ptr_to_ULong(disp_cp_xassisted));
+ *p++ = 0xE12FFF1C;
+
+ /* Fix up the conditional jump, if there was one. */
+ if (i->ARMin.XAssisted.cond != ARMcc_AL) {
+ Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
+ vassert(delta > 0 && delta < 40);
+ vassert((delta & 3) == 0);
+ UInt notCond = 1 ^ (UInt)i->ARMin.XAssisted.cond;
+ vassert(notCond <= 13); /* Neither AL nor NV */
+ delta = (delta >> 2) - 2;
+ *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
+ }
+ goto done;
+ }
+
case ARMin_CMov: {
UInt instr = skeletal_RI84(i->ARMin.CMov.src);
UInt subopc = X1101; /* MOV */
@@ -3293,9 +3604,15 @@
goto bad; // FPSCR -> iReg case currently ATC
}
case ARMin_MFence: {
- *p++ = 0xEE070F9A; /* mcr 15,0,r0,c7,c10,4 (DSB) */
- *p++ = 0xEE070FBA; /* mcr 15,0,r0,c7,c10,5 (DMB) */
- *p++ = 0xEE070F95; /* mcr 15,0,r0,c7,c5,4 (ISB) */
+ // It's not clear (to me) how these relate to the ARMv7
+ // versions, so let's just use the v7 versions as they
+ // are at least well documented.
+ //*p++ = 0xEE070F9A; /* mcr 15,0,r0,c7,c10,4 (DSB) */
+ //*p++ = 0xEE070FBA; /* mcr 15,0,r0,c7,c10,5 (DMB) */
+ //*p++ = 0xEE070F95; /* mcr 15,0,r0,c7,c5,4 (ISB) */
+ *p++ = 0xF57FF04F; /* DSB sy */
+ *p++ = 0xF57FF05F; /* DMB sy */
+ *p++ = 0xF57FF06F; /* ISB */
goto done;
}
case ARMin_CLREX: {
@@ -4099,6 +4416,62 @@
*p++ = insn;
goto done;
}
+
+ case ARMin_EvCheck: {
+ /* We generate:
+ ldr r12, [r8 + #4] 4 == offsetof(host_EvC_COUNTER)
+ subs r12, r12, #1 (A1)
+ str r12, [r8 + #4] 4 == offsetof(host_EvC_COUNTER)
+ bpl nofail
+ ldr r12, [r8 + #0] 0 == offsetof(host_EvC_FAILADDR)
+ bx r12
+ nofail:
+ */
+ UInt* p0 = p;
+ p = do_load_or_store32(p, True/*isLoad*/, /*r*/12,
+ i->ARMin.EvCheck.amCounter);
+ *p++ = 0xE25CC001; /* subs r12, r12, #1 */
+ p = do_load_or_store32(p, False/*!isLoad*/, /*r*/12,
+ i->ARMin.EvCheck.amCounter);
+ *p++ = 0x5A000001; /* bpl nofail */
+ p = do_load_or_store32(p, True/*isLoad*/, /*r*/12,
+ i->ARMin.EvCheck.amFailAddr);
+ *p++ = 0xE12FFF1C; /* bx r12 */
+ /* nofail: */
+
+ /* Crosscheck */
+ vassert(evCheckSzB_ARM() == (UChar*)p - (UChar*)p0);
+ goto done;
+ }
+
+ case ARMin_ProfInc: {
+ /* We generate:
+ (ctrP is unknown now, so use 0x65556555 in the
+ expectation that a later call to LibVEX_patchProfCtr
+ will be used to fill in the immediate fields once the
+ right value is known.)
+ movw r12, lo16(0x65556555)
+ movt r12, lo16(0x65556555)
+ ldr r11, [r12]
+ adds r11, r11, #1
+ str r11, [r12]
+ ldr r11, [r12+4]
+ adc r11, r11, #0
+ str r11, [r12+4]
+ */
+ p = imm32_to_iregNo_EXACTLY2(p, /*r*/12, 0x65556555);
+ *p++ = 0xE59CB000;
+ *p++ = 0xE29BB001;
+ *p++ = 0xE58CB000;
+ *p++ = 0xE59CB004;
+ *p++ = 0xE2ABB000;
+ *p++ = 0xE58CB004;
+ /* Tell the caller .. */
+ vassert(!(*is_profInc));
+ *is_profInc = True;
+ goto done;
+ }
+
/* ... */
default:
goto bad;
@@ -4114,6 +4487,109 @@
return ((UChar*)p) - &buf[0];
}
+
+/* How big is an event check? See case for ARMin_EvCheck in
+ emit_ARMInstr just above. That crosschecks what this returns, so
+ we can tell if we're inconsistent. */
+Int evCheckSzB_ARM ( void )
+{
+ return 24;
+}
+
+
+/* NB: what goes on here has to be very closely coordinated with the
+ emitInstr case for XDirect, above. */
+VexInvalRange chainXDirect_ARM ( void* place_to_chain,
+ void* disp_cp_chain_me_EXPECTED,
+ void* place_to_jump_to )
+{
+ /* What we're expecting to see is:
+ movw r12, lo16(disp_cp_chain_me_to_EXPECTED)
+ movt r12, hi16(disp_cp_chain_me_to_EXPECTED)
+ blx r12
+ viz
+ <8 bytes generated by imm32_to_iregNo_EXACTLY2>
+ E1 2F FF 3C
+ */
+ UInt* p = (UInt*)place_to_chain;
+ vassert(0 == (3 & (HWord)p));
+ vassert(is_imm32_to_iregNo_EXACTLY2(
+ p, /*r*/12, (UInt)Ptr_to_ULong(disp_cp_chain_me_EXPECTED)));
+ vassert(p[2] == 0xE12FFF3C);
+ /* And what we want to change it to is:
+ movw r12, lo16(place_to_jump_to)
+ movt r12, hi16(place_to_jump_to)
+ bx r12
+ viz
+ <8 bytes generated by imm32_to_iregNo_EXACTLY2>
+ E1 2F FF 1C
+ The replacement has the same length as the original.
+ */
+ (void)imm32_to_iregNo_EXACTLY2(
+ p, /*r*/12, (UInt)Ptr_to_ULong(place_to_jump_to));
+ p[2] = 0xE12FFF1C;
+ VexInvalRange vir = {(HWord)p, 12};
+ return vir;
+}
+
+
+/* NB: what goes on here has to be very closely coordinated with the
+ emitInstr case for XDirect, above. */
+VexInvalRange unchainXDirect_ARM ( void* place_to_unchain,
+ void* place_to_jump_to_EXPECTED,
+ void* disp_cp_chain_me )
+{
+ /* What we're expecting to see is:
+ movw r12, lo16(place_to_jump_to_EXPECTED)
+ movt r12, lo16(place_to_jump_to_EXPECTED)
+ bx r12
+ viz
+ <8 bytes generated by imm32_to_iregNo_EXACTLY2>
+ E1 2F FF 1C
+ */
+ UInt* p = (UInt*)place_to_unchain;
+ vassert(0 == (3 & (HWord)p));
+ vassert(is_imm32_to_iregNo_EXACTLY2(
+ p, /*r*/12, (UInt)Ptr_to_ULong(place_to_jump_to_EXPECTED)));
+ vassert(p[2] == 0xE12FFF1C);
+ /* And what we want to change it to is:
+ movw r12, lo16(disp_cp_chain_me)
+ movt r12, hi16(disp_cp_chain_me)
+ blx r12
+ viz
+ <8 bytes generated by imm32_to_iregNo_EXACTLY2>
+ E1 2F FF 3C
+ */
+ (void)imm32_to_iregNo_EXACTLY2(
+ p, /*r*/12, (UInt)Ptr_to_ULong(disp_cp_chain_me));
+ p[2] = 0xE12FFF3C;
+ VexInvalRange vir = {(HWord)p, 12};
+ return vir;
+}
+
+
+/* Patch the counter address into a profile inc point, as previously
+ created by the ARMin_ProfInc case for emit_ARMInstr. */
+VexInvalRange patchProfInc_ARM ( void* place_to_patch,
+ ULong* location_of_counter )
+{
+ vassert(sizeof(ULong*) == 4);
+ UInt* p = (UInt*)place_to_patch;
+ vassert(0 == (3 & (HWord)p));
+ vassert(is_imm32_to_iregNo_EXACTLY2(p, /*r*/12, 0x65556555));
+ vassert(p[2] == 0xE59CB000);
+ vassert(p[3] == 0xE29BB001);
+ vassert(p[4] == 0xE58CB000);
+ vassert(p[5] == 0xE59CB004);
+ vassert(p[6] == 0xE2ABB000);
+ vassert(p[7] == 0xE58CB004);
+ imm32_to_iregNo_EXACTLY2(p, /*r*/12,
+ (UInt)Ptr_to_ULong(location_of_counter));
+ VexInvalRange vir = {(HWord)p, 8};
+ return vir;
+}
+
+
#undef BITS4
#undef X0000
#undef X0001
@@ -4136,6 +4612,7 @@
#undef XXX___XX
#undef XXXXX__X
#undef XXXXXXXX
+#undef XX______
/*---------------------------------------------------------------*/
/*--- end host_arm_defs.c ---*/
diff --git a/priv/host_arm_defs.h b/priv/host_arm_defs.h
index 0dea3f5..7eb4f3e 100644
--- a/priv/host_arm_defs.h
+++ b/priv/host_arm_defs.h
@@ -564,7 +564,9 @@
ARMin_LdSt16,
ARMin_LdSt8U,
ARMin_Ld8S,
- ARMin_Goto,
+ ARMin_XDirect, /* direct transfer to GA */
+ ARMin_XIndir, /* indirect transfer to GA */
+ ARMin_XAssisted, /* assisted transfer to GA */
ARMin_CMov,
ARMin_Call,
ARMin_Mul,
@@ -604,9 +606,10 @@
allocator demands them to consist of no more than two instructions.
We will split this instruction into 2 or 3 ARM instructions on the
emiting phase.
-
NOTE: source and destination registers should be different! */
- ARMin_Add32
+ ARMin_Add32,
+ ARMin_EvCheck, /* Event check */
+ ARMin_ProfInc /* 64-bit profile counter increment */
}
ARMInstrTag;
@@ -676,13 +679,30 @@
HReg rD;
ARMAMode2* amode;
} Ld8S;
- /* Pseudo-insn. Go to guest address gnext, on given
- condition, which could be ARMcc_AL. */
+ /* Update the guest R15T value, then exit requesting to chain
+ to it. May be conditional. Urr, use of Addr32 implicitly
+ assumes that wordsize(guest) == wordsize(host). */
struct {
+ Addr32 dstGA; /* next guest address */
+ ARMAMode1* amR15T; /* amode in guest state for R15T */
+ ARMCondCode cond; /* can be ARMcc_AL */
+ Bool toFastEP; /* chain to the slow or fast point? */
+ } XDirect;
+ /* Boring transfer to a guest address not known at JIT time.
+ Not chainable. May be conditional. */
+ struct {
+ HReg dstGA;
+ ARMAMode1* amR15T;
+ ARMCondCode cond; /* can be ARMcc_AL */
+ } XIndir;
+ /* Assisted transfer to a guest address, most general case.
+ Not chainable. May be conditional. */
+ struct {
+ HReg dstGA;
+ ARMAMode1* amR15T;
+ ARMCondCode cond; /* can be ARMcc_AL */
IRJumpKind jk;
- ARMCondCode cond;
- HReg gnext;
- } Goto;
+ } XAssisted;
/* Mov src to dst on the given condition, which may not
be ARMcc_AL. */
struct {
@@ -905,6 +925,15 @@
HReg rN;
UInt imm32;
} Add32;
+ struct {
+ ARMAMode1* amCounter;
+ ARMAMode1* amFailAddr;
+ } EvCheck;
+ struct {
+ /* No fields. The address of the counter to inc is
+ installed later, post-translation, by patching it in,
+ as it is not known at translation time. */
+ } ProfInc;
} ARMin;
}
ARMInstr;
@@ -921,7 +950,12 @@
HReg, ARMAMode2* );
extern ARMInstr* ARMInstr_LdSt8U ( Bool isLoad, HReg, ARMAMode1* );
extern ARMInstr* ARMInstr_Ld8S ( HReg, ARMAMode2* );
-extern ARMInstr* ARMInstr_Goto ( IRJumpKind, ARMCondCode, HReg gnext );
+extern ARMInstr* ARMInstr_XDirect ( Addr32 dstGA, ARMAMode1* amR15T,
+ ARMCondCode cond, Bool toFastEP );
+extern ARMInstr* ARMInstr_XIndir ( HReg dstGA, ARMAMode1* amR15T,
+ ARMCondCode cond );
+extern ARMInstr* ARMInstr_XAssisted ( HReg dstGA, ARMAMode1* amR15T,
+ ARMCondCode cond, IRJumpKind jk );
extern ARMInstr* ARMInstr_CMov ( ARMCondCode, HReg dst, ARMRI84* src );
extern ARMInstr* ARMInstr_Call ( ARMCondCode, HWord, Int nArgRegs );
extern ARMInstr* ARMInstr_Mul ( ARMMulOp op );
@@ -957,6 +991,9 @@
extern ARMInstr* ARMInstr_NeonImm ( HReg, ARMNImm* );
extern ARMInstr* ARMInstr_NCMovQ ( ARMCondCode, HReg, HReg );
extern ARMInstr* ARMInstr_Add32 ( HReg rD, HReg rN, UInt imm32 );
+extern ARMInstr* ARMInstr_EvCheck ( ARMAMode1* amCounter,
+ ARMAMode1* amFailAddr );
+extern ARMInstr* ARMInstr_ProfInc ( void );
extern void ppARMInstr ( ARMInstr* );
@@ -966,10 +1003,13 @@
extern void getRegUsage_ARMInstr ( HRegUsage*, ARMInstr*, Bool );
extern void mapRegs_ARMInstr ( HRegRemap*, ARMInstr*, Bool );
extern Bool isMove_ARMInstr ( ARMInstr*, HReg*, HReg* );
-extern Int emit_ARMInstr ( UChar* buf, Int nbuf, ARMInstr*,
- Bool,
- void* dispatch_unassisted,
- void* dispatch_assisted );
+extern Int emit_ARMInstr ( /*MB_MOD*/Bool* is_profInc,
+ UChar* buf, Int nbuf, ARMInstr* i,
+ Bool mode64,
+ void* disp_cp_chain_me_to_slowEP,
+ void* disp_cp_chain_me_to_fastEP,
+ void* disp_cp_xindir,
+ void* disp_cp_xassisted );
extern void genSpill_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
HReg rreg, Int offset, Bool );
@@ -977,8 +1017,34 @@
HReg rreg, Int offset, Bool );
extern void getAllocableRegs_ARM ( Int*, HReg** );
-extern HInstrArray* iselSB_ARM ( IRSB*, VexArch,
- VexArchInfo*, VexAbiInfo* );
+extern HInstrArray* iselSB_ARM ( IRSB*,
+ VexArch,
+ VexArchInfo*,
+ VexAbiInfo*,
+ Int offs_Host_EvC_Counter,
+ Int offs_Host_EvC_FailAddr,
+ Bool chainingAllowed,
+ Bool addProfInc,
+ Addr64 max_ga );
+
+/* How big is an event check? This is kind of a kludge because it
+ depends on the offsets of host_EvC_FAILADDR and
+ host_EvC_COUNTER. */
+extern Int evCheckSzB_ARM ( void );
+
+/* Perform a chaining and unchaining of an XDirect jump. */
+extern VexInvalRange chainXDirect_ARM ( void* place_to_chain,
+ void* disp_cp_chain_me_EXPECTED,
+ void* place_to_jump_to );
+
+extern VexInvalRange unchainXDirect_ARM ( void* place_to_unchain,
+ void* place_to_jump_to_EXPECTED,
+ void* disp_cp_chain_me );
+
+/* Patch the counter location into an existing ProfInc point. */
+extern VexInvalRange patchProfInc_ARM ( void* place_to_patch,
+ ULong* location_of_counter );
+
#endif /* ndef __VEX_HOST_ARM_DEFS_H */
diff --git a/priv/host_arm_isel.c b/priv/host_arm_isel.c
index e695567..7ddd077 100644
--- a/priv/host_arm_isel.c
+++ b/priv/host_arm_isel.c
@@ -84,9 +84,6 @@
32-bit virtual HReg, which holds the high half
of the value.
- - The name of the vreg in which we stash a copy of the link reg, so
- helper functions don't kill it.
-
- The code array, that is, the insns selected so far.
- A counter, for generating new virtual registers.
@@ -94,23 +91,38 @@
- The host hardware capabilities word. This is set at the start
and does not change.
- Note, this is all host-independent. */
+ - A Bool for indicating whether we may generate chain-me
+ instructions for control flow transfers, or whether we must use
+ XAssisted.
+
+ - The maximum guest address of any guest insn in this block.
+ Actually, the address of the highest-addressed byte from any insn
+ in this block. Is set at the start and does not change. This is
+ used for detecting jumps which are definitely forward-edges from
+ this block, and therefore can be made (chained) to the fast entry
+ point of the destination, thereby avoiding the destination's
+ event check.
+
+ Note, this is all (well, mostly) host-independent.
+*/
typedef
struct {
+ /* Constant -- are set at the start and do not change. */
IRTypeEnv* type_env;
HReg* vregmap;
HReg* vregmapHI;
Int n_vregmap;
- HReg savedLR;
-
- HInstrArray* code;
-
- Int vreg_ctr;
-
UInt hwcaps;
+
+ Bool chainingAllowed;
+ Addr64 max_ga;
+
+ /* These are modified as we go along. */
+ HInstrArray* code;
+ Int vreg_ctr;
}
ISelEnv;
@@ -1514,7 +1526,7 @@
}
case Iop_64to8: {
HReg rHi, rLo;
- if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
HReg tHi = newVRegI(env);
HReg tLo = newVRegI(env);
HReg tmp = iselNeon64Expr(env, e->Iex.Unop.arg);
@@ -1819,7 +1831,7 @@
/* read 64-bit IRTemp */
if (e->tag == Iex_RdTmp) {
- if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
HReg tHi = newVRegI(env);
HReg tLo = newVRegI(env);
HReg tmp = iselNeon64Expr(env, e);
@@ -2028,7 +2040,7 @@
/* It is convenient sometimes to call iselInt64Expr even when we
have NEON support (e.g. in do_helper_call we need 64-bit
arguments as 2 x 32 regs). */
- if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
HReg tHi = newVRegI(env);
HReg tLo = newVRegI(env);
HReg tmp = iselNeon64Expr(env, e);
@@ -5339,7 +5351,7 @@
if (e->tag == Iex_Unop) {
switch (e->Iex.Unop.op) {
case Iop_ReinterpI64asF64: {
- if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
return iselNeon64Expr(env, e->Iex.Unop.arg);
} else {
HReg srcHi, srcLo;
@@ -5631,7 +5643,7 @@
return;
}
if (tyd == Ity_I64) {
- if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
HReg dD = iselNeon64Expr(env, stmt->Ist.Store.data);
ARMAModeN* am = iselIntExpr_AModeN(env, stmt->Ist.Store.addr);
addInstr(env, ARMInstr_NLdStD(False, dD, am));
@@ -5680,7 +5692,7 @@
return;
}
if (tyd == Ity_I64) {
- if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
HReg addr = newVRegI(env);
HReg qD = iselNeon64Expr(env, stmt->Ist.Put.data);
addInstr(env, ARMInstr_Add32(addr, hregARM_R8(),
@@ -5765,7 +5777,7 @@
return;
}
if (ty == Ity_I64) {
- if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
HReg src = iselNeon64Expr(env, stmt->Ist.WrTmp.data);
HReg dst = lookupIRTemp(env, tmp);
addInstr(env, ARMInstr_NUnary(ARMneon_COPY, dst, src, 4, False));
@@ -5824,7 +5836,7 @@
retty = typeOfIRTemp(env->type_env, d->tmp);
if (retty == Ity_I64) {
- if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
HReg tmp = lookupIRTemp(env, d->tmp);
addInstr(env, ARMInstr_VXferD(True, tmp, hregARM_R1(),
hregARM_R0()));
@@ -5878,7 +5890,7 @@
move it into a result register pair. On a NEON capable
CPU, the result register will be a 64 bit NEON
register, so we must move it there instead. */
- if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ if (env->hwcaps & VEX_HWCAPS_ARM_NEON) {
HReg dst = lookupIRTemp(env, res);
addInstr(env, ARMInstr_VXferD(True, dst, hregARM_R3(),
hregARM_R2()));
@@ -5964,15 +5976,53 @@
/* --------- EXIT --------- */
case Ist_Exit: {
- HReg gnext;
- ARMCondCode cc;
if (stmt->Ist.Exit.dst->tag != Ico_U32)
vpanic("isel_arm: Ist_Exit: dst is not a 32-bit value");
- gnext = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
- cc = iselCondCode(env, stmt->Ist.Exit.guard);
- addInstr(env, mk_iMOVds_RR(hregARM_R14(), env->savedLR));
- addInstr(env, ARMInstr_Goto(stmt->Ist.Exit.jk, cc, gnext));
- return;
+
+ ARMCondCode cc = iselCondCode(env, stmt->Ist.Exit.guard);
+ ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(),
+ stmt->Ist.Exit.offsIP);
+
+ /* Case: boring transfer to known address */
+ if (stmt->Ist.Exit.jk == Ijk_Boring
+ || stmt->Ist.Exit.jk == Ijk_Call
+ || stmt->Ist.Exit.jk == Ijk_Ret) {
+ if (env->chainingAllowed) {
+ /* .. almost always true .. */
+ /* Skip the event check at the dst if this is a forwards
+ edge. */
+ Bool toFastEP
+ = ((Addr32)stmt->Ist.Exit.dst->Ico.U32) > env->max_ga;
+ if (0) vex_printf("%s", toFastEP ? "Y" : ",");
+ addInstr(env, ARMInstr_XDirect(stmt->Ist.Exit.dst->Ico.U32,
+ amR15T, cc, toFastEP));
+ } else {
+ /* .. very occasionally .. */
+ /* We can't use chaining, so ask for an assisted transfer,
+ as that's the only alternative that is allowable. */
+ HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
+ addInstr(env, ARMInstr_XAssisted(r, amR15T, cc, Ijk_Boring));
+ }
+ return;
+ }
+
+ /* Case: assisted transfer to arbitrary address */
+ switch (stmt->Ist.Exit.jk) {
+ //case Ijk_MapFail:
+ //case Ijk_SigSEGV: case Ijk_TInval: case Ijk_EmWarn:
+ case Ijk_NoDecode:
+ {
+ HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
+ addInstr(env, ARMInstr_XAssisted(r, amR15T, cc,
+ stmt->Ist.Exit.jk));
+ return;
+ }
+ default:
+ break;
+ }
+
+ /* Do we ever expect to see any other kind? */
+ goto stmt_fail;
}
default: break;
@@ -5987,19 +6037,85 @@
/*--- ISEL: Basic block terminators (Nexts) ---*/
/*---------------------------------------------------------*/
-static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk )
+static void iselNext ( ISelEnv* env,
+ IRExpr* next, IRJumpKind jk, Int offsIP )
{
- HReg rDst;
if (vex_traceflags & VEX_TRACE_VCODE) {
- vex_printf("\n-- goto {");
+ vex_printf( "\n-- PUT(%d) = ", offsIP);
+ ppIRExpr( next );
+ vex_printf( "; exit-");
ppIRJumpKind(jk);
- vex_printf("} ");
- ppIRExpr(next);
- vex_printf("\n");
+ vex_printf( "\n");
}
- rDst = iselIntExpr_R(env, next);
- addInstr(env, mk_iMOVds_RR(hregARM_R14(), env->savedLR));
- addInstr(env, ARMInstr_Goto(jk, ARMcc_AL, rDst));
+
+ /* Case: boring transfer to known address */
+ if (next->tag == Iex_Const) {
+ IRConst* cdst = next->Iex.Const.con;
+ vassert(cdst->tag == Ico_U32);
+ if (jk == Ijk_Boring || jk == Ijk_Call) {
+ /* Boring transfer to known address */
+ ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
+ if (env->chainingAllowed) {
+ /* .. almost always true .. */
+ /* Skip the event check at the dst if this is a forwards
+ edge. */
+ Bool toFastEP
+ = ((Addr64)cdst->Ico.U32) > env->max_ga;
+ if (0) vex_printf("%s", toFastEP ? "X" : ".");
+ addInstr(env, ARMInstr_XDirect(cdst->Ico.U32,
+ amR15T, ARMcc_AL,
+ toFastEP));
+ } else {
+ /* .. very occasionally .. */
+ /* We can't use chaining, so ask for an assisted transfer,
+ as that's the only alternative that is allowable. */
+ HReg r = iselIntExpr_R(env, next);
+ addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL,
+ Ijk_Boring));
+ }
+ return;
+ }
+ }
+
+ /* Case: call/return (==boring) transfer to any address */
+ switch (jk) {
+ case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
+ HReg r = iselIntExpr_R(env, next);
+ ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
+ if (env->chainingAllowed) {
+ addInstr(env, ARMInstr_XIndir(r, amR15T, ARMcc_AL));
+ } else {
+ addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL,
+ Ijk_Boring));
+ }
+ return;
+ }
+ default:
+ break;
+ }
+
+ /* Case: some other kind of transfer to any address */
+ switch (jk) {
+ case Ijk_Sys_syscall: case Ijk_ClientReq: case Ijk_NoDecode:
+ case Ijk_NoRedir:
+ //case Ijk_Sys_int128:
+ //case Ijk_Yield: case Ijk_SigTRAP:
+ {
+ HReg r = iselIntExpr_R(env, next);
+ ARMAMode1* amR15T = ARMAMode1_RI(hregARM_R8(), offsIP);
+ addInstr(env, ARMInstr_XAssisted(r, amR15T, ARMcc_AL, jk));
+ return;
+ }
+ default:
+ break;
+ }
+
+ vex_printf( "\n-- PUT(%d) = ", offsIP);
+ ppIRExpr( next );
+ vex_printf( "; exit-");
+ ppIRJumpKind(jk);
+ vex_printf( "\n");
+ vassert(0); // are we expecting any other kind?
}
@@ -6009,21 +6125,27 @@
/* Translate an entire SB to arm code. */
-HInstrArray* iselSB_ARM ( IRSB* bb, VexArch arch_host,
- VexArchInfo* archinfo_host,
- VexAbiInfo* vbi/*UNUSED*/ )
+HInstrArray* iselSB_ARM ( IRSB* bb,
+ VexArch arch_host,
+ VexArchInfo* archinfo_host,
+ VexAbiInfo* vbi/*UNUSED*/,
+ Int offs_Host_EvC_Counter,
+ Int offs_Host_EvC_FailAddr,
+ Bool chainingAllowed,
+ Bool addProfInc,
+ Addr64 max_ga )
{
- Int i, j;
- HReg hreg, hregHI;
- ISelEnv* env;
- UInt hwcaps_host = archinfo_host->hwcaps;
- static UInt counter = 0;
+ Int i, j;
+ HReg hreg, hregHI;
+ ISelEnv* env;
+ UInt hwcaps_host = archinfo_host->hwcaps;
+ ARMAMode1 *amCounter, *amFailAddr;
/* sanity ... */
vassert(arch_host == VexArchARM);
/* hwcaps should not change from one ISEL call to another. */
- arm_hwcaps = hwcaps_host;
+ arm_hwcaps = hwcaps_host; // JRS 2012 Mar 31: FIXME (RM)
/* Make up an initial environment to use. */
env = LibVEX_Alloc(sizeof(ISelEnv));
@@ -6041,6 +6163,11 @@
env->vregmap = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
+ /* and finally ... */
+ env->chainingAllowed = chainingAllowed;
+ env->hwcaps = hwcaps_host;
+ env->max_ga = max_ga;
+
/* For each IR temporary, allocate a suitably-kinded virtual
register. */
j = 0;
@@ -6052,7 +6179,7 @@
case Ity_I16:
case Ity_I32: hreg = mkHReg(j++, HRcInt32, True); break;
case Ity_I64:
- if (arm_hwcaps & VEX_HWCAPS_ARM_NEON) {
+ if (hwcaps_host & VEX_HWCAPS_ARM_NEON) {
hreg = mkHReg(j++, HRcFlt64, True);
} else {
hregHI = mkHReg(j++, HRcInt32, True);
@@ -6070,21 +6197,27 @@
}
env->vreg_ctr = j;
- /* Keep a copy of the link reg, since any call to a helper function
- will trash it, and we can't get back to the dispatcher once that
- happens. */
- env->savedLR = newVRegI(env);
- addInstr(env, mk_iMOVds_RR(env->savedLR, hregARM_R14()));
+ /* The very first instruction must be an event check. */
+ amCounter = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_Counter);
+ amFailAddr = ARMAMode1_RI(hregARM_R8(), offs_Host_EvC_FailAddr);
+ addInstr(env, ARMInstr_EvCheck(amCounter, amFailAddr));
+
+ /* Possibly a block counter increment (for profiling). At this
+ point we don't know the address of the counter, so just pretend
+ it is zero. It will have to be patched later, but before this
+ translation is used, by a call to LibVEX_patchProfCtr. */
+ if (addProfInc) {
+ addInstr(env, ARMInstr_ProfInc());
+ }
/* Ok, finally we can iterate over the statements. */
for (i = 0; i < bb->stmts_used; i++)
- iselStmt(env,bb->stmts[i]);
+ iselStmt(env, bb->stmts[i]);
- iselNext(env,bb->next,bb->jumpkind);
+ iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
/* record the number of vregs we used. */
env->code->n_vregs = env->vreg_ctr;
- counter++;
return env->code;
}
diff --git a/priv/host_ppc_defs.c b/priv/host_ppc_defs.c
index 9974b7b..f8ff79c 100644
--- a/priv/host_ppc_defs.c
+++ b/priv/host_ppc_defs.c
@@ -845,13 +845,33 @@
vassert(0 == (argiregs & ~mask));
return i;
}
-PPCInstr* PPCInstr_Goto ( IRJumpKind jk,
- PPCCondCode cond, PPCRI* dst ) {
- PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
- i->tag = Pin_Goto;
- i->Pin.Goto.cond = cond;
- i->Pin.Goto.dst = dst;
- i->Pin.Goto.jk = jk;
+PPCInstr* PPCInstr_XDirect ( Addr64 dstGA, PPCAMode* amCIA,
+ PPCCondCode cond, Bool toFastEP ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_XDirect;
+ i->Pin.XDirect.dstGA = dstGA;
+ i->Pin.XDirect.amCIA = amCIA;
+ i->Pin.XDirect.cond = cond;
+ i->Pin.XDirect.toFastEP = toFastEP;
+ return i;
+}
+PPCInstr* PPCInstr_XIndir ( HReg dstGA, PPCAMode* amCIA,
+ PPCCondCode cond ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_XIndir;
+ i->Pin.XIndir.dstGA = dstGA;
+ i->Pin.XIndir.amCIA = amCIA;
+ i->Pin.XIndir.cond = cond;
+ return i;
+}
+PPCInstr* PPCInstr_XAssisted ( HReg dstGA, PPCAMode* amCIA,
+ PPCCondCode cond, IRJumpKind jk ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_XAssisted;
+ i->Pin.XAssisted.dstGA = dstGA;
+ i->Pin.XAssisted.amCIA = amCIA;
+ i->Pin.XAssisted.cond = cond;
+ i->Pin.XAssisted.jk = jk;
return i;
}
PPCInstr* PPCInstr_CMov ( PPCCondCode cond,
@@ -1057,7 +1077,6 @@
i->Pin.DfpD128toD64.dst = dst;
return i;
}
-
PPCInstr* PPCInstr_DfpI64StoD128 ( PPCFpOp op, HReg dst_hi,
HReg dst_lo, HReg src ) {
PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
@@ -1068,6 +1087,20 @@
i->Pin.DfpI64StoD128.dst_lo = dst_lo;
return i;
}
+PPCInstr* PPCInstr_EvCheck ( PPCAMode* amCounter,
+ PPCAMode* amFailAddr ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_EvCheck;
+ i->Pin.EvCheck.amCounter = amCounter;
+ i->Pin.EvCheck.amFailAddr = amFailAddr;
+ return i;
+}
+PPCInstr* PPCInstr_ProfInc ( void ) {
+ PPCInstr* i = LibVEX_Alloc(sizeof(PPCInstr));
+ i->tag = Pin_ProfInc;
+ return i;
+}
+
/*
Valid combo | fromI | int32 | syned | flt64 |
@@ -1432,26 +1465,53 @@
vex_printf("] }");
break;
}
- case Pin_Goto:
- vex_printf("goto: ");
- if (i->Pin.Goto.cond.test != Pct_ALWAYS) {
- vex_printf("if (%s) ", showPPCCondCode(i->Pin.Goto.cond));
- }
- vex_printf("{ ");
- if (i->Pin.Goto.jk != Ijk_Boring
- && i->Pin.Goto.jk != Ijk_Call
- && i->Pin.Goto.jk != Ijk_Ret) {
- vex_printf("li %%r31,$");
- ppIRJumpKind(i->Pin.Goto.jk);
- vex_printf(" ; ");
- }
- if (i->Pin.Goto.dst->tag == Pri_Imm) {
- ppLoadImm(hregPPC_GPR3(mode64), i->Pin.Goto.dst->Pri.Imm,
- mode64);
+ case Pin_XDirect:
+ vex_printf("(xDirect) ");
+ vex_printf("if (%s) { ",
+ showPPCCondCode(i->Pin.XDirect.cond));
+ if (mode64) {
+ vex_printf("imm64 r30,0x%llx; ", i->Pin.XDirect.dstGA);
+ vex_printf("std r30,");
} else {
- ppMovReg(hregPPC_GPR3(mode64), i->Pin.Goto.dst->Pri.Reg);
+ vex_printf("imm32 r30,0x%llx; ", i->Pin.XDirect.dstGA);
+ vex_printf("stw r30,");
}
- vex_printf(" ; blr }");
+ ppPPCAMode(i->Pin.XDirect.amCIA);
+ vex_printf("; ");
+ if (mode64) {
+ vex_printf("imm64-fixed5 r30,$disp_cp_chain_me_to_%sEP; ",
+ i->Pin.XDirect.toFastEP ? "fast" : "slow");
+ } else {
+ vex_printf("imm32-fixed2 r30,$disp_cp_chain_me_to_%sEP; ",
+ i->Pin.XDirect.toFastEP ? "fast" : "slow");
+ }
+ vex_printf("mtctr r30; bctrl }");
+ return;
+ case Pin_XIndir:
+ vex_printf("(xIndir) ");
+ vex_printf("if (%s) { ",
+ showPPCCondCode(i->Pin.XIndir.cond));
+ vex_printf("%s ", mode64 ? "std" : "stw");
+ ppHRegPPC(i->Pin.XIndir.dstGA);
+ vex_printf(",");
+ ppPPCAMode(i->Pin.XIndir.amCIA);
+ vex_printf("; ");
+ vex_printf("imm%s r30,$disp_cp_xindir; ", mode64 ? "64" : "32");
+ vex_printf("mtctr r30; bctr }");
+ return;
+ case Pin_XAssisted:
+ vex_printf("(xAssisted) ");
+ vex_printf("if (%s) { ",
+ showPPCCondCode(i->Pin.XAssisted.cond));
+ vex_printf("%s ", mode64 ? "std" : "stw");
+ ppHRegPPC(i->Pin.XAssisted.dstGA);
+ vex_printf(",");
+ ppPPCAMode(i->Pin.XAssisted.amCIA);
+ vex_printf("; ");
+ vex_printf("li r31,$IRJumpKind_to_TRCVAL(%d); ",
+ (Int)i->Pin.XAssisted.jk);
+ vex_printf("imm%s r30,$disp_cp_xindir; ", mode64 ? "64" : "32");
+ vex_printf("mtctr r30; bctr }");
return;
case Pin_CMov:
vex_printf("cmov (%s) ", showPPCCondCode(i->Pin.CMov.cond));
@@ -1875,6 +1935,30 @@
vex_printf(",");
return;
+ case Pin_EvCheck:
+ /* Note that the counter dec is 32 bit even in 64-bit mode. */
+ vex_printf("(evCheck) ");
+ vex_printf("lwz r30,");
+ ppPPCAMode(i->Pin.EvCheck.amCounter);
+ vex_printf("; addic. r30,r30,-1; ");
+ vex_printf("stw r30,");
+ ppPPCAMode(i->Pin.EvCheck.amCounter);
+ vex_printf("; bge nofail; lwz r30,");
+ ppPPCAMode(i->Pin.EvCheck.amFailAddr);
+ vex_printf("; mtctr r30; bctr; nofail:");
+ return;
+
+ case Pin_ProfInc:
+ if (mode64) {
+ vex_printf("(profInc) imm64-fixed5 r30,$NotKnownYet; ");
+ vex_printf("ld r29,(r30); addi r29,r29,1; std r29,(r30)");
+ } else {
+ vex_printf("(profInc) imm32-fixed2 r30,$NotKnownYet; ");
+ vex_printf("lwz r29,4(r30); addic. r29,r29,1; stw r29,4(r30)");
+ vex_printf("lwz r29,0(r30); addze r29,r29; stw r29,0(r30)");
+ }
+ break;
+
default:
vex_printf("\nppPPCInstr: No such tag(%d)\n", (Int)i->tag);
vpanic("ppPPCInstr");
@@ -1973,17 +2057,21 @@
and no other, as a destination temporary. */
return;
}
- case Pin_Goto:
- addRegUsage_PPCRI(u, i->Pin.Goto.dst);
- /* GPR3 holds destination address from Pin_Goto */
- addHRegUse(u, HRmWrite, hregPPC_GPR3(mode64));
- if (i->Pin.Goto.jk != Ijk_Boring
- && i->Pin.Goto.jk != Ijk_Call
- && i->Pin.Goto.jk != Ijk_Ret)
- /* note, this is irrelevant since the guest state pointer
- register is not actually available to the allocator.
- But still .. */
- addHRegUse(u, HRmWrite, GuestStatePtr(mode64));
+ /* XDirect/XIndir/XAssisted are also a bit subtle. They
+ conditionally exit the block. Hence we only need to list (1)
+ the registers that they read, and (2) the registers that they
+ write in the case where the block is not exited. (2) is empty,
+ hence only (1) is relevant here. */
+ case Pin_XDirect:
+ addRegUsage_PPCAMode(u, i->Pin.XDirect.amCIA);
+ return;
+ case Pin_XIndir:
+ addHRegUse(u, HRmRead, i->Pin.XIndir.dstGA);
+ addRegUsage_PPCAMode(u, i->Pin.XIndir.amCIA);
+ return;
+ case Pin_XAssisted:
+ addHRegUse(u, HRmRead, i->Pin.XAssisted.dstGA);
+ addRegUsage_PPCAMode(u, i->Pin.XAssisted.amCIA);
return;
case Pin_CMov:
addRegUsage_PPCRI(u, i->Pin.CMov.src);
@@ -2185,7 +2273,18 @@
addHRegUse(u, HRmWrite, i->Pin.DfpI64StoD128.dst_hi);
addHRegUse(u, HRmWrite, i->Pin.DfpI64StoD128.dst_lo);
return;
-
+ case Pin_EvCheck:
+ /* We expect both amodes only to mention the GSP (r31), so this
+ is in fact pointless, since GSP isn't allocatable, but
+ anyway.. */
+ addRegUsage_PPCAMode(u, i->Pin.EvCheck.amCounter);
+ addRegUsage_PPCAMode(u, i->Pin.EvCheck.amFailAddr);
+ addHRegUse(u, HRmWrite, hregPPC_GPR30(mode64)); /* also unavail to RA */
+ return;
+ case Pin_ProfInc:
+ addHRegUse(u, HRmWrite, hregPPC_GPR29(mode64));
+ addHRegUse(u, HRmWrite, hregPPC_GPR30(mode64));
+ return;
default:
ppPPCInstr(i, mode64);
vpanic("getRegUsage_PPCInstr");
@@ -2239,8 +2338,16 @@
return;
case Pin_Call:
return;
- case Pin_Goto:
- mapRegs_PPCRI(m, i->Pin.Goto.dst);
+ case Pin_XDirect:
+ mapRegs_PPCAMode(m, i->Pin.XDirect.amCIA);
+ return;
+ case Pin_XIndir:
+ mapReg(m, &i->Pin.XIndir.dstGA);
+ mapRegs_PPCAMode(m, i->Pin.XIndir.amCIA);
+ return;
+ case Pin_XAssisted:
+ mapReg(m, &i->Pin.XAssisted.dstGA);
+ mapRegs_PPCAMode(m, i->Pin.XAssisted.amCIA);
return;
case Pin_CMov:
mapRegs_PPCRI(m, i->Pin.CMov.src);
@@ -2424,7 +2531,16 @@
mapReg(m, &i->Pin.DfpI64StoD128.dst_hi);
mapReg(m, &i->Pin.DfpI64StoD128.dst_lo);
return;
-
+ case Pin_EvCheck:
+ /* We expect both amodes only to mention the GSP (r31), so this
+ is in fact pointless, since GSP isn't allocatable, but
+ anyway.. */
+ mapRegs_PPCAMode(m, i->Pin.EvCheck.amCounter);
+ mapRegs_PPCAMode(m, i->Pin.EvCheck.amFailAddr);
+ return;
+ case Pin_ProfInc:
+ /* hardwires r29 and r30 -- nothing to modify. */
+ return;
default:
ppPPCInstr(i, mode64);
vpanic("mapRegs_PPCInstr");
@@ -2558,7 +2674,7 @@
return n;
}
-/* Emit 32bit instruction big-endianly */
+/* Emit an instruction big-endianly */
static UChar* emit32 ( UChar* p, UInt w32 )
{
*p++ = toUChar((w32 >> 24) & 0x000000FF);
@@ -2568,6 +2684,17 @@
return p;
}
+/* Fetch an instruction big-endianly */
+static UInt fetch32 ( UChar* p )
+{
+ UInt w32 = 0;
+ w32 |= ((0xFF & (UInt)p[0]) << 24);
+ w32 |= ((0xFF & (UInt)p[1]) << 16);
+ w32 |= ((0xFF & (UInt)p[2]) << 8);
+ w32 |= ((0xFF & (UInt)p[3]) << 0);
+ return w32;
+}
+
/* The following mkForm[...] functions refer to ppc instruction forms
as per PPC32 p576
*/
@@ -2866,6 +2993,210 @@
return p;
}
+/* A simplified version of mkLoadImm that always generates 2 or 5
+ instructions (32 or 64 bits respectively) even if it could generate
+ fewer. This is needed for generating fixed sized patchable
+ sequences. */
+static UChar* mkLoadImm_EXACTLY2or5 ( UChar* p,
+ UInt r_dst, ULong imm, Bool mode64 )
+{
+ vassert(r_dst < 0x20);
+
+ if (!mode64) {
+ /* In 32-bit mode, make sure the top 32 bits of imm are a sign
+ extension of the bottom 32 bits. (Probably unnecessary.) */
+ UInt u32 = (UInt)imm;
+ Int s32 = (Int)u32;
+ Long s64 = (Long)s32;
+ imm = (ULong)s64;
+ }
+
+ if (!mode64) {
+ // addis r_dst,r0,(imm>>16) => lis r_dst, (imm>>16)
+ p = mkFormD(p, 15, r_dst, 0, (imm>>16) & 0xFFFF);
+ // ori r_dst, r_dst, (imm & 0xFFFF)
+ p = mkFormD(p, 24, r_dst, r_dst, imm & 0xFFFF);
+
+ } else {
+ // full 64bit immediate load: 5 (five!) insns.
+
+ // load high word
+ // lis r_dst, (imm>>48) & 0xFFFF
+ p = mkFormD(p, 15, r_dst, 0, (imm>>48) & 0xFFFF);
+
+ // ori r_dst, r_dst, (imm>>32) & 0xFFFF
+ p = mkFormD(p, 24, r_dst, r_dst, (imm>>32) & 0xFFFF);
+
+ // shift r_dst low word to high word => rldicr
+ p = mkFormMD(p, 30, r_dst, r_dst, 32, 31, 1);
+
+ // load low word
+ // oris r_dst, r_dst, (imm>>16) & 0xFFFF
+ p = mkFormD(p, 25, r_dst, r_dst, (imm>>16) & 0xFFFF);
+
+ // ori r_dst, r_dst, (imm) & 0xFFFF
+ p = mkFormD(p, 24, r_dst, r_dst, imm & 0xFFFF);
+ }
+ return p;
+}
+
+/* Checks whether the sequence of bytes at p was indeed created
+ by mkLoadImm_EXACTLY2or5 with the given parameters. */
+static Bool isLoadImm_EXACTLY2or5 ( UChar* p_to_check,
+ UInt r_dst, ULong imm, Bool mode64 )
+{
+ vassert(r_dst < 0x20);
+
+ if (!mode64) {
+ /* In 32-bit mode, make sure the top 32 bits of imm are a sign
+ extension of the bottom 32 bits. (Probably unnecessary.) */
+ UInt u32 = (UInt)imm;
+ Int s32 = (Int)u32;
+ Long s64 = (Long)s32;
+ imm = (ULong)s64;
+ }
+
+ if (!mode64) {
+ UInt expect[2] = { 0, 0 };
+ UChar* p = (UChar*)&expect[0];
+ // addis r_dst,r0,(imm>>16) => lis r_dst, (imm>>16)
+ p = mkFormD(p, 15, r_dst, 0, (imm>>16) & 0xFFFF);
+ // ori r_dst, r_dst, (imm & 0xFFFF)
+ p = mkFormD(p, 24, r_dst, r_dst, imm & 0xFFFF);
+ vassert(p == (UChar*)&expect[2]);
+
+ return fetch32(p_to_check + 0) == expect[0]
+ && fetch32(p_to_check + 4) == expect[1];
+
+ } else {
+ UInt expect[5] = { 0, 0, 0, 0, 0 };
+ UChar* p = (UChar*)&expect[0];
+ // full 64bit immediate load: 5 (five!) insns.
+
+ // load high word
+ // lis r_dst, (imm>>48) & 0xFFFF
+ p = mkFormD(p, 15, r_dst, 0, (imm>>48) & 0xFFFF);
+
+ // ori r_dst, r_dst, (imm>>32) & 0xFFFF
+ p = mkFormD(p, 24, r_dst, r_dst, (imm>>32) & 0xFFFF);
+
+ // shift r_dst low word to high word => rldicr
+ p = mkFormMD(p, 30, r_dst, r_dst, 32, 31, 1);
+
+ // load low word
+ // oris r_dst, r_dst, (imm>>16) & 0xFFFF
+ p = mkFormD(p, 25, r_dst, r_dst, (imm>>16) & 0xFFFF);
+
+ // ori r_dst, r_dst, (imm) & 0xFFFF
+ p = mkFormD(p, 24, r_dst, r_dst, imm & 0xFFFF);
+
+ vassert(p == (UChar*)&expect[5]);
+
+ return fetch32(p_to_check + 0) == expect[0]
+ && fetch32(p_to_check + 4) == expect[1]
+ && fetch32(p_to_check + 8) == expect[2]
+ && fetch32(p_to_check + 12) == expect[3]
+ && fetch32(p_to_check + 16) == expect[4];
+ }
+}
+
+
+/* Generate a machine-word sized load or store. Simplified version of
+ the Pin_Load and Pin_Store cases below. */
+static UChar* do_load_or_store_machine_word (
+ UChar* p, Bool isLoad,
+ UInt reg, PPCAMode* am, Bool mode64 )
+{
+ if (isLoad) {
+ UInt opc1, sz = mode64 ? 8 : 4;
+ switch (am->tag) {
+ case Pam_IR:
+ if (mode64) {
+ vassert(0 == (am->Pam.IR.index & 3));
+ }
+ switch (sz) {
+ case 4: opc1 = 32; vassert(!mode64); break;
+ case 8: opc1 = 58; vassert(mode64); break;
+ default: vassert(0);
+ }
+ p = doAMode_IR(p, opc1, reg, am, mode64);
+ break;
+ case Pam_RR:
+ /* we could handle this case, but we don't expect to ever
+ need to. */
+ vassert(0);
+ default:
+ vassert(0);
+ }
+ } else /*store*/ {
+ UInt opc1, sz = mode64 ? 8 : 4;
+ switch (am->tag) {
+ case Pam_IR:
+ if (mode64) {
+ vassert(0 == (am->Pam.IR.index & 3));
+ }
+ switch (sz) {
+ case 4: opc1 = 36; vassert(!mode64); break;
+ case 8: opc1 = 62; vassert(mode64); break;
+ default: vassert(0);
+ }
+ p = doAMode_IR(p, opc1, reg, am, mode64);
+ break;
+ case Pam_RR:
+ /* we could handle this case, but we don't expect to ever
+ need to. */
+ vassert(0);
+ default:
+ vassert(0);
+ }
+ }
+ return p;
+}
+
+/* Generate a 32-bit sized load or store. Simplified version of
+ do_load_or_store_machine_word above. */
+static UChar* do_load_or_store_word32 (
+ UChar* p, Bool isLoad,
+ UInt reg, PPCAMode* am, Bool mode64 )
+{
+ if (isLoad) {
+ UInt opc1;
+ switch (am->tag) {
+ case Pam_IR:
+ if (mode64) {
+ vassert(0 == (am->Pam.IR.index & 3));
+ }
+ opc1 = 32;
+ p = doAMode_IR(p, opc1, reg, am, mode64);
+ break;
+ case Pam_RR:
+ /* we could handle this case, but we don't expect to ever
+ need to. */
+ vassert(0);
+ default:
+ vassert(0);
+ }
+ } else /*store*/ {
+ UInt opc1;
+ switch (am->tag) {
+ case Pam_IR:
+ if (mode64) {
+ vassert(0 == (am->Pam.IR.index & 3));
+ }
+ opc1 = 36;
+ p = doAMode_IR(p, opc1, reg, am, mode64);
+ break;
+ case Pam_RR:
+ /* we could handle this case, but we don't expect to ever
+ need to. */
+ vassert(0);
+ default:
+ vassert(0);
+ }
+ }
+ return p;
+}
+
/* Move r_dst to r_src */
static UChar* mkMoveReg ( UChar* p, UInt r_dst, UInt r_src )
{
@@ -2926,18 +3257,19 @@
/* Emit an instruction into buf and return the number of bytes used.
Note that buf is not the insn's final place, and therefore it is
- imperative to emit position-independent code.
-
- Note, dispatch should always be NULL since ppc32/64 backends
- use a call-return scheme to get from the dispatcher to generated
- code and back.
+ imperative to emit position-independent code. If the emitted
+ instruction was a profiler inc, set *is_profInc to True, else leave
+ it unchanged.
*/
-Int emit_PPCInstr ( UChar* buf, Int nbuf, PPCInstr* i,
+Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc,
+ UChar* buf, Int nbuf, PPCInstr* i,
Bool mode64,
- void* dispatch_unassisted, void* dispatch_assisted )
+ void* disp_cp_chain_me_to_slowEP,
+ void* disp_cp_chain_me_to_fastEP,
+ void* disp_cp_xindir,
+ void* disp_cp_xassisted )
{
UChar* p = &buf[0];
- UChar* ptmp = p;
vassert(nbuf >= 32);
if (0) {
@@ -3304,6 +3636,7 @@
getRegUsage_PPCInstr above, %r10 is used as an address temp */
/* jump over the following insns if condition does not hold */
+ UChar* ptmp = NULL;
if (cond.test != Pct_ALWAYS) {
/* jmp fwds if !condition */
/* don't know how many bytes to jump over yet...
@@ -3332,75 +3665,176 @@
goto done;
}
- case Pin_Goto: {
- UInt trc = 0;
- UChar r_ret = 3; /* Put target addr into %r3 */
- PPCCondCode cond = i->Pin.Goto.cond;
- UInt r_dst;
- ULong imm_dst;
+ case Pin_XDirect: {
+ /* NB: what goes on here has to be very closely coordinated
+ with the chainXDirect_PPC and unchainXDirect_PPC below. */
+ /* We're generating chain-me requests here, so we need to be
+ sure this is actually allowed -- no-redir translations
+ can't use chain-me's. Hence: */
+ vassert(disp_cp_chain_me_to_slowEP != NULL);
+ vassert(disp_cp_chain_me_to_fastEP != NULL);
- vassert(dispatch_unassisted == NULL);
- vassert(dispatch_assisted == NULL);
-
- /* First off, if this is conditional, create a conditional
- jump over the rest of it. */
- if (cond.test != Pct_ALWAYS) {
- /* jmp fwds if !condition */
- /* don't know how many bytes to jump over yet...
- make space for a jump instruction and fill in later. */
- ptmp = p; /* fill in this bit later */
+ /* First off, if this is conditional, create a conditional jump
+ over the rest of it. Or at least, leave a space for it that
+ we will shortly fill in. */
+ UChar* ptmp = NULL;
+ if (i->Pin.XDirect.cond.test != Pct_ALWAYS) {
+ vassert(i->Pin.XDirect.cond.flag != Pcf_NONE);
+ ptmp = p;
p += 4;
- }
-
- // cond succeeds...
-
- /* If a non-boring, set GuestStatePtr appropriately. */
- switch (i->Pin.Goto.jk) {
- case Ijk_ClientReq: trc = VEX_TRC_JMP_CLIENTREQ; break;
- case Ijk_Sys_syscall: trc = VEX_TRC_JMP_SYS_SYSCALL; break;
- case Ijk_Yield: trc = VEX_TRC_JMP_YIELD; break;
- case Ijk_EmWarn: trc = VEX_TRC_JMP_EMWARN; break;
- case Ijk_EmFail: trc = VEX_TRC_JMP_EMFAIL; break;
- case Ijk_MapFail: trc = VEX_TRC_JMP_MAPFAIL; break;
- case Ijk_NoDecode: trc = VEX_TRC_JMP_NODECODE; break;
- case Ijk_TInval: trc = VEX_TRC_JMP_TINVAL; break;
- case Ijk_NoRedir: trc = VEX_TRC_JMP_NOREDIR; break;
- case Ijk_SigTRAP: trc = VEX_TRC_JMP_SIGTRAP; break;
- case Ijk_SigBUS: trc = VEX_TRC_JMP_SIGBUS; break;
- case Ijk_Ret:
- case Ijk_Call:
- case Ijk_Boring:
- break;
- default:
- ppIRJumpKind(i->Pin.Goto.jk);
- vpanic("emit_PPCInstr.Pin_Goto: unknown jump kind");
- }
- if (trc !=0) {
- vassert(trc < 0x10000);
- /* addi r31,0,trc */
- p = mkFormD(p, 14, 31, 0, trc); // p += 4
- }
-
- /* Get the destination address into %r_ret */
- if (i->Pin.Goto.dst->tag == Pri_Imm) {
- imm_dst = i->Pin.Goto.dst->Pri.Imm;
- p = mkLoadImm(p, r_ret, imm_dst, mode64); // p += 4|8|20
} else {
- vassert(i->Pin.Goto.dst->tag == Pri_Reg);
- r_dst = iregNo(i->Pin.Goto.dst->Pri.Reg, mode64);
- p = mkMoveReg(p, r_ret, r_dst); // p += 4
+ vassert(i->Pin.XDirect.cond.flag == Pcf_NONE);
}
-
- /* blr */
- p = mkFormXL(p, 19, Pct_ALWAYS, 0, 0, 16, 0); // p += 4
+
+ /* Update the guest CIA. */
+ /* imm32/64 r30, dstGA */
+ if (!mode64) vassert(0 == (((ULong)i->Pin.XDirect.dstGA) >> 32));
+ p = mkLoadImm(p, /*r*/30, (ULong)i->Pin.XDirect.dstGA, mode64);
+ /* stw/std r30, amCIA */
+ p = do_load_or_store_machine_word(
+ p, False/*!isLoad*/,
+ /*r*/30, i->Pin.XDirect.amCIA, mode64
+ );
+
+ /* --- FIRST PATCHABLE BYTE follows --- */
+ /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling
+ to) backs up the return address, so as to find the address of
+ the first patchable byte. So: don't change the number of
+ instructions (32-bit: 4, 64-bit: 7) below. */
+ /* imm32/64-fixed r30, VG_(disp_cp_chain_me_to_{slowEP,fastEP} */
+ void* disp_cp_chain_me
+ = i->Pin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
+ : disp_cp_chain_me_to_slowEP;
+ p = mkLoadImm_EXACTLY2or5(
+ p, /*r*/30, Ptr_to_ULong(disp_cp_chain_me), mode64);
+ /* mtctr r30 */
+ p = mkFormXFX(p, /*r*/30, 9, 467);
+ /* bctrl */
+ p = mkFormXL(p, 19, Pct_ALWAYS, 0, 0, 528, 1);
+ /* --- END of PATCHABLE BYTES --- */
/* Fix up the conditional jump, if there was one. */
- if (cond.test != Pct_ALWAYS) {
+ if (i->Pin.XDirect.cond.test != Pct_ALWAYS) {
Int delta = p - ptmp;
- vassert(delta >= 12 && delta <= 32);
+ vassert(delta >= 16 && delta <= 64 && 0 == (delta & 3));
/* bc !ct,cf,delta */
- mkFormB(ptmp, invertCondTest(cond.test),
- cond.flag, delta>>2, 0, 0);
+ mkFormB(ptmp, invertCondTest(i->Pin.XDirect.cond.test),
+ i->Pin.XDirect.cond.flag, (delta>>2), 0, 0);
+ }
+ goto done;
+ }
+
+ case Pin_XIndir: {
+ /* We're generating transfers that could lead indirectly to a
+ chain-me, so we need to be sure this is actually allowed --
+ no-redir translations are not allowed to reach normal
+ translations without going through the scheduler. That means
+ no XDirects or XIndirs out from no-redir translations.
+ Hence: */
+ vassert(disp_cp_xindir != NULL);
+
+ /* First off, if this is conditional, create a conditional jump
+ over the rest of it. Or at least, leave a space for it that
+ we will shortly fill in. */
+ UChar* ptmp = NULL;
+ if (i->Pin.XIndir.cond.test != Pct_ALWAYS) {
+ vassert(i->Pin.XIndir.cond.flag != Pcf_NONE);
+ ptmp = p;
+ p += 4;
+ } else {
+ vassert(i->Pin.XIndir.cond.flag == Pcf_NONE);
+ }
+
+ /* Update the guest CIA. */
+ /* stw/std r-dstGA, amCIA */
+ p = do_load_or_store_machine_word(
+ p, False/*!isLoad*/,
+ iregNo(i->Pin.XIndir.dstGA, mode64),
+ i->Pin.XIndir.amCIA, mode64
+ );
+
+ /* imm32/64 r30, VG_(disp_cp_xindir) */
+ p = mkLoadImm(p, /*r*/30, (ULong)Ptr_to_ULong(disp_cp_xindir), mode64);
+ /* mtctr r30 */
+ p = mkFormXFX(p, /*r*/30, 9, 467);
+ /* bctr */
+ p = mkFormXL(p, 19, Pct_ALWAYS, 0, 0, 528, 0);
+
+ /* Fix up the conditional jump, if there was one. */
+ if (i->Pin.XIndir.cond.test != Pct_ALWAYS) {
+ Int delta = p - ptmp;
+ vassert(delta >= 16 && delta <= 32 && 0 == (delta & 3));
+ /* bc !ct,cf,delta */
+ mkFormB(ptmp, invertCondTest(i->Pin.XIndir.cond.test),
+ i->Pin.XIndir.cond.flag, (delta>>2), 0, 0);
+ }
+ goto done;
+ }
+
+ case Pin_XAssisted: {
+ /* First off, if this is conditional, create a conditional jump
+ over the rest of it. Or at least, leave a space for it that
+ we will shortly fill in. */
+ UChar* ptmp = NULL;
+ if (i->Pin.XAssisted.cond.test != Pct_ALWAYS) {
+ vassert(i->Pin.XAssisted.cond.flag != Pcf_NONE);
+ ptmp = p;
+ p += 4;
+ } else {
+ vassert(i->Pin.XAssisted.cond.flag == Pcf_NONE);
+ }
+
+ /* Update the guest CIA. */
+ /* stw/std r-dstGA, amCIA */
+ p = do_load_or_store_machine_word(
+ p, False/*!isLoad*/,
+ iregNo(i->Pin.XIndir.dstGA, mode64),
+ i->Pin.XIndir.amCIA, mode64
+ );
+
+ /* imm32/64 r31, $magic_number */
+ UInt trcval = 0;
+ switch (i->Pin.XAssisted.jk) {
+ case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break;
+ case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
+ //case Ijk_Sys_int128: trcval = VEX_TRC_JMP_SYS_INT128; break;
+ //case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break;
+ case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
+ case Ijk_EmFail: trcval = VEX_TRC_JMP_EMFAIL; break;
+ //case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
+ case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break;
+ case Ijk_TInval: trcval = VEX_TRC_JMP_TINVAL; break;
+ case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break;
+ case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break;
+ //case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
+ case Ijk_SigBUS: trcval = VEX_TRC_JMP_SIGBUS; break;
+ case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break;
+ /* We don't expect to see the following being assisted. */
+ //case Ijk_Ret:
+ //case Ijk_Call:
+ /* fallthrough */
+ default:
+ ppIRJumpKind(i->Pin.XAssisted.jk);
+ vpanic("emit_ARMInstr.Pin_XAssisted: unexpected jump kind");
+ }
+ vassert(trcval != 0);
+ p = mkLoadImm(p, /*r*/31, trcval, mode64);
+
+ /* imm32/64 r30, VG_(disp_cp_xassisted) */
+ p = mkLoadImm(p, /*r*/30,
+ (ULong)Ptr_to_ULong(disp_cp_xassisted), mode64);
+ /* mtctr r30 */
+ p = mkFormXFX(p, /*r*/30, 9, 467);
+ /* bctr */
+ p = mkFormXL(p, 19, Pct_ALWAYS, 0, 0, 528, 0);
+
+ /* Fix up the conditional jump, if there was one. */
+ if (i->Pin.XAssisted.cond.test != Pct_ALWAYS) {
+ Int delta = p - ptmp;
+ vassert(delta >= 16 && delta <= 32 && 0 == (delta & 3));
+ /* bc !ct,cf,delta */
+ mkFormB(ptmp, invertCondTest(i->Pin.XAssisted.cond.test),
+ i->Pin.XAssisted.cond.flag, (delta>>2), 0, 0);
}
goto done;
}
@@ -3415,6 +3849,7 @@
cond = i->Pin.CMov.cond;
/* branch (if cond fails) over move instrs */
+ UChar* ptmp = NULL;
if (cond.test != Pct_ALWAYS) {
/* don't know how many bytes to jump over yet...
make space for a jump instruction and fill in later. */
@@ -4433,6 +4868,7 @@
p = mkFormX(p, 63, fr_dst, 0, 10, 72, 0);
goto done;
}
+
case Pin_DfpI64StoD128: {
UInt fr_dstHi = fregNo( i->Pin.DfpI64StoD128.dst_hi );
UInt fr_dstLo = fregNo( i->Pin.DfpI64StoD128.dst_lo );
@@ -4451,6 +4887,87 @@
p = mkFormX(p, 63, fr_dstLo, 0, 11, 72, 0);
goto done;
}
+
+ case Pin_EvCheck: {
+ /* This requires a 32-bit dec/test in both 32- and 64-bit
+ modes. */
+ /* We generate:
+ lwz r30, amCounter
+ addic. r30, r30, -1
+ stw r30, amCounter
+ bge nofail
+ lwz/ld r30, amFailAddr
+ mtctr r30
+ bctr
+ nofail:
+ */
+ UChar* p0 = p;
+ /* lwz r30, amCounter */
+ p = do_load_or_store_word32(p, True/*isLoad*/, /*r*/30,
+ i->Pin.EvCheck.amCounter, mode64);
+ /* addic. r30,r30,-1 */
+ p = emit32(p, 0x37DEFFFF);
+ /* stw r30, amCounter */
+ p = do_load_or_store_word32(p, False/*!isLoad*/, /*r*/30,
+ i->Pin.EvCheck.amCounter, mode64);
+ /* bge nofail */
+ p = emit32(p, 0x40800010);
+ /* lwz/ld r30, amFailAddr */
+ p = do_load_or_store_machine_word(p, True/*isLoad*/, /*r*/30,
+ i->Pin.EvCheck.amFailAddr, mode64);
+ /* mtctr r30 */
+ p = mkFormXFX(p, /*r*/30, 9, 467);
+ /* bctr */
+ p = mkFormXL(p, 19, Pct_ALWAYS, 0, 0, 528, 0);
+ /* nofail: */
+
+ /* Crosscheck */
+ vassert(evCheckSzB_PPC() == (UChar*)p - (UChar*)p0);
+ goto done;
+ }
+
+ case Pin_ProfInc: {
+ /* We generate:
+ (ctrP is unknown now, so use 0x65556555(65556555) in the
+ expectation that a later call to LibVEX_patchProfCtr
+ will be used to fill in the immediate fields once the
+ right value is known.)
+ 32-bit:
+ imm32-exactly r30, 0x65556555
+ lwz r29, 4(r30)
+ addic. r29, r29, 1
+ stw r29, 4(r30)
+ lwz r29, 0(r30)
+ addze r29, r29
+ stw r29, 0(r30)
+ 64-bit:
+ imm64-exactly r30, 0x6555655565556555
+ ld r29, 0(r30)
+ addi r29, r29, 1
+ std r29, 0(r30)
+ */
+ if (mode64) {
+ p = mkLoadImm_EXACTLY2or5(
+ p, /*r*/30, 0x6555655565556555ULL, True/*mode64*/);
+ p = emit32(p, 0xEBBE0000);
+ p = emit32(p, 0x3BBD0001);
+ p = emit32(p, 0xFBBE0000);
+ } else {
+ p = mkLoadImm_EXACTLY2or5(
+ p, /*r*/30, 0x65556555ULL, False/*!mode64*/);
+ p = emit32(p, 0x83BE0004);
+ p = emit32(p, 0x37BD0001);
+ p = emit32(p, 0x93BE0004);
+ p = emit32(p, 0x83BE0000);
+ p = emit32(p, 0x7FBD0194);
+ p = emit32(p, 0x93BE0000);
+ }
+ /* Tell the caller .. */
+ vassert(!(*is_profInc));
+ *is_profInc = True;
+ goto done;
+ }
+
default:
goto bad;
}
@@ -4462,10 +4979,151 @@
/*NOTREACHED*/
done:
- vassert(p - &buf[0] <= 32);
+ vassert(p - &buf[0] <= 64);
return p - &buf[0];
}
+
+/* How big is an event check? See case for Pin_EvCheck in
+ emit_PPCInstr just above. That crosschecks what this returns, so
+ we can tell if we're inconsistent. */
+Int evCheckSzB_PPC ( void )
+{
+ return 28;
+}
+
+
+/* NB: what goes on here has to be very closely coordinated with the
+ emitInstr case for XDirect, above. */
+VexInvalRange chainXDirect_PPC ( void* place_to_chain,
+ void* disp_cp_chain_me_EXPECTED,
+ void* place_to_jump_to,
+ Bool mode64 )
+{
+ /* What we're expecting to see is:
+ imm32/64-fixed r30, disp_cp_chain_me_to_EXPECTED
+ mtctr r30
+ bctrl
+ viz
+ <8 or 20 bytes generated by mkLoadImm_EXACTLY2or5>
+ 7F C9 03 A6
+ 4E 80 04 21
+ */
+ UChar* p = (UChar*)place_to_chain;
+ vassert(0 == (3 & (HWord)p));
+ vassert(isLoadImm_EXACTLY2or5(p, /*r*/30,
+ Ptr_to_ULong(disp_cp_chain_me_EXPECTED),
+ mode64));
+ vassert(fetch32(p + (mode64 ? 20 : 8) + 0) == 0x7FC903A6);
+ vassert(fetch32(p + (mode64 ? 20 : 8) + 4) == 0x4E800421);
+ /* And what we want to change it to is:
+ imm32/64-fixed r30, place_to_jump_to
+ mtctr r30
+ bctr
+ viz
+ <8 or 20 bytes generated by mkLoadImm_EXACTLY2or5>
+ 7F C9 03 A6
+ 4E 80 04 20
+ The replacement has the same length as the original.
+ */
+ p = mkLoadImm_EXACTLY2or5(p, /*r*/30,
+ Ptr_to_ULong(place_to_jump_to), mode64);
+ p = emit32(p, 0x7FC903A6);
+ p = emit32(p, 0x4E800420);
+
+ Int len = p - (UChar*)place_to_chain;
+ vassert(len == (mode64 ? 28 : 16)); /* stay sane */
+ VexInvalRange vir = {(HWord)place_to_chain, len};
+ return vir;
+}
+
+
+/* NB: what goes on here has to be very closely coordinated with the
+ emitInstr case for XDirect, above. */
+VexInvalRange unchainXDirect_PPC ( void* place_to_unchain,
+ void* place_to_jump_to_EXPECTED,
+ void* disp_cp_chain_me,
+ Bool mode64 )
+{
+ /* What we're expecting to see is:
+ imm32/64-fixed r30, place_to_jump_to_EXPECTED
+ mtctr r30
+ bctr
+ viz
+ <8 or 20 bytes generated by mkLoadImm_EXACTLY2or5>
+ 7F C9 03 A6
+ 4E 80 04 20
+ */
+ UChar* p = (UChar*)place_to_unchain;
+ vassert(0 == (3 & (HWord)p));
+ vassert(isLoadImm_EXACTLY2or5(p, /*r*/30,
+ Ptr_to_ULong(place_to_jump_to_EXPECTED),
+ mode64));
+ vassert(fetch32(p + (mode64 ? 20 : 8) + 0) == 0x7FC903A6);
+ vassert(fetch32(p + (mode64 ? 20 : 8) + 4) == 0x4E800420);
+ /* And what we want to change it to is:
+ imm32/64-fixed r30, disp_cp_chain_me
+ mtctr r30
+ bctrl
+ viz
+ <8 or 20 bytes generated by mkLoadImm_EXACTLY2or5>
+ 7F C9 03 A6
+ 4E 80 04 21
+ The replacement has the same length as the original.
+ */
+ p = mkLoadImm_EXACTLY2or5(p, /*r*/30,
+ Ptr_to_ULong(disp_cp_chain_me), mode64);
+ p = emit32(p, 0x7FC903A6);
+ p = emit32(p, 0x4E800421);
+
+ Int len = p - (UChar*)place_to_unchain;
+ vassert(len == (mode64 ? 28 : 16)); /* stay sane */
+ VexInvalRange vir = {(HWord)place_to_unchain, len};
+ return vir;
+}
+
+
+/* Patch the counter address into a profile inc point, as previously
+ created by the Pin_ProfInc case for emit_PPCInstr. */
+VexInvalRange patchProfInc_PPC ( void* place_to_patch,
+ ULong* location_of_counter,
+ Bool mode64 )
+{
+ UChar* p = (UChar*)place_to_patch;
+ vassert(0 == (3 & (HWord)p));
+
+ Int len = 0;
+ if (mode64) {
+ vassert(isLoadImm_EXACTLY2or5(p, /*r*/30,
+ 0x6555655565556555ULL, True/*mode64*/));
+ vassert(fetch32(p + 20) == 0xEBBE0000);
+ vassert(fetch32(p + 24) == 0x3BBD0001);
+ vassert(fetch32(p + 28) == 0xFBBE0000);
+ p = mkLoadImm_EXACTLY2or5(p, /*r*/30,
+ Ptr_to_ULong(location_of_counter),
+ True/*mode64*/);
+ len = p - (UChar*)place_to_patch;
+ vassert(len == 20);
+ } else {
+ vassert(isLoadImm_EXACTLY2or5(p, /*r*/30,
+ 0x65556555ULL, False/*!mode64*/));
+ vassert(fetch32(p + 8) == 0x83BE0004);
+ vassert(fetch32(p + 12) == 0x37BD0001);
+ vassert(fetch32(p + 16) == 0x93BE0004);
+ vassert(fetch32(p + 20) == 0x83BE0000);
+ vassert(fetch32(p + 24) == 0x7FBD0194);
+ vassert(fetch32(p + 28) == 0x93BE0000);
+ p = mkLoadImm_EXACTLY2or5(p, /*r*/30,
+ Ptr_to_ULong(location_of_counter),
+ False/*!mode64*/);
+ len = p - (UChar*)place_to_patch;
+ vassert(len == 8);
+ }
+ VexInvalRange vir = {(HWord)place_to_patch, len};
+ return vir;
+}
+
+
/*---------------------------------------------------------------*/
/*--- end host_ppc_defs.c ---*/
/*---------------------------------------------------------------*/
diff --git a/priv/host_ppc_defs.h b/priv/host_ppc_defs.h
index c09d748..1a1f902 100644
--- a/priv/host_ppc_defs.h
+++ b/priv/host_ppc_defs.h
@@ -454,7 +454,9 @@
Pin_MulL, /* widening multiply */
Pin_Div, /* div */
Pin_Call, /* call to address in register */
- Pin_Goto, /* conditional/unconditional jmp to dst */
+ Pin_XDirect, /* direct transfer to GA */
+ Pin_XIndir, /* indirect transfer to GA */
+ Pin_XAssisted, /* assisted transfer to GA */
Pin_CMov, /* conditional move */
Pin_Load, /* zero-extending load a 8|16|32|64 bit value from mem */
Pin_LoadL, /* load-linked (lwarx/ldarx) 32|64 bit value from mem */
@@ -503,6 +505,8 @@
* immediate value */
Pin_DfpD128toD64, /* DFP 128 to DFP 64 op */
Pin_DfpI64StoD128, /* DFP signed integer to DFP 128 */
+ Pin_EvCheck, /* Event check */
+ Pin_ProfInc /* 64-bit profile counter increment */
}
PPCInstrTag;
@@ -594,13 +598,30 @@
Addr64 target;
UInt argiregs;
} Call;
- /* Pseudo-insn. Goto dst, on given condition (which could be
- Pct_ALWAYS). */
+ /* Update the guest CIA value, then exit requesting to chain
+ to it. May be conditional. Use of Addr64 in order to cope
+ with 64-bit hosts. */
struct {
+ Addr64 dstGA; /* next guest address */
+ PPCAMode* amCIA; /* amode in guest state for CIA */
+ PPCCondCode cond; /* can be ALWAYS */
+ Bool toFastEP; /* chain to the slow or fast point? */
+ } XDirect;
+ /* Boring transfer to a guest address not known at JIT time.
+ Not chainable. May be conditional. */
+ struct {
+ HReg dstGA;
+ PPCAMode* amCIA;
+ PPCCondCode cond; /* can be ALWAYS */
+ } XIndir;
+ /* Assisted transfer to a guest address, most general case.
+ Not chainable. May be conditional. */
+ struct {
+ HReg dstGA;
+ PPCAMode* amCIA;
+ PPCCondCode cond; /* can be ALWAYS */
IRJumpKind jk;
- PPCCondCode cond;
- PPCRI* dst;
- } Goto;
+ } XAssisted;
/* Mov src to dst on the given condition, which may not
be the bogus Pct_ALWAYS. */
struct {
@@ -854,6 +875,15 @@
HReg dst_lo;
HReg src;
} DfpI64StoD128;
+ struct {
+ PPCAMode* amCounter;
+ PPCAMode* amFailAddr;
+ } EvCheck;
+ struct {
+ /* No fields. The address of the counter to inc is
+ installed later, post-translation, by patching it in,
+ as it is not known at translation time. */
+ } ProfInc;
} Pin;
}
PPCInstr;
@@ -868,7 +898,12 @@
extern PPCInstr* PPCInstr_MulL ( Bool syned, Bool hi32, Bool sz32, HReg, HReg, HReg );
extern PPCInstr* PPCInstr_Div ( Bool extended, Bool syned, Bool sz32, HReg dst, HReg srcL, HReg srcR );
extern PPCInstr* PPCInstr_Call ( PPCCondCode, Addr64, UInt );
-extern PPCInstr* PPCInstr_Goto ( IRJumpKind, PPCCondCode cond, PPCRI* dst );
+extern PPCInstr* PPCInstr_XDirect ( Addr64 dstGA, PPCAMode* amCIA,
+ PPCCondCode cond, Bool toFastEP );
+extern PPCInstr* PPCInstr_XIndir ( HReg dstGA, PPCAMode* amCIA,
+ PPCCondCode cond );
+extern PPCInstr* PPCInstr_XAssisted ( HReg dstGA, PPCAMode* amCIA,
+ PPCCondCode cond, IRJumpKind jk );
extern PPCInstr* PPCInstr_CMov ( PPCCondCode, HReg dst, PPCRI* src );
extern PPCInstr* PPCInstr_Load ( UChar sz,
HReg dst, PPCAMode* src, Bool mode64 );
@@ -928,6 +963,9 @@
HReg dst_lo, HReg src_lo);
extern PPCInstr* PPCInstr_DfpI64StoD128 ( PPCFpOp op, HReg dst_hi,
HReg dst_lo, HReg src);
+extern PPCInstr* PPCInstr_EvCheck ( PPCAMode* amCounter,
+ PPCAMode* amFailAddr );
+extern PPCInstr* PPCInstr_ProfInc ( void );
extern void ppPPCInstr(PPCInstr*, Bool mode64);
@@ -937,10 +975,13 @@
extern void getRegUsage_PPCInstr ( HRegUsage*, PPCInstr*, Bool mode64 );
extern void mapRegs_PPCInstr ( HRegRemap*, PPCInstr* , Bool mode64);
extern Bool isMove_PPCInstr ( PPCInstr*, HReg*, HReg* );
-extern Int emit_PPCInstr ( UChar* buf, Int nbuf, PPCInstr*,
+extern Int emit_PPCInstr ( /*MB_MOD*/Bool* is_profInc,
+ UChar* buf, Int nbuf, PPCInstr* i,
Bool mode64,
- void* dispatch_unassisted,
- void* dispatch_assisted );
+ void* disp_cp_chain_me_to_slowEP,
+ void* disp_cp_chain_me_to_fastEP,
+ void* disp_cp_xindir,
+ void* disp_cp_xassisted );
extern void genSpill_PPC ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
HReg rreg, Int offsetB, Bool mode64 );
@@ -948,9 +989,37 @@
HReg rreg, Int offsetB, Bool mode64 );
extern void getAllocableRegs_PPC ( Int*, HReg**, Bool mode64 );
-extern HInstrArray* iselSB_PPC ( IRSB*, VexArch,
- VexArchInfo*,
- VexAbiInfo* );
+extern HInstrArray* iselSB_PPC ( IRSB*,
+ VexArch,
+ VexArchInfo*,
+ VexAbiInfo*,
+ Int offs_Host_EvC_Counter,
+ Int offs_Host_EvC_FailAddr,
+ Bool chainingAllowed,
+ Bool addProfInc,
+ Addr64 max_ga );
+
+/* How big is an event check? This is kind of a kludge because it
+ depends on the offsets of host_EvC_FAILADDR and
+ host_EvC_COUNTER. */
+extern Int evCheckSzB_PPC ( void );
+
+/* Perform a chaining and unchaining of an XDirect jump. */
+extern VexInvalRange chainXDirect_PPC ( void* place_to_chain,
+ void* disp_cp_chain_me_EXPECTED,
+ void* place_to_jump_to,
+ Bool mode64 );
+
+extern VexInvalRange unchainXDirect_PPC ( void* place_to_unchain,
+ void* place_to_jump_to_EXPECTED,
+ void* disp_cp_chain_me,
+ Bool mode64 );
+
+/* Patch the counter location into an existing ProfInc point. */
+extern VexInvalRange patchProfInc_PPC ( void* place_to_patch,
+ ULong* location_of_counter,
+ Bool mode64 );
+
#endif /* ndef __VEX_HOST_PPC_DEFS_H */
diff --git a/priv/host_ppc_isel.c b/priv/host_ppc_isel.c
index be2b0b3..be10029 100644
--- a/priv/host_ppc_isel.c
+++ b/priv/host_ppc_isel.c
@@ -219,17 +219,20 @@
- A mapping from IRTemp to HReg. This tells the insn selector
which virtual register(s) are associated with each IRTemp
- temporary. This is computed before insn selection starts, and
- does not change. We expect this mapping to map precisely the
- same set of IRTemps as the type mapping does.
+ temporary. This is computed before insn selection starts, and
+ does not change. We expect this mapping to map precisely the
+ same set of IRTemps as the type mapping does.
- - vregmap holds the primary register for the IRTemp.
- - vregmapHI holds the secondary register for the IRTemp,
+ - vregmapLo holds the primary register for the IRTemp.
+ - vregmapMedLo holds the secondary register for the IRTemp,
if any is needed. That's only for Ity_I64 temps
in 32 bit mode or Ity_I128 temps in 64-bit mode.
-
- - The name of the vreg in which we stash a copy of the link reg,
- so helper functions don't kill it.
+ - vregmapMedHi is only for dealing with Ity_I128 temps in
+ 32 bit mode. It holds bits 95:64 (Intel numbering)
+ of the IRTemp.
+ - vregmapHi is also only for dealing with Ity_I128 temps
+ in 32 bit mode. It holds the most significant bits
+ (127:96 in Intel numbering) of the IRTemp.
- The code array, that is, the insns selected so far.
@@ -248,11 +251,20 @@
described in set_FPU_rounding_mode below.
- A VexMiscInfo*, needed for knowing how to generate
- function calls for this target
+ function calls for this target.
+
+ - The maximum guest address of any guest insn in this block.
+ Actually, the address of the highest-addressed byte from any
+ insn in this block. Is set at the start and does not change.
+ This is used for detecting jumps which are definitely
+ forward-edges from this block, and therefore can be made
+ (chained) to the fast entry point of the destination, thereby
+ avoiding the destination's event check.
*/
typedef
struct {
+ /* Constant -- are set at the start and do not change. */
IRTypeEnv* type_env;
// 64-bit mode 32-bit mode
HReg* vregmapLo; // Low 64-bits [63:0] Low 32-bits [31:0]
@@ -261,20 +273,21 @@
HReg* vregmapHi; // unused highest 32-bits [127:96]
Int n_vregmap;
- HReg savedLR;
-
- HInstrArray* code;
-
- Int vreg_ctr;
-
/* 27 Jan 06: Not currently used, but should be */
UInt hwcaps;
Bool mode64;
- IRExpr* previous_rm;
-
VexAbiInfo* vbi;
+
+ Bool chainingAllowed;
+ Addr64 max_ga;
+
+ /* These are modified as we go along. */
+ HInstrArray* code;
+ Int vreg_ctr;
+
+ IRExpr* previous_rm;
}
ISelEnv;
@@ -4684,18 +4697,61 @@
/* --------- EXIT --------- */
case Ist_Exit: {
- PPCRI* ri_dst;
- PPCCondCode cc;
- IRConstTag tag = stmt->Ist.Exit.dst->tag;
- if (!mode64 && (tag != Ico_U32))
+ IRConst* dst = stmt->Ist.Exit.dst;
+ if (!mode64 && dst->tag != Ico_U32)
vpanic("iselStmt(ppc): Ist_Exit: dst is not a 32-bit value");
- if (mode64 && (tag != Ico_U64))
+ if (mode64 && dst->tag != Ico_U64)
vpanic("iselStmt(ppc64): Ist_Exit: dst is not a 64-bit value");
- ri_dst = iselWordExpr_RI(env, IRExpr_Const(stmt->Ist.Exit.dst));
- cc = iselCondCode(env,stmt->Ist.Exit.guard);
- addInstr(env, PPCInstr_RdWrLR(True, env->savedLR));
- addInstr(env, PPCInstr_Goto(stmt->Ist.Exit.jk, cc, ri_dst));
- return;
+
+ PPCCondCode cc = iselCondCode(env, stmt->Ist.Exit.guard);
+ PPCAMode* amCIA = PPCAMode_IR(stmt->Ist.Exit.offsIP,
+ hregPPC_GPR31(mode64));
+
+ /* Case: boring transfer to known address */
+ if (stmt->Ist.Exit.jk == Ijk_Boring
+ || stmt->Ist.Exit.jk == Ijk_Call
+ /* || stmt->Ist.Exit.jk == Ijk_Ret */) {
+ if (env->chainingAllowed) {
+ /* .. almost always true .. */
+ /* Skip the event check at the dst if this is a forwards
+ edge. */
+ Bool toFastEP
+ = mode64
+ ? (((Addr64)stmt->Ist.Exit.dst->Ico.U64) > (Addr64)env->max_ga)
+ : (((Addr32)stmt->Ist.Exit.dst->Ico.U32) > (Addr32)env->max_ga);
+ if (0) vex_printf("%s", toFastEP ? "Y" : ",");
+ addInstr(env, PPCInstr_XDirect(
+ mode64 ? (Addr64)stmt->Ist.Exit.dst->Ico.U64
+ : (Addr64)stmt->Ist.Exit.dst->Ico.U32,
+ amCIA, cc, toFastEP));
+ } else {
+ /* .. very occasionally .. */
+ /* We can't use chaining, so ask for an assisted transfer,
+ as that's the only alternative that is allowable. */
+ HReg r = iselWordExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
+ addInstr(env, PPCInstr_XAssisted(r, amCIA, cc, Ijk_Boring));
+ }
+ return;
+ }
+
+ /* Case: assisted transfer to arbitrary address */
+ switch (stmt->Ist.Exit.jk) {
+ //case Ijk_MapFail:
+ //case Ijk_SigSEGV: case Ijk_TInval: case Ijk_EmWarn:
+ case Ijk_NoDecode: case Ijk_SigBUS: case Ijk_SigTRAP:
+ case Ijk_EmFail:
+ {
+ HReg r = iselWordExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
+ addInstr(env, PPCInstr_XAssisted(r, amCIA, cc,
+ stmt->Ist.Exit.jk));
+ return;
+ }
+ default:
+ break;
+ }
+
+ /* Do we ever expect to see any other kind? */
+ goto stmt_fail;
}
default: break;
@@ -4710,21 +4766,91 @@
/*--- ISEL: Basic block terminators (Nexts) ---*/
/*---------------------------------------------------------*/
-static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk )
+static void iselNext ( ISelEnv* env,
+ IRExpr* next, IRJumpKind jk, Int offsIP )
{
- PPCCondCode cond;
- PPCRI* ri;
if (vex_traceflags & VEX_TRACE_VCODE) {
- vex_printf("\n-- goto {");
+ vex_printf( "\n-- PUT(%d) = ", offsIP);
+ ppIRExpr( next );
+ vex_printf( "; exit-");
ppIRJumpKind(jk);
- vex_printf("} ");
- ppIRExpr(next);
- vex_printf("\n");
+ vex_printf( "\n");
}
- cond = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
- ri = iselWordExpr_RI(env, next);
- addInstr(env, PPCInstr_RdWrLR(True, env->savedLR));
- addInstr(env, PPCInstr_Goto(jk, cond, ri));
+
+ PPCCondCode always = mk_PPCCondCode( Pct_ALWAYS, Pcf_NONE );
+
+ /* Case: boring transfer to known address */
+ if (next->tag == Iex_Const) {
+ IRConst* cdst = next->Iex.Const.con;
+ vassert(cdst->tag == (env->mode64 ? Ico_U64 :Ico_U32));
+ if (jk == Ijk_Boring || jk == Ijk_Call) {
+ /* Boring transfer to known address */
+ PPCAMode* amCIA = PPCAMode_IR(offsIP, hregPPC_GPR31(env->mode64));
+ if (env->chainingAllowed) {
+ /* .. almost always true .. */
+ /* Skip the event check at the dst if this is a forwards
+ edge. */
+ Bool toFastEP
+ = env->mode64
+ ? (((Addr64)cdst->Ico.U64) > (Addr64)env->max_ga)
+ : (((Addr32)cdst->Ico.U32) > (Addr32)env->max_ga);
+ if (0) vex_printf("%s", toFastEP ? "X" : ".");
+ addInstr(env, PPCInstr_XDirect(
+ env->mode64 ? (Addr64)cdst->Ico.U64
+ : (Addr64)cdst->Ico.U32,
+ amCIA, always, toFastEP));
+ } else {
+ /* .. very occasionally .. */
+ /* We can't use chaining, so ask for an assisted transfer,
+ as that's the only alternative that is allowable. */
+ HReg r = iselWordExpr_R(env, next);
+ addInstr(env, PPCInstr_XAssisted(r, amCIA, always,
+ Ijk_Boring));
+ }
+ return;
+ }
+ }
+
+ /* Case: call/return (==boring) transfer to any address */
+ switch (jk) {
+ case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
+ HReg r = iselWordExpr_R(env, next);
+ PPCAMode* amCIA = PPCAMode_IR(offsIP, hregPPC_GPR31(env->mode64));
+ if (env->chainingAllowed) {
+ addInstr(env, PPCInstr_XIndir(r, amCIA, always));
+ } else {
+ addInstr(env, PPCInstr_XAssisted(r, amCIA, always,
+ Ijk_Boring));
+ }
+ return;
+ }
+ default:
+ break;
+ }
+
+ /* Case: some other kind of transfer to any address */
+ switch (jk) {
+ case Ijk_Sys_syscall: case Ijk_ClientReq: case Ijk_NoDecode:
+ case Ijk_EmWarn: case Ijk_SigTRAP: case Ijk_TInval:
+ case Ijk_NoRedir:
+ //case Ijk_Sys_int128:
+ //case Ijk_Yield:
+ {
+ HReg r = iselWordExpr_R(env, next);
+ PPCAMode* amCIA = PPCAMode_IR(offsIP, hregPPC_GPR31(env->mode64));
+ addInstr(env, PPCInstr_XAssisted(r, amCIA, always, jk));
+ return;
+ }
+ default:
+ break;
+ }
+
+ vex_printf( "\n-- PUT(%d) = ", offsIP);
+ ppIRExpr( next );
+ vex_printf( "; exit-");
+ ppIRJumpKind(jk);
+ vex_printf( "\n");
+ vassert(0); // are we expecting any other kind?
}
@@ -4732,20 +4858,29 @@
/*--- Insn selector top-level ---*/
/*---------------------------------------------------------*/
-/* Translate an entire BS to ppc code. */
-HInstrArray* iselSB_PPC ( IRSB* bb, VexArch arch_host,
- VexArchInfo* archinfo_host,
- VexAbiInfo* vbi )
+/* Translate an entire SB to ppc code. */
+HInstrArray* iselSB_PPC ( IRSB* bb,
+ VexArch arch_host,
+ VexArchInfo* archinfo_host,
+ VexAbiInfo* vbi,
+ Int offs_Host_EvC_Counter,
+ Int offs_Host_EvC_FailAddr,
+ Bool chainingAllowed,
+ Bool addProfInc,
+ Addr64 max_ga )
{
- Int i, j;
- HReg hregLo, hregMedLo, hregMedHi, hregHi;
- ISelEnv* env;
- UInt hwcaps_host = archinfo_host->hwcaps;
- Bool mode64 = False;
- UInt mask32, mask64;
+ Int i, j;
+ HReg hregLo, hregMedLo, hregMedHi, hregHi;
+ ISelEnv* env;
+ UInt hwcaps_host = archinfo_host->hwcaps;
+ Bool mode64 = False;
+ UInt mask32, mask64;
+ PPCAMode *amCounter, *amFailAddr;
+
vassert(arch_host == VexArchPPC32 || arch_host == VexArchPPC64);
mode64 = arch_host == VexArchPPC64;
+ if (!mode64) vassert(max_ga <= 0xFFFFFFFFULL);
/* do some sanity checks */
mask32 = VEX_HWCAPS_PPC32_F | VEX_HWCAPS_PPC32_V
@@ -4783,15 +4918,20 @@
env->n_vregmap = bb->tyenv->types_used;
env->vregmapLo = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
env->vregmapMedLo = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
- if (!mode64) {
+ if (mode64) {
+ env->vregmapMedHi = NULL;
+ env->vregmapHi = NULL;
+ } else {
env->vregmapMedHi = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
env->vregmapHi = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
}
/* and finally ... */
- env->hwcaps = hwcaps_host;
- env->previous_rm = NULL;
- env->vbi = vbi;
+ env->chainingAllowed = chainingAllowed;
+ env->max_ga = max_ga;
+ env->hwcaps = hwcaps_host;
+ env->previous_rm = NULL;
+ env->vbi = vbi;
/* For each IR temporary, allocate a suitably-kinded virtual
register. */
@@ -4838,16 +4978,24 @@
}
env->vreg_ctr = j;
- /* Keep a copy of the link reg, so helper functions don't kill it. */
- env->savedLR = newVRegI(env);
- addInstr(env, PPCInstr_RdWrLR(False, env->savedLR));
+ /* The very first instruction must be an event check. */
+ amCounter = PPCAMode_IR(offs_Host_EvC_Counter, hregPPC_GPR31(mode64));
+ amFailAddr = PPCAMode_IR(offs_Host_EvC_FailAddr, hregPPC_GPR31(mode64));
+ addInstr(env, PPCInstr_EvCheck(amCounter, amFailAddr));
+
+ /* Possibly a block counter increment (for profiling). At this
+ point we don't know the address of the counter, so just pretend
+ it is zero. It will have to be patched later, but before this
+ translation is used, by a call to LibVEX_patchProfCtr. */
+ if (addProfInc) {
+ addInstr(env, PPCInstr_ProfInc());
+ }
/* Ok, finally we can iterate over the statements. */
for (i = 0; i < bb->stmts_used; i++)
- if (bb->stmts[i])
- iselStmt(env,bb->stmts[i]);
+ iselStmt(env, bb->stmts[i]);
- iselNext(env,bb->next,bb->jumpkind);
+ iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
/* record the number of vregs we used. */
env->code->n_vregs = env->vreg_ctr;
diff --git a/priv/host_s390_defs.c b/priv/host_s390_defs.c
index 98183a8..f46a1be 100644
--- a/priv/host_s390_defs.c
+++ b/priv/host_s390_defs.c
@@ -59,6 +59,7 @@
static Bool s390_insn_is_reg_reg_move(const s390_insn *, HReg *src, HReg *dst);
static void s390_insn_map_regs(HRegRemap *, s390_insn *);
static void s390_insn_get_reg_usage(HRegUsage *u, const s390_insn *);
+static UInt s390_tchain_load64_len(void);
/*------------------------------------------------------------*/
@@ -118,7 +119,7 @@
/* Total number of allocable registers (all classes) */
*nregs = 16 /* GPRs */
- 1 /* r0 */
- - 1 /* r12 register holding VG_(dispatch_ctr) */
+ - 1 /* r12 scratch register for translation chaining support */
- 1 /* r13 guest state pointer */
- 1 /* r14 link register */
- 1 /* r15 stack pointer */
@@ -144,12 +145,8 @@
Otherwise, they are available to the allocator */
(*arr)[i++] = mkHReg(10, HRcInt64, False);
(*arr)[i++] = mkHReg(11, HRcInt64, False);
- /* GPR12 is not available because it caches VG_(dispatch_ctr).
- Setting aside a register for the counter gives slightly better
- performance - most of the time. From the 10 tests in "make perf"
- 8 run faster with a max observed speedup of 2.6% for bz2. ffbench
- is the counter example. It runs 1.3% faster without the dedicated
- register. */
+ /* GPR12 is not available because it us used as a scratch register
+ in translation chaining. */
/* GPR13 is not available because it is used as guest state pointer */
/* GPR14 is not available because it is used as link register */
/* GPR15 is not available because it is used as stack pointer */
@@ -183,6 +180,7 @@
return mkHReg(S390_REGNO_GUEST_STATE_POINTER, HRcInt64, False);
}
+
/* Is VALUE within the domain of a 20-bit signed integer. */
static __inline__ Bool
fits_signed_20bit(Int value)
@@ -617,14 +615,6 @@
s390_opnd_RMI_get_reg_usage(u, insn->variant.compare.src2);
break;
- case S390_INSN_BRANCH:
- s390_opnd_RMI_get_reg_usage(u, insn->variant.branch.dst);
- /* The destination address is loaded into S390_REGNO_RETURN_VALUE.
- See s390_insn_branch_emit. */
- addHRegUse(u, HRmWrite,
- mkHReg(S390_REGNO_RETURN_VALUE, HRcInt64, False));
- break;
-
case S390_INSN_HELPER_CALL: {
UInt i;
@@ -718,6 +708,29 @@
case S390_INSN_GADD:
break;
+ case S390_INSN_EVCHECK:
+ s390_amode_get_reg_usage(u, insn->variant.evcheck.counter);
+ s390_amode_get_reg_usage(u, insn->variant.evcheck.fail_addr);
+ break;
+
+ case S390_INSN_PROFINC:
+ /* Does not use any register visible to the register allocator */
+ break;
+
+ case S390_INSN_XDIRECT:
+ s390_amode_get_reg_usage(u, insn->variant.xdirect.guest_IA);
+ break;
+
+ case S390_INSN_XINDIR:
+ addHRegUse(u, HRmRead, insn->variant.xindir.dst);
+ s390_amode_get_reg_usage(u, insn->variant.xindir.guest_IA);
+ break;
+
+ case S390_INSN_XASSISTED:
+ addHRegUse(u, HRmRead, insn->variant.xassisted.dst);
+ s390_amode_get_reg_usage(u, insn->variant.xassisted.guest_IA);
+ break;
+
default:
vpanic("s390_insn_get_reg_usage");
}
@@ -829,11 +842,6 @@
s390_opnd_RMI_map_regs(m, &insn->variant.compare.src2);
break;
- case S390_INSN_BRANCH:
- s390_opnd_RMI_map_regs(m, &insn->variant.branch.dst);
- /* No need to map S390_REGNO_RETURN_VALUE. It's not virtual */
- break;
-
case S390_INSN_HELPER_CALL:
/* s390_insn_helper_call_emit also reads / writes the link register
and stack pointer. But those registers are not visible to the
@@ -923,6 +931,31 @@
case S390_INSN_GADD:
break;
+ case S390_INSN_EVCHECK:
+ s390_amode_map_regs(m, insn->variant.evcheck.counter);
+ s390_amode_map_regs(m, insn->variant.evcheck.fail_addr);
+ break;
+
+ case S390_INSN_PROFINC:
+ /* Does not use any register visible to the register allocator */
+ break;
+
+ case S390_INSN_XDIRECT:
+ s390_amode_map_regs(m, insn->variant.xdirect.guest_IA);
+ break;
+
+ case S390_INSN_XINDIR:
+ s390_amode_map_regs(m, insn->variant.xindir.guest_IA);
+ insn->variant.xindir.dst =
+ lookupHRegRemap(m, insn->variant.xindir.dst);
+ break;
+
+ case S390_INSN_XASSISTED:
+ s390_amode_map_regs(m, insn->variant.xassisted.guest_IA);
+ insn->variant.xassisted.dst =
+ lookupHRegRemap(m, insn->variant.xassisted.dst);
+ break;
+
default:
vpanic("s390_insn_map_regs");
}
@@ -1403,6 +1436,16 @@
static UChar *
+s390_emit_BRCL(UChar *p, UChar r1, ULong i2)
+{
+ if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
+ s390_disasm(ENC2(XMNM, PCREL), S390_XMNM_BRCL, r1, i2);
+
+ return emit_RIL(p, 0xc00400000000ULL, r1, i2);
+}
+
+
+static UChar *
s390_emit_CR(UChar *p, UChar r1, UChar r2)
{
if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM))
@@ -4252,21 +4295,6 @@
s390_insn *
-s390_insn_branch(IRJumpKind kind, s390_cc_t cond, s390_opnd_RMI dst)
-{
- s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
-
- insn->tag = S390_INSN_BRANCH;
- insn->size = 0; /* does not matter */
- insn->variant.branch.kind = kind;
- insn->variant.branch.dst = dst;
- insn->variant.branch.cond = cond;
-
- return insn;
-}
-
-
-s390_insn *
s390_insn_helper_call(s390_cc_t cond, Addr64 target, UInt num_args,
HChar *name)
{
@@ -4489,6 +4517,89 @@
}
+s390_insn *
+s390_insn_xdirect(s390_cc_t cond, Addr64 dst, s390_amode *guest_IA,
+ Bool to_fast_entry)
+{
+ s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
+
+ insn->tag = S390_INSN_XDIRECT;
+ insn->size = 0; /* does not matter */
+
+ insn->variant.xdirect.cond = cond;
+ insn->variant.xdirect.dst = dst;
+ insn->variant.xdirect.guest_IA = guest_IA;
+ insn->variant.xdirect.to_fast_entry = to_fast_entry;
+
+ return insn;
+}
+
+
+s390_insn *
+s390_insn_xindir(s390_cc_t cond, HReg dst, s390_amode *guest_IA)
+{
+ s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
+
+ insn->tag = S390_INSN_XINDIR;
+ insn->size = 0; /* does not matter */
+
+ insn->variant.xindir.cond = cond;
+ insn->variant.xindir.dst = dst;
+ insn->variant.xindir.guest_IA = guest_IA;
+
+ return insn;
+}
+
+
+s390_insn *
+s390_insn_xassisted(s390_cc_t cond, HReg dst, s390_amode *guest_IA,
+ IRJumpKind kind)
+{
+ s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
+
+ insn->tag = S390_INSN_XASSISTED;
+ insn->size = 0; /* does not matter */
+
+ insn->variant.xassisted.cond = cond;
+ insn->variant.xassisted.dst = dst;
+ insn->variant.xassisted.guest_IA = guest_IA;
+ insn->variant.xassisted.kind = kind;
+
+ return insn;
+}
+
+
+s390_insn *
+s390_insn_evcheck(s390_amode *counter, s390_amode *fail_addr)
+{
+ s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
+
+ vassert(counter->tag == S390_AMODE_B12 || counter->tag == S390_AMODE_BX12);
+ vassert(fail_addr->tag == S390_AMODE_B12 ||
+ fail_addr->tag == S390_AMODE_BX12);
+
+ insn->tag = S390_INSN_EVCHECK;
+ insn->size = 0; /* does not matter */
+
+ insn->variant.evcheck.counter = counter;
+ insn->variant.evcheck.fail_addr = fail_addr;
+
+ return insn;
+}
+
+
+s390_insn *
+s390_insn_profinc(void)
+{
+ s390_insn *insn = LibVEX_Alloc(sizeof(s390_insn));
+
+ insn->tag = S390_INSN_PROFINC;
+ insn->size = 0; /* does not matter */
+
+ return insn;
+}
+
+
/*---------------------------------------------------------------*/
/*--- Debug print ---*/
/*---------------------------------------------------------------*/
@@ -4792,11 +4903,6 @@
&insn->variant.compare.src2);
break;
- case S390_INSN_BRANCH:
- s390_sprintf(buf, "if (%C) %J %O", insn->variant.branch.cond,
- insn->variant.branch.kind, &insn->variant.branch.dst);
- return buf; /* avoid printing "size = ..." which is meaningless */
-
case S390_INSN_HELPER_CALL: {
s390_sprintf(buf, "%M if (%C) %s{%I}(%L)", "v-call",
insn->variant.helper_call.cond,
@@ -4924,6 +5030,39 @@
insn->variant.gadd.value);
break;
+ case S390_INSN_EVCHECK:
+ s390_sprintf(buf, "%M counter = %A, fail-addr = %A", "v-evcheck",
+ insn->variant.evcheck.counter,
+ insn->variant.evcheck.fail_addr);
+ return buf; /* avoid printing "size = ..." which is meaningless */
+
+ case S390_INSN_PROFINC:
+ s390_sprintf(buf, "%M", "v-profinc");
+ return buf; /* avoid printing "size = ..." which is meaningless */
+
+ case S390_INSN_XDIRECT:
+ s390_sprintf(buf, "%M if (%C) %A = %I %s", "v-xdirect",
+ insn->variant.xdirect.cond,
+ insn->variant.xdirect.guest_IA,
+ insn->variant.xdirect.dst,
+ insn->variant.xdirect.to_fast_entry ? "fast" : "slow");
+ return buf; /* avoid printing "size = ..." which is meaningless */
+
+ case S390_INSN_XINDIR:
+ s390_sprintf(buf, "%M if (%C) %A = %R", "v-xindir",
+ insn->variant.xindir.cond,
+ insn->variant.xindir.guest_IA,
+ insn->variant.xindir.dst);
+ return buf; /* avoid printing "size = ..." which is meaningless */
+
+ case S390_INSN_XASSISTED:
+ s390_sprintf(buf, "%M if (%C) %J %A = %R", "v-xassisted",
+ insn->variant.xassisted.cond,
+ insn->variant.xassisted.kind,
+ insn->variant.xassisted.guest_IA,
+ insn->variant.xassisted.dst);
+ return buf; /* avoid printing "size = ..." which is meaningless */
+
default: goto fail;
}
@@ -6512,104 +6651,6 @@
static UChar *
-s390_insn_branch_emit(UChar *buf, const s390_insn *insn)
-{
- s390_opnd_RMI dst;
- s390_cc_t cond;
- UInt trc;
- UChar *p, *ptmp = 0; /* avoid compiler warnings */
-
- cond = insn->variant.branch.cond;
- dst = insn->variant.branch.dst;
-
- p = buf;
- trc = 0;
-
- if (cond != S390_CC_ALWAYS) {
- /* So we have something like this
- if (cond) goto X;
- Y: ...
- We convert this into
- if (! cond) goto Y; // BRC insn; 4 bytes
- return_reg = X;
- return to dispatcher
- Y:
- */
- ptmp = p; /* 4 bytes (a BRC insn) to be filled in here */
- p += 4;
- }
-
- /* If a non-boring, set guest-state-pointer appropriately. */
-
- switch (insn->variant.branch.kind) {
- case Ijk_ClientReq: trc = VEX_TRC_JMP_CLIENTREQ; break;
- case Ijk_Sys_syscall: trc = VEX_TRC_JMP_SYS_SYSCALL; break;
- case Ijk_Yield: trc = VEX_TRC_JMP_YIELD; break;
- case Ijk_EmWarn: trc = VEX_TRC_JMP_EMWARN; break;
- case Ijk_EmFail: trc = VEX_TRC_JMP_EMFAIL; break;
- case Ijk_MapFail: trc = VEX_TRC_JMP_MAPFAIL; break;
- case Ijk_NoDecode: trc = VEX_TRC_JMP_NODECODE; break;
- case Ijk_TInval: trc = VEX_TRC_JMP_TINVAL; break;
- case Ijk_NoRedir: trc = VEX_TRC_JMP_NOREDIR; break;
- case Ijk_SigTRAP: trc = VEX_TRC_JMP_SIGTRAP; break;
- case Ijk_Ret: trc = 0; break;
- case Ijk_Call: trc = 0; break;
- case Ijk_Boring: trc = 0; break;
- break;
-
- default:
- vpanic("s390_insn_branch_emit: unknown jump kind");
- }
-
- /* Get the destination address into the return register */
- switch (dst.tag) {
- case S390_OPND_REG:
- p = s390_emit_LGR(p, S390_REGNO_RETURN_VALUE, hregNumber(dst.variant.reg));
- break;
-
- case S390_OPND_AMODE: {
- const s390_amode *am = dst.variant.am;
- UChar b = hregNumber(am->b);
- UChar x = hregNumber(am->x);
- Int d = am->d;
-
- p = s390_emit_LG(p, S390_REGNO_RETURN_VALUE, x, b, DISP20(d));
- break;
- }
-
- case S390_OPND_IMMEDIATE:
- p = s390_emit_load_64imm(p, S390_REGNO_RETURN_VALUE, dst.variant.imm);
- break;
-
- default:
- goto fail;
- }
-
- if (trc != 0) {
- /* Something special. Set guest-state pointer appropriately */
- p = s390_emit_LGHI(p, S390_REGNO_GUEST_STATE_POINTER, trc);
- } else {
- /* Nothing special needs to be done for calls and returns. */
- }
-
- p = s390_emit_BCR(p, S390_CC_ALWAYS, S390_REGNO_LINK_REGISTER);
-
- if (cond != S390_CC_ALWAYS) {
- Int delta = p - ptmp;
-
- delta >>= 1; /* immediate constant is #half-words */
- vassert(delta > 0 && delta < (1 << 16));
- s390_emit_BRC(ptmp, s390_cc_invert(cond), delta);
- }
-
- return p;
-
- fail:
- vpanic("s390_insn_branch_emit");
-}
-
-
-static UChar *
s390_insn_helper_call_emit(UChar *buf, const s390_insn *insn)
{
s390_cc_t cond;
@@ -7163,9 +7204,415 @@
}
+/* Define convenience functions needed for translation chaining.
+ Any changes need to be applied to the functions in concert. */
+
+static __inline__ Bool
+s390_insn_is_BRCL(const UChar *p, UChar condition)
+{
+ return p[0] == 0xc0 && p[1] == ((condition << 4) | 0x04);
+}
+
+static __inline__ Bool
+s390_insn_is_BR(const UChar *p, UChar reg)
+{
+ return p[0] == 0x07 && p[1] == (0xF0 | reg); /* BCR 15,reg */
+}
+
+static __inline__ Bool
+s390_insn_is_BASR(const UChar *p, UChar link_reg, UChar other_reg)
+{
+ return p[0] == 0x0D && p[1] == ((link_reg << 4) | other_reg);
+}
+
+/* Load the 64-bit VALUE into REG. Note that this function must NOT
+ optimise the generated code by looking at the value. I.e. using
+ LGHI if value == 0 would be very wrong.
+ fixs390: Do it in a way that works everywhere for now. */
+static UChar *
+s390_tchain_load64(UChar *buf, UChar regno, ULong value)
+{
+ UChar *begin = buf;
+
+ buf = s390_emit_IILL(buf, regno, value & 0xFFFF);
+ value >>= 16;
+ buf = s390_emit_IILH(buf, regno, value & 0xFFFF);
+ value >>= 16;
+ buf = s390_emit_IIHL(buf, regno, value & 0xFFFF);
+ value >>= 16;
+ buf = s390_emit_IIHH(buf, regno, value & 0xFFFF);
+
+ vassert(buf - begin == s390_tchain_load64_len());
+
+ return buf;
+}
+
+/* Return number of bytes generated by s390_tchain_load64 */
+static UInt
+s390_tchain_load64_len(void)
+{
+ return S390_TCHAIN_LOAD64_LEN;
+}
+
+/* Verify that CODE is the code sequence generated by s390_tchain_load64
+ to load VALUE into REGNO. Return pointer to the byte following the
+ insn sequence. */
+static const UChar *
+s390_tchain_verify_load64(const UChar *code, UChar regno, ULong value)
+{
+ UInt regmask = regno << 4;
+ UInt hw;
+
+ /* Check for IILL */
+ hw = value & 0xFFFF;
+ vassert(code[0] == 0xA5);
+ vassert(code[1] == (0x03 | regmask));
+ vassert(code[2] == (hw >> 8));
+ vassert(code[3] == (hw & 0xFF));
+
+ /* Check for IILH */
+ hw = (value >> 16) & 0xFFFF;
+ vassert(code[4] == 0xA5);
+ vassert(code[5] == (0x02 | regmask));
+ vassert(code[6] == (hw >> 8));
+ vassert(code[7] == (hw & 0xFF));
+
+ /* Check for IIHL */
+ hw = (value >> 32) & 0xFFFF;
+ vassert(code[8] == 0xA5);
+ vassert(code[9] == (0x01 | regmask));
+ vassert(code[10] == (hw >> 8));
+ vassert(code[11] == (hw & 0xFF));
+
+ /* Check for IIHH */
+ hw = (value >> 48) & 0xFFFF;
+ vassert(code[12] == 0xA5);
+ vassert(code[13] == (0x00 | regmask));
+ vassert(code[14] == (hw >> 8));
+ vassert(code[15] == (hw & 0xFF));
+
+ return code + s390_tchain_load64_len();
+}
+
+/* CODE points to the code sequence as generated by s390_tchain_load64.
+ Change the loaded value to VALUE. Return pointer to the byte following
+ the patched code sequence. */
+static UChar *
+s390_tchain_patch_load64(UChar *code, ULong imm64)
+{
+ code[3] = imm64 & 0xFF; imm64 >>= 8;
+ code[2] = imm64 & 0xFF; imm64 >>= 8;
+ code[7] = imm64 & 0xFF; imm64 >>= 8;
+ code[6] = imm64 & 0xFF; imm64 >>= 8;
+ code[11] = imm64 & 0xFF; imm64 >>= 8;
+ code[10] = imm64 & 0xFF; imm64 >>= 8;
+ code[15] = imm64 & 0xFF; imm64 >>= 8;
+ code[14] = imm64 & 0xFF; imm64 >>= 8;
+
+ return code + s390_tchain_load64_len();
+}
+
+
+/* NB: what goes on here has to be very closely coordinated with the
+ chainXDirect_S390 and unchainXDirect_S390 below. */
+static UChar *
+s390_insn_xdirect_emit(UChar *buf, const s390_insn *insn,
+ void *disp_cp_chain_me_to_slowEP,
+ void *disp_cp_chain_me_to_fastEP)
+{
+ /* We're generating chain-me requests here, so we need to be
+ sure this is actually allowed -- no-redir translations can't
+ use chain-me's. Hence: */
+ vassert(disp_cp_chain_me_to_slowEP != NULL);
+ vassert(disp_cp_chain_me_to_fastEP != NULL);
+
+ /* Use ptmp for backpatching conditional jumps. */
+ UChar *ptmp = buf;
+
+ /* First off, if this is conditional, create a conditional
+ jump over the rest of it. */
+ s390_cc_t cond = insn->variant.xdirect.cond;
+
+ if (cond != S390_CC_ALWAYS) {
+ /* So we have something like this
+ if (cond) do_xdirect;
+ Y: ...
+ We convert this into
+ if (! cond) goto Y; // BRC opcode; 4 bytes
+ do_xdirect;
+ Y:
+ */
+ /* 4 bytes (a BRC insn) to be filled in here */
+ buf += 4;
+ }
+
+ /* Update the guest IA. */
+ buf = s390_emit_load_64imm(buf, R0, insn->variant.xdirect.dst);
+
+ const s390_amode *amode = insn->variant.xdirect.guest_IA;
+ vassert(amode->tag == S390_AMODE_B12 || amode->tag == S390_AMODE_BX12);
+ UInt b = hregNumber(amode->b);
+ UInt x = hregNumber(amode->x); /* 0 for B12 and B20 */
+ UInt d = amode->d;
+
+ buf = s390_emit_STG(buf, R0, x, b, DISP20(d));
+
+ /* --- FIRST PATCHABLE BYTE follows --- */
+ /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling
+ to) backs up the return address, so as to find the address of
+ the first patchable byte. So: don't change the length of the
+ two instructions below. */
+
+ /* Load the chosen entry point into the scratch reg */
+ void *disp_cp_chain_me;
+
+ disp_cp_chain_me =
+ insn->variant.xdirect.to_fast_entry ? disp_cp_chain_me_to_fastEP
+ : disp_cp_chain_me_to_slowEP;
+
+ ULong addr = Ptr_to_ULong(disp_cp_chain_me);
+ buf = s390_tchain_load64(buf, S390_REGNO_TCHAIN_SCRATCH, addr);
+
+ /* call *tchain_scratch */
+ buf = s390_emit_BASR(buf, 1, S390_REGNO_TCHAIN_SCRATCH);
+
+ /* --- END of PATCHABLE BYTES --- */
+
+ /* Fix up the conditional jump, if there was one. */
+ if (cond != S390_CC_ALWAYS) {
+ Int delta = buf - ptmp;
+
+ delta >>= 1; /* immediate constant is #half-words */
+ vassert(delta > 0 && delta < (1 << 16));
+ s390_emit_BRC(ptmp, s390_cc_invert(cond), delta);
+ }
+
+ return buf;
+}
+
+/* Return the number of patchable bytes from an xdirect insn. */
+static UInt
+s390_xdirect_patchable_len(void)
+{
+ return s390_tchain_load64_len() + S390_TCHAIN_CALL_LEN;
+}
+
+
+static UChar *
+s390_insn_xindir_emit(UChar *buf, const s390_insn *insn, void *disp_cp_xindir)
+{
+ /* We're generating transfers that could lead indirectly to a
+ chain-me, so we need to be sure this is actually allowed --
+ no-redir translations are not allowed to reach normal
+ translations without going through the scheduler. That means
+ no XDirects or XIndirs out from no-redir translations.
+ Hence: */
+ vassert(disp_cp_xindir != NULL);
+
+ /* Use ptmp for backpatching conditional jumps. */
+ UChar *ptmp = buf;
+
+ /* First off, if this is conditional, create a conditional
+ jump over the rest of it. */
+ s390_cc_t cond = insn->variant.xdirect.cond;
+
+ if (cond != S390_CC_ALWAYS) {
+ /* So we have something like this
+ if (cond) do_xdirect;
+ Y: ...
+ We convert this into
+ if (! cond) goto Y; // BRC opcode; 4 bytes
+ do_xdirect;
+ Y:
+ */
+ /* 4 bytes (a BRC insn) to be filled in here */
+ buf += 4;
+ }
+
+ /* Update the guest IA with the address in xdirect.dst. */
+ const s390_amode *amode = insn->variant.xindir.guest_IA;
+
+ vassert(amode->tag == S390_AMODE_B12 || amode->tag == S390_AMODE_BX12);
+ UInt b = hregNumber(amode->b);
+ UInt x = hregNumber(amode->x); /* 0 for B12 and B20 */
+ UInt d = amode->d;
+ UInt regno = hregNumber(insn->variant.xindir.dst);
+
+ buf = s390_emit_STG(buf, regno, x, b, DISP20(d));
+
+ /* load tchain_scratch, #disp_indir */
+ buf = s390_tchain_load64(buf, S390_REGNO_TCHAIN_SCRATCH,
+ Ptr_to_ULong(disp_cp_xindir));
+ /* BR *tchain_direct */
+ buf = s390_emit_BCR(buf, S390_CC_ALWAYS, S390_REGNO_TCHAIN_SCRATCH);
+
+ /* Fix up the conditional jump, if there was one. */
+ if (cond != S390_CC_ALWAYS) {
+ Int delta = buf - ptmp;
+
+ delta >>= 1; /* immediate constant is #half-words */
+ vassert(delta > 0 && delta < (1 << 16));
+ s390_emit_BRC(ptmp, s390_cc_invert(cond), delta);
+ }
+
+ return buf;
+}
+
+static UChar *
+s390_insn_xassisted_emit(UChar *buf, const s390_insn *insn,
+ void *disp_cp_xassisted)
+{
+ /* Use ptmp for backpatching conditional jumps. */
+ UChar *ptmp = buf;
+
+ /* First off, if this is conditional, create a conditional
+ jump over the rest of it. */
+ s390_cc_t cond = insn->variant.xdirect.cond;
+
+ if (cond != S390_CC_ALWAYS) {
+ /* So we have something like this
+ if (cond) do_xdirect;
+ Y: ...
+ We convert this into
+ if (! cond) goto Y; // BRC opcode; 4 bytes
+ do_xdirect;
+ Y:
+ */
+ /* 4 bytes (a BRC insn) to be filled in here */
+ buf += 4;
+ }
+
+ /* Update the guest IA with the address in xassisted.dst. */
+ const s390_amode *amode = insn->variant.xassisted.guest_IA;
+
+ vassert(amode->tag == S390_AMODE_B12 || amode->tag == S390_AMODE_BX12);
+ UInt b = hregNumber(amode->b);
+ UInt x = hregNumber(amode->x); /* 0 for B12 and B20 */
+ UInt d = amode->d;
+ UInt regno = hregNumber(insn->variant.xassisted.dst);
+
+ buf = s390_emit_STG(buf, regno, x, b, DISP20(d));
+
+ UInt trcval = 0;
+
+ switch (insn->variant.xassisted.kind) {
+ case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break;
+ case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
+ case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break;
+ case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
+ case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
+ case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break;
+ case Ijk_TInval: trcval = VEX_TRC_JMP_TINVAL; break;
+ case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break;
+ case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break;
+ case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
+ case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break;
+ /* We don't expect to see the following being assisted. */
+ case Ijk_Ret:
+ case Ijk_Call:
+ /* fallthrough */
+ default:
+ ppIRJumpKind(insn->variant.xassisted.kind);
+ vpanic("s390_insn_xassisted_emit: unexpected jump kind");
+ }
+
+ vassert(trcval != 0);
+
+ /* guest_state_pointer = trcval */
+ buf = s390_emit_LGHI(buf, S390_REGNO_GUEST_STATE_POINTER, trcval);
+
+ /* load tchain_scratch, #disp_assisted */
+ buf = s390_tchain_load64(buf, S390_REGNO_TCHAIN_SCRATCH,
+ Ptr_to_ULong(disp_cp_xassisted));
+
+ /* BR *tchain_direct */
+ buf = s390_emit_BCR(buf, S390_CC_ALWAYS, S390_REGNO_TCHAIN_SCRATCH);
+
+ /* Fix up the conditional jump, if there was one. */
+ if (cond != S390_CC_ALWAYS) {
+ Int delta = buf - ptmp;
+
+ delta >>= 1; /* immediate constant is #half-words */
+ vassert(delta > 0 && delta < (1 << 16));
+ s390_emit_BRC(ptmp, s390_cc_invert(cond), delta);
+ }
+
+ return buf;
+}
+
+
+/* Pseudo code:
+
+ guest_state[host_EvC_COUNTER] -= 1;
+ if (guest_state[host_EvC_COUNTER] >= 0) goto nofail;
+ goto guest_state[host_EvC_FAILADDR];
+ nofail: ;
+
+ The dispatch counter is a 32-bit value. */
+static UChar *
+s390_insn_evcheck_emit(UChar *buf, const s390_insn *insn)
+{
+ s390_amode *amode;
+ UInt b, x, d;
+ UChar *code_begin, *code_end;
+
+ code_begin = buf;
+
+ amode = insn->variant.evcheck.counter;
+ vassert(amode->tag == S390_AMODE_B12 || amode->tag == S390_AMODE_BX12);
+ b = hregNumber(amode->b);
+ x = hregNumber(amode->x); /* 0 for B12 and B20 */
+ d = amode->d;
+
+ /* Decrement the dispatch counter in the guest state */
+ /* fixs390: ASI if available */
+ buf = s390_emit_LHI(buf, R0, -1); /* 4 bytes */
+ buf = s390_emit_A(buf, R0, x, b, d); /* 4 bytes */
+ buf = s390_emit_ST(buf, R0, x, b, d); /* 4 bytes */
+
+ /* Jump over the next insn if >= 0 */
+ buf = s390_emit_BRC(buf, S390_CC_HE, (4 + 6 + 2) / 2); /* 4 bytes */
+
+ /* Computed goto to fail_address */
+ amode = insn->variant.evcheck.fail_addr;
+ b = hregNumber(amode->b);
+ x = hregNumber(amode->x); /* 0 for B12 and B20 */
+ d = amode->d;
+ buf = s390_emit_LG(buf, S390_REGNO_TCHAIN_SCRATCH, x, b, DISP20(d)); /* 6 bytes */
+ buf = s390_emit_BCR(buf, S390_CC_ALWAYS, S390_REGNO_TCHAIN_SCRATCH); /* 2 bytes */
+
+ code_end = buf;
+
+ /* Make sure the size of the generated code is identical to the size
+ returned by evCheckSzB_S390 */
+ vassert(evCheckSzB_S390() == code_end - code_begin);
+
+ return buf;
+}
+
+
+static UChar *
+s390_insn_profinc_emit(UChar *buf,
+ const s390_insn *insn __attribute__((unused)))
+{
+ /* Generate a code template to increment a memory location whose
+ address will be known later as an immediate value. This code
+ template will be patched once the memory location is known.
+ For now we do this with address == 0. */
+ buf = s390_tchain_load64(buf, S390_REGNO_TCHAIN_SCRATCH, 0);
+ buf = s390_emit_LGHI(buf, R0, 1);
+ buf = s390_emit_AG( buf, R0, 0, S390_REGNO_TCHAIN_SCRATCH, DISP20(0));
+ buf = s390_emit_STG(buf, R0, 0, S390_REGNO_TCHAIN_SCRATCH, DISP20(0));
+
+ return buf;
+}
+
+
Int
-emit_S390Instr(UChar *buf, Int nbuf, s390_insn *insn, Bool mode64,
- void *dispatch_unassisted, void *dispatch_assisted)
+emit_S390Instr(Bool *is_profinc, UChar *buf, Int nbuf, s390_insn *insn,
+ Bool mode64, void *disp_cp_chain_me_to_slowEP,
+ void *disp_cp_chain_me_to_fastEP, void *disp_cp_xindir,
+ void *disp_cp_xassisted)
{
UChar *end;
@@ -7230,12 +7677,6 @@
end = s390_insn_compare_emit(buf, insn);
break;
- case S390_INSN_BRANCH:
- vassert(dispatch_unassisted == NULL);
- vassert(dispatch_assisted == NULL);
- end = s390_insn_branch_emit(buf, insn);
- break;
-
case S390_INSN_HELPER_CALL:
end = s390_insn_helper_call_emit(buf, insn);
break;
@@ -7288,6 +7729,30 @@
end = s390_insn_gadd_emit(buf, insn);
break;
+ case S390_INSN_PROFINC:
+ end = s390_insn_profinc_emit(buf, insn);
+ /* Tell the caller .. */
+ vassert(*is_profinc == False);
+ *is_profinc = True;
+ break;
+
+ case S390_INSN_EVCHECK:
+ end = s390_insn_evcheck_emit(buf, insn);
+ break;
+
+ case S390_INSN_XDIRECT:
+ end = s390_insn_xdirect_emit(buf, insn, disp_cp_chain_me_to_slowEP,
+ disp_cp_chain_me_to_fastEP);
+ break;
+
+ case S390_INSN_XINDIR:
+ end = s390_insn_xindir_emit(buf, insn, disp_cp_xindir);
+ break;
+
+ case S390_INSN_XASSISTED:
+ end = s390_insn_xassisted_emit(buf, insn, disp_cp_xassisted);
+ break;
+
default:
vpanic("emit_S390Instr");
}
@@ -7298,6 +7763,168 @@
}
+/* Return the number of bytes emitted for an S390_INSN_EVCHECK.
+ See s390_insn_evcheck_emit */
+Int
+evCheckSzB_S390(void)
+{
+ return 24;
+}
+
+
+/* Patch the counter address into CODE_TO_PATCH as previously
+ generated by s390_insn_profinc_emit. */
+VexInvalRange
+patchProfInc_S390(void *code_to_patch, ULong *location_of_counter)
+{
+ vassert(sizeof(ULong *) == 8);
+
+ s390_tchain_verify_load64(code_to_patch, S390_REGNO_TCHAIN_SCRATCH, 0);
+
+ s390_tchain_patch_load64(code_to_patch, Ptr_to_ULong(location_of_counter));
+
+ VexInvalRange vir = {0, 0};
+ return vir;
+}
+
+
+/* NB: what goes on here has to be very closely coordinated with the
+ s390_insn_xdirect_emit code above. */
+VexInvalRange
+chainXDirect_S390(void *place_to_chain,
+ void *disp_cp_chain_me_EXPECTED,
+ void *place_to_jump_to)
+{
+ /* What we're expecting to see @ PLACE_TI_CHAIN is:
+
+ load tchain-scratch, #disp_cp_chain_me_EXPECTED
+ BASR 1,S390_REGNO_TCHAIN_SCRATCH
+ */
+ const UChar *next;
+ next = s390_tchain_verify_load64(place_to_chain, S390_REGNO_TCHAIN_SCRATCH,
+ Ptr_to_ULong(disp_cp_chain_me_EXPECTED));
+ vassert(s390_insn_is_BASR(next, 1, S390_REGNO_TCHAIN_SCRATCH));
+
+ /* And what we want to change it to is either:
+ (general case):
+
+ load tchain_scratch, #place_to_jump_to
+ BR *tchain_scratch
+
+ ---OR---
+
+ in the case where the displacement is small enough
+
+ BRCL delta where delta is in half-words
+ invalid opcodes
+
+ In both cases the replacement has the same length as the original.
+ To remain sane & verifiable,
+ (1) limit the displacement for the short form to
+ (say) +/- one billion, so as to avoid wraparound
+ off-by-ones
+ (2) even if the short form is applicable, once every (say)
+ 1024 times use the long form anyway, so as to maintain
+ verifiability
+ */
+
+ /* This is the delta we need to put into a BRCL insn. Note, that the
+ offset in BRCL is in half-words. Hence division by 2. */
+ Long delta = (Long)((UChar *)place_to_jump_to - (UChar *)place_to_chain) / 2;
+ Bool shortOK = delta >= -1000*1000*1000 && delta < 1000*1000*1000;
+
+ static UInt shortCTR = 0; /* DO NOT MAKE NON-STATIC */
+ if (shortOK) {
+ shortCTR++; // thread safety bleh
+ if (0 == (shortCTR & 0x3FF)) {
+ shortOK = False;
+ if (0)
+ vex_printf("QQQ chainXDirect_S390: shortCTR = %u, "
+ "using long jmp\n", shortCTR);
+ }
+ }
+
+ /* And make the modifications. */
+ UChar *p = (UChar *)place_to_chain;
+ if (shortOK) {
+ p = s390_emit_BRCL(p, S390_CC_ALWAYS, delta); /* 6 bytes */
+
+ /* Make sure that BRCL fits into the patchable part of an xdirect
+ code sequence */
+ vassert(6 <= s390_xdirect_patchable_len());
+
+ /* Fill remaining bytes with 0x00 (invalid opcode) */
+ Int i;
+ for (i = 0; i < s390_xdirect_patchable_len() - 6; ++i)
+ p[i] = 0x00;
+ } else {
+ /*
+ load tchain_scratch, #place_to_jump_to
+ BR *tchain_scratch
+ */
+ ULong addr = Ptr_to_ULong(place_to_jump_to);
+ p = s390_tchain_load64(p, S390_REGNO_TCHAIN_SCRATCH, addr);
+ s390_emit_BCR(p, S390_CC_ALWAYS, S390_REGNO_TCHAIN_SCRATCH);
+ }
+
+ VexInvalRange vir = {0, 0};
+ return vir;
+}
+
+
+/* NB: what goes on here has to be very closely coordinated with the
+ s390_insn_xdirect_emit code above. */
+VexInvalRange
+unchainXDirect_S390(void *place_to_unchain,
+ void *place_to_jump_to_EXPECTED,
+ void *disp_cp_chain_me)
+{
+ /* What we're expecting to see @ PLACE_TO_UNCHAIN:
+
+ load tchain_scratch, #place_to_jump_to_EXPECTED
+ BR *tchain_scratch
+
+ ---OR---
+ in the case where the displacement falls within 32 bits
+
+ BRCL delta
+ invalid opcodes
+ */
+ UChar *p = place_to_unchain;
+
+ if (s390_insn_is_BRCL(p, S390_CC_ALWAYS)) {
+ /* Looks like the short form */
+ Int num_hw = *(Int *)&p[2];
+ Int delta = 2 *num_hw;
+
+ vassert(p + delta == place_to_jump_to_EXPECTED);
+
+ Int i;
+ for (i = 0; i < s390_xdirect_patchable_len() - 6; ++i)
+ vassert(p[6+i] == 0x00);
+ } else {
+ /* Should be the long form */
+ const UChar *next;
+
+ next = s390_tchain_verify_load64(p, S390_REGNO_TCHAIN_SCRATCH,
+ Ptr_to_ULong(place_to_jump_to_EXPECTED));
+ /* Check for BR *tchain_scratch */
+ vassert(s390_insn_is_BR(next, S390_REGNO_TCHAIN_SCRATCH));
+ }
+
+ /* And what we want to change it to is:
+
+ load tchain_scratch, #disp_cp_chain_me
+ call *tchain_scratch
+ */
+ ULong addr = Ptr_to_ULong(disp_cp_chain_me);
+ p = s390_tchain_load64(p, S390_REGNO_TCHAIN_SCRATCH, addr);
+ s390_emit_BASR(p, 1, S390_REGNO_TCHAIN_SCRATCH);
+
+ VexInvalRange vir = {0, 0};
+ return vir;
+}
+
/*---------------------------------------------------------------*/
/*--- end host_s390_defs.c ---*/
/*---------------------------------------------------------------*/
diff --git a/priv/host_s390_defs.h b/priv/host_s390_defs.h
index 8b75486..ad99c4f 100644
--- a/priv/host_s390_defs.h
+++ b/priv/host_s390_defs.h
@@ -130,7 +130,6 @@
S390_INSN_TEST, /* test operand and set cc */
S390_INSN_CC2BOOL,/* convert condition code to 0/1 */
S390_INSN_COMPARE,
- S390_INSN_BRANCH, /* un/conditional goto */
S390_INSN_HELPER_CALL,
S390_INSN_CAS, /* compare and swap */
S390_INSN_BFP_BINOP, /* Binary floating point 32-bit / 64-bit */
@@ -144,7 +143,13 @@
S390_INSN_BFP128_CONVERT_FROM,
S390_INSN_MFENCE,
S390_INSN_GZERO, /* Assign zero to a guest register */
- S390_INSN_GADD /* Add a value to a guest register */
+ S390_INSN_GADD, /* Add a value to a guest register */
+ /* The following 5 insns are mandated by translation chaining */
+ S390_INSN_XDIRECT, /* direct transfer to guest address */
+ S390_INSN_XINDIR, /* indirect transfer to guest address */
+ S390_INSN_XASSISTED, /* assisted transfer to guest address */
+ S390_INSN_EVCHECK, /* Event check */
+ S390_INSN_PROFINC /* 64-bit profile counter increment */
} s390_insn_tag;
@@ -338,11 +343,6 @@
HReg op3;
HReg old_mem;
} cas;
- struct {
- IRJumpKind kind;
- s390_cc_t cond;
- s390_opnd_RMI dst;
- } branch;
/* Pseudo-insn for representing a helper call.
TARGET is the absolute address of the helper function
NUM_ARGS says how many arguments are being passed.
@@ -407,6 +407,44 @@
UChar delta;
ULong value; /* for debugging only */
} gadd;
+
+ /* The next 5 entries are generic to support translation chaining */
+
+ /* Update the guest IA value, then exit requesting to chain
+ to it. May be conditional. */
+ struct {
+ s390_cc_t cond;
+ Bool to_fast_entry; /* chain to the what entry point? */
+ Addr64 dst; /* next guest address */
+ s390_amode *guest_IA;
+ } xdirect;
+ /* Boring transfer to a guest address not known at JIT time.
+ Not chainable. May be conditional. */
+ struct {
+ s390_cc_t cond;
+ HReg dst;
+ s390_amode *guest_IA;
+ } xindir;
+ /* Assisted transfer to a guest address, most general case.
+ Not chainable. May be conditional. */
+ struct {
+ s390_cc_t cond;
+ IRJumpKind kind;
+ HReg dst;
+ s390_amode *guest_IA;
+ } xassisted;
+ struct {
+ /* fixs390: I don't think these are really needed
+ as the gsp and the offset are fixed no ? */
+ s390_amode *counter; /* dispatch counter */
+ s390_amode *fail_addr;
+ } evcheck;
+ struct {
+ /* No fields. The address of the counter to increment is
+ installed later, post-translation, by patching it in,
+ as it is not known at translation time. */
+ } profinc;
+
} variant;
} s390_insn;
@@ -433,7 +471,6 @@
s390_insn *s390_insn_test(UChar size, s390_opnd_RMI src);
s390_insn *s390_insn_compare(UChar size, HReg dst, s390_opnd_RMI opnd,
Bool signed_comparison);
-s390_insn *s390_insn_branch(IRJumpKind jk, s390_cc_t cond, s390_opnd_RMI dst);
s390_insn *s390_insn_helper_call(s390_cc_t cond, Addr64 target, UInt num_args,
HChar *name);
s390_insn *s390_insn_bfp_triop(UChar size, s390_bfp_triop_t, HReg dst, HReg op2,
@@ -460,6 +497,15 @@
s390_insn *s390_insn_gzero(UChar size, UInt offset);
s390_insn *s390_insn_gadd(UChar size, UInt offset, UChar delta, ULong value);
+/* Five for translation chaining */
+s390_insn *s390_insn_xdirect(s390_cc_t cond, Addr64 dst, s390_amode *guest_IA,
+ Bool to_fast_entry);
+s390_insn *s390_insn_xindir(s390_cc_t cond, HReg dst, s390_amode *guest_IA);
+s390_insn *s390_insn_xassisted(s390_cc_t cond, HReg dst, s390_amode *guest_IA,
+ IRJumpKind kind);
+s390_insn *s390_insn_evcheck(s390_amode *counter, s390_amode *fail_addr);
+s390_insn *s390_insn_profinc(void);
+
const HChar *s390_insn_as_string(const s390_insn *);
/*--------------------------------------------------------*/
@@ -475,13 +521,30 @@
void getRegUsage_S390Instr( HRegUsage *, s390_insn *, Bool );
void mapRegs_S390Instr ( HRegRemap *, s390_insn *, Bool );
Bool isMove_S390Instr ( s390_insn *, HReg *, HReg * );
-Int emit_S390Instr ( UChar *, Int, s390_insn *, Bool,
- void *, void * );
+Int emit_S390Instr ( Bool *, UChar *, Int, s390_insn *, Bool,
+ void *, void *, void *, void *);
void getAllocableRegs_S390( Int *, HReg **, Bool );
void genSpill_S390 ( HInstr **, HInstr **, HReg , Int , Bool );
void genReload_S390 ( HInstr **, HInstr **, HReg , Int , Bool );
s390_insn *directReload_S390 ( s390_insn *, HReg, Short );
-HInstrArray *iselSB_S390 ( IRSB *, VexArch, VexArchInfo *, VexAbiInfo * );
+HInstrArray *iselSB_S390 ( IRSB *, VexArch, VexArchInfo *, VexAbiInfo *,
+ Int, Int, Bool, Bool, Addr64);
+
+/* Return the number of bytes of code needed for an event check */
+Int evCheckSzB_S390(void);
+
+/* Perform a chaining and unchaining of an XDirect jump. */
+VexInvalRange chainXDirect_S390(void *place_to_chain,
+ void *disp_cp_chain_me_EXPECTED,
+ void *place_to_jump_to);
+
+VexInvalRange unchainXDirect_S390(void *place_to_unchain,
+ void *place_to_jump_to_EXPECTED,
+ void *disp_cp_chain_me);
+
+/* Patch the counter location into an existing ProfInc point. */
+VexInvalRange patchProfInc_S390(void *code_to_patch,
+ ULong *location_of_counter);
/* KLUDGE: See detailled comment in host_s390_defs.c. */
extern const VexArchInfo *s390_archinfo_host;
diff --git a/priv/host_s390_isel.c b/priv/host_s390_isel.c
index a2217d4..9400012 100644
--- a/priv/host_s390_isel.c
+++ b/priv/host_s390_isel.c
@@ -69,6 +69,18 @@
- The host subarchitecture we are selecting insns for.
This is set at the start and does not change.
+ - A Bool for indicating whether we may generate chain-me
+ instructions for control flow transfers, or whether we must use
+ XAssisted.
+
+ - The maximum guest address of any guest insn in this block.
+ Actually, the address of the highest-addressed byte from any insn
+ in this block. Is set at the start and does not change. This is
+ used for detecting jumps which are definitely forward-edges from
+ this block, and therefore can be made (chained) to the fast entry
+ point of the destination, thereby avoiding the destination's
+ event check.
+
- A flag to indicate whether the guest IA has been assigned to.
- Values of certain guest registers which are often assigned constants.
@@ -92,16 +104,19 @@
typedef struct {
IRTypeEnv *type_env;
+ HInstrArray *code;
HReg *vregmap;
HReg *vregmapHI;
UInt n_vregmap;
-
- HInstrArray *code;
+ UInt vreg_ctr;
+ UInt hwcaps;
ULong old_value[NUM_TRACKED_REGS];
- UInt vreg_ctr;
- UInt hwcaps;
+ /* The next two are for translation chaining */
+ Addr64 max_ga;
+ Bool chaining_allowed;
+
Bool first_IA_assignment;
Bool old_value_valid[NUM_TRACKED_REGS];
} ISelEnv;
@@ -2437,17 +2452,56 @@
/* --------- EXIT --------- */
case Ist_Exit: {
- s390_opnd_RMI dst;
s390_cc_t cond;
IRConstTag tag = stmt->Ist.Exit.dst->tag;
if (tag != Ico_U64)
vpanic("s390_isel_stmt: Ist_Exit: dst is not a 64-bit value");
- dst = s390_isel_int_expr_RMI(env, IRExpr_Const(stmt->Ist.Exit.dst));
+ s390_amode *guest_IA = s390_amode_for_guest_state(stmt->Ist.Exit.offsIP);
cond = s390_isel_cc(env, stmt->Ist.Exit.guard);
- addInstr(env, s390_insn_branch(stmt->Ist.Exit.jk, cond, dst));
- return;
+
+ /* Case: boring transfer to known address */
+ if (stmt->Ist.Exit.jk == Ijk_Boring) {
+ if (env->chaining_allowed) {
+ /* .. almost always true .. */
+ /* Skip the event check at the dst if this is a forwards
+ edge. */
+ Bool to_fast_entry
+ = ((Addr64)stmt->Ist.Exit.dst->Ico.U64) > env->max_ga;
+ if (0) vex_printf("%s", to_fast_entry ? "Y" : ",");
+ addInstr(env, s390_insn_xdirect(cond, stmt->Ist.Exit.dst->Ico.U64,
+ guest_IA, to_fast_entry));
+ } else {
+ /* .. very occasionally .. */
+ /* We can't use chaining, so ask for an assisted transfer,
+ as that's the only alternative that is allowable. */
+ HReg dst = s390_isel_int_expr(env,
+ IRExpr_Const(stmt->Ist.Exit.dst));
+ addInstr(env, s390_insn_xassisted(cond, dst, guest_IA, Ijk_Boring));
+ }
+ return;
+ }
+
+ /* Case: assisted transfer to arbitrary address */
+ switch (stmt->Ist.Exit.jk) {
+ case Ijk_TInval:
+ case Ijk_Sys_syscall:
+ case Ijk_ClientReq:
+ case Ijk_NoRedir:
+ case Ijk_Yield:
+ case Ijk_SigTRAP: {
+ HReg dst = s390_isel_int_expr(env, IRExpr_Const(stmt->Ist.Exit.dst));
+ addInstr(env, s390_insn_xassisted(cond, dst, guest_IA,
+ stmt->Ist.Exit.jk));
+ return;
+ }
+ default:
+ break;
+ }
+
+ /* Do we ever expect to see any other kind? */
+ goto stmt_fail;
}
/* --------- MEM FENCE --------- */
@@ -2484,20 +2538,80 @@
/*---------------------------------------------------------*/
static void
-iselNext(ISelEnv *env, IRExpr *next, IRJumpKind jk)
+iselNext(ISelEnv *env, IRExpr *next, IRJumpKind jk, int offsIP)
{
- s390_opnd_RMI dst;
-
if (vex_traceflags & VEX_TRACE_VCODE) {
- vex_printf("\n-- goto {");
- ppIRJumpKind(jk);
- vex_printf("} ");
+ vex_printf("\n-- PUT(%d) = ", offsIP);
ppIRExpr(next);
+ vex_printf("; exit-");
+ ppIRJumpKind(jk);
vex_printf("\n");
}
- dst = s390_isel_int_expr_RMI(env, next);
- addInstr(env, s390_insn_branch(jk, S390_CC_ALWAYS, dst));
+ s390_amode *guest_IA = s390_amode_for_guest_state(offsIP);
+
+ /* Case: boring transfer to known address */
+ if (next->tag == Iex_Const) {
+ IRConst *cdst = next->Iex.Const.con;
+ vassert(cdst->tag == Ico_U64);
+ if (jk == Ijk_Boring || jk == Ijk_Call) {
+ /* Boring transfer to known address */
+ if (env->chaining_allowed) {
+ /* .. almost always true .. */
+ /* Skip the event check at the dst if this is a forwards
+ edge. */
+ Bool to_fast_entry
+ = ((Addr64)cdst->Ico.U64) > env->max_ga;
+ if (0) vex_printf("%s", to_fast_entry ? "X" : ".");
+ addInstr(env, s390_insn_xdirect(S390_CC_ALWAYS, cdst->Ico.U64,
+ guest_IA, to_fast_entry));
+ } else {
+ /* .. very occasionally .. */
+ /* We can't use chaining, so ask for an indirect transfer,
+ as that's the cheapest alternative that is allowable. */
+ HReg dst = s390_isel_int_expr(env, next);
+ addInstr(env, s390_insn_xassisted(S390_CC_ALWAYS, dst, guest_IA,
+ Ijk_Boring));
+ }
+ return;
+ }
+ }
+
+ /* Case: call/return (==boring) transfer to any address */
+ switch (jk) {
+ case Ijk_Boring:
+ case Ijk_Ret:
+ case Ijk_Call: {
+ HReg dst = s390_isel_int_expr(env, next);
+ if (env->chaining_allowed) {
+ addInstr(env, s390_insn_xindir(S390_CC_ALWAYS, dst, guest_IA));
+ } else {
+ addInstr(env, s390_insn_xassisted(S390_CC_ALWAYS, dst, guest_IA,
+ Ijk_Boring));
+ }
+ return;
+ }
+ default:
+ break;
+ }
+
+ /* Case: some other kind of transfer to any address */
+ switch (jk) {
+ case Ijk_TInval:
+ case Ijk_Sys_syscall:
+ case Ijk_ClientReq:
+ case Ijk_NoRedir:
+ case Ijk_Yield:
+ case Ijk_SigTRAP: {
+ HReg dst = s390_isel_int_expr(env, next);
+ addInstr(env, s390_insn_xassisted(S390_CC_ALWAYS, dst, guest_IA, jk));
+ return;
+ }
+ default:
+ break;
+ }
+
+ vpanic("iselNext");
}
@@ -2509,7 +2623,9 @@
HInstrArray *
iselSB_S390(IRSB *bb, VexArch arch_host, VexArchInfo *archinfo_host,
- VexAbiInfo *vbi)
+ VexAbiInfo *vbi, Int offset_host_evcheck_counter,
+ Int offset_host_evcheck_fail_addr, Bool chaining_allowed,
+ Bool add_profinc, Addr64 max_ga)
{
UInt i, j;
HReg hreg, hregHI;
@@ -2552,6 +2668,9 @@
/* and finally ... */
env->hwcaps = hwcaps_host;
+ env->max_ga = max_ga;
+ env->chaining_allowed = chaining_allowed;
+
/* For each IR temporary, allocate a suitably-kinded virtual
register. */
j = 0;
@@ -2595,12 +2714,26 @@
}
env->vreg_ctr = j;
+ /* The very first instruction must be an event check. */
+ s390_amode *counter, *fail_addr;
+ counter = s390_amode_for_guest_state(offset_host_evcheck_counter);
+ fail_addr = s390_amode_for_guest_state(offset_host_evcheck_fail_addr);
+ addInstr(env, s390_insn_evcheck(counter, fail_addr));
+
+ /* Possibly a block counter increment (for profiling). At this
+ point we don't know the address of the counter, so just pretend
+ it is zero. It will have to be patched later, but before this
+ translation is used, by a call to LibVEX_patchProfInc. */
+ if (add_profinc) {
+ addInstr(env, s390_insn_profinc());
+ }
+
/* Ok, finally we can iterate over the statements. */
for (i = 0; i < bb->stmts_used; i++)
if (bb->stmts[i])
s390_isel_stmt(env, bb->stmts[i]);
- iselNext(env, bb->next, bb->jumpkind);
+ iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
/* Record the number of vregs we used. */
env->code->n_vregs = env->vreg_ctr;
diff --git a/priv/host_x86_defs.c b/priv/host_x86_defs.c
index 25848a3..4471f4d 100644
--- a/priv/host_x86_defs.c
+++ b/priv/host_x86_defs.c
@@ -647,12 +647,33 @@
vassert(regparms >= 0 && regparms <= 3);
return i;
}
-X86Instr* X86Instr_Goto ( IRJumpKind jk, X86CondCode cond, X86RI* dst ) {
- X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
- i->tag = Xin_Goto;
- i->Xin.Goto.cond = cond;
- i->Xin.Goto.dst = dst;
- i->Xin.Goto.jk = jk;
+X86Instr* X86Instr_XDirect ( Addr32 dstGA, X86AMode* amEIP,
+ X86CondCode cond, Bool toFastEP ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_XDirect;
+ i->Xin.XDirect.dstGA = dstGA;
+ i->Xin.XDirect.amEIP = amEIP;
+ i->Xin.XDirect.cond = cond;
+ i->Xin.XDirect.toFastEP = toFastEP;
+ return i;
+}
+X86Instr* X86Instr_XIndir ( HReg dstGA, X86AMode* amEIP,
+ X86CondCode cond ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_XIndir;
+ i->Xin.XIndir.dstGA = dstGA;
+ i->Xin.XIndir.amEIP = amEIP;
+ i->Xin.XIndir.cond = cond;
+ return i;
+}
+X86Instr* X86Instr_XAssisted ( HReg dstGA, X86AMode* amEIP,
+ X86CondCode cond, IRJumpKind jk ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_XAssisted;
+ i->Xin.XAssisted.dstGA = dstGA;
+ i->Xin.XAssisted.amEIP = amEIP;
+ i->Xin.XAssisted.cond = cond;
+ i->Xin.XAssisted.jk = jk;
return i;
}
X86Instr* X86Instr_CMov32 ( X86CondCode cond, X86RM* src, HReg dst ) {
@@ -797,7 +818,6 @@
i->Xin.FpCmp.dst = dst;
return i;
}
-
X86Instr* X86Instr_SseConst ( UShort con, HReg dst ) {
X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
i->tag = Xin_SseConst;
@@ -886,6 +906,19 @@
vassert(order >= 0 && order <= 0xFF);
return i;
}
+X86Instr* X86Instr_EvCheck ( X86AMode* amCounter,
+ X86AMode* amFailAddr ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_EvCheck;
+ i->Xin.EvCheck.amCounter = amCounter;
+ i->Xin.EvCheck.amFailAddr = amFailAddr;
+ return i;
+}
+X86Instr* X86Instr_ProfInc ( void ) {
+ X86Instr* i = LibVEX_Alloc(sizeof(X86Instr));
+ i->tag = Xin_ProfInc;
+ return i;
+}
void ppX86Instr ( X86Instr* i, Bool mode64 ) {
vassert(mode64 == False);
@@ -953,24 +986,36 @@
i->Xin.Call.regparms);
vex_printf("0x%x", i->Xin.Call.target);
break;
- case Xin_Goto:
- if (i->Xin.Goto.cond != Xcc_ALWAYS) {
- vex_printf("if (%%eflags.%s) { ",
- showX86CondCode(i->Xin.Goto.cond));
- }
- if (i->Xin.Goto.jk != Ijk_Boring
- && i->Xin.Goto.jk != Ijk_Call
- && i->Xin.Goto.jk != Ijk_Ret) {
- vex_printf("movl $");
- ppIRJumpKind(i->Xin.Goto.jk);
- vex_printf(",%%ebp ; ");
- }
+ case Xin_XDirect:
+ vex_printf("(xDirect) ");
+ vex_printf("if (%%eflags.%s) { ",
+ showX86CondCode(i->Xin.XDirect.cond));
+ vex_printf("movl $0x%x,", i->Xin.XDirect.dstGA);
+ ppX86AMode(i->Xin.XDirect.amEIP);
+ vex_printf("; ");
+ vex_printf("movl $disp_cp_chain_me_to_%sEP,%%edx; call *%%edx }",
+ i->Xin.XDirect.toFastEP ? "fast" : "slow");
+ return;
+ case Xin_XIndir:
+ vex_printf("(xIndir) ");
+ vex_printf("if (%%eflags.%s) { movl ",
+ showX86CondCode(i->Xin.XIndir.cond));
+ ppHRegX86(i->Xin.XIndir.dstGA);
+ vex_printf(",");
+ ppX86AMode(i->Xin.XIndir.amEIP);
+ vex_printf("; movl $disp_indir,%%edx; jmp *%%edx }");
+ return;
+ case Xin_XAssisted:
+ vex_printf("(xAssisted) ");
+ vex_printf("if (%%eflags.%s) { ",
+ showX86CondCode(i->Xin.XAssisted.cond));
vex_printf("movl ");
- ppX86RI(i->Xin.Goto.dst);
- vex_printf(",%%eax ; movl $dispatcher_addr,%%edx ; jmp *%%edx");
- if (i->Xin.Goto.cond != Xcc_ALWAYS) {
- vex_printf(" }");
- }
+ ppHRegX86(i->Xin.XAssisted.dstGA);
+ vex_printf(",");
+ ppX86AMode(i->Xin.XAssisted.amEIP);
+ vex_printf("; movl $IRJumpKind_to_TRCVAL(%d),%%ebp",
+ (Int)i->Xin.XAssisted.jk);
+ vex_printf("; movl $disp_assisted,%%edx; jmp *%%edx }");
return;
case Xin_CMov32:
vex_printf("cmov%s ", showX86CondCode(i->Xin.CMov32.cond));
@@ -1152,7 +1197,17 @@
vex_printf(",");
ppHRegX86(i->Xin.SseShuf.dst);
return;
-
+ case Xin_EvCheck:
+ vex_printf("(evCheck) decl ");
+ ppX86AMode(i->Xin.EvCheck.amCounter);
+ vex_printf("; jns nofail; jmp *");
+ ppX86AMode(i->Xin.EvCheck.amFailAddr);
+ vex_printf("; nofail:");
+ return;
+ case Xin_ProfInc:
+ vex_printf("(profInc) addl $1,NotKnownYet; "
+ "adcl $0,NotKnownYet+4");
+ return;
default:
vpanic("ppX86Instr");
}
@@ -1258,16 +1313,21 @@
address temporary, depending on the regparmness: 0==EAX,
1==EDX, 2==ECX, 3==EDI. */
return;
- case Xin_Goto:
- addRegUsage_X86RI(u, i->Xin.Goto.dst);
- addHRegUse(u, HRmWrite, hregX86_EAX()); /* used for next guest addr */
- addHRegUse(u, HRmWrite, hregX86_EDX()); /* used for dispatcher addr */
- if (i->Xin.Goto.jk != Ijk_Boring
- && i->Xin.Goto.jk != Ijk_Call
- && i->Xin.Goto.jk != Ijk_Ret)
- /* note, this is irrelevant since ebp is not actually
- available to the allocator. But still .. */
- addHRegUse(u, HRmWrite, hregX86_EBP());
+ /* XDirect/XIndir/XAssisted are also a bit subtle. They
+ conditionally exit the block. Hence we only need to list (1)
+ the registers that they read, and (2) the registers that they
+ write in the case where the block is not exited. (2) is
+ empty, hence only (1) is relevant here. */
+ case Xin_XDirect:
+ addRegUsage_X86AMode(u, i->Xin.XDirect.amEIP);
+ return;
+ case Xin_XIndir:
+ addHRegUse(u, HRmRead, i->Xin.XIndir.dstGA);
+ addRegUsage_X86AMode(u, i->Xin.XIndir.amEIP);
+ return;
+ case Xin_XAssisted:
+ addHRegUse(u, HRmRead, i->Xin.XAssisted.dstGA);
+ addRegUsage_X86AMode(u, i->Xin.XAssisted.amEIP);
return;
case Xin_CMov32:
addRegUsage_X86RM(u, i->Xin.CMov32.src, HRmRead);
@@ -1410,6 +1470,15 @@
addHRegUse(u, HRmRead, i->Xin.SseShuf.src);
addHRegUse(u, HRmWrite, i->Xin.SseShuf.dst);
return;
+ case Xin_EvCheck:
+ /* We expect both amodes only to mention %ebp, so this is in
+ fact pointless, since %ebp isn't allocatable, but anyway.. */
+ addRegUsage_X86AMode(u, i->Xin.EvCheck.amCounter);
+ addRegUsage_X86AMode(u, i->Xin.EvCheck.amFailAddr);
+ return;
+ case Xin_ProfInc:
+ /* does not use any registers. */
+ return;
default:
ppX86Instr(i, False);
vpanic("getRegUsage_X86Instr");
@@ -1462,8 +1531,16 @@
return;
case Xin_Call:
return;
- case Xin_Goto:
- mapRegs_X86RI(m, i->Xin.Goto.dst);
+ case Xin_XDirect:
+ mapRegs_X86AMode(m, i->Xin.XDirect.amEIP);
+ return;
+ case Xin_XIndir:
+ mapReg(m, &i->Xin.XIndir.dstGA);
+ mapRegs_X86AMode(m, i->Xin.XIndir.amEIP);
+ return;
+ case Xin_XAssisted:
+ mapReg(m, &i->Xin.XAssisted.dstGA);
+ mapRegs_X86AMode(m, i->Xin.XAssisted.amEIP);
return;
case Xin_CMov32:
mapRegs_X86RM(m, i->Xin.CMov32.src);
@@ -1566,6 +1643,16 @@
mapReg(m, &i->Xin.SseShuf.src);
mapReg(m, &i->Xin.SseShuf.dst);
return;
+ case Xin_EvCheck:
+ /* We expect both amodes only to mention %ebp, so this is in
+ fact pointless, since %ebp isn't allocatable, but anyway.. */
+ mapRegs_X86AMode(m, i->Xin.EvCheck.amCounter);
+ mapRegs_X86AMode(m, i->Xin.EvCheck.amFailAddr);
+ return;
+ case Xin_ProfInc:
+ /* does not use any registers. */
+ return;
+
default:
ppX86Instr(i, mode64);
vpanic("mapRegs_X86Instr");
@@ -1986,12 +2073,17 @@
/* Emit an instruction into buf and return the number of bytes used.
Note that buf is not the insn's final place, and therefore it is
- imperative to emit position-independent code. */
+ imperative to emit position-independent code. If the emitted
+ instruction was a profiler inc, set *is_profInc to True, else
+ leave it unchanged. */
-Int emit_X86Instr ( UChar* buf, Int nbuf, X86Instr* i,
+Int emit_X86Instr ( /*MB_MOD*/Bool* is_profInc,
+ UChar* buf, Int nbuf, X86Instr* i,
Bool mode64,
- void* dispatch_unassisted,
- void* dispatch_assisted )
+ void* disp_cp_chain_me_to_slowEP,
+ void* disp_cp_chain_me_to_fastEP,
+ void* disp_cp_xindir,
+ void* disp_cp_xassisted )
{
UInt irno, opc, opc_rr, subopc_imm, opc_imma, opc_cl, opc_imm, subopc;
@@ -2306,110 +2398,153 @@
*p++ = toUChar(0xD0 + irno);
goto done;
- case Xin_Goto: {
- void* dispatch_to_use = NULL;
- vassert(dispatch_unassisted != NULL);
- vassert(dispatch_assisted != NULL);
+ case Xin_XDirect: {
+ /* NB: what goes on here has to be very closely coordinated with the
+ chainXDirect_X86 and unchainXDirect_X86 below. */
+ /* We're generating chain-me requests here, so we need to be
+ sure this is actually allowed -- no-redir translations can't
+ use chain-me's. Hence: */
+ vassert(disp_cp_chain_me_to_slowEP != NULL);
+ vassert(disp_cp_chain_me_to_fastEP != NULL);
/* Use ptmp for backpatching conditional jumps. */
ptmp = NULL;
/* First off, if this is conditional, create a conditional
- jump over the rest of it. */
- if (i->Xin.Goto.cond != Xcc_ALWAYS) {
+ jump over the rest of it. */
+ if (i->Xin.XDirect.cond != Xcc_ALWAYS) {
/* jmp fwds if !condition */
- *p++ = toUChar(0x70 + (0xF & (i->Xin.Goto.cond ^ 1)));
+ *p++ = toUChar(0x70 + (0xF & (i->Xin.XDirect.cond ^ 1)));
ptmp = p; /* fill in this bit later */
*p++ = 0; /* # of bytes to jump over; don't know how many yet. */
}
- /* If a non-boring, set %ebp (the guest state pointer)
- appropriately. Also, decide which dispatcher we need to
- use. */
- dispatch_to_use = dispatch_assisted;
+ /* Update the guest EIP. */
+ /* movl $dstGA, amEIP */
+ *p++ = 0xC7;
+ p = doAMode_M(p, fake(0), i->Xin.XDirect.amEIP);
+ p = emit32(p, i->Xin.XDirect.dstGA);
- /* movl $magic_number, %ebp */
- switch (i->Xin.Goto.jk) {
- case Ijk_ClientReq:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_CLIENTREQ); break;
- case Ijk_Sys_int128:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_SYS_INT128); break;
- case Ijk_Sys_int129:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_SYS_INT129); break;
- case Ijk_Sys_int130:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_SYS_INT130); break;
- case Ijk_Yield:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_YIELD); break;
- case Ijk_EmWarn:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_EMWARN); break;
- case Ijk_MapFail:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_MAPFAIL); break;
- case Ijk_NoDecode:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_NODECODE); break;
- case Ijk_TInval:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_TINVAL); break;
- case Ijk_NoRedir:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_NOREDIR); break;
- case Ijk_Sys_sysenter:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_SYS_SYSENTER); break;
- case Ijk_SigTRAP:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_SIGTRAP); break;
- case Ijk_SigSEGV:
- *p++ = 0xBD;
- p = emit32(p, VEX_TRC_JMP_SIGSEGV); break;
- case Ijk_Ret:
- case Ijk_Call:
- case Ijk_Boring:
- dispatch_to_use = dispatch_unassisted;
- break;
- default:
- ppIRJumpKind(i->Xin.Goto.jk);
- vpanic("emit_X86Instr.Xin_Goto: unknown jump kind");
- }
-
- /* Get the destination address into %eax */
- if (i->Xin.Goto.dst->tag == Xri_Imm) {
- /* movl $immediate, %eax */
- *p++ = 0xB8;
- p = emit32(p, i->Xin.Goto.dst->Xri.Imm.imm32);
- } else {
- vassert(i->Xin.Goto.dst->tag == Xri_Reg);
- /* movl %reg, %eax */
- if (i->Xin.Goto.dst->Xri.Reg.reg != hregX86_EAX()) {
- *p++ = 0x89;
- p = doAMode_R(p, i->Xin.Goto.dst->Xri.Reg.reg, hregX86_EAX());
- }
- }
-
- /* Get the dispatcher address into %edx. This has to happen
- after the load of %eax since %edx might be carrying the value
- destined for %eax immediately prior to this Xin_Goto. */
- vassert(sizeof(UInt) == sizeof(void*));
- vassert(dispatch_to_use != NULL);
- /* movl $imm32, %edx */
+ /* --- FIRST PATCHABLE BYTE follows --- */
+ /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're calling
+ to) backs up the return address, so as to find the address of
+ the first patchable byte. So: don't change the length of the
+ two instructions below. */
+ /* movl $disp_cp_chain_me_to_{slow,fast}EP,%edx; */
*p++ = 0xBA;
- p = emit32(p, (UInt)Ptr_to_ULong(dispatch_to_use));
+ void* disp_cp_chain_me
+ = i->Xin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
+ : disp_cp_chain_me_to_slowEP;
+ p = emit32(p, (UInt)Ptr_to_ULong(disp_cp_chain_me));
+ /* call *%edx */
+ *p++ = 0xFF;
+ *p++ = 0xD2;
+ /* --- END of PATCHABLE BYTES --- */
+ /* Fix up the conditional jump, if there was one. */
+ if (i->Xin.XDirect.cond != Xcc_ALWAYS) {
+ Int delta = p - ptmp;
+ vassert(delta > 0 && delta < 40);
+ *ptmp = toUChar(delta-1);
+ }
+ goto done;
+ }
+
+ case Xin_XIndir: {
+ /* We're generating transfers that could lead indirectly to a
+ chain-me, so we need to be sure this is actually allowed --
+ no-redir translations are not allowed to reach normal
+ translations without going through the scheduler. That means
+ no XDirects or XIndirs out from no-redir translations.
+ Hence: */
+ vassert(disp_cp_xindir != NULL);
+
+ /* Use ptmp for backpatching conditional jumps. */
+ ptmp = NULL;
+
+ /* First off, if this is conditional, create a conditional
+ jump over the rest of it. */
+ if (i->Xin.XIndir.cond != Xcc_ALWAYS) {
+ /* jmp fwds if !condition */
+ *p++ = toUChar(0x70 + (0xF & (i->Xin.XIndir.cond ^ 1)));
+ ptmp = p; /* fill in this bit later */
+ *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
+ }
+
+ /* movl dstGA(a reg), amEIP -- copied from Alu32M MOV case */
+ *p++ = 0x89;
+ p = doAMode_M(p, i->Xin.XIndir.dstGA, i->Xin.XIndir.amEIP);
+
+ /* movl $disp_indir, %edx */
+ *p++ = 0xBA;
+ p = emit32(p, (UInt)Ptr_to_ULong(disp_cp_xindir));
/* jmp *%edx */
*p++ = 0xFF;
*p++ = 0xE2;
/* Fix up the conditional jump, if there was one. */
- if (i->Xin.Goto.cond != Xcc_ALWAYS) {
+ if (i->Xin.XIndir.cond != Xcc_ALWAYS) {
Int delta = p - ptmp;
- vassert(delta > 0 && delta < 20);
+ vassert(delta > 0 && delta < 40);
+ *ptmp = toUChar(delta-1);
+ }
+ goto done;
+ }
+
+ case Xin_XAssisted: {
+ /* Use ptmp for backpatching conditional jumps. */
+ ptmp = NULL;
+
+ /* First off, if this is conditional, create a conditional
+ jump over the rest of it. */
+ if (i->Xin.XAssisted.cond != Xcc_ALWAYS) {
+ /* jmp fwds if !condition */
+ *p++ = toUChar(0x70 + (0xF & (i->Xin.XAssisted.cond ^ 1)));
+ ptmp = p; /* fill in this bit later */
+ *p++ = 0; /* # of bytes to jump over; don't know how many yet. */
+ }
+
+ /* movl dstGA(a reg), amEIP -- copied from Alu32M MOV case */
+ *p++ = 0x89;
+ p = doAMode_M(p, i->Xin.XIndir.dstGA, i->Xin.XIndir.amEIP);
+ /* movl $magic_number, %ebp. */
+ UInt trcval = 0;
+ switch (i->Xin.XAssisted.jk) {
+ case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break;
+ case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
+ case Ijk_Sys_int128: trcval = VEX_TRC_JMP_SYS_INT128; break;
+ case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break;
+ case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
+ case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
+ case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break;
+ case Ijk_TInval: trcval = VEX_TRC_JMP_TINVAL; break;
+ case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break;
+ case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break;
+ case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
+ case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break;
+ /* We don't expect to see the following being assisted. */
+ case Ijk_Ret:
+ case Ijk_Call:
+ /* fallthrough */
+ default:
+ ppIRJumpKind(i->Xin.XAssisted.jk);
+ vpanic("emit_X86Instr.Xin_XAssisted: unexpected jump kind");
+ }
+ vassert(trcval != 0);
+ *p++ = 0xBD;
+ p = emit32(p, trcval);
+
+ /* movl $disp_indir, %edx */
+ *p++ = 0xBA;
+ p = emit32(p, (UInt)Ptr_to_ULong(disp_cp_xassisted));
+ /* jmp *%edx */
+ *p++ = 0xFF;
+ *p++ = 0xE2;
+
+ /* Fix up the conditional jump, if there was one. */
+ if (i->Xin.XAssisted.cond != Xcc_ALWAYS) {
+ Int delta = p - ptmp;
+ vassert(delta > 0 && delta < 40);
*ptmp = toUChar(delta-1);
}
goto done;
@@ -3088,6 +3223,63 @@
*p++ = (UChar)(i->Xin.SseShuf.order);
goto done;
+ case Xin_EvCheck: {
+ /* We generate:
+ (3 bytes) decl 4(%ebp) 4 == offsetof(host_EvC_COUNTER)
+ (2 bytes) jns nofail expected taken
+ (3 bytes) jmp* 0(%ebp) 0 == offsetof(host_EvC_FAILADDR)
+ nofail:
+ */
+ /* This is heavily asserted re instruction lengths. It needs to
+ be. If we get given unexpected forms of .amCounter or
+ .amFailAddr -- basically, anything that's not of the form
+ uimm7(%ebp) -- they are likely to fail. */
+ /* Note also that after the decl we must be very careful not to
+ read the carry flag, else we get a partial flags stall.
+ js/jns avoids that, though. */
+ UChar* p0 = p;
+ /* --- decl 8(%ebp) --- */
+ /* "fake(1)" because + there's no register in this encoding;
+ instead the register + field is used as a sub opcode. The
+ encoding for "decl r/m32" + is FF /1, hence the fake(1). */
+ *p++ = 0xFF;
+ p = doAMode_M(p, fake(1), i->Xin.EvCheck.amCounter);
+ vassert(p - p0 == 3);
+ /* --- jns nofail --- */
+ *p++ = 0x79;
+ *p++ = 0x03; /* need to check this 0x03 after the next insn */
+ vassert(p - p0 == 5);
+ /* --- jmp* 0(%ebp) --- */
+ /* The encoding is FF /4. */
+ *p++ = 0xFF;
+ p = doAMode_M(p, fake(4), i->Xin.EvCheck.amFailAddr);
+ vassert(p - p0 == 8); /* also ensures that 0x03 offset above is ok */
+ /* And crosscheck .. */
+ vassert(evCheckSzB_X86() == 8);
+ goto done;
+ }
+
+ case Xin_ProfInc: {
+ /* We generate addl $1,NotKnownYet
+ adcl $0,NotKnownYet+4
+ in the expectation that a later call to LibVEX_patchProfCtr
+ will be used to fill in the immediate fields once the right
+ value is known.
+ 83 05 00 00 00 00 01
+ 83 15 00 00 00 00 00
+ */
+ *p++ = 0x83; *p++ = 0x05;
+ *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
+ *p++ = 0x01;
+ *p++ = 0x83; *p++ = 0x15;
+ *p++ = 0x00; *p++ = 0x00; *p++ = 0x00; *p++ = 0x00;
+ *p++ = 0x00;
+ /* Tell the caller .. */
+ vassert(!(*is_profInc));
+ *is_profInc = True;
+ goto done;
+ }
+
default:
goto bad;
}
@@ -3104,6 +3296,140 @@
# undef fake
}
+
+/* How big is an event check? See case for Xin_EvCheck in
+ emit_X86Instr just above. That crosschecks what this returns, so
+ we can tell if we're inconsistent. */
+Int evCheckSzB_X86 ( void )
+{
+ return 8;
+}
+
+
+/* NB: what goes on here has to be very closely coordinated with the
+ emitInstr case for XDirect, above. */
+VexInvalRange chainXDirect_X86 ( void* place_to_chain,
+ void* disp_cp_chain_me_EXPECTED,
+ void* place_to_jump_to )
+{
+ /* What we're expecting to see is:
+ movl $disp_cp_chain_me_EXPECTED, %edx
+ call *%edx
+ viz
+ BA <4 bytes value == disp_cp_chain_me_EXPECTED>
+ FF D2
+ */
+ UChar* p = (UChar*)place_to_chain;
+ vassert(p[0] == 0xBA);
+ vassert(*(UInt*)(&p[1]) == (UInt)Ptr_to_ULong(disp_cp_chain_me_EXPECTED));
+ vassert(p[5] == 0xFF);
+ vassert(p[6] == 0xD2);
+ /* And what we want to change it to is:
+ jmp disp32 where disp32 is relative to the next insn
+ ud2;
+ viz
+ E9 <4 bytes == disp32>
+ 0F 0B
+ The replacement has the same length as the original.
+ */
+ /* This is the delta we need to put into a JMP d32 insn. It's
+ relative to the start of the next insn, hence the -5. */
+ Long delta = (Long)((UChar*)place_to_jump_to - (UChar*)p) - (Long)5;
+
+ /* And make the modifications. */
+ p[0] = 0xE9;
+ p[1] = (delta >> 0) & 0xFF;
+ p[2] = (delta >> 8) & 0xFF;
+ p[3] = (delta >> 16) & 0xFF;
+ p[4] = (delta >> 24) & 0xFF;
+ p[5] = 0x0F; p[6] = 0x0B;
+ /* sanity check on the delta -- top 32 are all 0 or all 1 */
+ delta >>= 32;
+ vassert(delta == 0LL || delta == -1LL);
+ VexInvalRange vir = {0, 0};
+ return vir;
+}
+
+
+/* NB: what goes on here has to be very closely coordinated with the
+ emitInstr case for XDirect, above. */
+VexInvalRange unchainXDirect_X86 ( void* place_to_unchain,
+ void* place_to_jump_to_EXPECTED,
+ void* disp_cp_chain_me )
+{
+ /* What we're expecting to see is:
+ jmp d32
+ ud2;
+ viz
+ E9 <4 bytes == disp32>
+ 0F 0B
+ */
+ UChar* p = (UChar*)place_to_unchain;
+ Bool valid = False;
+ if (p[0] == 0xE9
+ && p[5] == 0x0F && p[6] == 0x0B) {
+ /* Check the offset is right. */
+ Int s32 = *(Int*)(&p[1]);
+ if ((UChar*)p + 5 + s32 == (UChar*)place_to_jump_to_EXPECTED) {
+ valid = True;
+ if (0)
+ vex_printf("QQQ unchainXDirect_X86: found valid\n");
+ }
+ }
+ vassert(valid);
+ /* And what we want to change it to is:
+ movl $disp_cp_chain_me, %edx
+ call *%edx
+ viz
+ BA <4 bytes value == disp_cp_chain_me_EXPECTED>
+ FF D2
+ So it's the same length (convenient, huh).
+ */
+ p[0] = 0xBA;
+ *(UInt*)(&p[1]) = (UInt)Ptr_to_ULong(disp_cp_chain_me);
+ p[5] = 0xFF;
+ p[6] = 0xD2;
+ VexInvalRange vir = {0, 0};
+ return vir;
+}
+
+
+/* Patch the counter address into a profile inc point, as previously
+ created by the Xin_ProfInc case for emit_X86Instr. */
+VexInvalRange patchProfInc_X86 ( void* place_to_patch,
+ ULong* location_of_counter )
+{
+ vassert(sizeof(ULong*) == 4);
+ UChar* p = (UChar*)place_to_patch;
+ vassert(p[0] == 0x83);
+ vassert(p[1] == 0x05);
+ vassert(p[2] == 0x00);
+ vassert(p[3] == 0x00);
+ vassert(p[4] == 0x00);
+ vassert(p[5] == 0x00);
+ vassert(p[6] == 0x01);
+ vassert(p[7] == 0x83);
+ vassert(p[8] == 0x15);
+ vassert(p[9] == 0x00);
+ vassert(p[10] == 0x00);
+ vassert(p[11] == 0x00);
+ vassert(p[12] == 0x00);
+ vassert(p[13] == 0x00);
+ UInt imm32 = (UInt)Ptr_to_ULong(location_of_counter);
+ p[2] = imm32 & 0xFF; imm32 >>= 8;
+ p[3] = imm32 & 0xFF; imm32 >>= 8;
+ p[4] = imm32 & 0xFF; imm32 >>= 8;
+ p[5] = imm32 & 0xFF; imm32 >>= 8;
+ imm32 = 4 + (UInt)Ptr_to_ULong(location_of_counter);
+ p[9] = imm32 & 0xFF; imm32 >>= 8;
+ p[10] = imm32 & 0xFF; imm32 >>= 8;
+ p[11] = imm32 & 0xFF; imm32 >>= 8;
+ p[12] = imm32 & 0xFF; imm32 >>= 8;
+ VexInvalRange vir = {0, 0};
+ return vir;
+}
+
+
/*---------------------------------------------------------------*/
/*--- end host_x86_defs.c ---*/
/*---------------------------------------------------------------*/
diff --git a/priv/host_x86_defs.h b/priv/host_x86_defs.h
index f68a426..544f8df 100644
--- a/priv/host_x86_defs.h
+++ b/priv/host_x86_defs.h
@@ -349,7 +349,9 @@
Xin_Sh3232, /* shldl or shrdl */
Xin_Push, /* push (32-bit?) value on stack */
Xin_Call, /* call to address in register */
- Xin_Goto, /* conditional/unconditional jmp to dst */
+ Xin_XDirect, /* direct transfer to GA */
+ Xin_XIndir, /* indirect transfer to GA */
+ Xin_XAssisted, /* assisted transfer to GA */
Xin_CMov32, /* conditional move */
Xin_LoadEX, /* mov{s,z}{b,w}l from mem to reg */
Xin_Store, /* store 16/8 bit value in memory */
@@ -378,7 +380,9 @@
Xin_Sse64FLo, /* SSE binary, 64F in lowest lane only */
Xin_SseReRg, /* SSE binary general reg-reg, Re, Rg */
Xin_SseCMov, /* SSE conditional move */
- Xin_SseShuf /* SSE2 shuffle (pshufd) */
+ Xin_SseShuf, /* SSE2 shuffle (pshufd) */
+ Xin_EvCheck, /* Event check */
+ Xin_ProfInc /* 64-bit profile counter increment */
}
X86InstrTag;
@@ -444,13 +448,30 @@
Addr32 target;
Int regparms; /* 0 .. 3 */
} Call;
- /* Pseudo-insn. Goto dst, on given condition (which could be
- Xcc_ALWAYS). */
+ /* Update the guest EIP value, then exit requesting to chain
+ to it. May be conditional. Urr, use of Addr32 implicitly
+ assumes that wordsize(guest) == wordsize(host). */
struct {
+ Addr32 dstGA; /* next guest address */
+ X86AMode* amEIP; /* amode in guest state for EIP */
+ X86CondCode cond; /* can be Xcc_ALWAYS */
+ Bool toFastEP; /* chain to the slow or fast point? */
+ } XDirect;
+ /* Boring transfer to a guest address not known at JIT time.
+ Not chainable. May be conditional. */
+ struct {
+ HReg dstGA;
+ X86AMode* amEIP;
+ X86CondCode cond; /* can be Xcc_ALWAYS */
+ } XIndir;
+ /* Assisted transfer to a guest address, most general case.
+ Not chainable. May be conditional. */
+ struct {
+ HReg dstGA;
+ X86AMode* amEIP;
+ X86CondCode cond; /* can be Xcc_ALWAYS */
IRJumpKind jk;
- X86CondCode cond;
- X86RI* dst;
- } Goto;
+ } XAssisted;
/* Mov src to dst on the given condition, which may not
be the bogus Xcc_ALWAYS. */
struct {
@@ -615,6 +636,15 @@
HReg src;
HReg dst;
} SseShuf;
+ struct {
+ X86AMode* amCounter;
+ X86AMode* amFailAddr;
+ } EvCheck;
+ struct {
+ /* No fields. The address of the counter to inc is
+ installed later, post-translation, by patching it in,
+ as it is not known at translation time. */
+ } ProfInc;
} Xin;
}
@@ -632,7 +662,12 @@
extern X86Instr* X86Instr_Sh3232 ( X86ShiftOp, UInt amt, HReg src, HReg dst );
extern X86Instr* X86Instr_Push ( X86RMI* );
extern X86Instr* X86Instr_Call ( X86CondCode, Addr32, Int );
-extern X86Instr* X86Instr_Goto ( IRJumpKind, X86CondCode cond, X86RI* dst );
+extern X86Instr* X86Instr_XDirect ( Addr32 dstGA, X86AMode* amEIP,
+ X86CondCode cond, Bool toFastEP );
+extern X86Instr* X86Instr_XIndir ( HReg dstGA, X86AMode* amEIP,
+ X86CondCode cond );
+extern X86Instr* X86Instr_XAssisted ( HReg dstGA, X86AMode* amEIP,
+ X86CondCode cond, IRJumpKind jk );
extern X86Instr* X86Instr_CMov32 ( X86CondCode, X86RM* src, HReg dst );
extern X86Instr* X86Instr_LoadEX ( UChar szSmall, Bool syned,
X86AMode* src, HReg dst );
@@ -663,6 +698,9 @@
extern X86Instr* X86Instr_SseReRg ( X86SseOp, HReg, HReg );
extern X86Instr* X86Instr_SseCMov ( X86CondCode, HReg src, HReg dst );
extern X86Instr* X86Instr_SseShuf ( Int order, HReg src, HReg dst );
+extern X86Instr* X86Instr_EvCheck ( X86AMode* amCounter,
+ X86AMode* amFailAddr );
+extern X86Instr* X86Instr_ProfInc ( void );
extern void ppX86Instr ( X86Instr*, Bool );
@@ -672,10 +710,13 @@
extern void getRegUsage_X86Instr ( HRegUsage*, X86Instr*, Bool );
extern void mapRegs_X86Instr ( HRegRemap*, X86Instr*, Bool );
extern Bool isMove_X86Instr ( X86Instr*, HReg*, HReg* );
-extern Int emit_X86Instr ( UChar* buf, Int nbuf, X86Instr*,
- Bool,
- void* dispatch_unassisted,
- void* dispatch_assisted );
+extern Int emit_X86Instr ( /*MB_MOD*/Bool* is_profInc,
+ UChar* buf, Int nbuf, X86Instr* i,
+ Bool mode64,
+ void* disp_cp_chain_me_to_slowEP,
+ void* disp_cp_chain_me_to_fastEP,
+ void* disp_cp_xindir,
+ void* disp_cp_xassisted );
extern void genSpill_X86 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
HReg rreg, Int offset, Bool );
@@ -685,9 +726,36 @@
extern X86Instr* directReload_X86 ( X86Instr* i,
HReg vreg, Short spill_off );
extern void getAllocableRegs_X86 ( Int*, HReg** );
-extern HInstrArray* iselSB_X86 ( IRSB*, VexArch,
- VexArchInfo*,
- VexAbiInfo* );
+extern HInstrArray* iselSB_X86 ( IRSB*,
+ VexArch,
+ VexArchInfo*,
+ VexAbiInfo*,
+ Int offs_Host_EvC_Counter,
+ Int offs_Host_EvC_FailAddr,
+ Bool chainingAllowed,
+ Bool addProfInc,
+ Addr64 max_ga );
+
+/* How big is an event check? This is kind of a kludge because it
+ depends on the offsets of host_EvC_FAILADDR and host_EvC_COUNTER,
+ and so assumes that they are both <= 128, and so can use the short
+ offset encoding. This is all checked with assertions, so in the
+ worst case we will merely assert at startup. */
+extern Int evCheckSzB_X86 ( void );
+
+/* Perform a chaining and unchaining of an XDirect jump. */
+extern VexInvalRange chainXDirect_X86 ( void* place_to_chain,
+ void* disp_cp_chain_me_EXPECTED,
+ void* place_to_jump_to );
+
+extern VexInvalRange unchainXDirect_X86 ( void* place_to_unchain,
+ void* place_to_jump_to_EXPECTED,
+ void* disp_cp_chain_me );
+
+/* Patch the counter location into an existing ProfInc point. */
+extern VexInvalRange patchProfInc_X86 ( void* place_to_patch,
+ ULong* location_of_counter );
+
#endif /* ndef __VEX_HOST_X86_DEFS_H */
diff --git a/priv/host_x86_isel.c b/priv/host_x86_isel.c
index 81896b3..2dd14ce 100644
--- a/priv/host_x86_isel.c
+++ b/priv/host_x86_isel.c
@@ -154,21 +154,38 @@
- The host subarchitecture we are selecting insns for.
This is set at the start and does not change.
- Note, this is all host-independent. */
+ - A Bool for indicating whether we may generate chain-me
+ instructions for control flow transfers, or whether we must use
+ XAssisted.
+
+ - The maximum guest address of any guest insn in this block.
+ Actually, the address of the highest-addressed byte from any insn
+ in this block. Is set at the start and does not change. This is
+ used for detecting jumps which are definitely forward-edges from
+ this block, and therefore can be made (chained) to the fast entry
+ point of the destination, thereby avoiding the destination's
+ event check.
+
+ Note, this is all (well, mostly) host-independent.
+*/
typedef
struct {
+ /* Constant -- are set at the start and do not change. */
IRTypeEnv* type_env;
HReg* vregmap;
HReg* vregmapHI;
Int n_vregmap;
- HInstrArray* code;
-
- Int vreg_ctr;
-
UInt hwcaps;
+
+ Bool chainingAllowed;
+ Addr64 max_ga;
+
+ /* These are modified as we go along. */
+ HInstrArray* code;
+ Int vreg_ctr;
}
ISelEnv;
@@ -4038,14 +4055,48 @@
/* --------- EXIT --------- */
case Ist_Exit: {
- X86RI* dst;
- X86CondCode cc;
if (stmt->Ist.Exit.dst->tag != Ico_U32)
- vpanic("isel_x86: Ist_Exit: dst is not a 32-bit value");
- dst = iselIntExpr_RI(env, IRExpr_Const(stmt->Ist.Exit.dst));
- cc = iselCondCode(env,stmt->Ist.Exit.guard);
- addInstr(env, X86Instr_Goto(stmt->Ist.Exit.jk, cc, dst));
- return;
+ vpanic("iselStmt(x86): Ist_Exit: dst is not a 32-bit value");
+
+ X86CondCode cc = iselCondCode(env, stmt->Ist.Exit.guard);
+ X86AMode* amEIP = X86AMode_IR(stmt->Ist.Exit.offsIP,
+ hregX86_EBP());
+
+ /* Case: boring transfer to known address */
+ if (stmt->Ist.Exit.jk == Ijk_Boring) {
+ if (env->chainingAllowed) {
+ /* .. almost always true .. */
+ /* Skip the event check at the dst if this is a forwards
+ edge. */
+ Bool toFastEP
+ = ((Addr32)stmt->Ist.Exit.dst->Ico.U32) > env->max_ga;
+ if (0) vex_printf("%s", toFastEP ? "Y" : ",");
+ addInstr(env, X86Instr_XDirect(stmt->Ist.Exit.dst->Ico.U32,
+ amEIP, cc, toFastEP));
+ } else {
+ /* .. very occasionally .. */
+ /* We can't use chaining, so ask for an assisted transfer,
+ as that's the only alternative that is allowable. */
+ HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
+ addInstr(env, X86Instr_XAssisted(r, amEIP, cc, Ijk_Boring));
+ }
+ return;
+ }
+
+ /* Case: assisted transfer to arbitrary address */
+ switch (stmt->Ist.Exit.jk) {
+ case Ijk_MapFail:
+ case Ijk_SigSEGV: case Ijk_TInval: case Ijk_EmWarn: {
+ HReg r = iselIntExpr_R(env, IRExpr_Const(stmt->Ist.Exit.dst));
+ addInstr(env, X86Instr_XAssisted(r, amEIP, cc, stmt->Ist.Exit.jk));
+ return;
+ }
+ default:
+ break;
+ }
+
+ /* Do we ever expect to see any other kind? */
+ goto stmt_fail;
}
default: break;
@@ -4060,18 +4111,82 @@
/*--- ISEL: Basic block terminators (Nexts) ---*/
/*---------------------------------------------------------*/
-static void iselNext ( ISelEnv* env, IRExpr* next, IRJumpKind jk )
+static void iselNext ( ISelEnv* env,
+ IRExpr* next, IRJumpKind jk, Int offsIP )
{
- X86RI* ri;
if (vex_traceflags & VEX_TRACE_VCODE) {
- vex_printf("\n-- goto {");
+ vex_printf( "\n-- PUT(%d) = ", offsIP);
+ ppIRExpr( next );
+ vex_printf( "; exit-");
ppIRJumpKind(jk);
- vex_printf("} ");
- ppIRExpr(next);
- vex_printf("\n");
+ vex_printf( "\n");
}
- ri = iselIntExpr_RI(env, next);
- addInstr(env, X86Instr_Goto(jk, Xcc_ALWAYS,ri));
+
+ /* Case: boring transfer to known address */
+ if (next->tag == Iex_Const) {
+ IRConst* cdst = next->Iex.Const.con;
+ vassert(cdst->tag == Ico_U32);
+ if (jk == Ijk_Boring || jk == Ijk_Call) {
+ /* Boring transfer to known address */
+ X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
+ if (env->chainingAllowed) {
+ /* .. almost always true .. */
+ /* Skip the event check at the dst if this is a forwards
+ edge. */
+ Bool toFastEP
+ = ((Addr64)cdst->Ico.U32) > env->max_ga;
+ if (0) vex_printf("%s", toFastEP ? "X" : ".");
+ addInstr(env, X86Instr_XDirect(cdst->Ico.U32,
+ amEIP, Xcc_ALWAYS,
+ toFastEP));
+ } else {
+ /* .. very occasionally .. */
+ /* We can't use chaining, so ask for an assisted transfer,
+ as that's the only alternative that is allowable. */
+ HReg r = iselIntExpr_R(env, next);
+ addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS,
+ Ijk_Boring));
+ }
+ return;
+ }
+ }
+
+ /* Case: call/return (==boring) transfer to any address */
+ switch (jk) {
+ case Ijk_Boring: case Ijk_Ret: case Ijk_Call: {
+ HReg r = iselIntExpr_R(env, next);
+ X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
+ if (env->chainingAllowed) {
+ addInstr(env, X86Instr_XIndir(r, amEIP, Xcc_ALWAYS));
+ } else {
+ addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS,
+ Ijk_Boring));
+ }
+ return;
+ }
+ default:
+ break;
+ }
+
+ /* Case: some other kind of transfer to any address */
+ switch (jk) {
+ case Ijk_Sys_int128: case Ijk_ClientReq: case Ijk_NoRedir:
+ case Ijk_Yield: case Ijk_SigTRAP: case Ijk_TInval: {
+ HReg r = iselIntExpr_R(env, next);
+ X86AMode* amEIP = X86AMode_IR(offsIP, hregX86_EBP());
+ addInstr(env, X86Instr_XAssisted(r, amEIP, Xcc_ALWAYS, jk));
+ return;
+ }
+ default:
+ break;
+ }
+
+ vex_printf( "\n-- PUT(%d) = ", offsIP);
+ ppIRExpr( next );
+ vex_printf( "; exit-");
+ ppIRJumpKind(jk);
+ vex_printf( "\n");
+ vassert(0); // are we expecting any other kind?
}
@@ -4081,14 +4196,21 @@
/* Translate an entire SB to x86 code. */
-HInstrArray* iselSB_X86 ( IRSB* bb, VexArch arch_host,
- VexArchInfo* archinfo_host,
- VexAbiInfo* vbi/*UNUSED*/ )
+HInstrArray* iselSB_X86 ( IRSB* bb,
+ VexArch arch_host,
+ VexArchInfo* archinfo_host,
+ VexAbiInfo* vbi/*UNUSED*/,
+ Int offs_Host_EvC_Counter,
+ Int offs_Host_EvC_FailAddr,
+ Bool chainingAllowed,
+ Bool addProfInc,
+ Addr64 max_ga )
{
Int i, j;
HReg hreg, hregHI;
ISelEnv* env;
UInt hwcaps_host = archinfo_host->hwcaps;
+ X86AMode *amCounter, *amFailAddr;
/* sanity ... */
vassert(arch_host == VexArchX86);
@@ -4097,6 +4219,8 @@
| VEX_HWCAPS_X86_SSE2
| VEX_HWCAPS_X86_SSE3
| VEX_HWCAPS_X86_LZCNT)));
+ vassert(sizeof(max_ga) == 8);
+ vassert((max_ga >> 32) == 0);
/* Make up an initial environment to use. */
env = LibVEX_Alloc(sizeof(ISelEnv));
@@ -4115,7 +4239,9 @@
env->vregmapHI = LibVEX_Alloc(env->n_vregmap * sizeof(HReg));
/* and finally ... */
- env->hwcaps = hwcaps_host;
+ env->chainingAllowed = chainingAllowed;
+ env->hwcaps = hwcaps_host;
+ env->max_ga = max_ga;
/* For each IR temporary, allocate a suitably-kinded virtual
register. */
@@ -4140,11 +4266,24 @@
}
env->vreg_ctr = j;
+ /* The very first instruction must be an event check. */
+ amCounter = X86AMode_IR(offs_Host_EvC_Counter, hregX86_EBP());
+ amFailAddr = X86AMode_IR(offs_Host_EvC_FailAddr, hregX86_EBP());
+ addInstr(env, X86Instr_EvCheck(amCounter, amFailAddr));
+
+ /* Possibly a block counter increment (for profiling). At this
+ point we don't know the address of the counter, so just pretend
+ it is zero. It will have to be patched later, but before this
+ translation is used, by a call to LibVEX_patchProfCtr. */
+ if (addProfInc) {
+ addInstr(env, X86Instr_ProfInc());
+ }
+
/* Ok, finally we can iterate over the statements. */
for (i = 0; i < bb->stmts_used; i++)
- iselStmt(env,bb->stmts[i]);
+ iselStmt(env, bb->stmts[i]);
- iselNext(env,bb->next,bb->jumpkind);
+ iselNext(env, bb->next, bb->jumpkind, bb->offsIP);
/* record the number of vregs we used. */
env->code->n_vregs = env->vreg_ctr;
diff --git a/priv/ir_defs.c b/priv/ir_defs.c
index ae0d090..d38acd7 100644
--- a/priv/ir_defs.c
+++ b/priv/ir_defs.c
@@ -1253,10 +1253,11 @@
case Ist_Exit:
vex_printf( "if (" );
ppIRExpr(s->Ist.Exit.guard);
- vex_printf( ") goto {");
- ppIRJumpKind(s->Ist.Exit.jk);
- vex_printf("} ");
+ vex_printf( ") { PUT(%d) = ", s->Ist.Exit.offsIP);
ppIRConst(s->Ist.Exit.dst);
+ vex_printf("; exit-");
+ ppIRJumpKind(s->Ist.Exit.jk);
+ vex_printf(" } ");
break;
default:
vpanic("ppIRStmt");
@@ -1291,10 +1292,10 @@
ppIRStmt(bb->stmts[i]);
vex_printf( "\n");
}
- vex_printf( " goto {");
- ppIRJumpKind(bb->jumpkind);
- vex_printf( "} ");
+ vex_printf( " PUT(%d) = ", bb->offsIP );
ppIRExpr( bb->next );
+ vex_printf( "; exit-");
+ ppIRJumpKind(bb->jumpkind);
vex_printf( "\n}\n");
}
@@ -1725,12 +1726,14 @@
s->Ist.MBE.event = event;
return s;
}
-IRStmt* IRStmt_Exit ( IRExpr* guard, IRJumpKind jk, IRConst* dst ) {
- IRStmt* s = LibVEX_Alloc(sizeof(IRStmt));
- s->tag = Ist_Exit;
- s->Ist.Exit.guard = guard;
- s->Ist.Exit.jk = jk;
- s->Ist.Exit.dst = dst;
+IRStmt* IRStmt_Exit ( IRExpr* guard, IRJumpKind jk, IRConst* dst,
+ Int offsIP ) {
+ IRStmt* s = LibVEX_Alloc(sizeof(IRStmt));
+ s->tag = Ist_Exit;
+ s->Ist.Exit.guard = guard;
+ s->Ist.Exit.jk = jk;
+ s->Ist.Exit.dst = dst;
+ s->Ist.Exit.offsIP = offsIP;
return s;
}
@@ -1758,6 +1761,7 @@
bb->stmts = LibVEX_Alloc(bb->stmts_size * sizeof(IRStmt*));
bb->next = NULL;
bb->jumpkind = Ijk_Boring;
+ bb->offsIP = 0;
return bb;
}
@@ -1948,7 +1952,8 @@
case Ist_Exit:
return IRStmt_Exit(deepCopyIRExpr(s->Ist.Exit.guard),
s->Ist.Exit.jk,
- deepCopyIRConst(s->Ist.Exit.dst));
+ deepCopyIRConst(s->Ist.Exit.dst),
+ s->Ist.Exit.offsIP);
default:
vpanic("deepCopyIRStmt");
}
@@ -1975,7 +1980,7 @@
sts2 = LibVEX_Alloc(bb2->stmts_used * sizeof(IRStmt*));
for (i = 0; i < bb2->stmts_used; i++)
sts2[i] = deepCopyIRStmt(bb->stmts[i]);
- bb2->stmts = sts2;
+ bb2->stmts = sts2;
return bb2;
}
@@ -1985,6 +1990,7 @@
bb2->tyenv = deepCopyIRTypeEnv(bb->tyenv);
bb2->next = deepCopyIRExpr(bb->next);
bb2->jumpkind = bb->jumpkind;
+ bb2->offsIP = bb->offsIP;
return bb2;
}
@@ -3508,6 +3514,9 @@
sanityCheckFail(bb,stmt,"IRStmt.Exit.dst: bad dst");
if (typeOfIRConst(stmt->Ist.Exit.dst) != gWordTy)
sanityCheckFail(bb,stmt,"IRStmt.Exit.dst: not :: guest word type");
+ /* because it would intersect with host_EvC_* */
+ if (stmt->Ist.Exit.offsIP < 16)
+ sanityCheckFail(bb,stmt,"IRStmt.Exit.offsIP: too low");
break;
default:
vpanic("tcStmt");
@@ -3634,6 +3643,10 @@
tcStmt( bb, bb->stmts[i], guest_word_size );
if (typeOfIRExpr(bb->tyenv,bb->next) != guest_word_size)
sanityCheckFail(bb, NULL, "bb->next field has wrong type");
+ /* because it would intersect with host_EvC_* */
+ if (bb->offsIP < 16)
+ sanityCheckFail(bb, NULL, "bb->offsIP: too low");
+
}
/*---------------------------------------------------------------*/
diff --git a/priv/ir_opt.c b/priv/ir_opt.c
index 118249a..5bf44fd 100644
--- a/priv/ir_opt.c
+++ b/priv/ir_opt.c
@@ -467,7 +467,8 @@
case Ist_Exit:
e1 = flatten_Expr(bb, st->Ist.Exit.guard);
addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk,
- st->Ist.Exit.dst));
+ st->Ist.Exit.dst,
+ st->Ist.Exit.offsIP));
break;
default:
vex_printf("\n");
@@ -489,6 +490,7 @@
flatten_Stmt( out, in->stmts[i] );
out->next = flatten_Expr( out, in->next );
out->jumpkind = in->jumpkind;
+ out->offsIP = in->offsIP;
return out;
}
@@ -815,6 +817,14 @@
UInt key = 0; /* keep gcc -O happy */
HashHW* env = newHHW();
+
+ /* Initialise the running env with the fact that the final exit
+ writes the IP (or, whatever it claims to write. We don't
+ care.) */
+ key = mk_key_GetPut(bb->offsIP, typeOfIRExpr(bb->tyenv, bb->next));
+ addToHHW(env, (HWord)key, 0);
+
+ /* And now scan backwards through the statements. */
for (i = bb->stmts_used-1; i >= 0; i--) {
st = bb->stmts[i];
@@ -823,13 +833,32 @@
/* Deal with conditional exits. */
if (st->tag == Ist_Exit) {
- /* Since control may not get beyond this point, we must empty
- out the set, since we can no longer claim that the next
- event for any part of the guest state is definitely a
- write. */
- vassert(isIRAtom(st->Ist.Exit.guard));
+ //Bool re_add;
+ /* Need to throw out from the env, any part of it which
+ doesn't overlap with the guest state written by this exit.
+ Since the exit only writes one section, it's simplest to
+ do this: (1) check whether env contains a write that
+ completely overlaps the write done by this exit; (2) empty
+ out env; and (3) if (1) was true, add the write done by
+ this exit.
+
+ To make (1) a bit simpler, merely search for a write that
+ exactly matches the one done by this exit. That's safe
+ because it will fail as often or more often than a full
+ overlap check, and failure to find an overlapping write in
+ env is the safe case (we just nuke env if that
+ happens). */
+ //vassert(isIRAtom(st->Ist.Exit.guard));
+ /* (1) */
+ //key = mk_key_GetPut(st->Ist.Exit.offsIP,
+ // typeOfIRConst(st->Ist.Exit.dst));
+ //re_add = lookupHHW(env, NULL, key);
+ /* (2) */
for (j = 0; j < env->used; j++)
env->inuse[j] = False;
+ /* (3) */
+ //if (0 && re_add)
+ // addToHHW(env, (HWord)key, 0);
continue;
}
@@ -926,10 +955,24 @@
assumed to compute different values. After all the accesses may happen
at different times and the guest state / memory can have changed in
the meantime. */
+
+/* JRS 20-Mar-2012: split sameIRExprs_aux into a fast inlineable
+ wrapper that deals with the common tags-don't-match case, and a
+ slower out of line general case. Saves a few insns. */
+
+__attribute__((noinline))
+static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 );
+
+inline
static Bool sameIRExprs_aux ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
{
if (e1->tag != e2->tag) return False;
+ return sameIRExprs_aux2(env, e1, e2);
+}
+__attribute__((noinline))
+static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
+{
if (num_nodes_visited++ > NODE_LIMIT) return False;
switch (e1->tag) {
@@ -996,6 +1039,7 @@
return False;
}
+inline
static Bool sameIRExprs ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
{
Bool same;
@@ -2217,7 +2261,8 @@
vex_printf("vex iropt: IRStmt_Exit became unconditional\n");
}
}
- return IRStmt_Exit(fcond, st->Ist.Exit.jk, st->Ist.Exit.dst);
+ return IRStmt_Exit(fcond, st->Ist.Exit.jk,
+ st->Ist.Exit.dst, st->Ist.Exit.offsIP);
}
default:
@@ -2294,6 +2339,7 @@
out->next = subst_Expr( env, in->next );
out->jumpkind = in->jumpkind;
+ out->offsIP = in->offsIP;
return out;
}
@@ -2519,6 +2565,8 @@
= IRExpr_Const( bb->stmts[i_unconditional_exit]->Ist.Exit.dst );
bb->jumpkind
= bb->stmts[i_unconditional_exit]->Ist.Exit.jk;
+ bb->offsIP
+ = bb->stmts[i_unconditional_exit]->Ist.Exit.offsIP;
for (i = i_unconditional_exit; i < bb->stmts_used; i++)
bb->stmts[i] = IRStmt_NoOp();
}
@@ -4604,7 +4652,8 @@
return IRStmt_Exit(
atbSubst_Expr(env, st->Ist.Exit.guard),
st->Ist.Exit.jk,
- st->Ist.Exit.dst
+ st->Ist.Exit.dst,
+ st->Ist.Exit.offsIP
);
case Ist_IMark:
return IRStmt_IMark(st->Ist.IMark.addr,
@@ -4649,7 +4698,7 @@
}
}
-/* notstatic */ void ado_treebuild_BB ( IRSB* bb )
+/* notstatic */ Addr64 ado_treebuild_BB ( IRSB* bb )
{
Int i, j, k, m;
Bool stmtPuts, stmtStores, invalidateMe;
@@ -4657,19 +4706,37 @@
IRStmt* st2;
ATmpInfo env[A_NENV];
+ Bool max_ga_known = False;
+ Addr64 max_ga = 0;
+
Int n_tmps = bb->tyenv->types_used;
UShort* uses = LibVEX_Alloc(n_tmps * sizeof(UShort));
/* Phase 1. Scan forwards in bb, counting use occurrences of each
- temp. Also count occurrences in the bb->next field. */
+ temp. Also count occurrences in the bb->next field. Take the
+ opportunity to also find the maximum guest address in the block,
+ since that will be needed later for deciding when we can safely
+ elide event checks. */
for (i = 0; i < n_tmps; i++)
uses[i] = 0;
for (i = 0; i < bb->stmts_used; i++) {
st = bb->stmts[i];
- if (st->tag == Ist_NoOp)
- continue;
+ switch (st->tag) {
+ case Ist_NoOp:
+ continue;
+ case Ist_IMark: {
+ Int len = st->Ist.IMark.len;
+ Addr64 mga = st->Ist.IMark.addr + (len < 1 ? 1 : len) - 1;
+ max_ga_known = True;
+ if (mga > max_ga)
+ max_ga = mga;
+ break;
+ }
+ default:
+ break;
+ }
aoccCount_Stmt( uses, st );
}
aoccCount_Expr(uses, bb->next );
@@ -4842,6 +4909,8 @@
by definition dead? */
bb->next = atbSubst_Expr(env, bb->next);
bb->stmts_used = j;
+
+ return max_ga_known ? max_ga : ~(Addr64)0;
}
diff --git a/priv/ir_opt.h b/priv/ir_opt.h
index 9390a1c..ded1c2d 100644
--- a/priv/ir_opt.h
+++ b/priv/ir_opt.h
@@ -60,9 +60,11 @@
void do_deadcode_BB ( IRSB* bb );
/* The tree-builder. Make (approximately) maximal safe trees. bb is
- destructively modified. */
+ destructively modified. Returns (unrelatedly, but useful later on)
+ the guest address of the highest addressed byte from any insn in
+ this block, or Addr64_MAX if unknown (can that ever happen?) */
extern
-void ado_treebuild_BB ( IRSB* bb );
+Addr64 ado_treebuild_BB ( IRSB* bb );
#endif /* ndef __VEX_IR_OPT_H */
diff --git a/priv/main_main.c b/priv/main_main.c
index c8777fe..cf6e2f5 100644
--- a/priv/main_main.c
+++ b/priv/main_main.c
@@ -155,6 +155,17 @@
vassert(VEX_HOST_WORDSIZE == sizeof(void*));
vassert(VEX_HOST_WORDSIZE == sizeof(HWord));
+ /* These take a lot of space, so make sure we don't have
+ any unnoticed size regressions. */
+ if (VEX_HOST_WORDSIZE == 4) {
+ vassert(sizeof(IRExpr) == 24);
+ vassert(sizeof(IRStmt) == 20 /* x86 */
+ || sizeof(IRStmt) == 24 /* arm */);
+ } else {
+ vassert(sizeof(IRExpr) == 48);
+ vassert(sizeof(IRStmt) == 40);
+ }
+
/* Really start up .. */
vex_debuglevel = debuglevel;
vex_valgrind_support = valgrind_support;
@@ -183,9 +194,11 @@
HInstr* (*directReload) ( HInstr*, HReg, Short );
void (*ppInstr) ( HInstr*, Bool );
void (*ppReg) ( HReg );
- HInstrArray* (*iselSB) ( IRSB*, VexArch, VexArchInfo*,
- VexAbiInfo* );
- Int (*emit) ( UChar*, Int, HInstr*, Bool, void*, void* );
+ HInstrArray* (*iselSB) ( IRSB*, VexArch, VexArchInfo*, VexAbiInfo*,
+ Int, Int, Bool, Bool, Addr64 );
+ Int (*emit) ( /*MB_MOD*/Bool*,
+ UChar*, Int, HInstr*, Bool,
+ void*, void*, void*, void* );
IRExpr* (*specHelper) ( HChar*, IRExpr**, IRStmt**, Int );
Bool (*preciseMemExnsFn) ( Int, Int );
@@ -197,11 +210,13 @@
HInstrArray* vcode;
HInstrArray* rcode;
Int i, j, k, out_used, guest_sizeB;
- Int offB_TISTART, offB_TILEN;
- UChar insn_bytes[48];
+ Int offB_TISTART, offB_TILEN, offB_GUEST_IP, szB_GUEST_IP;
+ Int offB_HOST_EvC_COUNTER, offB_HOST_EvC_FAILADDR;
+ UChar insn_bytes[64];
IRType guest_word_type;
IRType host_word_type;
- Bool mode64;
+ Bool mode64, chainingAllowed;
+ Addr64 max_ga;
guest_layout = NULL;
available_real_regs = NULL;
@@ -223,12 +238,27 @@
host_word_type = Ity_INVALID;
offB_TISTART = 0;
offB_TILEN = 0;
+ offB_GUEST_IP = 0;
+ szB_GUEST_IP = 0;
+ offB_HOST_EvC_COUNTER = 0;
+ offB_HOST_EvC_FAILADDR = 0;
mode64 = False;
+ chainingAllowed = False;
vex_traceflags = vta->traceflags;
vassert(vex_initdone);
- vassert(vta->needs_self_check != NULL);
+ vassert(vta->needs_self_check != NULL);
+ vassert(vta->disp_cp_xassisted != NULL);
+ /* Both the chainers and the indir are either NULL or non-NULL. */
+ if (vta->disp_cp_chain_me_to_slowEP != NULL) {
+ vassert(vta->disp_cp_chain_me_to_fastEP != NULL);
+ vassert(vta->disp_cp_xindir != NULL);
+ chainingAllowed = True;
+ } else {
+ vassert(vta->disp_cp_chain_me_to_fastEP == NULL);
+ vassert(vta->disp_cp_xindir == NULL);
+ }
vexSetAllocModeTEMP_and_clear();
vexAllocSanityCheck();
@@ -254,14 +284,12 @@
ppInstr = (void(*)(HInstr*, Bool)) ppX86Instr;
ppReg = (void(*)(HReg)) ppHRegX86;
iselSB = iselSB_X86;
- emit = (Int(*)(UChar*,Int,HInstr*,Bool,void*,void*))
+ emit = (Int(*)(Bool*,UChar*,Int,HInstr*,Bool,
+ void*,void*,void*,void*))
emit_X86Instr;
host_is_bigendian = False;
host_word_type = Ity_I32;
vassert(are_valid_hwcaps(VexArchX86, vta->archinfo_host.hwcaps));
- /* jump-to-dispatcher scheme */
- vassert(vta->dispatch_unassisted != NULL);
- vassert(vta->dispatch_assisted != NULL);
break;
case VexArchAMD64:
@@ -279,14 +307,12 @@
ppInstr = (void(*)(HInstr*, Bool)) ppAMD64Instr;
ppReg = (void(*)(HReg)) ppHRegAMD64;
iselSB = iselSB_AMD64;
- emit = (Int(*)(UChar*,Int,HInstr*,Bool,void*,void*))
+ emit = (Int(*)(Bool*,UChar*,Int,HInstr*,Bool,
+ void*,void*,void*,void*))
emit_AMD64Instr;
host_is_bigendian = False;
host_word_type = Ity_I64;
vassert(are_valid_hwcaps(VexArchAMD64, vta->archinfo_host.hwcaps));
- /* jump-to-dispatcher scheme */
- vassert(vta->dispatch_unassisted != NULL);
- vassert(vta->dispatch_assisted != NULL);
break;
case VexArchPPC32:
@@ -301,14 +327,12 @@
ppInstr = (void(*)(HInstr*,Bool)) ppPPCInstr;
ppReg = (void(*)(HReg)) ppHRegPPC;
iselSB = iselSB_PPC;
- emit = (Int(*)(UChar*,Int,HInstr*,Bool,void*,void*))
+ emit = (Int(*)(Bool*,UChar*,Int,HInstr*,Bool,
+ void*,void*,void*,void*))
emit_PPCInstr;
host_is_bigendian = True;
host_word_type = Ity_I32;
vassert(are_valid_hwcaps(VexArchPPC32, vta->archinfo_host.hwcaps));
- /* return-to-dispatcher scheme */
- vassert(vta->dispatch_unassisted == NULL);
- vassert(vta->dispatch_assisted == NULL);
break;
case VexArchPPC64:
@@ -323,14 +347,12 @@
ppInstr = (void(*)(HInstr*, Bool)) ppPPCInstr;
ppReg = (void(*)(HReg)) ppHRegPPC;
iselSB = iselSB_PPC;
- emit = (Int(*)(UChar*,Int,HInstr*,Bool,void*,void*))
+ emit = (Int(*)(Bool*,UChar*,Int,HInstr*,Bool,
+ void*,void*,void*,void*))
emit_PPCInstr;
host_is_bigendian = True;
host_word_type = Ity_I64;
vassert(are_valid_hwcaps(VexArchPPC64, vta->archinfo_host.hwcaps));
- /* return-to-dispatcher scheme */
- vassert(vta->dispatch_unassisted == NULL);
- vassert(vta->dispatch_assisted == NULL);
break;
case VexArchS390X:
@@ -345,14 +367,11 @@
ppInstr = (void(*)(HInstr*, Bool)) ppS390Instr;
ppReg = (void(*)(HReg)) ppHRegS390;
iselSB = iselSB_S390;
- emit = (Int(*)(UChar*,Int,HInstr*,Bool,void*,void*))
- emit_S390Instr;
+ emit = (Int(*)(Bool*,UChar*,Int,HInstr*,Bool,
+ void*,void*,void*,void*)) emit_S390Instr;
host_is_bigendian = True;
host_word_type = Ity_I64;
vassert(are_valid_hwcaps(VexArchS390X, vta->archinfo_host.hwcaps));
- /* return-to-dispatcher scheme */
- vassert(vta->dispatch_unassisted == NULL);
- vassert(vta->dispatch_assisted == NULL);
break;
case VexArchARM:
@@ -367,14 +386,12 @@
ppInstr = (void(*)(HInstr*, Bool)) ppARMInstr;
ppReg = (void(*)(HReg)) ppHRegARM;
iselSB = iselSB_ARM;
- emit = (Int(*)(UChar*,Int,HInstr*,Bool,void*,void*))
+ emit = (Int(*)(Bool*,UChar*,Int,HInstr*,Bool,
+ void*,void*,void*,void*))
emit_ARMInstr;
host_is_bigendian = False;
host_word_type = Ity_I32;
vassert(are_valid_hwcaps(VexArchARM, vta->archinfo_host.hwcaps));
- vassert(vta->dispatch_unassisted == NULL);
- vassert(vta->dispatch_assisted == NULL);
- /* return-to-dispatcher scheme */
break;
default:
@@ -385,14 +402,18 @@
switch (vta->arch_guest) {
case VexArchX86:
- preciseMemExnsFn = guest_x86_state_requires_precise_mem_exns;
- disInstrFn = disInstr_X86;
- specHelper = guest_x86_spechelper;
- guest_sizeB = sizeof(VexGuestX86State);
- guest_word_type = Ity_I32;
- guest_layout = &x86guest_layout;
- offB_TISTART = offsetof(VexGuestX86State,guest_TISTART);
- offB_TILEN = offsetof(VexGuestX86State,guest_TILEN);
+ preciseMemExnsFn = guest_x86_state_requires_precise_mem_exns;
+ disInstrFn = disInstr_X86;
+ specHelper = guest_x86_spechelper;
+ guest_sizeB = sizeof(VexGuestX86State);
+ guest_word_type = Ity_I32;
+ guest_layout = &x86guest_layout;
+ offB_TISTART = offsetof(VexGuestX86State,guest_TISTART);
+ offB_TILEN = offsetof(VexGuestX86State,guest_TILEN);
+ offB_GUEST_IP = offsetof(VexGuestX86State,guest_EIP);
+ szB_GUEST_IP = sizeof( ((VexGuestX86State*)0)->guest_EIP );
+ offB_HOST_EvC_COUNTER = offsetof(VexGuestX86State,host_EvC_COUNTER);
+ offB_HOST_EvC_FAILADDR = offsetof(VexGuestX86State,host_EvC_FAILADDR);
vassert(are_valid_hwcaps(VexArchX86, vta->archinfo_guest.hwcaps));
vassert(0 == sizeof(VexGuestX86State) % 16);
vassert(sizeof( ((VexGuestX86State*)0)->guest_TISTART) == 4);
@@ -401,14 +422,18 @@
break;
case VexArchAMD64:
- preciseMemExnsFn = guest_amd64_state_requires_precise_mem_exns;
- disInstrFn = disInstr_AMD64;
- specHelper = guest_amd64_spechelper;
- guest_sizeB = sizeof(VexGuestAMD64State);
- guest_word_type = Ity_I64;
- guest_layout = &amd64guest_layout;
- offB_TISTART = offsetof(VexGuestAMD64State,guest_TISTART);
- offB_TILEN = offsetof(VexGuestAMD64State,guest_TILEN);
+ preciseMemExnsFn = guest_amd64_state_requires_precise_mem_exns;
+ disInstrFn = disInstr_AMD64;
+ specHelper = guest_amd64_spechelper;
+ guest_sizeB = sizeof(VexGuestAMD64State);
+ guest_word_type = Ity_I64;
+ guest_layout = &amd64guest_layout;
+ offB_TISTART = offsetof(VexGuestAMD64State,guest_TISTART);
+ offB_TILEN = offsetof(VexGuestAMD64State,guest_TILEN);
+ offB_GUEST_IP = offsetof(VexGuestAMD64State,guest_RIP);
+ szB_GUEST_IP = sizeof( ((VexGuestAMD64State*)0)->guest_RIP );
+ offB_HOST_EvC_COUNTER = offsetof(VexGuestAMD64State,host_EvC_COUNTER);
+ offB_HOST_EvC_FAILADDR = offsetof(VexGuestAMD64State,host_EvC_FAILADDR);
vassert(are_valid_hwcaps(VexArchAMD64, vta->archinfo_guest.hwcaps));
vassert(0 == sizeof(VexGuestAMD64State) % 16);
vassert(sizeof( ((VexGuestAMD64State*)0)->guest_TISTART ) == 8);
@@ -417,14 +442,18 @@
break;
case VexArchPPC32:
- preciseMemExnsFn = guest_ppc32_state_requires_precise_mem_exns;
- disInstrFn = disInstr_PPC;
- specHelper = guest_ppc32_spechelper;
- guest_sizeB = sizeof(VexGuestPPC32State);
- guest_word_type = Ity_I32;
- guest_layout = &ppc32Guest_layout;
- offB_TISTART = offsetof(VexGuestPPC32State,guest_TISTART);
- offB_TILEN = offsetof(VexGuestPPC32State,guest_TILEN);
+ preciseMemExnsFn = guest_ppc32_state_requires_precise_mem_exns;
+ disInstrFn = disInstr_PPC;
+ specHelper = guest_ppc32_spechelper;
+ guest_sizeB = sizeof(VexGuestPPC32State);
+ guest_word_type = Ity_I32;
+ guest_layout = &ppc32Guest_layout;
+ offB_TISTART = offsetof(VexGuestPPC32State,guest_TISTART);
+ offB_TILEN = offsetof(VexGuestPPC32State,guest_TILEN);
+ offB_GUEST_IP = offsetof(VexGuestPPC32State,guest_CIA);
+ szB_GUEST_IP = sizeof( ((VexGuestPPC32State*)0)->guest_CIA );
+ offB_HOST_EvC_COUNTER = offsetof(VexGuestPPC32State,host_EvC_COUNTER);
+ offB_HOST_EvC_FAILADDR = offsetof(VexGuestPPC32State,host_EvC_FAILADDR);
vassert(are_valid_hwcaps(VexArchPPC32, vta->archinfo_guest.hwcaps));
vassert(0 == sizeof(VexGuestPPC32State) % 16);
vassert(sizeof( ((VexGuestPPC32State*)0)->guest_TISTART ) == 4);
@@ -433,14 +462,18 @@
break;
case VexArchPPC64:
- preciseMemExnsFn = guest_ppc64_state_requires_precise_mem_exns;
- disInstrFn = disInstr_PPC;
- specHelper = guest_ppc64_spechelper;
- guest_sizeB = sizeof(VexGuestPPC64State);
- guest_word_type = Ity_I64;
- guest_layout = &ppc64Guest_layout;
- offB_TISTART = offsetof(VexGuestPPC64State,guest_TISTART);
- offB_TILEN = offsetof(VexGuestPPC64State,guest_TILEN);
+ preciseMemExnsFn = guest_ppc64_state_requires_precise_mem_exns;
+ disInstrFn = disInstr_PPC;
+ specHelper = guest_ppc64_spechelper;
+ guest_sizeB = sizeof(VexGuestPPC64State);
+ guest_word_type = Ity_I64;
+ guest_layout = &ppc64Guest_layout;
+ offB_TISTART = offsetof(VexGuestPPC64State,guest_TISTART);
+ offB_TILEN = offsetof(VexGuestPPC64State,guest_TILEN);
+ offB_GUEST_IP = offsetof(VexGuestPPC64State,guest_CIA);
+ szB_GUEST_IP = sizeof( ((VexGuestPPC64State*)0)->guest_CIA );
+ offB_HOST_EvC_COUNTER = offsetof(VexGuestPPC64State,host_EvC_COUNTER);
+ offB_HOST_EvC_FAILADDR = offsetof(VexGuestPPC64State,host_EvC_FAILADDR);
vassert(are_valid_hwcaps(VexArchPPC64, vta->archinfo_guest.hwcaps));
vassert(0 == sizeof(VexGuestPPC64State) % 16);
vassert(sizeof( ((VexGuestPPC64State*)0)->guest_TISTART ) == 8);
@@ -458,6 +491,10 @@
guest_layout = &s390xGuest_layout;
offB_TISTART = offsetof(VexGuestS390XState,guest_TISTART);
offB_TILEN = offsetof(VexGuestS390XState,guest_TILEN);
+ offB_GUEST_IP = offsetof(VexGuestS390XState,guest_IA);
+ szB_GUEST_IP = sizeof( ((VexGuestS390XState*)0)->guest_IA);
+ offB_HOST_EvC_COUNTER = offsetof(VexGuestS390XState,host_EvC_COUNTER);
+ offB_HOST_EvC_FAILADDR = offsetof(VexGuestS390XState,host_EvC_FAILADDR);
vassert(are_valid_hwcaps(VexArchS390X, vta->archinfo_guest.hwcaps));
vassert(0 == sizeof(VexGuestS390XState) % 16);
vassert(sizeof( ((VexGuestS390XState*)0)->guest_TISTART ) == 8);
@@ -466,14 +503,18 @@
break;
case VexArchARM:
- preciseMemExnsFn = guest_arm_state_requires_precise_mem_exns;
- disInstrFn = disInstr_ARM;
- specHelper = guest_arm_spechelper;
- guest_sizeB = sizeof(VexGuestARMState);
- guest_word_type = Ity_I32;
- guest_layout = &armGuest_layout;
- offB_TISTART = offsetof(VexGuestARMState,guest_TISTART);
- offB_TILEN = offsetof(VexGuestARMState,guest_TILEN);
+ preciseMemExnsFn = guest_arm_state_requires_precise_mem_exns;
+ disInstrFn = disInstr_ARM;
+ specHelper = guest_arm_spechelper;
+ guest_sizeB = sizeof(VexGuestARMState);
+ guest_word_type = Ity_I32;
+ guest_layout = &armGuest_layout;
+ offB_TISTART = offsetof(VexGuestARMState,guest_TISTART);
+ offB_TILEN = offsetof(VexGuestARMState,guest_TILEN);
+ offB_GUEST_IP = offsetof(VexGuestARMState,guest_R15T);
+ szB_GUEST_IP = sizeof( ((VexGuestARMState*)0)->guest_R15T );
+ offB_HOST_EvC_COUNTER = offsetof(VexGuestARMState,host_EvC_COUNTER);
+ offB_HOST_EvC_FAILADDR = offsetof(VexGuestARMState,host_EvC_FAILADDR);
vassert(are_valid_hwcaps(VexArchARM, vta->archinfo_guest.hwcaps));
vassert(0 == sizeof(VexGuestARMState) % 16);
vassert(sizeof( ((VexGuestARMState*)0)->guest_TISTART) == 4);
@@ -489,6 +530,7 @@
VexTranslateResult res;
res.status = VexTransOK;
res.n_sc_extents = 0;
+ res.offs_profInc = -1;
/* yet more sanity checks ... */
if (vta->arch_guest == vta->arch_host) {
@@ -520,7 +562,9 @@
vta->needs_self_check,
vta->preamble_function,
offB_TISTART,
- offB_TILEN );
+ offB_TILEN,
+ offB_GUEST_IP,
+ szB_GUEST_IP );
vexAllocSanityCheck();
@@ -627,7 +671,7 @@
/* Turn it into virtual-registerised code. Build trees -- this
also throws away any dead bindings. */
- ado_treebuild_BB( irsb );
+ max_ga = ado_treebuild_BB( irsb );
if (vta->finaltidy) {
irsb = vta->finaltidy(irsb);
@@ -655,8 +699,19 @@
" Instruction selection "
"------------------------\n");
- vcode = iselSB ( irsb, vta->arch_host, &vta->archinfo_host,
- &vta->abiinfo_both );
+ /* No guest has its IP field at offset zero. If this fails it
+ means some transformation pass somewhere failed to update/copy
+ irsb->offsIP properly. */
+ vassert(irsb->offsIP >= 16);
+
+ vcode = iselSB ( irsb, vta->arch_host,
+ &vta->archinfo_host,
+ &vta->abiinfo_both,
+ offB_HOST_EvC_COUNTER,
+ offB_HOST_EvC_FAILADDR,
+ chainingAllowed,
+ vta->addProfInc,
+ max_ga );
vexAllocSanityCheck();
@@ -710,13 +765,19 @@
out_used = 0; /* tracks along the host_bytes array */
for (i = 0; i < rcode->arr_used; i++) {
- if (vex_traceflags & VEX_TRACE_ASM) {
- ppInstr(rcode->arr[i], mode64);
+ HInstr* hi = rcode->arr[i];
+ Bool hi_isProfInc = False;
+ if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) {
+ ppInstr(hi, mode64);
vex_printf("\n");
}
- j = (*emit)( insn_bytes, sizeof insn_bytes, rcode->arr[i], mode64,
- vta->dispatch_unassisted, vta->dispatch_assisted );
- if (vex_traceflags & VEX_TRACE_ASM) {
+ j = emit( &hi_isProfInc,
+ insn_bytes, sizeof insn_bytes, hi, mode64,
+ vta->disp_cp_chain_me_to_slowEP,
+ vta->disp_cp_chain_me_to_fastEP,
+ vta->disp_cp_xindir,
+ vta->disp_cp_xassisted );
+ if (UNLIKELY(vex_traceflags & VEX_TRACE_ASM)) {
for (k = 0; k < j; k++)
if (insn_bytes[k] < 16)
vex_printf("0%x ", (UInt)insn_bytes[k]);
@@ -724,15 +785,23 @@
vex_printf("%x ", (UInt)insn_bytes[k]);
vex_printf("\n\n");
}
- if (out_used + j > vta->host_bytes_size) {
+ if (UNLIKELY(out_used + j > vta->host_bytes_size)) {
vexSetAllocModeTEMP_and_clear();
vex_traceflags = 0;
res.status = VexTransOutputFull;
return res;
}
- for (k = 0; k < j; k++) {
- vta->host_bytes[out_used] = insn_bytes[k];
- out_used++;
+ if (UNLIKELY(hi_isProfInc)) {
+ vassert(vta->addProfInc); /* else where did it come from? */
+ vassert(res.offs_profInc == -1); /* there can be only one (tm) */
+ vassert(out_used >= 0);
+ res.offs_profInc = out_used;
+ }
+ { UChar* dst = &vta->host_bytes[out_used];
+ for (k = 0; k < j; k++) {
+ dst[k] = insn_bytes[k];
+ }
+ out_used += j;
}
vassert(out_used <= vta->host_bytes_size);
}
@@ -748,6 +817,127 @@
}
+/* --------- Chain/Unchain XDirects. --------- */
+
+VexInvalRange LibVEX_Chain ( VexArch arch_host,
+ void* place_to_chain,
+ void* disp_cp_chain_me_EXPECTED,
+ void* place_to_jump_to )
+{
+ VexInvalRange (*chainXDirect)(void*, void*, void*) = NULL;
+ switch (arch_host) {
+ case VexArchX86:
+ chainXDirect = chainXDirect_X86; break;
+ case VexArchAMD64:
+ chainXDirect = chainXDirect_AMD64; break;
+ case VexArchARM:
+ chainXDirect = chainXDirect_ARM; break;
+ case VexArchS390X:
+ chainXDirect = chainXDirect_S390; break;
+ case VexArchPPC32:
+ return chainXDirect_PPC(place_to_chain,
+ disp_cp_chain_me_EXPECTED,
+ place_to_jump_to, False/*!mode64*/);
+ case VexArchPPC64:
+ return chainXDirect_PPC(place_to_chain,
+ disp_cp_chain_me_EXPECTED,
+ place_to_jump_to, True/*mode64*/);
+ default:
+ vassert(0);
+ }
+ vassert(chainXDirect);
+ VexInvalRange vir
+ = chainXDirect(place_to_chain, disp_cp_chain_me_EXPECTED,
+ place_to_jump_to);
+ return vir;
+}
+
+VexInvalRange LibVEX_UnChain ( VexArch arch_host,
+ void* place_to_unchain,
+ void* place_to_jump_to_EXPECTED,
+ void* disp_cp_chain_me )
+{
+ VexInvalRange (*unchainXDirect)(void*, void*, void*) = NULL;
+ switch (arch_host) {
+ case VexArchX86:
+ unchainXDirect = unchainXDirect_X86; break;
+ case VexArchAMD64:
+ unchainXDirect = unchainXDirect_AMD64; break;
+ case VexArchARM:
+ unchainXDirect = unchainXDirect_ARM; break;
+ case VexArchS390X:
+ unchainXDirect = unchainXDirect_S390; break;
+ case VexArchPPC32:
+ return unchainXDirect_PPC(place_to_unchain,
+ place_to_jump_to_EXPECTED,
+ disp_cp_chain_me, False/*!mode64*/);
+ case VexArchPPC64:
+ return unchainXDirect_PPC(place_to_unchain,
+ place_to_jump_to_EXPECTED,
+ disp_cp_chain_me, True/*mode64*/);
+ default:
+ vassert(0);
+ }
+ vassert(unchainXDirect);
+ VexInvalRange vir
+ = unchainXDirect(place_to_unchain, place_to_jump_to_EXPECTED,
+ disp_cp_chain_me);
+ return vir;
+}
+
+Int LibVEX_evCheckSzB ( VexArch arch_host )
+{
+ static Int cached = 0; /* DO NOT MAKE NON-STATIC */
+ if (UNLIKELY(cached == 0)) {
+ switch (arch_host) {
+ case VexArchX86:
+ cached = evCheckSzB_X86(); break;
+ case VexArchAMD64:
+ cached = evCheckSzB_AMD64(); break;
+ case VexArchARM:
+ cached = evCheckSzB_ARM(); break;
+ case VexArchS390X:
+ cached = evCheckSzB_S390(); break;
+ case VexArchPPC32:
+ case VexArchPPC64:
+ cached = evCheckSzB_PPC(); break;
+ default:
+ vassert(0);
+ }
+ }
+ return cached;
+}
+
+VexInvalRange LibVEX_PatchProfInc ( VexArch arch_host,
+ void* place_to_patch,
+ ULong* location_of_counter )
+{
+ VexInvalRange (*patchProfInc)(void*,ULong*) = NULL;
+ switch (arch_host) {
+ case VexArchX86:
+ patchProfInc = patchProfInc_X86; break;
+ case VexArchAMD64:
+ patchProfInc = patchProfInc_AMD64; break;
+ case VexArchARM:
+ patchProfInc = patchProfInc_ARM; break;
+ case VexArchS390X:
+ patchProfInc = patchProfInc_S390; break;
+ case VexArchPPC32:
+ return patchProfInc_PPC(place_to_patch,
+ location_of_counter, False/*!mode64*/);
+ case VexArchPPC64:
+ return patchProfInc_PPC(place_to_patch,
+ location_of_counter, True/*mode64*/);
+ default:
+ vassert(0);
+ }
+ vassert(patchProfInc);
+ VexInvalRange vir
+ = patchProfInc(place_to_patch, location_of_counter);
+ return vir;
+}
+
+
/* --------- Emulation warnings. --------- */
HChar* LibVEX_EmWarn_string ( VexEmWarn ew )
diff --git a/pub/libvex.h b/pub/libvex.h
index 7b81598..0466801 100644
--- a/pub/libvex.h
+++ b/pub/libvex.h
@@ -486,6 +486,9 @@
VexTransAccessFail, VexTransOutputFull } status;
/* The number of extents that have a self-check (0 to 3) */
UInt n_sc_extents;
+ /* Offset in generated code of the profile inc, or -1 if
+ none. Needed for later patching. */
+ Int offs_profInc;
}
VexTranslateResult;
@@ -583,6 +586,10 @@
/* IN: debug: trace vex activity at various points */
Int traceflags;
+ /* IN: profiling: add a 64 bit profiler counter increment to the
+ translation? */
+ Bool addProfInc;
+
/* IN: address of the dispatcher entry points. Describes the
places where generated code should jump to at the end of each
bb.
@@ -615,9 +622,13 @@
The aim is to get back and forth between translations and the
dispatcher without creating memory traffic to store return
addresses.
+
+ FIXME: update this comment
*/
- void* dispatch_unassisted;
- void* dispatch_assisted;
+ void* disp_cp_chain_me_to_slowEP;
+ void* disp_cp_chain_me_to_fastEP;
+ void* disp_cp_xindir;
+ void* disp_cp_xassisted;
}
VexTranslateArgs;
@@ -635,7 +646,60 @@
would not be the result. Therefore chase_into_ok should disallow
following into #2. That will force the caller to eventually
request a new translation starting at #2, at which point Vex will
- correctly observe the make-a-self-check flag. */
+ correctly observe the make-a-self-check flag.
+
+ FIXME: is this still up to date? */
+
+
+/*-------------------------------------------------------*/
+/*--- Patch existing translations ---*/
+/*-------------------------------------------------------*/
+
+/* Indicates a host address range for which callers to the functions
+ below must request I-D cache syncing after the call. ::len == 0 is
+ ambiguous -- it could mean either zero bytes or the entire address
+ space, so we mean the former. */
+typedef
+ struct {
+ HWord start;
+ HWord len;
+ }
+ VexInvalRange;
+
+/* Chain an XDirect jump located at place_to_chain so it jumps to
+ place_to_jump_to. It is expected (and checked) that this site
+ currently contains a call to the dispatcher specified by
+ disp_cp_chain_me_EXPECTED. */
+extern
+VexInvalRange LibVEX_Chain ( VexArch arch_host,
+ void* place_to_chain,
+ void* disp_cp_chain_me_EXPECTED,
+ void* place_to_jump_to );
+
+/* Undo an XDirect jump located at place_to_unchain, so it is
+ converted back into a call to disp_cp_chain_me. It is expected
+ (and checked) that this site currently contains a jump directly to
+ the address specified by place_to_jump_to_EXPECTED. */
+extern
+VexInvalRange LibVEX_UnChain ( VexArch arch_host,
+ void* place_to_unchain,
+ void* place_to_jump_to_EXPECTED,
+ void* disp_cp_chain_me );
+
+/* Returns a constant -- the size of the event check that is put at
+ the start of every translation. This makes it possible to
+ calculate the fast entry point address if the slow entry point
+ address is known (the usual case), or vice versa. */
+extern
+Int LibVEX_evCheckSzB ( VexArch arch_host );
+
+
+/* Patch the counter location into an existing ProfInc point. The
+ specified point is checked to make sure it is plausible. */
+extern
+VexInvalRange LibVEX_PatchProfInc ( VexArch arch_host,
+ void* place_to_patch,
+ ULong* location_of_counter );
/*-------------------------------------------------------*/
diff --git a/pub/libvex_guest_amd64.h b/pub/libvex_guest_amd64.h
index 564f6a0..7d5d354 100644
--- a/pub/libvex_guest_amd64.h
+++ b/pub/libvex_guest_amd64.h
@@ -52,34 +52,39 @@
typedef
struct {
- /* 0 */ ULong guest_RAX;
- /* 8 */ ULong guest_RCX;
- /* 16 */ ULong guest_RDX;
- /* 24 */ ULong guest_RBX;
- /* 32 */ ULong guest_RSP;
- /* 40 */ ULong guest_RBP;
- /* 48 */ ULong guest_RSI;
- /* 56 */ ULong guest_RDI;
- /* 64 */ ULong guest_R8;
- /* 72 */ ULong guest_R9;
- /* 80 */ ULong guest_R10;
- /* 88 */ ULong guest_R11;
- /* 96 */ ULong guest_R12;
- /* 104 */ ULong guest_R13;
- /* 112 */ ULong guest_R14;
- /* 120 */ ULong guest_R15;
+ /* Event check fail addr, counter, and padding to make RAX 16
+ aligned. */
+ /* 0 */ ULong host_EvC_FAILADDR;
+ /* 8 */ UInt host_EvC_COUNTER;
+ /* 12 */ UInt pad0;
+ /* 16 */ ULong guest_RAX;
+ /* 24 */ ULong guest_RCX;
+ /* 32 */ ULong guest_RDX;
+ /* 40 */ ULong guest_RBX;
+ /* 48 */ ULong guest_RSP;
+ /* 56 */ ULong guest_RBP;
+ /* 64 */ ULong guest_RSI;
+ /* 72 */ ULong guest_RDI;
+ /* 80 */ ULong guest_R8;
+ /* 88 */ ULong guest_R9;
+ /* 96 */ ULong guest_R10;
+ /* 104 */ ULong guest_R11;
+ /* 112 */ ULong guest_R12;
+ /* 120 */ ULong guest_R13;
+ /* 128 */ ULong guest_R14;
+ /* 136 */ ULong guest_R15;
/* 4-word thunk used to calculate O S Z A C P flags. */
- /* 128 */ ULong guest_CC_OP;
- /* 136 */ ULong guest_CC_DEP1;
- /* 144 */ ULong guest_CC_DEP2;
- /* 152 */ ULong guest_CC_NDEP;
+ /* 144 */ ULong guest_CC_OP;
+ /* 152 */ ULong guest_CC_DEP1;
+ /* 160 */ ULong guest_CC_DEP2;
+ /* 168 */ ULong guest_CC_NDEP;
/* The D flag is stored here, encoded as either -1 or +1 */
- /* 160 */ ULong guest_DFLAG;
- /* 168 */ ULong guest_RIP;
+ /* 176 */ ULong guest_DFLAG;
+ /* 184 */ ULong guest_RIP;
/* Bit 18 (AC) of eflags stored here, as either 0 or 1. */
/* ... */ ULong guest_ACFLAG;
/* Bit 21 (ID) of eflags stored here, as either 0 or 1. */
- /* 176 */ ULong guest_IDFLAG;
+ /* 192 */ ULong guest_IDFLAG;
/* Probably a lot more stuff too.
D,ID flags
16 128-bit SSE registers
@@ -89,14 +94,14 @@
/* HACK to make tls on amd64-linux work. %fs only ever seems to
hold zero, and so guest_FS_ZERO holds the 64-bit offset
associated with a %fs value of zero. */
- /* 184 */ ULong guest_FS_ZERO;
+ /* 200 */ ULong guest_FS_ZERO;
/* XMM registers. Note that these must be allocated
consecutively in order that the SSE4.2 PCMP{E,I}STR{I,M}
helpers can treat them as an array. XMM16 is a fake reg used
as an intermediary in handling aforementioned insns. */
- /* 192 */ULong guest_SSEROUND;
- /* 200 */U128 guest_XMM0;
+ /* 208 */ULong guest_SSEROUND;
+ /* 216 */U128 guest_XMM0;
U128 guest_XMM1;
U128 guest_XMM2;
U128 guest_XMM3;
@@ -118,14 +123,14 @@
/* Note. Setting guest_FTOP to be ULong messes up the
delicately-balanced PutI/GetI optimisation machinery.
Therefore best to leave it as a UInt. */
- /* 456 */UInt guest_FTOP;
+ UInt guest_FTOP;
ULong guest_FPREG[8];
- /* 528 */ UChar guest_FPTAG[8];
- /* 536 */ ULong guest_FPROUND;
- /* 544 */ ULong guest_FC3210;
+ UChar guest_FPTAG[8];
+ ULong guest_FPROUND;
+ ULong guest_FC3210;
/* Emulation warnings */
- /* 552 */ UInt guest_EMWARN;
+ UInt guest_EMWARN;
/* Translation-invalidation area description. Not used on amd64
(there is no invalidate-icache insn), but needed so as to
@@ -161,7 +166,7 @@
ULong guest_IP_AT_SYSCALL;
/* Padding to make it have an 16-aligned size */
- ULong padding;
+ ULong pad1;
}
VexGuestAMD64State;
diff --git a/pub/libvex_guest_arm.h b/pub/libvex_guest_arm.h
index b6a6a4f..19be179 100644
--- a/pub/libvex_guest_arm.h
+++ b/pub/libvex_guest_arm.h
@@ -42,6 +42,9 @@
typedef
struct {
/* 0 */
+ /* Event check fail addr and counter. */
+ UInt host_EvC_FAILADDR; /* 0 */
+ UInt host_EvC_COUNTER; /* 4 */
UInt guest_R0;
UInt guest_R1;
UInt guest_R2;
@@ -69,7 +72,7 @@
/* 4-word thunk used to calculate N(sign) Z(zero) C(carry,
unsigned overflow) and V(signed overflow) flags. */
- /* 64 */
+ /* 72 */
UInt guest_CC_OP;
UInt guest_CC_DEP1;
UInt guest_CC_DEP2;
@@ -108,11 +111,11 @@
program counter at the last syscall insn (int 0x80/81/82,
sysenter, syscall, svc). Used when backing up to restart a
syscall that has been interrupted by a signal. */
- /* 116 */
+ /* 124 */
UInt guest_IP_AT_SYSCALL;
/* VFP state. D0 .. D15 must be 8-aligned. */
- /* 120 -- I guess there's 4 bytes of padding just prior to this? */
+ /* 128 */
ULong guest_D0;
ULong guest_D1;
ULong guest_D2;
@@ -193,8 +196,6 @@
/* Padding to make it have an 16-aligned size */
UInt padding1;
- UInt padding2;
- UInt padding3;
}
VexGuestARMState;
diff --git a/pub/libvex_guest_ppc32.h b/pub/libvex_guest_ppc32.h
index d848029..99bec3c 100644
--- a/pub/libvex_guest_ppc32.h
+++ b/pub/libvex_guest_ppc32.h
@@ -48,6 +48,12 @@
typedef
struct {
+ /* Event check fail addr and counter. */
+ /* 0 */ UInt host_EvC_FAILADDR;
+ /* 4 */ UInt host_EvC_COUNTER;
+ /* 8 */ UInt pad3;
+ /* 12 */ UInt pad4;
+ /* Add 16 to all the numbers below. Sigh. */
/* General Purpose Registers */
/* 0 */ UInt guest_GPR0;
/* 4 */ UInt guest_GPR1;
diff --git a/pub/libvex_guest_ppc64.h b/pub/libvex_guest_ppc64.h
index e086c02..c3c0292 100644
--- a/pub/libvex_guest_ppc64.h
+++ b/pub/libvex_guest_ppc64.h
@@ -86,6 +86,12 @@
typedef
struct {
+ /* Event check fail addr, counter, and padding to make GPR0 16
+ aligned. */
+ /* 0 */ ULong host_EvC_FAILADDR;
+ /* 8 */ UInt host_EvC_COUNTER;
+ /* 12 */ UInt pad0;
+ /* Add 16 to all of the offsets below .. */
/* General Purpose Registers */
/* 0 */ ULong guest_GPR0;
/* 8 */ ULong guest_GPR1;
diff --git a/pub/libvex_guest_s390x.h b/pub/libvex_guest_s390x.h
index 3bbeaf2..84d8bdc 100644
--- a/pub/libvex_guest_s390x.h
+++ b/pub/libvex_guest_s390x.h
@@ -144,10 +144,14 @@
/* Emulation warnings; see comments in libvex_emwarn.h */
/* 416 */ UInt guest_EMWARN;
+ /* For translation chaining */
+ /* 420 */ UInt host_EvC_COUNTER;
+ /* 424 */ ULong host_EvC_FAILADDR;
+
/*------------------------------------------------------------*/
/*--- Force alignment to 16 bytes ---*/
/*------------------------------------------------------------*/
- /* 420 */ UChar padding[12];
+ /* No padding needed */
/* 432 */ /* This is the size of the guest state */
} VexGuestS390XState;
diff --git a/pub/libvex_guest_x86.h b/pub/libvex_guest_x86.h
index 80ee423..e0b1b76 100644
--- a/pub/libvex_guest_x86.h
+++ b/pub/libvex_guest_x86.h
@@ -141,40 +141,43 @@
*/
typedef
struct {
- UInt guest_EAX; /* 0 */
+ /* Event check fail addr and counter. */
+ UInt host_EvC_FAILADDR; /* 0 */
+ UInt host_EvC_COUNTER; /* 4 */
+ UInt guest_EAX; /* 8 */
UInt guest_ECX;
UInt guest_EDX;
UInt guest_EBX;
UInt guest_ESP;
UInt guest_EBP;
UInt guest_ESI;
- UInt guest_EDI; /* 28 */
+ UInt guest_EDI; /* 36 */
/* 4-word thunk used to calculate O S Z A C P flags. */
- UInt guest_CC_OP; /* 32 */
+ UInt guest_CC_OP; /* 40 */
UInt guest_CC_DEP1;
UInt guest_CC_DEP2;
- UInt guest_CC_NDEP; /* 44 */
+ UInt guest_CC_NDEP; /* 52 */
/* The D flag is stored here, encoded as either -1 or +1 */
- UInt guest_DFLAG; /* 48 */
+ UInt guest_DFLAG; /* 56 */
/* Bit 21 (ID) of eflags stored here, as either 0 or 1. */
- UInt guest_IDFLAG; /* 52 */
+ UInt guest_IDFLAG; /* 60 */
/* Bit 18 (AC) of eflags stored here, as either 0 or 1. */
- UInt guest_ACFLAG; /* 56 */
+ UInt guest_ACFLAG; /* 64 */
/* EIP */
- UInt guest_EIP; /* 60 */
+ UInt guest_EIP; /* 68 */
/* FPU */
- ULong guest_FPREG[8]; /* 64 */
- UChar guest_FPTAG[8]; /* 128 */
- UInt guest_FPROUND; /* 136 */
- UInt guest_FC3210; /* 140 */
- UInt guest_FTOP; /* 144 */
+ ULong guest_FPREG[8]; /* 72 */
+ UChar guest_FPTAG[8]; /* 136 */
+ UInt guest_FPROUND; /* 144 */
+ UInt guest_FC3210; /* 148 */
+ UInt guest_FTOP; /* 152 */
/* SSE */
- UInt guest_SSEROUND; /* 148 */
- U128 guest_XMM0; /* 152 */
+ UInt guest_SSEROUND; /* 156 */
+ U128 guest_XMM0; /* 160 */
U128 guest_XMM1;
U128 guest_XMM2;
U128 guest_XMM3;
@@ -220,8 +223,6 @@
/* Padding to make it have an 16-aligned size */
UInt padding1;
- UInt padding2;
- UInt padding3;
}
VexGuestX86State;
diff --git a/pub/libvex_ir.h b/pub/libvex_ir.h
index 2081c8c..1491fe5 100644
--- a/pub/libvex_ir.h
+++ b/pub/libvex_ir.h
@@ -1712,8 +1712,9 @@
guest to restart a syscall that has been interrupted by a signal.
*/
typedef
- enum {
- Ijk_Boring=0x16000, /* not interesting; just goto next */
+ enum {
+ Ijk_INVALID=0x16000,
+ Ijk_Boring, /* not interesting; just goto next */
Ijk_Call, /* guest is doing a call */
Ijk_Ret, /* guest is doing a return */
Ijk_ClientReq, /* do guest client req before continuing */
@@ -2194,11 +2195,15 @@
/* Conditional exit from the middle of an IRSB.
ppIRStmt output: if (<guard>) goto {<jk>} <dst>
eg. if (t69) goto {Boring} 0x4000AAA:I32
+ If <guard> is true, the guest state is also updated by
+ PUT-ing <dst> at <offsIP>. This is done because a
+ taken exit must update the guest program counter.
*/
struct {
IRExpr* guard; /* Conditional expression */
IRJumpKind jk; /* Jump kind */
IRConst* dst; /* Jump target (constant only) */
+ Int offsIP; /* Guest state offset for IP */
} Exit;
} Ist;
}
@@ -2218,7 +2223,11 @@
IRExpr* addr, IRExpr* storedata );
extern IRStmt* IRStmt_Dirty ( IRDirty* details );
extern IRStmt* IRStmt_MBE ( IRMBusEvent event );
-extern IRStmt* IRStmt_Exit ( IRExpr* guard, IRJumpKind jk, IRConst* dst );
+extern IRStmt* IRStmt_Exit ( IRExpr* guard, IRJumpKind jk, IRConst* dst,
+ Int offsIP );
+// TEMP HACK
+#define IRStmt_Exit3(__guard,__jk,__dst) IRStmt_Exit(__guard,__jk,__dst,0)
+
/* Deep-copy an IRStmt. */
extern IRStmt* deepCopyIRStmt ( IRStmt* );
@@ -2263,6 +2272,8 @@
executes all the way to the end, without a side exit
- An indication of any special actions (JumpKind) needed
for this final jump.
+ - Offset of the IP field in the guest state. This will be
+ updated before the final jump is done.
"IRSB" stands for "IR Super Block".
*/
@@ -2274,6 +2285,7 @@
Int stmts_used;
IRExpr* next;
IRJumpKind jumpkind;
+ Int offsIP;
}
IRSB;
diff --git a/pub/libvex_s390x_common.h b/pub/libvex_s390x_common.h
index 95efef6..6d8ef5c 100644
--- a/pub/libvex_s390x_common.h
+++ b/pub/libvex_s390x_common.h
@@ -1,3 +1,4 @@
+/* -*- mode: C; c-basic-offset: 3; -*- */
/*--------------------------------------------------------------------*/
/*--- Common defs for s390x libvex_s390x_common.h ---*/
@@ -27,8 +28,6 @@
The GNU General Public License is contained in the file COPYING.
*/
-/* -*- mode: C; c-basic-offset: 3; -*- */
-
#ifndef __LIBVEX_PUB_S390X_H
#define __LIBVEX_PUB_S390X_H
@@ -42,7 +41,7 @@
/*--------------------------------------------------------------*/
#define S390_REGNO_RETURN_VALUE 2
-#define S390_REGNO_DISPATCH_CTR 12 /* Holds VG_(dispatch_ctr) */
+#define S390_REGNO_TCHAIN_SCRATCH 12
#define S390_REGNO_GUEST_STATE_POINTER 13
#define S390_REGNO_LINK_REGISTER 14
#define S390_REGNO_STACK_POINTER 15
@@ -52,7 +51,7 @@
/*--- Offsets in the stack frame allocated by the dispatcher ---*/
/*--------------------------------------------------------------*/
-/* Where the profiling dispatcher saves the r2 contents. */
+/* Where the dispatcher saves the r2 contents. */
#define S390_OFFSET_SAVED_R2 160+96
/* Where client's FPC register is saved. */
@@ -88,6 +87,12 @@
/* Number of double words needed to store all facility bits. */
#define S390_NUM_FACILITY_DW 2
+/* The length of the instructions issued by s390_tchain_load64 */
+#define S390_TCHAIN_LOAD64_LEN 16
+
+/* The length of the call insn (BASR) used in translation chaining */
+#define S390_TCHAIN_CALL_LEN 2
+
#endif /* __LIBVEX_PUB_S390X_H */
/*--------------------------------------------------------------------*/
diff --git a/pub/libvex_trc_values.h b/pub/libvex_trc_values.h
index b882d1d..cf69444 100644
--- a/pub/libvex_trc_values.h
+++ b/pub/libvex_trc_values.h
@@ -80,6 +80,9 @@
#define VEX_TRC_JMP_SYS_SYSENTER 79 /* do syscall before continuing */
+#define VEX_TRC_JMP_BORING 95 /* return to sched, but just
+ keep going; no special action */
+
#endif /* ndef __LIBVEX_TRC_VALUES_H */
/*---------------------------------------------------------------*/
diff --git a/switchback/switchback.c b/switchback/switchback.c
index 1cf98ef..990c7d3 100644
--- a/switchback/switchback.c
+++ b/switchback/switchback.c
@@ -867,6 +867,7 @@
vta.do_self_check = False;
vta.traceflags = verbose ? TEST_FLAGS : DEBUG_TRACE_FLAGS;
vta.dispatch = NULL;
+ vta.addProfInc = False;
tres = LibVEX_Translate ( &vta );
diff --git a/test_main.c b/test_main.c
index cb79408..f443580 100644
--- a/test_main.c
+++ b/test_main.c
@@ -107,7 +107,7 @@
VexTranslateArgs vta;
if (argc != 2) {
- fprintf(stderr, "usage: vex file.org\n");
+ fprintf(stderr, "usage: vex file.orig\n");
exit(1);
}
f = fopen(argv[1], "r");
@@ -176,8 +176,10 @@
vai_ppc32.ppc_cache_line_szB = 128;
LibVEX_default_VexAbiInfo(&vbi);
+ vbi.guest_stack_redzone_size = 128;
/* ----- Set up args for LibVEX_Translate ----- */
+
#if 0 /* ppc32 -> ppc32 */
vta.arch_guest = VexArchPPC32;
vta.archinfo_guest = vai_ppc32;
@@ -196,6 +198,7 @@
vta.arch_host = VexArchX86;
vta.archinfo_host = vai_x86;
#endif
+
vta.abiinfo_both = vbi;
vta.guest_bytes = origbuf;
vta.guest_bytes_addr = (Addr64)orig_addr;
@@ -205,7 +208,8 @@
vta.host_bytes = transbuf;
vta.host_bytes_size = N_TRANSBUF;
vta.host_bytes_used = &trans_used;
-#if 0 /* no instrumentation */
+
+#if 1 /* no instrumentation */
vta.instrument1 = NULL;
vta.instrument2 = NULL;
#endif
@@ -213,19 +217,19 @@
vta.instrument1 = ac_instrument;
vta.instrument2 = NULL;
#endif
-#if 1 /* memcheck */
+#if 0 /* memcheck */
vta.instrument1 = mc_instrument;
vta.instrument2 = NULL;
#endif
vta.needs_self_check = needs_self_check;
vta.preamble_function = NULL;
vta.traceflags = TEST_FLAGS;
-#if 1 /* x86, amd64 hosts */
- vta.dispatch_unassisted = (void*)0x12345678;
- vta.dispatch_assisted = (void*)0x12345678;
-#else /* ppc32, ppc64 hosts */
- vta.dispatch = NULL;
-#endif
+ vta.addProfInc = False;
+
+ vta.disp_cp_chain_me_to_slowEP = (void*)0x12345678;
+ vta.disp_cp_chain_me_to_fastEP = (void*)0x12345679;
+ vta.disp_cp_xindir = (void*)0x1234567A;
+ vta.disp_cp_xassisted = (void*)0x1234567B;
vta.finaltidy = NULL;
diff --git a/test_main.h b/test_main.h
index 0005fd6..0c537a0 100644
--- a/test_main.h
+++ b/test_main.h
@@ -2,15 +2,15 @@
/* Copy this file (test_main.h.in) to test_main.h, and edit */
/* DEBUG RUN, ON V */
-#if 0
+#if 1
#define TEST_VSUPPORT True
#define TEST_N_ITERS 1
#define TEST_N_BBS 1
-#define TEST_FLAGS (1<<7) /* |(1<<2)|(1<<1) */
+#define TEST_FLAGS (1<<7)|(0<<6)|(1<<3)|(0<<2)|(0<<1)|(0<<0)
#endif
/* CHECKING RUN, ON V */
-#if 1
+#if 0
#define TEST_VSUPPORT True
#define TEST_N_ITERS 1
#define TEST_N_BBS 100000