Support for FXSAVE/FXRSTOR (Tom Hughes). Fixes #71180.
git-svn-id: svn://svn.valgrind.org/valgrind/trunk@2183 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/addrcheck/ac_main.c b/addrcheck/ac_main.c
index 74acac0..044f917 100644
--- a/addrcheck/ac_main.c
+++ b/addrcheck/ac_main.c
@@ -907,7 +907,7 @@
return;
}
- if (size == 16 || size == 10 || size == 28 || size == 108) {
+ if (size == 16 || size == 10 || size == 28 || size == 108 || size == 512) {
PROF_EVENT(94);
ac_fpu_ACCESS_check_SLOWLY ( addr, size, isWrite );
return;
@@ -1055,8 +1055,8 @@
helper = (Addr)ac_fpu_WRITE_check;
goto do_Access_ARG3;
do_Access_ARG3:
- sk_assert(u_in->size == 4
- || u_in->size == 8 || u_in->size == 16);
+ sk_assert(u_in->size == 4 || u_in->size == 8
+ || u_in->size == 16 || u_in->size == 512);
sk_assert(u_in->tag3 == TempReg);
t_addr = u_in->val3;
t_size = newTemp(cb);
diff --git a/cachegrind/cg_main.c b/cachegrind/cg_main.c
index 0cbeaab..6264abf 100644
--- a/cachegrind/cg_main.c
+++ b/cachegrind/cg_main.c
@@ -544,7 +544,7 @@
case SSE2a_MemRd:
case SSE2a1_MemRd:
- sk_assert(u_in->size == 4 || u_in->size == 16);
+ sk_assert(u_in->size == 4 || u_in->size == 16 || u_in->size == 512);
t_read = u_in->val3;
is_FPU_R = True;
break;
@@ -577,7 +577,7 @@
break;
case SSE2a_MemWr:
- sk_assert(u_in->size == 4 || u_in->size == 16);
+ sk_assert(u_in->size == 4 || u_in->size == 16 || u_in->size == 512);
t_write = u_in->val3;
is_FPU_W = True;
break;
@@ -798,11 +798,16 @@
case SSE2a_MemRd:
case SSE2a1_MemRd:
- sk_assert(u_in->size == 4 || u_in->size == 16);
+ sk_assert(u_in->size == 4 || u_in->size == 16 || u_in->size == 512);
t_read = u_in->val3;
t_read_addr = newTemp(cb);
uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_read_addr);
- data_size = u_in->size;
+ /* 512 B data-sized instructions will be done inaccurately
+ * but they're very rare and this avoids errors from
+ * hitting more than two cache lines in the simulation. */
+ data_size = ( u_in->size <= MIN_LINE_SIZE
+ ? u_in->size
+ : MIN_LINE_SIZE);
VG_(copy_UInstr)(cb, u_in);
break;
@@ -856,14 +861,19 @@
break;
case SSE2a_MemWr:
- sk_assert(u_in->size == 4 || u_in->size == 16);
+ sk_assert(u_in->size == 4 || u_in->size == 16 || u_in->size == 512);
/* fall through */
case SSE3a_MemWr:
- sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16);
+ sk_assert(u_in->size == 4 || u_in->size == 8 || u_in->size == 16 || u_in->size == 512);
t_write = u_in->val3;
t_write_addr = newTemp(cb);
uInstr2(cb, MOV, 4, TempReg, u_in->val3, TempReg, t_write_addr);
- data_size = u_in->size;
+ /* 512 B data-sized instructions will be done inaccurately
+ * but they're very rare and this avoids errors from
+ * hitting more than two cache lines in the simulation. */
+ data_size = ( u_in->size <= MIN_LINE_SIZE
+ ? u_in->size
+ : MIN_LINE_SIZE);
VG_(copy_UInstr)(cb, u_in);
break;
diff --git a/coregrind/vg_from_ucode.c b/coregrind/vg_from_ucode.c
index d83862d..7be34b1 100644
--- a/coregrind/vg_from_ucode.c
+++ b/coregrind/vg_from_ucode.c
@@ -4075,7 +4075,7 @@
case SSE2a_MemWr:
case SSE2a_MemRd:
- vg_assert(u->size == 4 || u->size == 16);
+ vg_assert(u->size == 4 || u->size == 16 || u->size == 512);
vg_assert(u->tag1 == Lit16);
vg_assert(u->tag2 == Lit16);
vg_assert(u->tag3 == RealReg);
diff --git a/coregrind/vg_to_ucode.c b/coregrind/vg_to_ucode.c
index 35d1911..e7c48de 100644
--- a/coregrind/vg_to_ucode.c
+++ b/coregrind/vg_to_ucode.c
@@ -3545,8 +3545,27 @@
if (VG_(have_ssestate)) {
UChar* insn = (UChar*)eip;
+ /* FXSAVE/FXRSTOR m32 -- load/store the FPU/MMX/SSE state. */
+ if (insn[0] == 0x0F && insn[1] == 0xAE
+ && (!epartIsReg(insn[2]))
+ && (gregOfRM(insn[2]) == 1 || gregOfRM(insn[2]) == 0) ) {
+ Bool store = gregOfRM(insn[2]) == 0;
+ vg_assert(sz == 4);
+ pair = disAMode ( cb, sorb, eip+2, dis?dis_buf:NULL );
+ t1 = LOW24(pair);
+ eip += 2+HI8(pair);
+ uInstr3(cb, store ? SSE2a_MemWr : SSE2a_MemRd, 512,
+ Lit16, (((UShort)insn[0]) << 8) | (UShort)insn[1],
+ Lit16, (UShort)insn[2],
+ TempReg, t1 );
+ if (dis)
+ VG_(printf)("fx%s %s\n", store ? "save" : "rstor", dis_buf );
+ goto decode_success;
+ }
+
/* STMXCSR/LDMXCSR m32 -- load/store the MXCSR register. */
if (insn[0] == 0x0F && insn[1] == 0xAE
+ && (!epartIsReg(insn[2]))
&& (gregOfRM(insn[2]) == 3 || gregOfRM(insn[2]) == 2) ) {
Bool store = gregOfRM(insn[2]) == 3;
vg_assert(sz == 4);
diff --git a/coregrind/vg_translate.c b/coregrind/vg_translate.c
index 408a707..0a231c1 100644
--- a/coregrind/vg_translate.c
+++ b/coregrind/vg_translate.c
@@ -414,7 +414,8 @@
# define SZ42 (u->size == 4 || u->size == 2)
# define SZ48 (u->size == 4 || u->size == 8)
# define SZ416 (u->size == 4 || u->size == 16)
-# define SZsse (u->size == 4 || u->size == 8 || u->size == 16)
+# define SZsse2 (u->size == 4 || u->size == 16 || u->size == 512)
+# define SZsse3 (u->size == 4 || u->size == 8 || u->size == 16)
# define SZi (u->size == 4 || u->size == 2 || u->size == 1)
# define SZf ( u->size == 4 || u->size == 8 || u->size == 2 \
|| u->size == 10 || u->size == 28 || u->size == 108)
@@ -563,22 +564,22 @@
case MMX2_ERegWr: return LIT0 && SZ4 && CC0 && Ls1 && TR2 && N3 && XOTHER;
/* Fields checked: lit32 size flags_r/w tag1 tag2 tag3 (rest) */
- case SSE2a_MemWr: return LIT0 && SZ416 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
- case SSE2a_MemRd: return LIT0 && SZ416 && CCa && Ls1 && Ls2 && TR3 && XOTHER;
- case SSE2a1_MemRd: return LIT0 && SZ416 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
- case SSE3a_MemWr: return LIT0 && SZsse && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
- case SSE3a_MemRd: return LIT0 && SZsse && CCa && Ls1 && Ls2 && TR3 && XOTHER;
- case SSE3e_RegRd: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
- case SSE3e_RegWr: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
- case SSE3a1_MemRd: return LIT8 && SZ16 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
- case SSE3g_RegWr: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
- case SSE3g1_RegWr: return LIT8 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
- case SSE3e1_RegRd: return LIT8 && SZ2 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
- case SSE3: return LIT0 && SZ0 && CCa && Ls1 && Ls2 && N3 && XOTHER;
- case SSE4: return LIT0 && SZ0 && CCa && Ls1 && Ls2 && N3 && XOTHER;
- case SSE5: return LIT0 && SZ0 && CC0 && Ls1 && Ls2 && Ls3 && XOTHER;
+ case SSE2a_MemWr: return LIT0 && SZsse2 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
+ case SSE2a_MemRd: return LIT0 && SZsse2 && CCa && Ls1 && Ls2 && TR3 && XOTHER;
+ case SSE2a1_MemRd: return LIT0 && SZ416 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
+ case SSE3a_MemWr: return LIT0 && SZsse3 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
+ case SSE3a_MemRd: return LIT0 && SZsse3 && CCa && Ls1 && Ls2 && TR3 && XOTHER;
+ case SSE3e_RegRd: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
+ case SSE3e_RegWr: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
+ case SSE3a1_MemRd: return LIT8 && SZ16 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
+ case SSE3g_RegWr: return LIT0 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
+ case SSE3g1_RegWr: return LIT8 && SZ4 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
+ case SSE3e1_RegRd: return LIT8 && SZ2 && CC0 && Ls1 && Ls2 && TR3 && XOTHER;
+ case SSE3: return LIT0 && SZ0 && CCa && Ls1 && Ls2 && N3 && XOTHER;
+ case SSE4: return LIT0 && SZ0 && CCa && Ls1 && Ls2 && N3 && XOTHER;
+ case SSE5: return LIT0 && SZ0 && CC0 && Ls1 && Ls2 && Ls3 && XOTHER;
case SSE3ag_MemRd_RegWr:
- return SZ48 && CC0 && TR1 && TR2 && N3 && XOTHER;
+ return SZ48 && CC0 && TR1 && TR2 && N3 && XOTHER;
default:
if (VG_(needs).extended_UCode)
return SK_(sane_XUInstr)(beforeRA, beforeLiveness, u);
@@ -602,7 +603,8 @@
# undef SZ42
# undef SZ48
# undef SZ416
-# undef SZsse
+# undef SZsse2
+# undef SZsse3
# undef SZi
# undef SZf
# undef SZ4m
diff --git a/include/vg_skin.h.base b/include/vg_skin.h.base
index 4ef7a63..52fc2c8 100644
--- a/include/vg_skin.h.base
+++ b/include/vg_skin.h.base
@@ -960,7 +960,7 @@
/* word 3 */
UShort val3; /* third operand */
UChar opcode; /* opcode */
- UChar size; /* data transfer size */
+ UShort size; /* data transfer size */
/* word 4 */
FlagSet flags_r; /* :: FlagSet */
diff --git a/memcheck/mac_needs.c b/memcheck/mac_needs.c
index 315847f..a3ab823 100644
--- a/memcheck/mac_needs.c
+++ b/memcheck/mac_needs.c
@@ -732,19 +732,19 @@
81 fpu_read aligned 4
82 fpu_read aligned 8
83 fpu_read 2
- 84 fpu_read 10/28/108
+ 84 fpu_read 10/28/108/512
M 85 fpu_write
M 86 fpu_write aligned 4
M 87 fpu_write aligned 8
M 88 fpu_write 2
-M 89 fpu_write 10/28/108
+M 89 fpu_write 10/28/108/512
90 fpu_access
91 fpu_access aligned 4
92 fpu_access aligned 8
93 fpu_access 2
- 94 fpu_access 10/28/108
+ 94 fpu_access 10/28/108/512
100 fpu_access_check_SLOWLY
101 fpu_access_check_SLOWLY(byte loop)
diff --git a/memcheck/mc_main.c b/memcheck/mc_main.c
index 7ee467c..3cc44de 100644
--- a/memcheck/mc_main.c
+++ b/memcheck/mc_main.c
@@ -1190,7 +1190,7 @@
}
if (size == 16 /*SSE*/
- || size == 10 || size == 28 || size == 108) {
+ || size == 10 || size == 28 || size == 108 || size == 512) {
PROF_EVENT(84);
mc_fpu_read_check_SLOWLY ( addr, size );
return;
@@ -1273,7 +1273,7 @@
}
if (size == 16 /*SSE*/
- || size == 10 || size == 28 || size == 108) {
+ || size == 10 || size == 28 || size == 108 || size == 512) {
PROF_EVENT(89);
mc_fpu_write_check_SLOWLY ( addr, size );
return;
diff --git a/memcheck/mc_translate.c b/memcheck/mc_translate.c
index 1b3599b..d21bb86 100644
--- a/memcheck/mc_translate.c
+++ b/memcheck/mc_translate.c
@@ -1112,8 +1112,8 @@
Bool is_load;
Int t_size;
- sk_assert(u_in->size == 4
- || u_in->size == 8 || u_in->size == 16);
+ sk_assert(u_in->size == 4 || u_in->size == 8
+ || u_in->size == 16 || u_in->size == 512);
t_size = INVALID_TEMPREG;
is_load = u_in->opcode==SSE2a_MemRd