Implement a few SSE instructions, enough to run bzip2 when compiled
with the Intel C compiler (7.1.009) with vectorisation for SSE engaged
(-xK).
git-svn-id: svn://svn.valgrind.org/valgrind/trunk@1651 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/coregrind/vg_from_ucode.c b/coregrind/vg_from_ucode.c
index 2236a47..9b8fb84 100644
--- a/coregrind/vg_from_ucode.c
+++ b/coregrind/vg_from_ucode.c
@@ -1419,6 +1419,86 @@
nameIReg(4,ireg) );
}
+static void emit_SSE2a ( FlagSet uses_sflags,
+ FlagSet sets_sflags,
+ UChar first_byte,
+ UChar second_byte,
+ UChar third_byte,
+ Int ireg )
+{
+ VG_(new_emit)(True, uses_sflags, sets_sflags);
+ VG_(emitB) ( first_byte );
+ VG_(emitB) ( second_byte );
+ third_byte &= 0x38; /* mask out mod and rm fields */
+ emit_amode_regmem_reg ( ireg, third_byte >> 3 );
+ if (dis)
+ VG_(printf)("\n\t\tsse-0x%x:0x%x:0x%x-(%s)\n",
+ (UInt)first_byte, (UInt)second_byte, (UInt)third_byte,
+ nameIReg(4,ireg) );
+}
+
+static void emit_SSE3a ( FlagSet uses_sflags,
+ FlagSet sets_sflags,
+ UChar first_byte,
+ UChar second_byte,
+ UChar third_byte,
+ UChar fourth_byte,
+ Int ireg )
+{
+ VG_(new_emit)(True, uses_sflags, sets_sflags);
+ VG_(emitB) ( first_byte );
+ VG_(emitB) ( second_byte );
+ VG_(emitB) ( third_byte );
+ fourth_byte &= 0x38; /* mask out mod and rm fields */
+ emit_amode_regmem_reg ( ireg, fourth_byte >> 3 );
+ if (dis)
+ VG_(printf)("\n\t\tsse-0x%x:0x%x:0x%x:0x%x-(%s)\n",
+ (UInt)first_byte, (UInt)second_byte,
+ (UInt)third_byte, (UInt)fourth_byte,
+ nameIReg(4,ireg) );
+}
+
+static void emit_SSE3g ( FlagSet uses_sflags,
+ FlagSet sets_sflags,
+ UChar first_byte,
+ UChar second_byte,
+ UChar third_byte,
+ UChar fourth_byte,
+ Int ireg )
+{
+ VG_(new_emit)(True, uses_sflags, sets_sflags);
+ VG_(emitB) ( first_byte );
+ VG_(emitB) ( second_byte );
+ VG_(emitB) ( third_byte );
+ fourth_byte &= 0x38; /* mask out mod and rm fields */
+ fourth_byte |= 0xC0; /* set top two bits: mod = 11b */
+ fourth_byte |= (ireg & 7); /* patch in our ireg */
+ VG_(emitB) ( fourth_byte );
+ if (dis)
+ VG_(printf)("\n\t\tsse-reg-to-xmmreg--0x%x:0x%x:0x%x:0x%x-(%s)\n",
+ (UInt)first_byte, (UInt)second_byte,
+ (UInt)third_byte, (UInt)fourth_byte,
+ nameIReg(4,ireg) );
+}
+
+static void emit_SSE4 ( FlagSet uses_sflags,
+ FlagSet sets_sflags,
+ UChar first_byte,
+ UChar second_byte,
+ UChar third_byte,
+ UChar fourth_byte )
+{
+ VG_(new_emit)(True, uses_sflags, sets_sflags);
+ VG_(emitB) ( first_byte );
+ VG_(emitB) ( second_byte );
+ VG_(emitB) ( third_byte );
+ VG_(emitB) ( fourth_byte );
+ if (dis)
+ VG_(printf)("\n\t\tsse-0x%x:0x%x:0x%x:0x%x\n",
+ (UInt)first_byte, (UInt)second_byte,
+ (UInt)third_byte, (UInt)fourth_byte );
+}
+
static void emit_MMX2_reg_to_mmxreg ( FlagSet uses_sflags,
FlagSet sets_sflags,
UChar first_byte,
@@ -3593,6 +3673,78 @@
u->val2 & 0xFF );
break;
+ case SSE2a_MemWr:
+ case SSE2a_MemRd:
+ vg_assert(u->size == 4 || u->size == 16);
+ vg_assert(u->tag1 == Lit16);
+ vg_assert(u->tag2 == Lit16);
+ vg_assert(u->tag3 == RealReg);
+ vg_assert(!anyFlagUse(u));
+ if (!(*sselive)) {
+ emit_get_sse_state();
+ *sselive = True;
+ }
+ emit_SSE2a ( u->flags_r, u->flags_w,
+ (u->val1 >> 8) & 0xFF,
+ u->val1 & 0xFF,
+ u->val2 & 0xFF,
+ u->val3 );
+ break;
+
+ case SSE3a_MemWr:
+ case SSE3a_MemRd:
+ vg_assert(u->size == 4 || u->size == 16);
+ vg_assert(u->tag1 == Lit16);
+ vg_assert(u->tag2 == Lit16);
+ vg_assert(u->tag3 == RealReg);
+ vg_assert(!anyFlagUse(u));
+ if (!(*sselive)) {
+ emit_get_sse_state();
+ *sselive = True;
+ }
+ emit_SSE3a ( u->flags_r, u->flags_w,
+ (u->val1 >> 8) & 0xFF,
+ u->val1 & 0xFF,
+ (u->val2 >> 8) & 0xFF,
+ u->val2 & 0xFF,
+ u->val3 );
+ break;
+
+ case SSE3g_RegRd:
+ vg_assert(u->size == 4);
+ vg_assert(u->tag1 == Lit16);
+ vg_assert(u->tag2 == Lit16);
+ vg_assert(u->tag3 == RealReg);
+ vg_assert(!anyFlagUse(u));
+ if (!(*sselive)) {
+ emit_get_sse_state();
+ *sselive = True;
+ }
+ emit_SSE3g ( u->flags_r, u->flags_w,
+ (u->val1 >> 8) & 0xFF,
+ u->val1 & 0xFF,
+ (u->val2 >> 8) & 0xFF,
+ u->val2 & 0xFF,
+ u->val3 );
+ break;
+
+ case SSE4:
+ vg_assert(u->size == 0);
+ vg_assert(u->tag1 == Lit16);
+ vg_assert(u->tag2 == Lit16);
+ vg_assert(u->tag3 == NoValue);
+ vg_assert(!anyFlagUse(u));
+ if (!(*sselive)) {
+ emit_get_sse_state();
+ *sselive = True;
+ }
+ emit_SSE4 ( u->flags_r, u->flags_w,
+ (u->val1 >> 8) & 0xFF,
+ u->val1 & 0xFF,
+ (u->val2 >> 8) & 0xFF,
+ u->val2 & 0xFF );
+ break;
+
default:
if (VG_(needs).extended_UCode) {
if (*sselive) {