Build system and hwcaps fixes pertaining to #305728, which added
support for AVX2, BMI1, BMI2 and FMA instructions.
(Jakub Jelinek, jakub@redhat.com)
git-svn-id: svn://svn.valgrind.org/valgrind/trunk@13340 a5019735-40e9-0310-863c-91ae7b9d1cf9
diff --git a/Makefile.vex.am b/Makefile.vex.am
index ddb5374..419a580 100644
--- a/Makefile.vex.am
+++ b/Makefile.vex.am
@@ -46,6 +46,8 @@
priv/host_generic_regs.h \
priv/host_generic_simd64.h \
priv/host_generic_simd128.h \
+ priv/host_generic_simd256.h \
+ priv/host_generic_maddf.h \
priv/host_x86_defs.h \
priv/host_amd64_defs.h \
priv/host_ppc_defs.h \
@@ -117,6 +119,8 @@
priv/host_generic_regs.c \
priv/host_generic_simd64.c \
priv/host_generic_simd128.c \
+ priv/host_generic_simd256.c \
+ priv/host_generic_maddf.c \
priv/host_generic_reg_alloc2.c \
priv/host_x86_defs.c \
priv/host_x86_isel.c \
diff --git a/configure.in b/configure.in
index 9aadfa2..747ccd4 100644
--- a/configure.in
+++ b/configure.in
@@ -1909,6 +1909,77 @@
AM_CONDITIONAL(BUILD_AVX_TESTS, test x$ac_have_as_avx = xyes)
+# does the x86/amd64 assembler understand AVX2 instructions?
+# Note, this doesn't generate a C-level symbol. It generates a
+# automake-level symbol (BUILD_AVX2_TESTS), used in test Makefile.am's
+AC_MSG_CHECKING([if x86/amd64 assembler speaks AVX2])
+
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]], [[
+ do { long long int x;
+ __asm__ __volatile__(
+ "vpsravd (%%rsp), %%ymm8, %%ymm7" : : : "xmm7", "xmm8" );
+ __asm__ __volatile__(
+ "vpaddb %%ymm6,%%ymm7,%%ymm8" : : : "xmm6","xmm7","xmm8"); }
+ while (0)
+]])], [
+ac_have_as_avx2=yes
+AC_MSG_RESULT([yes])
+], [
+ac_have_as_avx2=no
+AC_MSG_RESULT([no])
+])
+
+AM_CONDITIONAL(BUILD_AVX2_TESTS, test x$ac_have_as_avx2 = xyes)
+
+
+# does the x86/amd64 assembler understand BMI1 and BMI2 instructions?
+# Note, this doesn't generate a C-level symbol. It generates a
+# automake-level symbol (BUILD_BMI_TESTS), used in test Makefile.am's
+AC_MSG_CHECKING([if x86/amd64 assembler speaks BMI1 and BMI2])
+
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]], [[
+ do { unsigned int h, l;
+ __asm__ __volatile__(
+ "andn %2, %1, %0" : "=r" (h) : "r" (0x1234567), "r" (0x7654321) ); }
+ __asm__ __volatile__(
+ "movl %2, %%edx; mulx %3, %1, %0" : "=r" (h), "=r" (l) : "g" (0x1234567), "g" (0x7654321) : "edx" ); }
+ while (0)
+]])], [
+ac_have_as_bmi=yes
+AC_MSG_RESULT([yes])
+], [
+ac_have_as_bmi=no
+AC_MSG_RESULT([no])
+])
+
+AM_CONDITIONAL(BUILD_BMI_TESTS, test x$ac_have_as_bmi = xyes)
+
+
+# does the x86/amd64 assembler understand FMA instructions?
+# Note, this doesn't generate a C-level symbol. It generates a
+# automake-level symbol (BUILD_FMA_TESTS), used in test Makefile.am's
+AC_MSG_CHECKING([if x86/amd64 assembler speaks FMA])
+
+AC_COMPILE_IFELSE([AC_LANG_PROGRAM([[]], [[
+ do { unsigned int h, l;
+ __asm__ __volatile__(
+ "vfmadd132ps (%%rsp), %%ymm8, %%ymm7" : : : "xmm7", "xmm8" );
+ __asm__ __volatile__(
+ "vfnmsub231sd (%%rsp), %%xmm8, %%xmm7" : : : "xmm7", "xmm8" );
+ __asm__ __volatile__(
+ "vfmsubadd213pd (%%rsp), %%xmm8, %%xmm7" : : : "xmm7", "xmm8" ); }
+ while (0)
+]])], [
+ac_have_as_fma=yes
+AC_MSG_RESULT([yes])
+], [
+ac_have_as_fma=no
+AC_MSG_RESULT([no])
+])
+
+AM_CONDITIONAL(BUILD_FMA_TESTS, test x$ac_have_as_fma = xyes)
+
+
# does the x86/amd64 assembler understand MOVBE?
# Note, this doesn't generate a C-level symbol. It generates a
# automake-level symbol (BUILD_MOVBE_TESTS), used in test Makefile.am's
diff --git a/coregrind/m_machine.c b/coregrind/m_machine.c
index dce7ee6..7ba19c9 100644
--- a/coregrind/m_machine.c
+++ b/coregrind/m_machine.c
@@ -767,9 +767,9 @@
#elif defined(VGA_amd64)
{ Bool have_sse3, have_cx8, have_cx16;
- Bool have_lzcnt, have_avx /*, have_fma*/;
+ Bool have_lzcnt, have_avx, have_bmi, have_avx2;
Bool have_rdtscp;
- UInt eax, ebx, ecx, edx, max_extended;
+ UInt eax, ebx, ecx, edx, max_basic, max_extended;
HChar vstr[13];
vstr[0] = 0;
@@ -778,7 +778,8 @@
return False;
VG_(cpuid)(0, 0, &eax, &ebx, &ecx, &edx);
- if (eax < 1)
+ max_basic = eax;
+ if (max_basic < 1)
/* we can't ask for cpuid(x) for x > 0. Give up. */
return False;
@@ -835,13 +836,13 @@
/* on amd64 we tolerate older cpus, which don't have cmpxchg16b */
have_cx16 = (ecx & (1<<13)) != 0; /* True => have cmpxchg16b */
- /* Figure out if this is an AMD that can do LZCNT. */
+ /* Figure out if this CPU can do LZCNT. */
have_lzcnt = False;
- if (0 == VG_(strcmp)(vstr, "AuthenticAMD")
- && max_extended >= 0x80000001) {
+ if (max_extended >= 0x80000001) {
VG_(cpuid)(0x80000001, 0, &eax, &ebx, &ecx, &edx);
have_lzcnt = (ecx & (1<<5)) != 0; /* True => have LZCNT */
}
+
/* Can we do RDTSCP? */
have_rdtscp = False;
if (max_extended >= 0x80000001) {
@@ -849,11 +850,22 @@
have_rdtscp = (edx & (1<<27)) != 0; /* True => have RDTSVCP */
}
+ /* Check for BMI1 and AVX2. */
+ have_bmi = False;
+ have_avx2 = False;
+ if (max_basic >= 7) {
+ VG_(cpuid)(7, 0, &eax, &ebx, &ecx, &edx);
+ have_bmi = (ebx & (1<<3)) != 0; /* True => have BMI1 */
+ have_avx2 = have_avx && ((ebx & (1<<5)) != 0); /* True => have AVX2 */
+ }
+
va = VexArchAMD64;
vai.hwcaps = (have_sse3 ? VEX_HWCAPS_AMD64_SSE3 : 0)
| (have_cx16 ? VEX_HWCAPS_AMD64_CX16 : 0)
| (have_lzcnt ? VEX_HWCAPS_AMD64_LZCNT : 0)
| (have_avx ? VEX_HWCAPS_AMD64_AVX : 0)
+ | (have_bmi ? VEX_HWCAPS_AMD64_BMI : 0)
+ | (have_avx2 ? VEX_HWCAPS_AMD64_AVX2 : 0)
| (have_rdtscp ? VEX_HWCAPS_AMD64_RDTSCP : 0);
VG_(machine_get_cache_info)(&vai);