Implement SSE4 instructions: PCMPGTQ PMAXUD PMINUD PMAXSB PMINSB PMULLD
I believe this covers everything that gcc-4.4 and gcc-4.5 will generate
with "-O3 -msse4.2". Note, this commit changes the set of IR ops and so
requires a from-scratch rebuild of the tree.
git-svn-id: svn://svn.valgrind.org/vex/trunk@1984 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/host_generic_simd128.h b/priv/host_generic_simd128.h
new file mode 100644
index 0000000..125514a
--- /dev/null
+++ b/priv/host_generic_simd128.h
@@ -0,0 +1,79 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin host_generic_simd128.h ---*/
+/*---------------------------------------------------------------*/
+
+/*
+ This file is part of Valgrind, a dynamic binary instrumentation
+ framework.
+
+ Copyright (C) 2010-2010 OpenWorks GbR
+ info@open-works.net
+
+ This program is free software; you can redistribute it and/or
+ modify it under the terms of the GNU General Public License as
+ published by the Free Software Foundation; either version 2 of the
+ License, or (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful, but
+ WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+ 02110-1301, USA.
+
+ The GNU General Public License is contained in the file COPYING.
+*/
+
+/* Generic helper functions for doing 128-bit SIMD arithmetic in cases
+ where the instruction selectors cannot generate code in-line.
+ These are purely back-end entities and cannot be seen/referenced
+ as clean helper functions from IR.
+
+ These will get called from generated code and therefore should be
+ well behaved -- no floating point or mmx insns, just straight
+ integer code.
+
+ Each function implements the correspondingly-named IR primop.
+*/
+
+#ifndef __VEX_HOST_GENERIC_SIMD128_H
+#define __VEX_HOST_GENERIC_SIMD128_H
+
+/* A union for doing 128-bit primitives conveniently. It is not
+ public and so not placed in pub/. */
+typedef
+ union {
+ UChar w8[16];
+ UShort w16[8];
+ UInt w32[4];
+ ULong w64[2];
+ }
+ V128;
+
+
+#include "libvex_basictypes.h"
+
+/* DO NOT MAKE THESE INTO REGPARM FNS! THIS WILL BREAK CALLING
+ SEQUENCES GENERATED BY host-x86/isel.c. */
+
+extern void h_generic_calc_Mul32x4 ( /*OUT*/V128*, V128*, V128* );
+extern void h_generic_calc_Max32Sx4 ( /*OUT*/V128*, V128*, V128* );
+extern void h_generic_calc_Min32Sx4 ( /*OUT*/V128*, V128*, V128* );
+extern void h_generic_calc_Max32Ux4 ( /*OUT*/V128*, V128*, V128* );
+extern void h_generic_calc_Min32Ux4 ( /*OUT*/V128*, V128*, V128* );
+extern void h_generic_calc_Max16Ux8 ( /*OUT*/V128*, V128*, V128* );
+extern void h_generic_calc_Min16Ux8 ( /*OUT*/V128*, V128*, V128* );
+extern void h_generic_calc_Max8Sx16 ( /*OUT*/V128*, V128*, V128* );
+extern void h_generic_calc_Min8Sx16 ( /*OUT*/V128*, V128*, V128* );
+extern void h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128*, V128*, V128* );
+
+
+#endif /* ndef __VEX_HOST_GENERIC_SIMD128_H */
+
+/*---------------------------------------------------------------*/
+/*--- end host_generic_simd128.h ---*/
+/*---------------------------------------------------------------*/