Implement SSE4 instructions: PCMPGTQ PMAXUD PMINUD PMAXSB PMINSB PMULLD
I believe this covers everything that gcc-4.4 and gcc-4.5 will generate
with "-O3 -msse4.2".  Note, this commit changes the set of IR ops and so
requires a from-scratch rebuild of the tree.



git-svn-id: svn://svn.valgrind.org/vex/trunk@1984 8f6e269a-dfd6-0310-a8e1-e2731360e62c
diff --git a/priv/host_generic_simd128.h b/priv/host_generic_simd128.h
new file mode 100644
index 0000000..125514a
--- /dev/null
+++ b/priv/host_generic_simd128.h
@@ -0,0 +1,79 @@
+
+/*---------------------------------------------------------------*/
+/*--- begin                             host_generic_simd128.h ---*/
+/*---------------------------------------------------------------*/
+
+/*
+   This file is part of Valgrind, a dynamic binary instrumentation
+   framework.
+
+   Copyright (C) 2010-2010 OpenWorks GbR
+      info@open-works.net
+
+   This program is free software; you can redistribute it and/or
+   modify it under the terms of the GNU General Public License as
+   published by the Free Software Foundation; either version 2 of the
+   License, or (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
+   02110-1301, USA.
+
+   The GNU General Public License is contained in the file COPYING.
+*/
+
+/* Generic helper functions for doing 128-bit SIMD arithmetic in cases
+   where the instruction selectors cannot generate code in-line.
+   These are purely back-end entities and cannot be seen/referenced
+   as clean helper functions from IR.
+
+   These will get called from generated code and therefore should be
+   well behaved -- no floating point or mmx insns, just straight
+   integer code.
+
+   Each function implements the correspondingly-named IR primop.
+*/
+
+#ifndef __VEX_HOST_GENERIC_SIMD128_H
+#define __VEX_HOST_GENERIC_SIMD128_H
+
+/* A union for doing 128-bit primitives conveniently.  It is not
+   public and so not placed in pub/. */
+typedef
+   union {
+      UChar  w8[16];
+      UShort w16[8];
+      UInt   w32[4];
+      ULong  w64[2];
+   }
+   V128;
+
+
+#include "libvex_basictypes.h"
+
+/* DO NOT MAKE THESE INTO REGPARM FNS!  THIS WILL BREAK CALLING
+   SEQUENCES GENERATED BY host-x86/isel.c. */
+
+extern void h_generic_calc_Mul32x4    ( /*OUT*/V128*, V128*, V128* );
+extern void h_generic_calc_Max32Sx4   ( /*OUT*/V128*, V128*, V128* );
+extern void h_generic_calc_Min32Sx4   ( /*OUT*/V128*, V128*, V128* );
+extern void h_generic_calc_Max32Ux4   ( /*OUT*/V128*, V128*, V128* );
+extern void h_generic_calc_Min32Ux4   ( /*OUT*/V128*, V128*, V128* );
+extern void h_generic_calc_Max16Ux8   ( /*OUT*/V128*, V128*, V128* );
+extern void h_generic_calc_Min16Ux8   ( /*OUT*/V128*, V128*, V128* );
+extern void h_generic_calc_Max8Sx16   ( /*OUT*/V128*, V128*, V128* );
+extern void h_generic_calc_Min8Sx16   ( /*OUT*/V128*, V128*, V128* );
+extern void h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128*, V128*, V128* );
+
+
+#endif /* ndef __VEX_HOST_GENERIC_SIMD128_H */
+
+/*---------------------------------------------------------------*/
+/*--- end                              host_generic_simd128.h ---*/
+/*---------------------------------------------------------------*/