blob: 8ed516609dfbf791affa18044a4de1589c93329c [file] [log] [blame]
/*---------------------------------------------------------------*/
/*--- begin host_generic_simd128.c ---*/
/*---------------------------------------------------------------*/
/*
This file is part of Valgrind, a dynamic binary instrumentation
framework.
Copyright (C) 2010-2010 OpenWorks GbR
info@open-works.net
This program is free software; you can redistribute it and/or
modify it under the terms of the GNU General Public License as
published by the Free Software Foundation; either version 2 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
02110-1301, USA.
The GNU General Public License is contained in the file COPYING.
*/
/* Generic helper functions for doing 128-bit SIMD arithmetic in cases
where the instruction selectors cannot generate code in-line.
These are purely back-end entities and cannot be seen/referenced
from IR. */
#include "libvex_basictypes.h"
#include "host_generic_simd128.h"
/* Primitive helpers always take args of the real type (signed vs
unsigned) but return an unsigned result, so there's no conversion
weirdness when stuffing results back in the V128 union fields,
which are all unsigned. */
static inline UInt mul32 ( Int xx, Int yy )
{
Int t = ((Int)xx) * ((Int)yy);
return toUInt(t);
}
static inline UInt max32S ( Int xx, Int yy )
{
return toUInt((xx > yy) ? xx : yy);
}
static inline UInt min32S ( Int xx, Int yy )
{
return toUInt((xx < yy) ? xx : yy);
}
static inline UInt max32U ( UInt xx, UInt yy )
{
return toUInt((xx > yy) ? xx : yy);
}
static inline UInt min32U ( UInt xx, UInt yy )
{
return toUInt((xx < yy) ? xx : yy);
}
static inline UShort max16U ( UShort xx, UShort yy )
{
return toUShort((xx > yy) ? xx : yy);
}
static inline UShort min16U ( UShort xx, UShort yy )
{
return toUShort((xx < yy) ? xx : yy);
}
static inline UChar max8S ( Char xx, Char yy )
{
return toUChar((xx > yy) ? xx : yy);
}
static inline UChar min8S ( Char xx, Char yy )
{
return toUChar((xx < yy) ? xx : yy);
}
static inline ULong cmpGT64S ( Long xx, Long yy )
{
return (((Long)xx) > ((Long)yy))
? 0xFFFFFFFFFFFFFFFFULL : 0ULL;
}
void h_generic_calc_Mul32x4 ( /*OUT*/V128* res,
V128* argL, V128* argR )
{
res->w32[0] = mul32(argL->w32[0], argR->w32[0]);
res->w32[1] = mul32(argL->w32[1], argR->w32[1]);
res->w32[2] = mul32(argL->w32[2], argR->w32[2]);
res->w32[3] = mul32(argL->w32[3], argR->w32[3]);
}
void h_generic_calc_Max32Sx4 ( /*OUT*/V128* res,
V128* argL, V128* argR )
{
res->w32[0] = max32S(argL->w32[0], argR->w32[0]);
res->w32[1] = max32S(argL->w32[1], argR->w32[1]);
res->w32[2] = max32S(argL->w32[2], argR->w32[2]);
res->w32[3] = max32S(argL->w32[3], argR->w32[3]);
}
void h_generic_calc_Min32Sx4 ( /*OUT*/V128* res,
V128* argL, V128* argR )
{
res->w32[0] = min32S(argL->w32[0], argR->w32[0]);
res->w32[1] = min32S(argL->w32[1], argR->w32[1]);
res->w32[2] = min32S(argL->w32[2], argR->w32[2]);
res->w32[3] = min32S(argL->w32[3], argR->w32[3]);
}
void h_generic_calc_Max32Ux4 ( /*OUT*/V128* res,
V128* argL, V128* argR )
{
res->w32[0] = max32U(argL->w32[0], argR->w32[0]);
res->w32[1] = max32U(argL->w32[1], argR->w32[1]);
res->w32[2] = max32U(argL->w32[2], argR->w32[2]);
res->w32[3] = max32U(argL->w32[3], argR->w32[3]);
}
void h_generic_calc_Min32Ux4 ( /*OUT*/V128* res,
V128* argL, V128* argR )
{
res->w32[0] = min32U(argL->w32[0], argR->w32[0]);
res->w32[1] = min32U(argL->w32[1], argR->w32[1]);
res->w32[2] = min32U(argL->w32[2], argR->w32[2]);
res->w32[3] = min32U(argL->w32[3], argR->w32[3]);
}
void h_generic_calc_Max16Ux8 ( /*OUT*/V128* res,
V128* argL, V128* argR )
{
res->w16[0] = max16U(argL->w16[0], argR->w16[0]);
res->w16[1] = max16U(argL->w16[1], argR->w16[1]);
res->w16[2] = max16U(argL->w16[2], argR->w16[2]);
res->w16[3] = max16U(argL->w16[3], argR->w16[3]);
res->w16[4] = max16U(argL->w16[4], argR->w16[4]);
res->w16[5] = max16U(argL->w16[5], argR->w16[5]);
res->w16[6] = max16U(argL->w16[6], argR->w16[6]);
res->w16[7] = max16U(argL->w16[7], argR->w16[7]);
}
void h_generic_calc_Min16Ux8 ( /*OUT*/V128* res,
V128* argL, V128* argR )
{
res->w16[0] = min16U(argL->w16[0], argR->w16[0]);
res->w16[1] = min16U(argL->w16[1], argR->w16[1]);
res->w16[2] = min16U(argL->w16[2], argR->w16[2]);
res->w16[3] = min16U(argL->w16[3], argR->w16[3]);
res->w16[4] = min16U(argL->w16[4], argR->w16[4]);
res->w16[5] = min16U(argL->w16[5], argR->w16[5]);
res->w16[6] = min16U(argL->w16[6], argR->w16[6]);
res->w16[7] = min16U(argL->w16[7], argR->w16[7]);
}
void h_generic_calc_Max8Sx16 ( /*OUT*/V128* res,
V128* argL, V128* argR )
{
res->w8[ 0] = max8S(argL->w8[ 0], argR->w8[ 0]);
res->w8[ 1] = max8S(argL->w8[ 1], argR->w8[ 1]);
res->w8[ 2] = max8S(argL->w8[ 2], argR->w8[ 2]);
res->w8[ 3] = max8S(argL->w8[ 3], argR->w8[ 3]);
res->w8[ 4] = max8S(argL->w8[ 4], argR->w8[ 4]);
res->w8[ 5] = max8S(argL->w8[ 5], argR->w8[ 5]);
res->w8[ 6] = max8S(argL->w8[ 6], argR->w8[ 6]);
res->w8[ 7] = max8S(argL->w8[ 7], argR->w8[ 7]);
res->w8[ 8] = max8S(argL->w8[ 8], argR->w8[ 8]);
res->w8[ 9] = max8S(argL->w8[ 9], argR->w8[ 9]);
res->w8[10] = max8S(argL->w8[10], argR->w8[10]);
res->w8[11] = max8S(argL->w8[11], argR->w8[11]);
res->w8[12] = max8S(argL->w8[12], argR->w8[12]);
res->w8[13] = max8S(argL->w8[13], argR->w8[13]);
res->w8[14] = max8S(argL->w8[14], argR->w8[14]);
res->w8[15] = max8S(argL->w8[15], argR->w8[15]);
}
void h_generic_calc_Min8Sx16 ( /*OUT*/V128* res,
V128* argL, V128* argR )
{
res->w8[ 0] = min8S(argL->w8[ 0], argR->w8[ 0]);
res->w8[ 1] = min8S(argL->w8[ 1], argR->w8[ 1]);
res->w8[ 2] = min8S(argL->w8[ 2], argR->w8[ 2]);
res->w8[ 3] = min8S(argL->w8[ 3], argR->w8[ 3]);
res->w8[ 4] = min8S(argL->w8[ 4], argR->w8[ 4]);
res->w8[ 5] = min8S(argL->w8[ 5], argR->w8[ 5]);
res->w8[ 6] = min8S(argL->w8[ 6], argR->w8[ 6]);
res->w8[ 7] = min8S(argL->w8[ 7], argR->w8[ 7]);
res->w8[ 8] = min8S(argL->w8[ 8], argR->w8[ 8]);
res->w8[ 9] = min8S(argL->w8[ 9], argR->w8[ 9]);
res->w8[10] = min8S(argL->w8[10], argR->w8[10]);
res->w8[11] = min8S(argL->w8[11], argR->w8[11]);
res->w8[12] = min8S(argL->w8[12], argR->w8[12]);
res->w8[13] = min8S(argL->w8[13], argR->w8[13]);
res->w8[14] = min8S(argL->w8[14], argR->w8[14]);
res->w8[15] = min8S(argL->w8[15], argR->w8[15]);
}
void h_generic_calc_CmpGT64Sx2 ( /*OUT*/V128* res,
V128* argL, V128* argR )
{
res->w64[0] = cmpGT64S(argL->w64[0], argR->w64[0]);
res->w64[1] = cmpGT64S(argL->w64[1], argR->w64[1]);
}
/*---------------------------------------------------------------*/
/*--- end host_generic_simd128.c ---*/
/*---------------------------------------------------------------*/