Blame - priv/host_generic_simd64.c - platform/external/valgrind

blob: 367491f597219f97db7fbb9fec03bf9b0a2364ec [file] [log] [blame]

sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	1
				2	/---------------------------------------------------------------/
sewardj	752f906	2010-05-03 21:38:49 +0000	[diff] [blame]	3	/--- begin host_generic_simd64.c ---/
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	4	/---------------------------------------------------------------/
				5
				6	/*
sewardj	752f906	2010-05-03 21:38:49 +0000	[diff] [blame]	7	This file is part of Valgrind, a dynamic binary instrumentation
				8	framework.
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	9
sewardj	89ae847	2013-10-18 14:12:58 +0000	[diff] [blame]	10	Copyright (C) 2004-2013 OpenWorks LLP
sewardj	752f906	2010-05-03 21:38:49 +0000	[diff] [blame]	11	info@open-works.net
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	12
sewardj	752f906	2010-05-03 21:38:49 +0000	[diff] [blame]	13	This program is free software; you can redistribute it and/or
				14	modify it under the terms of the GNU General Public License as
				15	published by the Free Software Foundation; either version 2 of the
				16	License, or (at your option) any later version.
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	17
sewardj	752f906	2010-05-03 21:38:49 +0000	[diff] [blame]	18	This program is distributed in the hope that it will be useful, but
				19	WITHOUT ANY WARRANTY; without even the implied warranty of
				20	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				21	General Public License for more details.
				22
				23	You should have received a copy of the GNU General Public License
				24	along with this program; if not, write to the Free Software
				25	Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
sewardj	7bd6ffe	2005-08-03 16:07:36 +0000	[diff] [blame]	26	02110-1301, USA.
				27
sewardj	752f906	2010-05-03 21:38:49 +0000	[diff] [blame]	28	The GNU General Public License is contained in the file COPYING.
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	29
				30	Neither the names of the U.S. Department of Energy nor the
				31	University of California nor the names of its contributors may be
				32	used to endorse or promote products derived from this software
				33	without prior written permission.
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	34	*/
				35
				36	/* Generic helper functions for doing 64-bit SIMD arithmetic in cases
				37	where the instruction selectors cannot generate code in-line.
				38	These are purely back-end entities and cannot be seen/referenced
sewardj	8bde7f1	2013-04-11 13:57:43 +0000	[diff] [blame]	39	from IR. There are also helpers for 32-bit arithmetic in here. */
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	40
				41	#include "libvex_basictypes.h"
sewardj	8bde7f1	2013-04-11 13:57:43 +0000	[diff] [blame]	42	#include "main_util.h" // LIKELY, UNLIKELY
sewardj	cef7d3e	2009-07-02 12:21:59 +0000	[diff] [blame]	43	#include "host_generic_simd64.h"
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	44
				45
				46
				47	/* Tuple/select functions for 32x2 vectors. */
				48
				49	static inline ULong mk32x2 ( UInt w1, UInt w0 ) {
				50	return (((ULong)w1) << 32) \| ((ULong)w0);
				51	}
				52
				53	static inline UInt sel32x2_1 ( ULong w64 ) {
sewardj	d19fc16	2005-02-26 02:16:39 +0000	[diff] [blame]	54	return 0xFFFFFFFF & toUInt(w64 >> 32);
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	55	}
				56	static inline UInt sel32x2_0 ( ULong w64 ) {
sewardj	d19fc16	2005-02-26 02:16:39 +0000	[diff] [blame]	57	return 0xFFFFFFFF & toUInt(w64);
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	58	}
				59
				60
				61	/* Tuple/select functions for 16x4 vectors. gcc is pretty hopeless
				62	with 64-bit shifts so we give it a hand. */
				63
				64	static inline ULong mk16x4 ( UShort w3, UShort w2,
				65	UShort w1, UShort w0 ) {
				66	UInt hi32 = (((UInt)w3) << 16) \| ((UInt)w2);
				67	UInt lo32 = (((UInt)w1) << 16) \| ((UInt)w0);
				68	return mk32x2(hi32, lo32);
				69	}
				70
				71	static inline UShort sel16x4_3 ( ULong w64 ) {
sewardj	d19fc16	2005-02-26 02:16:39 +0000	[diff] [blame]	72	UInt hi32 = toUInt(w64 >> 32);
				73	return toUShort(0xFFFF & (hi32 >> 16));
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	74	}
				75	static inline UShort sel16x4_2 ( ULong w64 ) {
sewardj	d19fc16	2005-02-26 02:16:39 +0000	[diff] [blame]	76	UInt hi32 = toUInt(w64 >> 32);
				77	return toUShort(0xFFFF & hi32);
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	78	}
				79	static inline UShort sel16x4_1 ( ULong w64 ) {
				80	UInt lo32 = (UInt)w64;
sewardj	d19fc16	2005-02-26 02:16:39 +0000	[diff] [blame]	81	return toUShort(0xFFFF & (lo32 >> 16));
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	82	}
				83	static inline UShort sel16x4_0 ( ULong w64 ) {
				84	UInt lo32 = (UInt)w64;
sewardj	d19fc16	2005-02-26 02:16:39 +0000	[diff] [blame]	85	return toUShort(0xFFFF & lo32);
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	86	}
				87
				88
				89	/* Tuple/select functions for 8x8 vectors. */
				90
				91	static inline ULong mk8x8 ( UChar w7, UChar w6,
				92	UChar w5, UChar w4,
				93	UChar w3, UChar w2,
sewardj	e2ea176	2010-09-22 00:56:37 +0000	[diff] [blame]	94	UChar w1, UChar w0 ) {
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	95	UInt hi32 = (((UInt)w7) << 24) \| (((UInt)w6) << 16)
				96	\| (((UInt)w5) << 8) \| (((UInt)w4) << 0);
				97	UInt lo32 = (((UInt)w3) << 24) \| (((UInt)w2) << 16)
				98	\| (((UInt)w1) << 8) \| (((UInt)w0) << 0);
				99	return mk32x2(hi32, lo32);
				100	}
				101
				102	static inline UChar sel8x8_7 ( ULong w64 ) {
sewardj	d19fc16	2005-02-26 02:16:39 +0000	[diff] [blame]	103	UInt hi32 = toUInt(w64 >> 32);
				104	return toUChar(0xFF & (hi32 >> 24));
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	105	}
				106	static inline UChar sel8x8_6 ( ULong w64 ) {
sewardj	d19fc16	2005-02-26 02:16:39 +0000	[diff] [blame]	107	UInt hi32 = toUInt(w64 >> 32);
				108	return toUChar(0xFF & (hi32 >> 16));
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	109	}
				110	static inline UChar sel8x8_5 ( ULong w64 ) {
sewardj	d19fc16	2005-02-26 02:16:39 +0000	[diff] [blame]	111	UInt hi32 = toUInt(w64 >> 32);
				112	return toUChar(0xFF & (hi32 >> 8));
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	113	}
				114	static inline UChar sel8x8_4 ( ULong w64 ) {
sewardj	d19fc16	2005-02-26 02:16:39 +0000	[diff] [blame]	115	UInt hi32 = toUInt(w64 >> 32);
				116	return toUChar(0xFF & (hi32 >> 0));
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	117	}
				118	static inline UChar sel8x8_3 ( ULong w64 ) {
				119	UInt lo32 = (UInt)w64;
sewardj	d19fc16	2005-02-26 02:16:39 +0000	[diff] [blame]	120	return toUChar(0xFF & (lo32 >> 24));
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	121	}
				122	static inline UChar sel8x8_2 ( ULong w64 ) {
				123	UInt lo32 = (UInt)w64;
sewardj	d19fc16	2005-02-26 02:16:39 +0000	[diff] [blame]	124	return toUChar(0xFF & (lo32 >> 16));
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	125	}
				126	static inline UChar sel8x8_1 ( ULong w64 ) {
				127	UInt lo32 = (UInt)w64;
sewardj	d19fc16	2005-02-26 02:16:39 +0000	[diff] [blame]	128	return toUChar(0xFF & (lo32 >> 8));
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	129	}
				130	static inline UChar sel8x8_0 ( ULong w64 ) {
				131	UInt lo32 = (UInt)w64;
sewardj	d19fc16	2005-02-26 02:16:39 +0000	[diff] [blame]	132	return toUChar(0xFF & (lo32 >> 0));
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	133	}
				134
sewardj	d166e28	2008-02-06 11:42:45 +0000	[diff] [blame]	135	static inline UChar index8x8 ( ULong w64, UChar ix ) {
				136	ix &= 7;
				137	return toUChar((w64 >> (8*ix)) & 0xFF);
				138	}
				139
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	140
				141	/* Scalar helpers. */
				142
sewardj	44ce46d	2012-07-11 13:19:10 +0000	[diff] [blame]	143	static inline Int qadd32S ( Int xx, Int yy )
				144	{
				145	Long t = ((Long)xx) + ((Long)yy);
				146	const Long loLim = -0x80000000LL;
				147	const Long hiLim = 0x7FFFFFFFLL;
				148	if (t < loLim) t = loLim;
				149	if (t > hiLim) t = hiLim;
				150	return (Int)t;
				151	}
				152
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	153	static inline Short qadd16S ( Short xx, Short yy )
				154	{
				155	Int t = ((Int)xx) + ((Int)yy);
				156	if (t < -32768) t = -32768;
				157	if (t > 32767) t = 32767;
				158	return (Short)t;
				159	}
				160
				161	static inline Char qadd8S ( Char xx, Char yy )
				162	{
				163	Int t = ((Int)xx) + ((Int)yy);
				164	if (t < -128) t = -128;
				165	if (t > 127) t = 127;
				166	return (Char)t;
				167	}
				168
				169	static inline UShort qadd16U ( UShort xx, UShort yy )
				170	{
				171	UInt t = ((UInt)xx) + ((UInt)yy);
				172	if (t > 0xFFFF) t = 0xFFFF;
				173	return (UShort)t;
				174	}
				175
				176	static inline UChar qadd8U ( UChar xx, UChar yy )
				177	{
				178	UInt t = ((UInt)xx) + ((UInt)yy);
				179	if (t > 0xFF) t = 0xFF;
				180	return (UChar)t;
				181	}
				182
sewardj	44ce46d	2012-07-11 13:19:10 +0000	[diff] [blame]	183	static inline Int qsub32S ( Int xx, Int yy )
				184	{
				185	Long t = ((Long)xx) - ((Long)yy);
				186	const Long loLim = -0x80000000LL;
				187	const Long hiLim = 0x7FFFFFFFLL;
				188	if (t < loLim) t = loLim;
				189	if (t > hiLim) t = hiLim;
				190	return (Int)t;
				191	}
				192
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	193	static inline Short qsub16S ( Short xx, Short yy )
				194	{
				195	Int t = ((Int)xx) - ((Int)yy);
				196	if (t < -32768) t = -32768;
				197	if (t > 32767) t = 32767;
				198	return (Short)t;
				199	}
				200
				201	static inline Char qsub8S ( Char xx, Char yy )
				202	{
				203	Int t = ((Int)xx) - ((Int)yy);
				204	if (t < -128) t = -128;
				205	if (t > 127) t = 127;
				206	return (Char)t;
				207	}
				208
				209	static inline UShort qsub16U ( UShort xx, UShort yy )
				210	{
				211	Int t = ((Int)xx) - ((Int)yy);
				212	if (t < 0) t = 0;
				213	if (t > 0xFFFF) t = 0xFFFF;
				214	return (UShort)t;
				215	}
				216
				217	static inline UChar qsub8U ( UChar xx, UChar yy )
				218	{
				219	Int t = ((Int)xx) - ((Int)yy);
				220	if (t < 0) t = 0;
				221	if (t > 0xFF) t = 0xFF;
				222	return (UChar)t;
				223	}
				224
				225	static inline Short mul16 ( Short xx, Short yy )
				226	{
				227	Int t = ((Int)xx) * ((Int)yy);
				228	return (Short)t;
				229	}
				230
sewardj	d166e28	2008-02-06 11:42:45 +0000	[diff] [blame]	231	static inline Int mul32 ( Int xx, Int yy )
				232	{
				233	Int t = ((Int)xx) * ((Int)yy);
				234	return (Int)t;
				235	}
				236
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	237	static inline Short mulhi16S ( Short xx, Short yy )
				238	{
				239	Int t = ((Int)xx) * ((Int)yy);
				240	t >>=/s/ 16;
				241	return (Short)t;
				242	}
				243
				244	static inline UShort mulhi16U ( UShort xx, UShort yy )
				245	{
				246	UInt t = ((UInt)xx) * ((UInt)yy);
				247	t >>=/u/ 16;
				248	return (UShort)t;
				249	}
				250
				251	static inline UInt cmpeq32 ( UInt xx, UInt yy )
				252	{
				253	return xx==yy ? 0xFFFFFFFF : 0;
				254	}
				255
				256	static inline UShort cmpeq16 ( UShort xx, UShort yy )
				257	{
sewardj	d19fc16	2005-02-26 02:16:39 +0000	[diff] [blame]	258	return toUShort(xx==yy ? 0xFFFF : 0);
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	259	}
				260
				261	static inline UChar cmpeq8 ( UChar xx, UChar yy )
				262	{
sewardj	d19fc16	2005-02-26 02:16:39 +0000	[diff] [blame]	263	return toUChar(xx==yy ? 0xFF : 0);
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	264	}
				265
				266	static inline UInt cmpgt32S ( Int xx, Int yy )
				267	{
				268	return xx>yy ? 0xFFFFFFFF : 0;
				269	}
				270
				271	static inline UShort cmpgt16S ( Short xx, Short yy )
				272	{
sewardj	d19fc16	2005-02-26 02:16:39 +0000	[diff] [blame]	273	return toUShort(xx>yy ? 0xFFFF : 0);
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	274	}
				275
				276	static inline UChar cmpgt8S ( Char xx, Char yy )
				277	{
sewardj	d19fc16	2005-02-26 02:16:39 +0000	[diff] [blame]	278	return toUChar(xx>yy ? 0xFF : 0);
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	279	}
				280
sewardj	1806918	2005-01-13 19:16:04 +0000	[diff] [blame]	281	static inline UInt cmpnez32 ( UInt xx )
				282	{
				283	return xx==0 ? 0 : 0xFFFFFFFF;
				284	}
				285
				286	static inline UShort cmpnez16 ( UShort xx )
				287	{
sewardj	d19fc16	2005-02-26 02:16:39 +0000	[diff] [blame]	288	return toUShort(xx==0 ? 0 : 0xFFFF);
sewardj	1806918	2005-01-13 19:16:04 +0000	[diff] [blame]	289	}
				290
				291	static inline UChar cmpnez8 ( UChar xx )
				292	{
sewardj	d19fc16	2005-02-26 02:16:39 +0000	[diff] [blame]	293	return toUChar(xx==0 ? 0 : 0xFF);
sewardj	1806918	2005-01-13 19:16:04 +0000	[diff] [blame]	294	}
				295
sewardj	c9bff7d	2011-06-15 15:09:37 +0000	[diff] [blame]	296	static inline Short qnarrow32Sto16S ( UInt xx0 )
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	297	{
				298	Int xx = (Int)xx0;
				299	if (xx < -32768) xx = -32768;
				300	if (xx > 32767) xx = 32767;
				301	return (Short)xx;
				302	}
				303
sewardj	c9bff7d	2011-06-15 15:09:37 +0000	[diff] [blame]	304	static inline Char qnarrow16Sto8S ( UShort xx0 )
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	305	{
				306	Short xx = (Short)xx0;
				307	if (xx < -128) xx = -128;
				308	if (xx > 127) xx = 127;
				309	return (Char)xx;
				310	}
				311
sewardj	c9bff7d	2011-06-15 15:09:37 +0000	[diff] [blame]	312	static inline UChar qnarrow16Sto8U ( UShort xx0 )
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	313	{
				314	Short xx = (Short)xx0;
				315	if (xx < 0) xx = 0;
				316	if (xx > 255) xx = 255;
				317	return (UChar)xx;
				318	}
				319
sewardj	ad2c9ea	2011-10-22 09:32:16 +0000	[diff] [blame]	320	static inline UShort narrow32to16 ( UInt xx )
				321	{
				322	return (UShort)xx;
				323	}
				324
				325	static inline UChar narrow16to8 ( UShort xx )
				326	{
				327	return (UChar)xx;
				328	}
				329
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	330	/* shifts: we don't care about out-of-range ones, since
				331	that is dealt with at a higher level. */
				332
sewardj	d166e28	2008-02-06 11:42:45 +0000	[diff] [blame]	333	static inline UChar shl8 ( UChar v, UInt n )
				334	{
				335	return toUChar(v << n);
				336	}
				337
sewardj	d71ba83	2006-12-27 01:15:29 +0000	[diff] [blame]	338	static inline UChar sar8 ( UChar v, UInt n )
				339	{
				340	return toUChar(((Char)v) >> n);
				341	}
				342
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	343	static inline UShort shl16 ( UShort v, UInt n )
				344	{
sewardj	d19fc16	2005-02-26 02:16:39 +0000	[diff] [blame]	345	return toUShort(v << n);
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	346	}
				347
				348	static inline UShort shr16 ( UShort v, UInt n )
				349	{
sewardj	d19fc16	2005-02-26 02:16:39 +0000	[diff] [blame]	350	return toUShort((((UShort)v) >> n));
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	351	}
				352
				353	static inline UShort sar16 ( UShort v, UInt n )
				354	{
sewardj	d19fc16	2005-02-26 02:16:39 +0000	[diff] [blame]	355	return toUShort(((Short)v) >> n);
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	356	}
				357
				358	static inline UInt shl32 ( UInt v, UInt n )
				359	{
				360	return v << n;
				361	}
				362
				363	static inline UInt shr32 ( UInt v, UInt n )
				364	{
				365	return (((UInt)v) >> n);
				366	}
				367
				368	static inline UInt sar32 ( UInt v, UInt n )
				369	{
				370	return ((Int)v) >> n;
				371	}
				372
				373	static inline UChar avg8U ( UChar xx, UChar yy )
				374	{
				375	UInt xxi = (UInt)xx;
				376	UInt yyi = (UInt)yy;
				377	UInt r = (xxi + yyi + 1) >> 1;
				378	return (UChar)r;
				379	}
				380
				381	static inline UShort avg16U ( UShort xx, UShort yy )
				382	{
				383	UInt xxi = (UInt)xx;
				384	UInt yyi = (UInt)yy;
				385	UInt r = (xxi + yyi + 1) >> 1;
				386	return (UShort)r;
				387	}
				388
				389	static inline Short max16S ( Short xx, Short yy )
				390	{
sewardj	d19fc16	2005-02-26 02:16:39 +0000	[diff] [blame]	391	return toUShort((xx > yy) ? xx : yy);
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	392	}
				393
				394	static inline UChar max8U ( UChar xx, UChar yy )
				395	{
sewardj	d19fc16	2005-02-26 02:16:39 +0000	[diff] [blame]	396	return toUChar((xx > yy) ? xx : yy);
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	397	}
				398
				399	static inline Short min16S ( Short xx, Short yy )
				400	{
sewardj	d19fc16	2005-02-26 02:16:39 +0000	[diff] [blame]	401	return toUShort((xx < yy) ? xx : yy);
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	402	}
				403
				404	static inline UChar min8U ( UChar xx, UChar yy )
				405	{
sewardj	d19fc16	2005-02-26 02:16:39 +0000	[diff] [blame]	406	return toUChar((xx < yy) ? xx : yy);
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	407	}
				408
sewardj	e2ea176	2010-09-22 00:56:37 +0000	[diff] [blame]	409	static inline UShort hadd16U ( UShort xx, UShort yy )
				410	{
				411	UInt xxi = (UInt)xx;
				412	UInt yyi = (UInt)yy;
				413	UInt r = (xxi + yyi) >> 1;
				414	return (UShort)r;
				415	}
				416
				417	static inline Short hadd16S ( Short xx, Short yy )
				418	{
				419	Int xxi = (Int)xx;
				420	Int yyi = (Int)yy;
				421	Int r = (xxi + yyi) >> 1;
				422	return (Short)r;
				423	}
				424
				425	static inline UShort hsub16U ( UShort xx, UShort yy )
				426	{
				427	UInt xxi = (UInt)xx;
				428	UInt yyi = (UInt)yy;
				429	UInt r = (xxi - yyi) >> 1;
				430	return (UShort)r;
				431	}
				432
				433	static inline Short hsub16S ( Short xx, Short yy )
				434	{
				435	Int xxi = (Int)xx;
				436	Int yyi = (Int)yy;
				437	Int r = (xxi - yyi) >> 1;
				438	return (Short)r;
				439	}
				440
				441	static inline UChar hadd8U ( UChar xx, UChar yy )
				442	{
				443	UInt xxi = (UInt)xx;
				444	UInt yyi = (UInt)yy;
				445	UInt r = (xxi + yyi) >> 1;
				446	return (UChar)r;
				447	}
				448
				449	static inline Char hadd8S ( Char xx, Char yy )
				450	{
				451	Int xxi = (Int)xx;
				452	Int yyi = (Int)yy;
				453	Int r = (xxi + yyi) >> 1;
				454	return (Char)r;
				455	}
				456
				457	static inline UChar hsub8U ( UChar xx, UChar yy )
				458	{
				459	UInt xxi = (UInt)xx;
				460	UInt yyi = (UInt)yy;
				461	UInt r = (xxi - yyi) >> 1;
				462	return (UChar)r;
				463	}
				464
				465	static inline Char hsub8S ( Char xx, Char yy )
				466	{
				467	Int xxi = (Int)xx;
				468	Int yyi = (Int)yy;
				469	Int r = (xxi - yyi) >> 1;
				470	return (Char)r;
				471	}
				472
sewardj	310d6b2	2010-10-18 16:29:40 +0000	[diff] [blame]	473	static inline UInt absdiff8U ( UChar xx, UChar yy )
				474	{
				475	UInt xxu = (UChar)xx;
				476	UInt yyu = (UChar)yy;
				477	return xxu >= yyu ? xxu - yyu : yyu - xxu;
				478	}
sewardj	e2ea176	2010-09-22 00:56:37 +0000	[diff] [blame]	479
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	480	/* ----------------------------------------------------- */
				481	/* Start of the externally visible functions. These simply
				482	implement the corresponding IR primops. */
				483	/* ----------------------------------------------------- */
				484
				485	/* ------------ Normal addition ------------ */
				486
				487	ULong h_generic_calc_Add32x2 ( ULong xx, ULong yy )
				488	{
				489	return mk32x2(
				490	sel32x2_1(xx) + sel32x2_1(yy),
				491	sel32x2_0(xx) + sel32x2_0(yy)
				492	);
				493	}
				494
				495	ULong h_generic_calc_Add16x4 ( ULong xx, ULong yy )
				496	{
				497	return mk16x4(
sewardj	d19fc16	2005-02-26 02:16:39 +0000	[diff] [blame]	498	toUShort( sel16x4_3(xx) + sel16x4_3(yy) ),
				499	toUShort( sel16x4_2(xx) + sel16x4_2(yy) ),
				500	toUShort( sel16x4_1(xx) + sel16x4_1(yy) ),
				501	toUShort( sel16x4_0(xx) + sel16x4_0(yy) )
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	502	);
				503	}
				504
				505	ULong h_generic_calc_Add8x8 ( ULong xx, ULong yy )
				506	{
				507	return mk8x8(
sewardj	d19fc16	2005-02-26 02:16:39 +0000	[diff] [blame]	508	toUChar( sel8x8_7(xx) + sel8x8_7(yy) ),
				509	toUChar( sel8x8_6(xx) + sel8x8_6(yy) ),
				510	toUChar( sel8x8_5(xx) + sel8x8_5(yy) ),
				511	toUChar( sel8x8_4(xx) + sel8x8_4(yy) ),
				512	toUChar( sel8x8_3(xx) + sel8x8_3(yy) ),
				513	toUChar( sel8x8_2(xx) + sel8x8_2(yy) ),
				514	toUChar( sel8x8_1(xx) + sel8x8_1(yy) ),
				515	toUChar( sel8x8_0(xx) + sel8x8_0(yy) )
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	516	);
				517	}
				518
				519	/* ------------ Saturating addition ------------ */
				520
				521	ULong h_generic_calc_QAdd16Sx4 ( ULong xx, ULong yy )
				522	{
				523	return mk16x4(
				524	qadd16S( sel16x4_3(xx), sel16x4_3(yy) ),
				525	qadd16S( sel16x4_2(xx), sel16x4_2(yy) ),
				526	qadd16S( sel16x4_1(xx), sel16x4_1(yy) ),
				527	qadd16S( sel16x4_0(xx), sel16x4_0(yy) )
				528	);
				529	}
				530
				531	ULong h_generic_calc_QAdd8Sx8 ( ULong xx, ULong yy )
				532	{
				533	return mk8x8(
				534	qadd8S( sel8x8_7(xx), sel8x8_7(yy) ),
				535	qadd8S( sel8x8_6(xx), sel8x8_6(yy) ),
				536	qadd8S( sel8x8_5(xx), sel8x8_5(yy) ),
				537	qadd8S( sel8x8_4(xx), sel8x8_4(yy) ),
				538	qadd8S( sel8x8_3(xx), sel8x8_3(yy) ),
				539	qadd8S( sel8x8_2(xx), sel8x8_2(yy) ),
				540	qadd8S( sel8x8_1(xx), sel8x8_1(yy) ),
				541	qadd8S( sel8x8_0(xx), sel8x8_0(yy) )
				542	);
				543	}
				544
				545	ULong h_generic_calc_QAdd16Ux4 ( ULong xx, ULong yy )
				546	{
				547	return mk16x4(
				548	qadd16U( sel16x4_3(xx), sel16x4_3(yy) ),
				549	qadd16U( sel16x4_2(xx), sel16x4_2(yy) ),
				550	qadd16U( sel16x4_1(xx), sel16x4_1(yy) ),
				551	qadd16U( sel16x4_0(xx), sel16x4_0(yy) )
				552	);
				553	}
				554
				555	ULong h_generic_calc_QAdd8Ux8 ( ULong xx, ULong yy )
				556	{
				557	return mk8x8(
				558	qadd8U( sel8x8_7(xx), sel8x8_7(yy) ),
				559	qadd8U( sel8x8_6(xx), sel8x8_6(yy) ),
				560	qadd8U( sel8x8_5(xx), sel8x8_5(yy) ),
				561	qadd8U( sel8x8_4(xx), sel8x8_4(yy) ),
				562	qadd8U( sel8x8_3(xx), sel8x8_3(yy) ),
				563	qadd8U( sel8x8_2(xx), sel8x8_2(yy) ),
				564	qadd8U( sel8x8_1(xx), sel8x8_1(yy) ),
				565	qadd8U( sel8x8_0(xx), sel8x8_0(yy) )
				566	);
				567	}
				568
				569	/* ------------ Normal subtraction ------------ */
				570
				571	ULong h_generic_calc_Sub32x2 ( ULong xx, ULong yy )
				572	{
				573	return mk32x2(
				574	sel32x2_1(xx) - sel32x2_1(yy),
				575	sel32x2_0(xx) - sel32x2_0(yy)
				576	);
				577	}
				578
				579	ULong h_generic_calc_Sub16x4 ( ULong xx, ULong yy )
				580	{
				581	return mk16x4(
sewardj	d19fc16	2005-02-26 02:16:39 +0000	[diff] [blame]	582	toUShort( sel16x4_3(xx) - sel16x4_3(yy) ),
				583	toUShort( sel16x4_2(xx) - sel16x4_2(yy) ),
				584	toUShort( sel16x4_1(xx) - sel16x4_1(yy) ),
				585	toUShort( sel16x4_0(xx) - sel16x4_0(yy) )
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	586	);
				587	}
				588
				589	ULong h_generic_calc_Sub8x8 ( ULong xx, ULong yy )
				590	{
				591	return mk8x8(
sewardj	d19fc16	2005-02-26 02:16:39 +0000	[diff] [blame]	592	toUChar( sel8x8_7(xx) - sel8x8_7(yy) ),
				593	toUChar( sel8x8_6(xx) - sel8x8_6(yy) ),
				594	toUChar( sel8x8_5(xx) - sel8x8_5(yy) ),
				595	toUChar( sel8x8_4(xx) - sel8x8_4(yy) ),
				596	toUChar( sel8x8_3(xx) - sel8x8_3(yy) ),
				597	toUChar( sel8x8_2(xx) - sel8x8_2(yy) ),
				598	toUChar( sel8x8_1(xx) - sel8x8_1(yy) ),
				599	toUChar( sel8x8_0(xx) - sel8x8_0(yy) )
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	600	);
				601	}
				602
				603	/* ------------ Saturating subtraction ------------ */
				604
				605	ULong h_generic_calc_QSub16Sx4 ( ULong xx, ULong yy )
				606	{
				607	return mk16x4(
				608	qsub16S( sel16x4_3(xx), sel16x4_3(yy) ),
				609	qsub16S( sel16x4_2(xx), sel16x4_2(yy) ),
				610	qsub16S( sel16x4_1(xx), sel16x4_1(yy) ),
				611	qsub16S( sel16x4_0(xx), sel16x4_0(yy) )
				612	);
				613	}
				614
				615	ULong h_generic_calc_QSub8Sx8 ( ULong xx, ULong yy )
				616	{
				617	return mk8x8(
				618	qsub8S( sel8x8_7(xx), sel8x8_7(yy) ),
				619	qsub8S( sel8x8_6(xx), sel8x8_6(yy) ),
				620	qsub8S( sel8x8_5(xx), sel8x8_5(yy) ),
				621	qsub8S( sel8x8_4(xx), sel8x8_4(yy) ),
				622	qsub8S( sel8x8_3(xx), sel8x8_3(yy) ),
				623	qsub8S( sel8x8_2(xx), sel8x8_2(yy) ),
				624	qsub8S( sel8x8_1(xx), sel8x8_1(yy) ),
				625	qsub8S( sel8x8_0(xx), sel8x8_0(yy) )
				626	);
				627	}
				628
				629	ULong h_generic_calc_QSub16Ux4 ( ULong xx, ULong yy )
				630	{
				631	return mk16x4(
				632	qsub16U( sel16x4_3(xx), sel16x4_3(yy) ),
				633	qsub16U( sel16x4_2(xx), sel16x4_2(yy) ),
				634	qsub16U( sel16x4_1(xx), sel16x4_1(yy) ),
				635	qsub16U( sel16x4_0(xx), sel16x4_0(yy) )
				636	);
				637	}
				638
				639	ULong h_generic_calc_QSub8Ux8 ( ULong xx, ULong yy )
				640	{
				641	return mk8x8(
				642	qsub8U( sel8x8_7(xx), sel8x8_7(yy) ),
				643	qsub8U( sel8x8_6(xx), sel8x8_6(yy) ),
				644	qsub8U( sel8x8_5(xx), sel8x8_5(yy) ),
				645	qsub8U( sel8x8_4(xx), sel8x8_4(yy) ),
				646	qsub8U( sel8x8_3(xx), sel8x8_3(yy) ),
				647	qsub8U( sel8x8_2(xx), sel8x8_2(yy) ),
				648	qsub8U( sel8x8_1(xx), sel8x8_1(yy) ),
				649	qsub8U( sel8x8_0(xx), sel8x8_0(yy) )
				650	);
				651	}
				652
				653	/* ------------ Multiplication ------------ */
				654
				655	ULong h_generic_calc_Mul16x4 ( ULong xx, ULong yy )
				656	{
				657	return mk16x4(
				658	mul16( sel16x4_3(xx), sel16x4_3(yy) ),
				659	mul16( sel16x4_2(xx), sel16x4_2(yy) ),
				660	mul16( sel16x4_1(xx), sel16x4_1(yy) ),
				661	mul16( sel16x4_0(xx), sel16x4_0(yy) )
				662	);
				663	}
				664
sewardj	d166e28	2008-02-06 11:42:45 +0000	[diff] [blame]	665	ULong h_generic_calc_Mul32x2 ( ULong xx, ULong yy )
				666	{
				667	return mk32x2(
				668	mul32( sel32x2_1(xx), sel32x2_1(yy) ),
				669	mul32( sel32x2_0(xx), sel32x2_0(yy) )
				670	);
				671	}
				672
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	673	ULong h_generic_calc_MulHi16Sx4 ( ULong xx, ULong yy )
				674	{
				675	return mk16x4(
				676	mulhi16S( sel16x4_3(xx), sel16x4_3(yy) ),
				677	mulhi16S( sel16x4_2(xx), sel16x4_2(yy) ),
				678	mulhi16S( sel16x4_1(xx), sel16x4_1(yy) ),
				679	mulhi16S( sel16x4_0(xx), sel16x4_0(yy) )
				680	);
				681	}
				682
				683	ULong h_generic_calc_MulHi16Ux4 ( ULong xx, ULong yy )
				684	{
				685	return mk16x4(
				686	mulhi16U( sel16x4_3(xx), sel16x4_3(yy) ),
				687	mulhi16U( sel16x4_2(xx), sel16x4_2(yy) ),
				688	mulhi16U( sel16x4_1(xx), sel16x4_1(yy) ),
				689	mulhi16U( sel16x4_0(xx), sel16x4_0(yy) )
				690	);
				691	}
				692
				693	/* ------------ Comparison ------------ */
				694
				695	ULong h_generic_calc_CmpEQ32x2 ( ULong xx, ULong yy )
				696	{
				697	return mk32x2(
				698	cmpeq32( sel32x2_1(xx), sel32x2_1(yy) ),
				699	cmpeq32( sel32x2_0(xx), sel32x2_0(yy) )
				700	);
				701	}
				702
				703	ULong h_generic_calc_CmpEQ16x4 ( ULong xx, ULong yy )
				704	{
				705	return mk16x4(
				706	cmpeq16( sel16x4_3(xx), sel16x4_3(yy) ),
				707	cmpeq16( sel16x4_2(xx), sel16x4_2(yy) ),
				708	cmpeq16( sel16x4_1(xx), sel16x4_1(yy) ),
				709	cmpeq16( sel16x4_0(xx), sel16x4_0(yy) )
				710	);
				711	}
				712
				713	ULong h_generic_calc_CmpEQ8x8 ( ULong xx, ULong yy )
				714	{
				715	return mk8x8(
				716	cmpeq8( sel8x8_7(xx), sel8x8_7(yy) ),
				717	cmpeq8( sel8x8_6(xx), sel8x8_6(yy) ),
				718	cmpeq8( sel8x8_5(xx), sel8x8_5(yy) ),
				719	cmpeq8( sel8x8_4(xx), sel8x8_4(yy) ),
				720	cmpeq8( sel8x8_3(xx), sel8x8_3(yy) ),
				721	cmpeq8( sel8x8_2(xx), sel8x8_2(yy) ),
				722	cmpeq8( sel8x8_1(xx), sel8x8_1(yy) ),
				723	cmpeq8( sel8x8_0(xx), sel8x8_0(yy) )
				724	);
				725	}
				726
				727	ULong h_generic_calc_CmpGT32Sx2 ( ULong xx, ULong yy )
				728	{
				729	return mk32x2(
				730	cmpgt32S( sel32x2_1(xx), sel32x2_1(yy) ),
				731	cmpgt32S( sel32x2_0(xx), sel32x2_0(yy) )
				732	);
				733	}
				734
				735	ULong h_generic_calc_CmpGT16Sx4 ( ULong xx, ULong yy )
				736	{
				737	return mk16x4(
				738	cmpgt16S( sel16x4_3(xx), sel16x4_3(yy) ),
				739	cmpgt16S( sel16x4_2(xx), sel16x4_2(yy) ),
				740	cmpgt16S( sel16x4_1(xx), sel16x4_1(yy) ),
				741	cmpgt16S( sel16x4_0(xx), sel16x4_0(yy) )
				742	);
				743	}
				744
				745	ULong h_generic_calc_CmpGT8Sx8 ( ULong xx, ULong yy )
				746	{
				747	return mk8x8(
				748	cmpgt8S( sel8x8_7(xx), sel8x8_7(yy) ),
				749	cmpgt8S( sel8x8_6(xx), sel8x8_6(yy) ),
				750	cmpgt8S( sel8x8_5(xx), sel8x8_5(yy) ),
				751	cmpgt8S( sel8x8_4(xx), sel8x8_4(yy) ),
				752	cmpgt8S( sel8x8_3(xx), sel8x8_3(yy) ),
				753	cmpgt8S( sel8x8_2(xx), sel8x8_2(yy) ),
				754	cmpgt8S( sel8x8_1(xx), sel8x8_1(yy) ),
				755	cmpgt8S( sel8x8_0(xx), sel8x8_0(yy) )
				756	);
				757	}
				758
sewardj	1806918	2005-01-13 19:16:04 +0000	[diff] [blame]	759	ULong h_generic_calc_CmpNEZ32x2 ( ULong xx )
				760	{
				761	return mk32x2(
				762	cmpnez32( sel32x2_1(xx) ),
				763	cmpnez32( sel32x2_0(xx) )
				764	);
				765	}
				766
				767	ULong h_generic_calc_CmpNEZ16x4 ( ULong xx )
				768	{
				769	return mk16x4(
				770	cmpnez16( sel16x4_3(xx) ),
				771	cmpnez16( sel16x4_2(xx) ),
				772	cmpnez16( sel16x4_1(xx) ),
				773	cmpnez16( sel16x4_0(xx) )
				774	);
				775	}
				776
				777	ULong h_generic_calc_CmpNEZ8x8 ( ULong xx )
				778	{
				779	return mk8x8(
				780	cmpnez8( sel8x8_7(xx) ),
				781	cmpnez8( sel8x8_6(xx) ),
				782	cmpnez8( sel8x8_5(xx) ),
				783	cmpnez8( sel8x8_4(xx) ),
				784	cmpnez8( sel8x8_3(xx) ),
				785	cmpnez8( sel8x8_2(xx) ),
				786	cmpnez8( sel8x8_1(xx) ),
				787	cmpnez8( sel8x8_0(xx) )
				788	);
				789	}
				790
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	791	/* ------------ Saturating narrowing ------------ */
				792
sewardj	5f438dd	2011-06-16 11:36:23 +0000	[diff] [blame]	793	ULong h_generic_calc_QNarrowBin32Sto16Sx4 ( ULong aa, ULong bb )
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	794	{
				795	UInt d = sel32x2_1(aa);
				796	UInt c = sel32x2_0(aa);
				797	UInt b = sel32x2_1(bb);
				798	UInt a = sel32x2_0(bb);
				799	return mk16x4(
sewardj	c9bff7d	2011-06-15 15:09:37 +0000	[diff] [blame]	800	qnarrow32Sto16S(d),
				801	qnarrow32Sto16S(c),
				802	qnarrow32Sto16S(b),
				803	qnarrow32Sto16S(a)
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	804	);
				805	}
				806
sewardj	5f438dd	2011-06-16 11:36:23 +0000	[diff] [blame]	807	ULong h_generic_calc_QNarrowBin16Sto8Sx8 ( ULong aa, ULong bb )
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	808	{
				809	UShort h = sel16x4_3(aa);
				810	UShort g = sel16x4_2(aa);
				811	UShort f = sel16x4_1(aa);
				812	UShort e = sel16x4_0(aa);
				813	UShort d = sel16x4_3(bb);
				814	UShort c = sel16x4_2(bb);
				815	UShort b = sel16x4_1(bb);
				816	UShort a = sel16x4_0(bb);
				817	return mk8x8(
sewardj	c9bff7d	2011-06-15 15:09:37 +0000	[diff] [blame]	818	qnarrow16Sto8S(h),
				819	qnarrow16Sto8S(g),
				820	qnarrow16Sto8S(f),
				821	qnarrow16Sto8S(e),
				822	qnarrow16Sto8S(d),
				823	qnarrow16Sto8S(c),
				824	qnarrow16Sto8S(b),
				825	qnarrow16Sto8S(a)
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	826	);
				827	}
				828
sewardj	5f438dd	2011-06-16 11:36:23 +0000	[diff] [blame]	829	ULong h_generic_calc_QNarrowBin16Sto8Ux8 ( ULong aa, ULong bb )
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	830	{
				831	UShort h = sel16x4_3(aa);
				832	UShort g = sel16x4_2(aa);
				833	UShort f = sel16x4_1(aa);
				834	UShort e = sel16x4_0(aa);
				835	UShort d = sel16x4_3(bb);
				836	UShort c = sel16x4_2(bb);
				837	UShort b = sel16x4_1(bb);
				838	UShort a = sel16x4_0(bb);
				839	return mk8x8(
sewardj	c9bff7d	2011-06-15 15:09:37 +0000	[diff] [blame]	840	qnarrow16Sto8U(h),
				841	qnarrow16Sto8U(g),
				842	qnarrow16Sto8U(f),
				843	qnarrow16Sto8U(e),
				844	qnarrow16Sto8U(d),
				845	qnarrow16Sto8U(c),
				846	qnarrow16Sto8U(b),
				847	qnarrow16Sto8U(a)
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	848	);
				849	}
				850
sewardj	ad2c9ea	2011-10-22 09:32:16 +0000	[diff] [blame]	851	/* ------------ Truncating narrowing ------------ */
				852
				853	ULong h_generic_calc_NarrowBin32to16x4 ( ULong aa, ULong bb )
				854	{
				855	UInt d = sel32x2_1(aa);
				856	UInt c = sel32x2_0(aa);
				857	UInt b = sel32x2_1(bb);
				858	UInt a = sel32x2_0(bb);
				859	return mk16x4(
				860	narrow32to16(d),
				861	narrow32to16(c),
				862	narrow32to16(b),
				863	narrow32to16(a)
				864	);
				865	}
				866
				867	ULong h_generic_calc_NarrowBin16to8x8 ( ULong aa, ULong bb )
				868	{
				869	UShort h = sel16x4_3(aa);
				870	UShort g = sel16x4_2(aa);
				871	UShort f = sel16x4_1(aa);
				872	UShort e = sel16x4_0(aa);
				873	UShort d = sel16x4_3(bb);
				874	UShort c = sel16x4_2(bb);
				875	UShort b = sel16x4_1(bb);
				876	UShort a = sel16x4_0(bb);
				877	return mk8x8(
				878	narrow16to8(h),
				879	narrow16to8(g),
				880	narrow16to8(f),
				881	narrow16to8(e),
				882	narrow16to8(d),
				883	narrow16to8(c),
				884	narrow16to8(b),
				885	narrow16to8(a)
				886	);
				887	}
				888
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	889	/* ------------ Interleaving ------------ */
				890
				891	ULong h_generic_calc_InterleaveHI8x8 ( ULong aa, ULong bb )
				892	{
				893	return mk8x8(
				894	sel8x8_7(aa),
				895	sel8x8_7(bb),
				896	sel8x8_6(aa),
				897	sel8x8_6(bb),
				898	sel8x8_5(aa),
				899	sel8x8_5(bb),
				900	sel8x8_4(aa),
				901	sel8x8_4(bb)
				902	);
				903	}
				904
				905	ULong h_generic_calc_InterleaveLO8x8 ( ULong aa, ULong bb )
				906	{
				907	return mk8x8(
				908	sel8x8_3(aa),
				909	sel8x8_3(bb),
				910	sel8x8_2(aa),
				911	sel8x8_2(bb),
				912	sel8x8_1(aa),
				913	sel8x8_1(bb),
				914	sel8x8_0(aa),
				915	sel8x8_0(bb)
				916	);
				917	}
				918
				919	ULong h_generic_calc_InterleaveHI16x4 ( ULong aa, ULong bb )
				920	{
				921	return mk16x4(
				922	sel16x4_3(aa),
				923	sel16x4_3(bb),
				924	sel16x4_2(aa),
				925	sel16x4_2(bb)
				926	);
				927	}
				928
				929	ULong h_generic_calc_InterleaveLO16x4 ( ULong aa, ULong bb )
				930	{
				931	return mk16x4(
				932	sel16x4_1(aa),
				933	sel16x4_1(bb),
				934	sel16x4_0(aa),
				935	sel16x4_0(bb)
				936	);
				937	}
				938
				939	ULong h_generic_calc_InterleaveHI32x2 ( ULong aa, ULong bb )
				940	{
				941	return mk32x2(
				942	sel32x2_1(aa),
				943	sel32x2_1(bb)
				944	);
				945	}
				946
				947	ULong h_generic_calc_InterleaveLO32x2 ( ULong aa, ULong bb )
				948	{
				949	return mk32x2(
				950	sel32x2_0(aa),
				951	sel32x2_0(bb)
				952	);
				953	}
				954
sewardj	d166e28	2008-02-06 11:42:45 +0000	[diff] [blame]	955	/* ------------ Concatenation ------------ */
				956
				957	ULong h_generic_calc_CatOddLanes16x4 ( ULong aa, ULong bb )
				958	{
				959	return mk16x4(
				960	sel16x4_3(aa),
				961	sel16x4_1(aa),
				962	sel16x4_3(bb),
				963	sel16x4_1(bb)
				964	);
				965	}
				966
				967	ULong h_generic_calc_CatEvenLanes16x4 ( ULong aa, ULong bb )
				968	{
				969	return mk16x4(
				970	sel16x4_2(aa),
				971	sel16x4_0(aa),
				972	sel16x4_2(bb),
				973	sel16x4_0(bb)
				974	);
				975	}
				976
				977	/* misc hack looking for a proper home */
				978	ULong h_generic_calc_Perm8x8 ( ULong aa, ULong bb )
				979	{
				980	return mk8x8(
				981	index8x8(aa, sel8x8_7(bb)),
				982	index8x8(aa, sel8x8_6(bb)),
				983	index8x8(aa, sel8x8_5(bb)),
				984	index8x8(aa, sel8x8_4(bb)),
				985	index8x8(aa, sel8x8_3(bb)),
				986	index8x8(aa, sel8x8_2(bb)),
				987	index8x8(aa, sel8x8_1(bb)),
				988	index8x8(aa, sel8x8_0(bb))
				989	);
				990	}
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	991
				992	/* ------------ Shifting ------------ */
				993	/* Note that because these primops are undefined if the shift amount
				994	equals or exceeds the lane width, the shift amount is masked so
				995	that the scalar shifts are always in range. In fact, given the
				996	semantics of these primops (ShlN16x4, etc) it is an error if in
				997	fact we are ever given an out-of-range shift amount.
				998	*/
				999	ULong h_generic_calc_ShlN32x2 ( ULong xx, UInt nn )
				1000	{
				1001	/* vassert(nn < 32); */
				1002	nn &= 31;
				1003	return mk32x2(
				1004	shl32( sel32x2_1(xx), nn ),
				1005	shl32( sel32x2_0(xx), nn )
				1006	);
				1007	}
				1008
				1009	ULong h_generic_calc_ShlN16x4 ( ULong xx, UInt nn )
				1010	{
				1011	/* vassert(nn < 16); */
				1012	nn &= 15;
				1013	return mk16x4(
				1014	shl16( sel16x4_3(xx), nn ),
				1015	shl16( sel16x4_2(xx), nn ),
				1016	shl16( sel16x4_1(xx), nn ),
				1017	shl16( sel16x4_0(xx), nn )
				1018	);
				1019	}
				1020
sewardj	d166e28	2008-02-06 11:42:45 +0000	[diff] [blame]	1021	ULong h_generic_calc_ShlN8x8 ( ULong xx, UInt nn )
				1022	{
				1023	/* vassert(nn < 8); */
				1024	nn &= 7;
				1025	return mk8x8(
				1026	shl8( sel8x8_7(xx), nn ),
				1027	shl8( sel8x8_6(xx), nn ),
				1028	shl8( sel8x8_5(xx), nn ),
				1029	shl8( sel8x8_4(xx), nn ),
				1030	shl8( sel8x8_3(xx), nn ),
				1031	shl8( sel8x8_2(xx), nn ),
				1032	shl8( sel8x8_1(xx), nn ),
				1033	shl8( sel8x8_0(xx), nn )
				1034	);
				1035	}
				1036
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	1037	ULong h_generic_calc_ShrN32x2 ( ULong xx, UInt nn )
				1038	{
				1039	/* vassert(nn < 32); */
				1040	nn &= 31;
				1041	return mk32x2(
				1042	shr32( sel32x2_1(xx), nn ),
				1043	shr32( sel32x2_0(xx), nn )
				1044	);
				1045	}
				1046
				1047	ULong h_generic_calc_ShrN16x4 ( ULong xx, UInt nn )
				1048	{
				1049	/* vassert(nn < 16); */
				1050	nn &= 15;
				1051	return mk16x4(
				1052	shr16( sel16x4_3(xx), nn ),
				1053	shr16( sel16x4_2(xx), nn ),
				1054	shr16( sel16x4_1(xx), nn ),
				1055	shr16( sel16x4_0(xx), nn )
				1056	);
				1057	}
				1058
				1059	ULong h_generic_calc_SarN32x2 ( ULong xx, UInt nn )
				1060	{
				1061	/* vassert(nn < 32); */
				1062	nn &= 31;
				1063	return mk32x2(
				1064	sar32( sel32x2_1(xx), nn ),
				1065	sar32( sel32x2_0(xx), nn )
				1066	);
				1067	}
				1068
				1069	ULong h_generic_calc_SarN16x4 ( ULong xx, UInt nn )
				1070	{
				1071	/* vassert(nn < 16); */
				1072	nn &= 15;
				1073	return mk16x4(
				1074	sar16( sel16x4_3(xx), nn ),
				1075	sar16( sel16x4_2(xx), nn ),
				1076	sar16( sel16x4_1(xx), nn ),
				1077	sar16( sel16x4_0(xx), nn )
				1078	);
				1079	}
				1080
sewardj	d71ba83	2006-12-27 01:15:29 +0000	[diff] [blame]	1081	ULong h_generic_calc_SarN8x8 ( ULong xx, UInt nn )
				1082	{
				1083	/* vassert(nn < 8); */
				1084	nn &= 7;
				1085	return mk8x8(
				1086	sar8( sel8x8_7(xx), nn ),
				1087	sar8( sel8x8_6(xx), nn ),
				1088	sar8( sel8x8_5(xx), nn ),
				1089	sar8( sel8x8_4(xx), nn ),
				1090	sar8( sel8x8_3(xx), nn ),
				1091	sar8( sel8x8_2(xx), nn ),
				1092	sar8( sel8x8_1(xx), nn ),
				1093	sar8( sel8x8_0(xx), nn )
				1094	);
				1095	}
				1096
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	1097	/* ------------ Averaging ------------ */
				1098
				1099	ULong h_generic_calc_Avg8Ux8 ( ULong xx, ULong yy )
				1100	{
				1101	return mk8x8(
				1102	avg8U( sel8x8_7(xx), sel8x8_7(yy) ),
				1103	avg8U( sel8x8_6(xx), sel8x8_6(yy) ),
				1104	avg8U( sel8x8_5(xx), sel8x8_5(yy) ),
				1105	avg8U( sel8x8_4(xx), sel8x8_4(yy) ),
				1106	avg8U( sel8x8_3(xx), sel8x8_3(yy) ),
				1107	avg8U( sel8x8_2(xx), sel8x8_2(yy) ),
				1108	avg8U( sel8x8_1(xx), sel8x8_1(yy) ),
				1109	avg8U( sel8x8_0(xx), sel8x8_0(yy) )
				1110	);
				1111	}
				1112
				1113	ULong h_generic_calc_Avg16Ux4 ( ULong xx, ULong yy )
				1114	{
				1115	return mk16x4(
				1116	avg16U( sel16x4_3(xx), sel16x4_3(yy) ),
				1117	avg16U( sel16x4_2(xx), sel16x4_2(yy) ),
				1118	avg16U( sel16x4_1(xx), sel16x4_1(yy) ),
				1119	avg16U( sel16x4_0(xx), sel16x4_0(yy) )
				1120	);
				1121	}
				1122
				1123	/* ------------ max/min ------------ */
				1124
				1125	ULong h_generic_calc_Max16Sx4 ( ULong xx, ULong yy )
				1126	{
				1127	return mk16x4(
				1128	max16S( sel16x4_3(xx), sel16x4_3(yy) ),
				1129	max16S( sel16x4_2(xx), sel16x4_2(yy) ),
				1130	max16S( sel16x4_1(xx), sel16x4_1(yy) ),
				1131	max16S( sel16x4_0(xx), sel16x4_0(yy) )
				1132	);
				1133	}
				1134
				1135	ULong h_generic_calc_Max8Ux8 ( ULong xx, ULong yy )
				1136	{
				1137	return mk8x8(
				1138	max8U( sel8x8_7(xx), sel8x8_7(yy) ),
				1139	max8U( sel8x8_6(xx), sel8x8_6(yy) ),
				1140	max8U( sel8x8_5(xx), sel8x8_5(yy) ),
				1141	max8U( sel8x8_4(xx), sel8x8_4(yy) ),
				1142	max8U( sel8x8_3(xx), sel8x8_3(yy) ),
				1143	max8U( sel8x8_2(xx), sel8x8_2(yy) ),
				1144	max8U( sel8x8_1(xx), sel8x8_1(yy) ),
				1145	max8U( sel8x8_0(xx), sel8x8_0(yy) )
				1146	);
				1147	}
				1148
				1149	ULong h_generic_calc_Min16Sx4 ( ULong xx, ULong yy )
				1150	{
				1151	return mk16x4(
				1152	min16S( sel16x4_3(xx), sel16x4_3(yy) ),
				1153	min16S( sel16x4_2(xx), sel16x4_2(yy) ),
				1154	min16S( sel16x4_1(xx), sel16x4_1(yy) ),
				1155	min16S( sel16x4_0(xx), sel16x4_0(yy) )
				1156	);
				1157	}
				1158
				1159	ULong h_generic_calc_Min8Ux8 ( ULong xx, ULong yy )
				1160	{
				1161	return mk8x8(
				1162	min8U( sel8x8_7(xx), sel8x8_7(yy) ),
				1163	min8U( sel8x8_6(xx), sel8x8_6(yy) ),
				1164	min8U( sel8x8_5(xx), sel8x8_5(yy) ),
				1165	min8U( sel8x8_4(xx), sel8x8_4(yy) ),
				1166	min8U( sel8x8_3(xx), sel8x8_3(yy) ),
				1167	min8U( sel8x8_2(xx), sel8x8_2(yy) ),
				1168	min8U( sel8x8_1(xx), sel8x8_1(yy) ),
				1169	min8U( sel8x8_0(xx), sel8x8_0(yy) )
				1170	);
				1171	}
				1172
sewardj	e13074c	2012-11-08 10:57:08 +0000	[diff] [blame]	1173	UInt h_generic_calc_GetMSBs8x8 ( ULong xx )
				1174	{
				1175	UInt r = 0;
				1176	if (xx & (1ULL << (64-1))) r \|= (1<<7);
				1177	if (xx & (1ULL << (56-1))) r \|= (1<<6);
				1178	if (xx & (1ULL << (48-1))) r \|= (1<<5);
				1179	if (xx & (1ULL << (40-1))) r \|= (1<<4);
				1180	if (xx & (1ULL << (32-1))) r \|= (1<<3);
				1181	if (xx & (1ULL << (24-1))) r \|= (1<<2);
				1182	if (xx & (1ULL << (16-1))) r \|= (1<<1);
				1183	if (xx & (1ULL << ( 8-1))) r \|= (1<<0);
				1184	return r;
				1185	}
				1186
sewardj	e2ea176	2010-09-22 00:56:37 +0000	[diff] [blame]	1187	/* ------------ SOME 32-bit SIMD HELPERS TOO ------------ */
				1188
				1189	/* Tuple/select functions for 16x2 vectors. */
				1190	static inline UInt mk16x2 ( UShort w1, UShort w2 ) {
				1191	return (((UInt)w1) << 16) \| ((UInt)w2);
				1192	}
				1193
				1194	static inline UShort sel16x2_1 ( UInt w32 ) {
				1195	return 0xFFFF & (UShort)(w32 >> 16);
				1196	}
				1197	static inline UShort sel16x2_0 ( UInt w32 ) {
				1198	return 0xFFFF & (UShort)(w32);
				1199	}
				1200
				1201	static inline UInt mk8x4 ( UChar w3, UChar w2,
				1202	UChar w1, UChar w0 ) {
				1203	UInt w32 = (((UInt)w3) << 24) \| (((UInt)w2) << 16)
				1204	\| (((UInt)w1) << 8) \| (((UInt)w0) << 0);
				1205	return w32;
				1206	}
				1207
				1208	static inline UChar sel8x4_3 ( UInt w32 ) {
				1209	return toUChar(0xFF & (w32 >> 24));
				1210	}
				1211	static inline UChar sel8x4_2 ( UInt w32 ) {
				1212	return toUChar(0xFF & (w32 >> 16));
				1213	}
				1214	static inline UChar sel8x4_1 ( UInt w32 ) {
				1215	return toUChar(0xFF & (w32 >> 8));
				1216	}
				1217	static inline UChar sel8x4_0 ( UInt w32 ) {
				1218	return toUChar(0xFF & (w32 >> 0));
				1219	}
				1220
				1221
				1222	/* ----------------------------------------------------- */
				1223	/* More externally visible functions. These simply
				1224	implement the corresponding IR primops. */
				1225	/* ----------------------------------------------------- */
				1226
				1227	/* ------ 16x2 ------ */
				1228
				1229	UInt h_generic_calc_Add16x2 ( UInt xx, UInt yy )
				1230	{
				1231	return mk16x2( sel16x2_1(xx) + sel16x2_1(yy),
				1232	sel16x2_0(xx) + sel16x2_0(yy) );
				1233	}
				1234
				1235	UInt h_generic_calc_Sub16x2 ( UInt xx, UInt yy )
				1236	{
				1237	return mk16x2( sel16x2_1(xx) - sel16x2_1(yy),
				1238	sel16x2_0(xx) - sel16x2_0(yy) );
				1239	}
				1240
				1241	UInt h_generic_calc_HAdd16Ux2 ( UInt xx, UInt yy )
				1242	{
				1243	return mk16x2( hadd16U( sel16x2_1(xx), sel16x2_1(yy) ),
				1244	hadd16U( sel16x2_0(xx), sel16x2_0(yy) ) );
				1245	}
				1246
				1247	UInt h_generic_calc_HAdd16Sx2 ( UInt xx, UInt yy )
				1248	{
				1249	return mk16x2( hadd16S( sel16x2_1(xx), sel16x2_1(yy) ),
				1250	hadd16S( sel16x2_0(xx), sel16x2_0(yy) ) );
				1251	}
				1252
				1253	UInt h_generic_calc_HSub16Ux2 ( UInt xx, UInt yy )
				1254	{
				1255	return mk16x2( hsub16U( sel16x2_1(xx), sel16x2_1(yy) ),
				1256	hsub16U( sel16x2_0(xx), sel16x2_0(yy) ) );
				1257	}
				1258
				1259	UInt h_generic_calc_HSub16Sx2 ( UInt xx, UInt yy )
				1260	{
				1261	return mk16x2( hsub16S( sel16x2_1(xx), sel16x2_1(yy) ),
				1262	hsub16S( sel16x2_0(xx), sel16x2_0(yy) ) );
				1263	}
				1264
				1265	UInt h_generic_calc_QAdd16Ux2 ( UInt xx, UInt yy )
				1266	{
				1267	return mk16x2( qadd16U( sel16x2_1(xx), sel16x2_1(yy) ),
				1268	qadd16U( sel16x2_0(xx), sel16x2_0(yy) ) );
				1269	}
				1270
				1271	UInt h_generic_calc_QAdd16Sx2 ( UInt xx, UInt yy )
				1272	{
				1273	return mk16x2( qadd16S( sel16x2_1(xx), sel16x2_1(yy) ),
				1274	qadd16S( sel16x2_0(xx), sel16x2_0(yy) ) );
				1275	}
				1276
				1277	UInt h_generic_calc_QSub16Ux2 ( UInt xx, UInt yy )
				1278	{
				1279	return mk16x2( qsub16U( sel16x2_1(xx), sel16x2_1(yy) ),
				1280	qsub16U( sel16x2_0(xx), sel16x2_0(yy) ) );
				1281	}
				1282
				1283	UInt h_generic_calc_QSub16Sx2 ( UInt xx, UInt yy )
				1284	{
				1285	return mk16x2( qsub16S( sel16x2_1(xx), sel16x2_1(yy) ),
				1286	qsub16S( sel16x2_0(xx), sel16x2_0(yy) ) );
				1287	}
				1288
				1289	/* ------ 8x4 ------ */
				1290
				1291	UInt h_generic_calc_Add8x4 ( UInt xx, UInt yy )
				1292	{
				1293	return mk8x4(
				1294	sel8x4_3(xx) + sel8x4_3(yy),
				1295	sel8x4_2(xx) + sel8x4_2(yy),
				1296	sel8x4_1(xx) + sel8x4_1(yy),
				1297	sel8x4_0(xx) + sel8x4_0(yy)
				1298	);
				1299	}
				1300
				1301	UInt h_generic_calc_Sub8x4 ( UInt xx, UInt yy )
				1302	{
				1303	return mk8x4(
				1304	sel8x4_3(xx) - sel8x4_3(yy),
				1305	sel8x4_2(xx) - sel8x4_2(yy),
				1306	sel8x4_1(xx) - sel8x4_1(yy),
				1307	sel8x4_0(xx) - sel8x4_0(yy)
				1308	);
				1309	}
				1310
				1311	UInt h_generic_calc_HAdd8Ux4 ( UInt xx, UInt yy )
				1312	{
				1313	return mk8x4(
				1314	hadd8U( sel8x4_3(xx), sel8x4_3(yy) ),
				1315	hadd8U( sel8x4_2(xx), sel8x4_2(yy) ),
				1316	hadd8U( sel8x4_1(xx), sel8x4_1(yy) ),
				1317	hadd8U( sel8x4_0(xx), sel8x4_0(yy) )
				1318	);
				1319	}
				1320
				1321	UInt h_generic_calc_HAdd8Sx4 ( UInt xx, UInt yy )
				1322	{
				1323	return mk8x4(
				1324	hadd8S( sel8x4_3(xx), sel8x4_3(yy) ),
				1325	hadd8S( sel8x4_2(xx), sel8x4_2(yy) ),
				1326	hadd8S( sel8x4_1(xx), sel8x4_1(yy) ),
				1327	hadd8S( sel8x4_0(xx), sel8x4_0(yy) )
				1328	);
				1329	}
				1330
				1331	UInt h_generic_calc_HSub8Ux4 ( UInt xx, UInt yy )
				1332	{
				1333	return mk8x4(
				1334	hsub8U( sel8x4_3(xx), sel8x4_3(yy) ),
				1335	hsub8U( sel8x4_2(xx), sel8x4_2(yy) ),
				1336	hsub8U( sel8x4_1(xx), sel8x4_1(yy) ),
				1337	hsub8U( sel8x4_0(xx), sel8x4_0(yy) )
				1338	);
				1339	}
				1340
				1341	UInt h_generic_calc_HSub8Sx4 ( UInt xx, UInt yy )
				1342	{
				1343	return mk8x4(
				1344	hsub8S( sel8x4_3(xx), sel8x4_3(yy) ),
				1345	hsub8S( sel8x4_2(xx), sel8x4_2(yy) ),
				1346	hsub8S( sel8x4_1(xx), sel8x4_1(yy) ),
				1347	hsub8S( sel8x4_0(xx), sel8x4_0(yy) )
				1348	);
				1349	}
				1350
				1351	UInt h_generic_calc_QAdd8Ux4 ( UInt xx, UInt yy )
				1352	{
				1353	return mk8x4(
				1354	qadd8U( sel8x4_3(xx), sel8x4_3(yy) ),
				1355	qadd8U( sel8x4_2(xx), sel8x4_2(yy) ),
				1356	qadd8U( sel8x4_1(xx), sel8x4_1(yy) ),
				1357	qadd8U( sel8x4_0(xx), sel8x4_0(yy) )
				1358	);
				1359	}
				1360
				1361	UInt h_generic_calc_QAdd8Sx4 ( UInt xx, UInt yy )
				1362	{
				1363	return mk8x4(
				1364	qadd8S( sel8x4_3(xx), sel8x4_3(yy) ),
				1365	qadd8S( sel8x4_2(xx), sel8x4_2(yy) ),
				1366	qadd8S( sel8x4_1(xx), sel8x4_1(yy) ),
				1367	qadd8S( sel8x4_0(xx), sel8x4_0(yy) )
				1368	);
				1369	}
				1370
				1371	UInt h_generic_calc_QSub8Ux4 ( UInt xx, UInt yy )
				1372	{
				1373	return mk8x4(
				1374	qsub8U( sel8x4_3(xx), sel8x4_3(yy) ),
				1375	qsub8U( sel8x4_2(xx), sel8x4_2(yy) ),
				1376	qsub8U( sel8x4_1(xx), sel8x4_1(yy) ),
				1377	qsub8U( sel8x4_0(xx), sel8x4_0(yy) )
				1378	);
				1379	}
				1380
				1381	UInt h_generic_calc_QSub8Sx4 ( UInt xx, UInt yy )
				1382	{
				1383	return mk8x4(
				1384	qsub8S( sel8x4_3(xx), sel8x4_3(yy) ),
				1385	qsub8S( sel8x4_2(xx), sel8x4_2(yy) ),
				1386	qsub8S( sel8x4_1(xx), sel8x4_1(yy) ),
				1387	qsub8S( sel8x4_0(xx), sel8x4_0(yy) )
				1388	);
				1389	}
				1390
				1391	UInt h_generic_calc_CmpNEZ16x2 ( UInt xx )
				1392	{
				1393	return mk16x2(
				1394	cmpnez16( sel16x2_1(xx) ),
				1395	cmpnez16( sel16x2_0(xx) )
				1396	);
				1397	}
				1398
				1399	UInt h_generic_calc_CmpNEZ8x4 ( UInt xx )
				1400	{
				1401	return mk8x4(
				1402	cmpnez8( sel8x4_3(xx) ),
				1403	cmpnez8( sel8x4_2(xx) ),
				1404	cmpnez8( sel8x4_1(xx) ),
				1405	cmpnez8( sel8x4_0(xx) )
				1406	);
				1407	}
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	1408
sewardj	310d6b2	2010-10-18 16:29:40 +0000	[diff] [blame]	1409	UInt h_generic_calc_Sad8Ux4 ( UInt xx, UInt yy )
				1410	{
				1411	return absdiff8U( sel8x4_3(xx), sel8x4_3(yy) )
				1412	+ absdiff8U( sel8x4_2(xx), sel8x4_2(yy) )
				1413	+ absdiff8U( sel8x4_1(xx), sel8x4_1(yy) )
				1414	+ absdiff8U( sel8x4_0(xx), sel8x4_0(yy) );
				1415	}
				1416
sewardj	44ce46d	2012-07-11 13:19:10 +0000	[diff] [blame]	1417	UInt h_generic_calc_QAdd32S ( UInt xx, UInt yy )
				1418	{
				1419	return qadd32S( xx, yy );
				1420	}
				1421
				1422	UInt h_generic_calc_QSub32S ( UInt xx, UInt yy )
				1423	{
				1424	return qsub32S( xx, yy );
				1425	}
				1426
				1427
sewardj	4c96e61	2012-06-02 23:47:02 +0000	[diff] [blame]	1428	/------------------------------------------------------------------/
				1429	/* Decimal Floating Point (DFP) externally visible helper functions */
				1430	/* that implement Iop_BCDtoDPB and Iop_DPBtoBCD */
				1431	/------------------------------------------------------------------/
				1432
				1433	#define NOT( x ) ( ( ( x ) == 0) ? 1 : 0)
				1434	#define GET( x, y ) ( ( ( x ) & ( 0x1UL << ( y ) ) ) >> ( y ) )
				1435	#define PUT( x, y ) ( ( x )<< ( y ) )
				1436
sewardj	8bde7f1	2013-04-11 13:57:43 +0000	[diff] [blame]	1437	static ULong dpb_to_bcd( ULong chunk )
sewardj	4c96e61	2012-06-02 23:47:02 +0000	[diff] [blame]	1438	{
				1439	Short a, b, c, d, e, f, g, h, i, j, k, m;
				1440	Short p, q, r, s, t, u, v, w, x, y;
				1441	ULong value;
				1442
				1443	/* convert 10 bit densely packed BCD to BCD */
				1444	p = GET( chunk, 9 );
				1445	q = GET( chunk, 8 );
				1446	r = GET( chunk, 7 );
				1447	s = GET( chunk, 6 );
				1448	t = GET( chunk, 5 );
				1449	u = GET( chunk, 4 );
				1450	v = GET( chunk, 3 );
				1451	w = GET( chunk, 2 );
				1452	x = GET( chunk, 1 );
				1453	y = GET( chunk, 0 );
				1454
				1455	/* The BCD bit values are given by the following boolean equations.*/
				1456	a = ( NOT(s) & v & w ) \| ( t & v & w & s ) \| ( v & w & NOT(x) );
				1457	b = ( p & s & x & NOT(t) ) \| ( p & NOT(w) ) \| ( p & NOT(v) );
				1458	c = ( q & s & x & NOT(t) ) \| ( q & NOT(w) ) \| ( q & NOT(v) );
				1459	d = r;
				1460	e = ( v & NOT(w) & x ) \| ( s & v & w & x ) \| ( NOT(t) & v & x & w );
				1461	f = ( p & t & v & w & x & NOT(s) ) \| ( s & NOT(x) & v ) \| ( s & NOT(v) );
				1462	g = ( q & t & w & v & x & NOT(s) ) \| ( t & NOT(x) & v ) \| ( t & NOT(v) );
				1463	h = u;
				1464	i = ( t & v & w & x ) \| ( s & v & w & x ) \| ( v & NOT(w) & NOT(x) );
				1465	j = ( p & NOT(s) & NOT(t) & w & v ) \| ( s & v & NOT(w) & x )
				1466	\| ( p & w & NOT(x) & v ) \| ( w & NOT(v) );
				1467	k = ( q & NOT(s) & NOT(t) & v & w ) \| ( t & v & NOT(w) & x )
				1468	\| ( q & v & w & NOT(x) ) \| ( x & NOT(v) );
				1469	m = y;
				1470
				1471	value = PUT(a, 11) \| PUT(b, 10) \| PUT(c, 9) \| PUT(d, 8) \| PUT(e, 7)
				1472	\| PUT(f, 6) \| PUT(g, 5) \| PUT(h, 4) \| PUT(i, 3) \| PUT(j, 2)
				1473	\| PUT(k, 1) \| PUT(m, 0);
				1474	return value;
				1475	}
				1476
sewardj	8bde7f1	2013-04-11 13:57:43 +0000	[diff] [blame]	1477	static ULong bcd_to_dpb( ULong chunk )
sewardj	4c96e61	2012-06-02 23:47:02 +0000	[diff] [blame]	1478	{
				1479	Short a, b, c, d, e, f, g, h, i, j, k, m;
				1480	Short p, q, r, s, t, u, v, w, x, y;
				1481	ULong value;
				1482	/* Convert a 3 digit BCD value to a 10 bit Densely Packed Binary (DPD) value
				1483	The boolean equations to calculate the value of each of the DPD bit
				1484	is given in Appendix B of Book 1: Power ISA User Instruction set. The
				1485	bits for the DPD number are [abcdefghijkm]. The bits for the BCD value
				1486	are [pqrstuvwxy]. The boolean logic equations in psuedo C code are:
				1487	*/
				1488	a = GET( chunk, 11 );
				1489	b = GET( chunk, 10 );
				1490	c = GET( chunk, 9 );
				1491	d = GET( chunk, 8 );
				1492	e = GET( chunk, 7 );
				1493	f = GET( chunk, 6 );
				1494	g = GET( chunk, 5 );
				1495	h = GET( chunk, 4 );
				1496	i = GET( chunk, 3 );
				1497	j = GET( chunk, 2 );
				1498	k = GET( chunk, 1 );
				1499	m = GET( chunk, 0 );
				1500
				1501	p = ( f & a & i & NOT(e) ) \| ( j & a & NOT(i) ) \| ( b & NOT(a) );
				1502	q = ( g & a & i & NOT(e) ) \| ( k & a & NOT(i) ) \| ( c & NOT(a) );
				1503	r = d;
				1504	s = ( j & NOT(a) & e & NOT(i) ) \| ( f & NOT(i) & NOT(e) )
				1505	\| ( f & NOT(a) & NOT(e) ) \| ( e & i );
				1506	t = ( k & NOT(a) & e & NOT(i) ) \| ( g & NOT(i) & NOT(e) )
				1507	\| ( g & NOT(a) & NOT(e) ) \| ( a & i );
				1508	u = h;
				1509	v = a \| e \| i;
				1510	w = ( NOT(e) & j & NOT(i) ) \| ( e & i ) \| a;
				1511	x = ( NOT(a) & k & NOT(i) ) \| ( a & i ) \| e;
				1512	y = m;
				1513
				1514	value = PUT(p, 9) \| PUT(q, 8) \| PUT(r, 7) \| PUT(s, 6) \| PUT(t, 5)
				1515	\| PUT(u, 4) \| PUT(v, 3) \| PUT(w, 2) \| PUT(x, 1) \| y;
				1516
				1517	return value;
				1518	}
				1519
sewardj	8bde7f1	2013-04-11 13:57:43 +0000	[diff] [blame]	1520	ULong h_calc_DPBtoBCD( ULong dpb )
sewardj	4c96e61	2012-06-02 23:47:02 +0000	[diff] [blame]	1521	{
				1522	ULong result, chunk;
				1523	Int i;
				1524
				1525	result = 0;
				1526
				1527	for (i = 0; i < 5; i++) {
				1528	chunk = dpb >> ( 4 - i ) * 10;
				1529	result = result << 12;
				1530	result \|= dpb_to_bcd( chunk & 0x3FF );
				1531	}
				1532	return result;
				1533	}
				1534
sewardj	8bde7f1	2013-04-11 13:57:43 +0000	[diff] [blame]	1535	ULong h_calc_BCDtoDPB( ULong bcd )
sewardj	4c96e61	2012-06-02 23:47:02 +0000	[diff] [blame]	1536	{
				1537	ULong result, chunk;
				1538	Int i;
				1539
				1540	result = 0;
				1541
				1542	for (i = 0; i < 5; i++) {
				1543	chunk = bcd >> ( 4 - i ) * 12;
				1544	result = result << 10;
				1545	result \|= bcd_to_dpb( chunk & 0xFFF );
				1546	}
				1547	return result;
				1548	}
				1549	#undef NOT
				1550	#undef GET
				1551	#undef PUT
sewardj	310d6b2	2010-10-18 16:29:40 +0000	[diff] [blame]	1552
sewardj	8bde7f1	2013-04-11 13:57:43 +0000	[diff] [blame]	1553
				1554	/* ----------------------------------------------------- */
				1555	/* Signed and unsigned integer division, that behave like
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1556	the ARMv7 UDIV ansd SDIV instructions.
				1557
				1558	sdiv32 also behaves like 64-bit v8 SDIV on w-regs.
				1559	udiv32 also behaves like 64-bit v8 UDIV on w-regs.
				1560	*/
sewardj	8bde7f1	2013-04-11 13:57:43 +0000	[diff] [blame]	1561	/* ----------------------------------------------------- */
				1562
				1563	UInt h_calc_udiv32_w_arm_semantics ( UInt x, UInt y )
				1564	{
				1565	// Division by zero --> zero
				1566	if (UNLIKELY(y == 0)) return 0;
				1567	// C requires rounding towards zero, which is also what we need.
				1568	return x / y;
				1569	}
				1570
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1571	ULong h_calc_udiv64_w_arm_semantics ( ULong x, ULong y )
				1572	{
				1573	// Division by zero --> zero
				1574	if (UNLIKELY(y == 0)) return 0;
				1575	// C requires rounding towards zero, which is also what we need.
				1576	return x / y;
				1577	}
				1578
sewardj	8bde7f1	2013-04-11 13:57:43 +0000	[diff] [blame]	1579	Int h_calc_sdiv32_w_arm_semantics ( Int x, Int y )
				1580	{
				1581	// Division by zero --> zero
				1582	if (UNLIKELY(y == 0)) return 0;
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1583	// The single case that produces an unrepresentable result
sewardj	8bde7f1	2013-04-11 13:57:43 +0000	[diff] [blame]	1584	if (UNLIKELY( ((UInt)x) == ((UInt)0x80000000)
				1585	&& ((UInt)y) == ((UInt)0xFFFFFFFF) ))
				1586	return (Int)(UInt)0x80000000;
				1587	// Else return the result rounded towards zero. C89 says
				1588	// this is implementation defined (in the signed case), but gcc
				1589	// promises to round towards zero. Nevertheless, at startup,
				1590	// in main_main.c, do a check for that.
				1591	return x / y;
				1592	}
				1593
sewardj	bbcf188	2014-01-12 12:49:10 +0000	[diff] [blame]	1594	Long h_calc_sdiv64_w_arm_semantics ( Long x, Long y )
				1595	{
				1596	// Division by zero --> zero
				1597	if (UNLIKELY(y == 0)) return 0;
				1598	// The single case that produces an unrepresentable result
				1599	if (UNLIKELY( ((ULong)x) == ((ULong)0x8000000000000000ULL )
				1600	&& ((ULong)y) == ((ULong)0xFFFFFFFFFFFFFFFFULL ) ))
				1601	return (Long)(ULong)0x8000000000000000ULL;
				1602	// Else return the result rounded towards zero. C89 says
				1603	// this is implementation defined (in the signed case), but gcc
				1604	// promises to round towards zero. Nevertheless, at startup,
				1605	// in main_main.c, do a check for that.
				1606	return x / y;
				1607	}
				1608
sewardj	8bde7f1	2013-04-11 13:57:43 +0000	[diff] [blame]	1609
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	1610	/---------------------------------------------------------------/
sewardj	cef7d3e	2009-07-02 12:21:59 +0000	[diff] [blame]	1611	/--- end host_generic_simd64.c ---/
sewardj	38a3f86	2005-01-13 15:06:51 +0000	[diff] [blame]	1612	/---------------------------------------------------------------/