Blame - none/tests/ppc32/test_isa_2_06_part3.c - platform/external/valgrind

blob: 8c74c09d4ea771e60aa7808815e61aaf6fe97f95 [file] [log] [blame]

sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	1	/* Copyright (C) 2011 IBM
				2
				3	Author: Maynard Johnson <maynardj@us.ibm.com>
				4
				5	This program is free software; you can redistribute it and/or
				6	modify it under the terms of the GNU General Public License as
				7	published by the Free Software Foundation; either version 2 of the
				8	License, or (at your option) any later version.
				9
				10	This program is distributed in the hope that it will be useful, but
				11	WITHOUT ANY WARRANTY; without even the implied warranty of
				12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
				13	General Public License for more details.
				14
				15	You should have received a copy of the GNU General Public License
				16	along with this program; if not, write to the Free Software
				17	Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
				18	02111-1307, USA.
				19
				20	The GNU General Public License is contained in the file COPYING.
				21	*/
				22
				23	#ifdef HAS_VSX
				24
				25	#include <stdio.h>
				26	#include <stdint.h>
				27	#include <stdlib.h>
				28	#include <string.h>
				29	#include <malloc.h>
				30	#include <altivec.h>
				31	#include <math.h>
Elliott Hughes	ed39800	2017-06-21 14:41:24 -0700	[diff] [blame^]	32	#include <unistd.h> // getopt
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	33
				34	#ifndef __powerpc64__
				35	typedef uint32_t HWord_t;
				36	#else
				37	typedef uint64_t HWord_t;
				38	#endif /* __powerpc64__ */
				39
carll	dd690bf	2014-08-07 23:49:27 +0000	[diff] [blame]	40	#ifdef VGP_ppc64le_linux
				41	#define isLE 1
				42	#else
				43	#define isLE 0
				44	#endif
				45
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	46	typedef unsigned char Bool;
				47	#define True 1
				48	#define False 0
				49	register HWord_t r14 __asm__ ("r14");
				50	register HWord_t r15 __asm__ ("r15");
				51	register HWord_t r16 __asm__ ("r16");
				52	register HWord_t r17 __asm__ ("r17");
				53	register double f14 __asm__ ("fr14");
				54	register double f15 __asm__ ("fr15");
				55	register double f16 __asm__ ("fr16");
				56	register double f17 __asm__ ("fr17");
				57
				58	static volatile unsigned int div_flags, div_xer;
				59
				60	#define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"
				61
				62	#define SET_CR(_arg) \
				63	__asm__ __volatile__ ("mtcr %0" : : "b"(_arg) : ALLCR );
				64
				65	#define SET_XER(_arg) \
				66	__asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );
				67
				68	#define GET_CR(_lval) \
				69	__asm__ __volatile__ ("mfcr %0" : "=b"(_lval) )
				70
				71	#define GET_XER(_lval) \
				72	__asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )
				73
				74	#define GET_CR_XER(_lval_cr,_lval_xer) \
				75	do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)
				76
				77	#define SET_CR_ZERO \
				78	SET_CR(0)
				79
				80	#define SET_XER_ZERO \
				81	SET_XER(0)
				82
				83	#define SET_CR_XER_ZERO \
				84	do { SET_CR_ZERO; SET_XER_ZERO; } while (0)
				85
				86	#define SET_FPSCR_ZERO \
				87	do { double _d = 0.0; \
				88	__asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
				89	} while (0)
				90
				91
				92	typedef void (*test_func_t)(void);
				93	typedef struct test_table test_table_t;
				94
Elliott Hughes	ed39800	2017-06-21 14:41:24 -0700	[diff] [blame^]	95	/* Defines for the instructiion groups, use bit field to identify */
				96	#define SCALAR_DIV_INST 0x0001
				97	#define OTHER_INST 0x0002
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	98
				99	/* These functions below that construct a table of floating point
				100	* values were lifted from none/tests/ppc32/jm-insns.c.
				101	*/
				102
				103	#if defined (DEBUG_ARGS_BUILD)
				104	#define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
				105	#else
				106	#define AB_DPRINTF(fmt, args...) do { } while (0)
				107	#endif
				108
				109	static inline void register_farg (void *farg,
				110	int s, uint16_t _exp, uint64_t mant)
				111	{
				112	uint64_t tmp;
				113
				114	tmp = ((uint64_t)s << 63) \| ((uint64_t)_exp << 52) \| mant;
				115	(uint64_t )farg = tmp;
				116	AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
				117	s, _exp, mant, (uint64_t )farg, (double )farg);
				118	}
				119
carll	8efe4e4	2013-09-12 17:38:13 +0000	[diff] [blame]	120	static inline void register_sp_farg (void *farg,
				121	int s, uint16_t _exp, uint32_t mant)
				122	{
				123	uint32_t tmp;
				124	tmp = ((uint32_t)s << 31) \| ((uint32_t)_exp << 23) \| mant;
				125	(uint32_t )farg = tmp;
				126	}
				127
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	128
				129	typedef struct fp_test_args {
				130	int fra_idx;
				131	int frb_idx;
				132	} fp_test_args_t;
				133
				134
				135	fp_test_args_t two_arg_fp_tests[] = {
				136	{8, 8},
				137	{8, 14},
				138	{15, 16},
				139	{8, 5},
				140	{8, 4},
				141	{8, 7},
				142	{8, 9},
				143	{8, 11},
				144	{14, 8},
				145	{14, 14},
				146	{14, 6},
				147	{14, 5},
				148	{14, 4},
				149	{14, 7},
				150	{14, 9},
				151	{14, 11},
				152	{6, 8},
				153	{6, 14},
				154	{6, 6},
				155	{6, 5},
				156	{6, 4},
				157	{6, 7},
				158	{6, 9},
				159	{6, 11},
				160	{5, 8},
				161	{5, 14},
				162	{5, 6},
				163	{5, 5},
				164	{5, 4},
				165	{5, 7},
				166	{5, 9},
				167	{5, 11},
				168	{4, 8},
				169	{4, 14},
				170	{4, 6},
				171	{4, 5},
				172	{4, 1},
				173	{4, 7},
				174	{4, 9},
				175	{4, 11},
				176	{7, 8},
				177	{7, 14},
				178	{7, 6},
				179	{7, 5},
				180	{7, 4},
				181	{7, 7},
				182	{7, 9},
				183	{7, 11},
				184	{10, 8},
				185	{10, 14},
				186	{12, 6},
				187	{12, 5},
				188	{10, 4},
				189	{10, 7},
				190	{10, 9},
				191	{10, 11},
				192	{12, 8 },
				193	{12, 14},
				194	{12, 6},
				195	{15, 16},
				196	{15, 16},
				197	{9, 11},
				198	{11, 11},
				199	{11, 12},
				200	{16, 18},
				201	{17, 16},
				202	{19, 19},
				203	{19, 18}
				204	};
				205
				206
				207	static int nb_special_fargs;
				208	static double * spec_fargs;
				209	static float * spec_sp_fargs;
				210
				211	static void build_special_fargs_table(void)
				212	{
				213	/*
				214	Entry Sign Exp fraction Special value
				215	0 0 3fd 0x8000000000000ULL Positive finite number
				216	1 0 404 0xf000000000000ULL ...
				217	2 0 001 0x8000000b77501ULL ...
				218	3 0 7fe 0x800000000051bULL ...
				219	4 0 012 0x3214569900000ULL ...
				220	5 0 000 0x0000000000000ULL +0.0 (+zero)
				221	6 1 000 0x0000000000000ULL -0.0 (-zero)
				222	7 0 7ff 0x0000000000000ULL +infinity
				223	8 1 7ff 0x0000000000000ULL -infinity
				224	9 0 7ff 0x7FFFFFFFFFFFFULL +SNaN
				225	10 1 7ff 0x7FFFFFFFFFFFFULL -SNaN
				226	11 0 7ff 0x8000000000000ULL +QNaN
				227	12 1 7ff 0x8000000000000ULL -QNaN
				228	13 1 000 0x8340000078000ULL Denormalized val (zero exp and non-zero fraction)
				229	14 1 40d 0x0650f5a07b353ULL Negative finite number
				230	15 0 412 0x32585a9900000ULL A few more positive finite numbers
				231	16 0 413 0x82511a2000000ULL ...
				232	17 . . . . . . . . . . . . . . . . . . . . . . .
				233	18 . . . . . . . . . . . . . . . . . . . . . . .
				234	19 . . . . . . . . . . . . . . . . . . . . . . .
				235	*/
				236
				237	uint64_t mant;
carll	8efe4e4	2013-09-12 17:38:13 +0000	[diff] [blame]	238	uint32_t mant_sp;
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	239	uint16_t _exp;
				240	int s;
				241	int j, i = 0;
				242
				243	if (spec_fargs)
				244	return;
				245
				246	spec_fargs = malloc( 20 * sizeof(double) );
				247	spec_sp_fargs = malloc( 20 * sizeof(float) );
				248
				249	// #0
				250	s = 0;
				251	_exp = 0x3fd;
				252	mant = 0x8000000000000ULL;
				253	register_farg(&spec_fargs[i++], s, _exp, mant);
				254
				255	// #1
				256	s = 0;
				257	_exp = 0x404;
				258	mant = 0xf000000000000ULL;
				259	register_farg(&spec_fargs[i++], s, _exp, mant);
				260
				261	// #2
				262	s = 0;
				263	_exp = 0x001;
				264	mant = 0x8000000b77501ULL;
				265	register_farg(&spec_fargs[i++], s, _exp, mant);
				266
				267	// #3
				268	s = 0;
				269	_exp = 0x7fe;
				270	mant = 0x800000000051bULL;
				271	register_farg(&spec_fargs[i++], s, _exp, mant);
				272
				273	// #4
				274	s = 0;
				275	_exp = 0x012;
				276	mant = 0x3214569900000ULL;
				277	register_farg(&spec_fargs[i++], s, _exp, mant);
				278
				279
				280	/* Special values */
				281	/* +0.0 : 0 0x000 0x0000000000000 */
				282	// #5
				283	s = 0;
				284	_exp = 0x000;
				285	mant = 0x0000000000000ULL;
				286	register_farg(&spec_fargs[i++], s, _exp, mant);
				287
				288	/* -0.0 : 1 0x000 0x0000000000000 */
				289	// #6
				290	s = 1;
				291	_exp = 0x000;
				292	mant = 0x0000000000000ULL;
				293	register_farg(&spec_fargs[i++], s, _exp, mant);
				294
				295	/* +infinity : 0 0x7FF 0x0000000000000 */
				296	// #7
				297	s = 0;
				298	_exp = 0x7FF;
				299	mant = 0x0000000000000ULL;
				300	register_farg(&spec_fargs[i++], s, _exp, mant);
				301
				302	/* -infinity : 1 0x7FF 0x0000000000000 */
				303	// #8
				304	s = 1;
				305	_exp = 0x7FF;
				306	mant = 0x0000000000000ULL;
				307	register_farg(&spec_fargs[i++], s, _exp, mant);
				308
carll	8efe4e4	2013-09-12 17:38:13 +0000	[diff] [blame]	309	/*
				310	* This comment applies to values #9 and #10 below:
				311	* When src is a SNaN, it's converted to a QNaN first before rounding to single-precision,
				312	* so we can't just copy the double-precision value to the corresponding slot in the
				313	* single-precision array (i.e., in the loop at the end of this function). Instead, we
				314	* have to manually set the bits using register_sp_farg().
				315	*/
				316
				317	/* +SNaN : 0 0x7FF 0x7FFFFFFFFFFFF */
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	318	// #9
				319	s = 0;
				320	_exp = 0x7FF;
				321	mant = 0x7FFFFFFFFFFFFULL;
				322	register_farg(&spec_fargs[i++], s, _exp, mant);
carll	8efe4e4	2013-09-12 17:38:13 +0000	[diff] [blame]	323	_exp = 0xff;
				324	mant_sp = 0x3FFFFF;
				325	register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	326
carll	8efe4e4	2013-09-12 17:38:13 +0000	[diff] [blame]	327	/* -SNaN : 1 0x7FF 0x7FFFFFFFFFFFF */
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	328	// #10
				329	s = 1;
				330	_exp = 0x7FF;
				331	mant = 0x7FFFFFFFFFFFFULL;
				332	register_farg(&spec_fargs[i++], s, _exp, mant);
carll	8efe4e4	2013-09-12 17:38:13 +0000	[diff] [blame]	333	_exp = 0xff;
				334	mant_sp = 0x3FFFFF;
				335	register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	336
carll	8efe4e4	2013-09-12 17:38:13 +0000	[diff] [blame]	337	/* +QNaN : 0 0x7FF 0x8000000000000 */
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	338	// #11
				339	s = 0;
				340	_exp = 0x7FF;
				341	mant = 0x8000000000000ULL;
				342	register_farg(&spec_fargs[i++], s, _exp, mant);
				343
carll	8efe4e4	2013-09-12 17:38:13 +0000	[diff] [blame]	344	/* -QNaN : 1 0x7FF 0x8000000000000 */
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	345	// #12
				346	s = 1;
				347	_exp = 0x7FF;
				348	mant = 0x8000000000000ULL;
				349	register_farg(&spec_fargs[i++], s, _exp, mant);
				350
				351	/* denormalized value */
				352	// #13
				353	s = 1;
				354	_exp = 0x000;
				355	mant = 0x8340000078000ULL;
				356	register_farg(&spec_fargs[i++], s, _exp, mant);
				357
				358	/* Negative finite number */
				359	// #14
				360	s = 1;
				361	_exp = 0x40d;
				362	mant = 0x0650f5a07b353ULL;
				363	register_farg(&spec_fargs[i++], s, _exp, mant);
				364
				365	/* A few positive finite numbers ... */
				366	// #15
				367	s = 0;
				368	_exp = 0x412;
				369	mant = 0x32585a9900000ULL;
				370	register_farg(&spec_fargs[i++], s, _exp, mant);
				371
				372	// #16
				373	s = 0;
				374	_exp = 0x413;
				375	mant = 0x82511a2000000ULL;
				376	register_farg(&spec_fargs[i++], s, _exp, mant);
				377
				378	// #17
				379	s = 0;
				380	_exp = 0x403;
				381	mant = 0x12ef5a9300000ULL;
				382	register_farg(&spec_fargs[i++], s, _exp, mant);
				383
				384	// #18
				385	s = 0;
				386	_exp = 0x405;
				387	mant = 0x14bf5d2300000ULL;
				388	register_farg(&spec_fargs[i++], s, _exp, mant);
				389
				390	// #19
				391	s = 0;
				392	_exp = 0x409;
				393	mant = 0x76bf982440000ULL;
				394	register_farg(&spec_fargs[i++], s, _exp, mant);
				395
				396	nb_special_fargs = i;
				397	for (j = 0; j < i; j++) {
carll	8efe4e4	2013-09-12 17:38:13 +0000	[diff] [blame]	398	if (!(j == 9 \|\| j == 10))
				399	spec_sp_fargs[j] = spec_fargs[j];
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	400	}
				401	}
				402
				403
				404	struct test_table
				405	{
				406	test_func_t test_category;
				407	char * name;
Elliott Hughes	ed39800	2017-06-21 14:41:24 -0700	[diff] [blame^]	408	unsigned int test_group;
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	409	};
				410
				411	/* Type of input for floating point operations.*/
				412	typedef enum {
				413	SINGLE_TEST,
				414	DOUBLE_TEST
				415	} precision_type_t;
				416
				417	typedef enum {
				418	VX_SCALAR_CONV_TO_WORD,
				419	VX_CONV_TO_SINGLE,
				420	VX_CONV_TO_DOUBLE,
				421	VX_ESTIMATE,
				422	VX_DEFAULT
				423	} vx_fp_test_type;
				424
				425	static vector unsigned int vec_out, vec_inA, vec_inB;
				426
				427	/* This function is for checking the reciprocal and reciprocal square root
				428	* estimate instructions.
				429	*/
				430	Bool check_estimate(precision_type_t type, Bool is_rsqrte, int idx, int output_vec_idx)
				431	{
				432	/* Technically, the number of bits of precision for xvredp and xvrsqrtedp is
				433	* 14 bits (14 = log2 16384). However, the VEX emulation of these instructions
				434	* does an actual reciprocal calculation versus estimation, so the answer we get back from
				435	* valgrind can easily differ from the estimate in the lower bits (within the 14 bits of
				436	* precision) and the estimate may still be within expected tolerances. On top of that,
				437	* we can't count on these estimates always being the same across implementations.
				438	* For example, with the fre[s] instruction (which should be correct to within one part
				439	* in 256 -- i.e., 8 bits of precision) . . . When approximating the value 1.0111_1111_1111,
				440	* one implementation could return 1.0111_1111_0000 and another implementation could return
				441	* 1.1000_0000_0000. Both estimates meet the 1/256 accuracy requirement, but share only a
				442	* single bit in common.
				443	*
				444	* The upshot is we can't validate the VEX output for these instructions by comparing against
				445	* stored bit patterns. We must check that the result is within expected tolerances.
				446	*/
				447
				448
				449	/* A mask to be used for validation as a last resort.
				450	* Only use 12 bits of precision for reasons discussed above.
				451	*/
				452	#define VSX_RECIP_ESTIMATE_MASK_DP 0xFFFFFF0000000000ULL
				453	#define VSX_RECIP_ESTIMATE_MASK_SP 0xFFFFFF00
				454
				455	Bool result = False;
				456	Bool dp_test = type == DOUBLE_TEST;
				457	double src_dp, res_dp;
				458	float src_sp, res_sp;
				459	src_dp = res_dp = 0;
				460	src_sp = res_sp = 0;
				461	#define SRC (dp_test ? src_dp : src_sp)
				462	#define RES (dp_test ? res_dp : res_sp)
				463	Bool src_is_negative = False;
				464	Bool res_is_negative = False;
				465	unsigned long long * dst_dp = NULL;
				466	unsigned int * dst_sp = NULL;
				467	if (dp_test) {
				468	unsigned long long * src_dp_ull;
				469	dst_dp = (unsigned long long *) &vec_out;
				470	src_dp = spec_fargs[idx];
				471	src_dp_ull = (unsigned long long *) &src_dp;
				472	src_is_negative = (*src_dp_ull & 0x8000000000000000ULL) ? True : False;
				473	res_is_negative = (dst_dp[output_vec_idx] & 0x8000000000000000ULL) ? True : False;
				474	memcpy(&res_dp, &dst_dp[output_vec_idx], 8);
				475	} else {
				476	unsigned int * src_sp_uint;
				477	dst_sp = (unsigned int *) &vec_out;
				478	src_sp = spec_sp_fargs[idx];
				479	src_sp_uint = (unsigned int *) &src_sp;
				480	src_is_negative = (*src_sp_uint & 0x80000000) ? True : False;
				481	res_is_negative = (dst_sp[output_vec_idx] & 0x80000000) ? True : False;
				482	memcpy(&res_sp, &dst_sp[output_vec_idx], 4);
				483	}
				484
				485	// Below are common rules for xvre{d\|s}p and xvrsqrte{d\|s}p
				486	if (isnan(SRC))
				487	return isnan(RES);
				488	if (fpclassify(SRC) == FP_ZERO)
				489	return isinf(RES);
				490	if (!src_is_negative && isinf(SRC))
				491	return !res_is_negative && (fpclassify(RES) == FP_ZERO);
				492	if (is_rsqrte) {
				493	if (src_is_negative)
				494	return isnan(RES);
				495	} else {
				496	if (src_is_negative && isinf(SRC))
				497	return res_is_negative && (fpclassify(RES) == FP_ZERO);
				498	}
				499	if (dp_test) {
				500	double calc_diff;
				501	double real_diff;
				502	double recip_divisor;
				503	double div_result;
				504	double calc_diff_tmp;
				505
				506	if (is_rsqrte)
				507	recip_divisor = sqrt(src_dp);
				508	else
				509	recip_divisor = src_dp;
				510
				511	div_result = 1.0/recip_divisor;
				512	calc_diff_tmp = recip_divisor * 16384.0;
				513	if (isnormal(calc_diff_tmp)) {
				514	calc_diff = fabs(1.0/calc_diff_tmp);
				515	real_diff = fabs(res_dp - div_result);
				516	result = ( ( res_dp == div_result )
				517	\|\| ( real_diff <= calc_diff ) );
				518	} else {
				519	/* Unable to compute theoretical difference, so we fall back to masking out
				520	* un-precise bits.
				521	*/
				522	unsigned long long * div_result_dp = (unsigned long long *) &div_result;
				523	result = (dst_dp[output_vec_idx] & VSX_RECIP_ESTIMATE_MASK_DP) == (*div_result_dp & VSX_RECIP_ESTIMATE_MASK_DP);
				524	}
				525	/* For debug use . . .
				526	if (!result) {
				527	unsigned long long * dv = &div_result;
				528	unsigned long long * rd = &real_diff;
				529	unsigned long long * cd = &calc_diff;
				530	printf("\n\t {actual div_result: %016llx; real_diff: %016llx; calc_diff: %016llx}\n",
				531	dv, rd, *cd);
				532	}
				533	*/
				534	} else { // single precision test (only have xvrsqrtesp, since xvresp was implemented in stage 2)
				535	float calc_diff;
				536	float real_diff;
				537	float div_result;
				538	float calc_diff_tmp;
				539	float recip_divisor = sqrt(src_sp);
				540
				541	div_result = 1.0/recip_divisor;
				542	calc_diff_tmp = recip_divisor * 16384.0;
				543	if (isnormal(calc_diff_tmp)) {
				544	calc_diff = fabsf(1.0/calc_diff_tmp);
				545	real_diff = fabsf(res_sp - div_result);
				546	result = ( ( res_sp == div_result )
				547	\|\| ( real_diff <= calc_diff ) );
				548	} else {
				549	/* Unable to compute theoretical difference, so we fall back to masking out
				550	* un-precise bits.
				551	*/
				552	unsigned int * div_result_sp = (unsigned int *) &div_result;
				553	result = (dst_sp[output_vec_idx] & VSX_RECIP_ESTIMATE_MASK_SP) == (*div_result_sp & VSX_RECIP_ESTIMATE_MASK_SP);
				554	}
				555	/* For debug use . . .
				556	if (!result) {
				557	unsigned long long * dv = &div_result;
				558	unsigned long long * rd = &real_diff;
				559	unsigned long long * cd = &calc_diff;
				560	printf("\n\t {actual div_result: %016llx; real_diff: %016llx; calc_diff: %016llx}\n",
				561	dv, rd, *cd);
				562	}
				563	*/
				564	}
				565	return result;
				566	}
				567
				568	typedef struct vx_fp_test
				569	{
				570	test_func_t test_func;
				571	const char * name;
				572	fp_test_args_t * targs;
				573	int num_tests;
				574	precision_type_t precision;
				575	vx_fp_test_type type;
				576	const char * op;
				577	} vx_fp_test_t;
				578
				579
				580	static Bool do_dot;
				581
				582	static void test_xvredp(void)
				583	{
				584	__asm__ __volatile__ ("xvredp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				585	}
				586
				587	static void test_xsredp(void)
				588	{
				589	__asm__ __volatile__ ("xsredp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				590	}
				591
				592	static void test_xvrsqrtedp(void)
				593	{
				594	__asm__ __volatile__ ("xvrsqrtedp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				595	}
				596
				597	static void test_xsrsqrtedp(void)
				598	{
				599	__asm__ __volatile__ ("xsrsqrtedp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				600	}
				601
				602	static void test_xvrsqrtesp(void)
				603	{
				604	__asm__ __volatile__ ("xvrsqrtesp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				605	}
				606
				607	static void test_xstsqrtdp(void)
				608	{
				609	__asm__ __volatile__ ("xstsqrtdp cr1, %x0" : : "wa" (vec_inB));
				610	}
				611
				612	static void test_xvtsqrtdp(void)
				613	{
				614	__asm__ __volatile__ ("xvtsqrtdp cr1, %x0" : : "wa" (vec_inB));
				615	}
				616
				617	static void test_xvtsqrtsp(void)
				618	{
				619	__asm__ __volatile__ ("xvtsqrtsp cr1, %x0" : : "wa" (vec_inB));
				620	}
				621
				622	static void test_xvsqrtdp(void)
				623	{
				624	__asm__ __volatile__ ("xvsqrtdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				625	}
				626
				627	static void test_xvsqrtsp(void)
				628	{
				629	__asm__ __volatile__ ("xvsqrtsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				630	}
				631
				632	static void test_xvtdivdp(void)
				633	{
				634	__asm__ __volatile__ ("xvtdivdp cr1, %x0, %x1" : : "wa" (vec_inA), "wa" (vec_inB));
				635	}
				636
				637	static void test_xvtdivsp(void)
				638	{
				639	__asm__ __volatile__ ("xvtdivsp cr1, %x0, %x1" : : "wa" (vec_inA), "wa" (vec_inB));
				640	}
				641
				642	static void test_xscvdpsp(void)
				643	{
				644	__asm__ __volatile__ ("xscvdpsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				645	}
				646
				647	static void test_xscvdpuxws(void)
				648	{
				649	__asm__ __volatile__ ("xscvdpuxws %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				650	}
				651
				652	static void test_xscvspdp(void)
				653	{
				654	__asm__ __volatile__ ("xscvspdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				655	}
				656
				657	static void test_xvcvdpsp(void)
				658	{
				659	__asm__ __volatile__ ("xvcvdpsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				660	}
				661
				662	static void test_xvcvdpuxds(void)
				663	{
				664	__asm__ __volatile__ ("xvcvdpuxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				665	}
				666
				667	static void test_xvcvdpuxws(void)
				668	{
				669	__asm__ __volatile__ ("xvcvdpuxws %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				670	}
				671
				672	static void test_xvcvspdp(void)
				673	{
				674	__asm__ __volatile__ ("xvcvspdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				675	}
				676
				677	static void test_xvcvspsxds(void)
				678	{
				679	__asm__ __volatile__ ("xvcvspsxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				680	}
				681
				682	static void test_xvcvspuxds(void)
				683	{
				684	__asm__ __volatile__ ("xvcvspuxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				685	}
				686
				687	static void test_xvcvdpsxds(void)
				688	{
				689	__asm__ __volatile__ ("xvcvdpsxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				690	}
				691
				692	static void test_xvcvspuxws(void)
				693	{
				694	__asm__ __volatile__ ("xvcvspuxws %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				695	}
				696
				697	static void test_xvcvsxddp(void)
				698	{
				699	__asm__ __volatile__ ("xvcvsxddp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				700	}
				701
				702	static void test_xvcvuxddp(void)
				703	{
				704	__asm__ __volatile__ ("xvcvuxddp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				705	}
				706
				707	static void test_xvcvsxdsp(void)
				708	{
				709	__asm__ __volatile__ ("xvcvsxdsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				710	}
				711
				712	static void test_xvcvuxdsp(void)
				713	{
				714	__asm__ __volatile__ ("xvcvuxdsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				715	}
				716
				717	static void test_xvcvsxwdp(void)
				718	{
				719	__asm__ __volatile__ ("xvcvsxwdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				720	}
				721
				722	static void test_xvcvuxwdp(void)
				723	{
				724	__asm__ __volatile__ ("xvcvuxwdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				725	}
				726
				727	static void test_xvcvsxwsp(void)
				728	{
				729	__asm__ __volatile__ ("xvcvsxwsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				730	}
				731
				732	static void test_xvcvuxwsp(void)
				733	{
				734	__asm__ __volatile__ ("xvcvuxwsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				735	}
				736
				737	static void test_xsrdpic(void)
				738	{
				739	__asm__ __volatile__ ("xsrdpic %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				740	}
				741
				742	static void test_xsrdpiz(void)
				743	{
				744	__asm__ __volatile__ ("xsrdpiz %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				745	}
				746
				747	static void test_xsrdpi(void)
				748	{
				749	__asm__ __volatile__ ("xsrdpi %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				750	}
				751
				752	static void test_xvabsdp(void)
				753	{
				754	__asm__ __volatile__ ("xvabsdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				755	}
				756
				757	static void test_xvnabsdp(void)
				758	{
				759	__asm__ __volatile__ ("xvnabsdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				760	}
				761
				762	static void test_xvnegdp(void)
				763	{
				764	__asm__ __volatile__ ("xvnegdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				765	}
				766
				767	static void test_xvabssp(void)
				768	{
				769	__asm__ __volatile__ ("xvabssp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				770	}
				771
				772	static void test_xvnabssp(void)
				773	{
				774	__asm__ __volatile__ ("xvnabssp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				775	}
				776
				777	static void test_xvrdpi(void)
				778	{
				779	__asm__ __volatile__ ("xvrdpi %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				780	}
				781
				782	static void test_xvrdpic(void)
				783	{
				784	__asm__ __volatile__ ("xvrdpic %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				785	}
				786
				787	static void test_xvrdpim(void)
				788	{
				789	__asm__ __volatile__ ("xvrdpim %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				790	}
				791
				792	static void test_xvrdpip(void)
				793	{
				794	__asm__ __volatile__ ("xvrdpip %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				795	}
				796
				797	static void test_xvrdpiz(void)
				798	{
				799	__asm__ __volatile__ ("xvrdpiz %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				800	}
				801
				802	static void test_xvrspi(void)
				803	{
				804	__asm__ __volatile__ ("xvrspi %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				805	}
				806
				807	static void test_xvrspic(void)
				808	{
				809	__asm__ __volatile__ ("xvrspic %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				810	}
				811
				812	static void test_xvrspim(void)
				813	{
				814	__asm__ __volatile__ ("xvrspim %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				815	}
				816
				817	static void test_xvrspip(void)
				818	{
				819	__asm__ __volatile__ ("xvrspip %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				820	}
				821
				822	static void test_xvrspiz(void)
				823	{
				824	__asm__ __volatile__ ("xvrspiz %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
				825	}
				826
				827	static vx_fp_test_t
				828	vsx_one_fp_arg_tests[] = {
				829	{ &test_xvredp, "xvredp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x"},
				830	{ &test_xsredp, "xsredp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x"},
				831	{ &test_xvrsqrtedp, "xvrsqrtedp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x-sqrt"},
				832	{ &test_xsrsqrtedp, "xsrsqrtedp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x-sqrt"},
				833	{ &test_xvrsqrtesp, "xvrsqrtesp", NULL, 18, SINGLE_TEST, VX_ESTIMATE, "1/x-sqrt"},
				834	{ &test_xvsqrtdp, "xvsqrtdp", NULL, 18, DOUBLE_TEST, VX_DEFAULT, "sqrt"},
				835	{ &test_xvsqrtsp, "xvsqrtsp", NULL, 18, SINGLE_TEST, VX_DEFAULT, "sqrt"},
				836	{ &test_xscvdpsp, "xscvdpsp", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"},
				837	{ &test_xscvdpuxws, "xscvdpuxws", NULL, 20, DOUBLE_TEST, VX_SCALAR_CONV_TO_WORD, "conv"},
				838	{ &test_xscvspdp, "xscvspdp", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
				839	{ &test_xvcvdpsp, "xvcvdpsp", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"},
				840	{ &test_xvcvdpuxds, "xvcvdpuxds", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
				841	{ &test_xvcvdpuxws, "xvcvdpuxws", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"},
				842	{ &test_xvcvspdp, "xvcvspdp", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
				843	{ &test_xvcvspsxds, "xvcvspsxds", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
				844	{ &test_xvcvdpsxds, "xvcvdpsxds", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
				845	{ &test_xvcvspuxds, "xvcvspuxds", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
				846	{ &test_xvcvspuxws, "xvcvspuxws", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "conv"},
				847	{ &test_xsrdpic, "xsrdpic", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
				848	{ &test_xsrdpiz, "xsrdpiz", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
				849	{ &test_xsrdpi, "xsrdpi", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
				850	{ &test_xvabsdp, "xvabsdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "abs"},
				851	{ &test_xvnabsdp, "xvnabsdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "nabs"},
				852	{ &test_xvnegdp, "xvnegdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "neg"},
				853	{ &test_xvabssp, "xvabssp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "abs"},
				854	{ &test_xvnabssp, "xvnabssp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "nabs"},
				855	{ &test_xvrdpi, "xvrdpi", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
				856	{ &test_xvrdpic, "xvrdpic", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
				857	{ &test_xvrdpim, "xvrdpim", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
				858	{ &test_xvrdpip, "xvrdpip", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
				859	{ &test_xvrdpiz, "xvrdpiz", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
				860	{ &test_xvrspi, "xvrspi", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
				861	{ &test_xvrspic, "xvrspic", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
				862	{ &test_xvrspim, "xvrspim", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
				863	{ &test_xvrspip, "xvrspip", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
				864	{ &test_xvrspiz, "xvrspiz", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
				865	{ NULL, NULL, NULL, 0, 0, 0, NULL}
				866	};
				867
				868	static vx_fp_test_t
				869	vx_tdivORtsqrt_tests[] = {
				870	{ &test_xstsqrtdp, "xstsqrtdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "test-sqrt"},
				871	{ &test_xvtsqrtdp, "xvtsqrtdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "test-sqrt"},
				872	{ &test_xvtsqrtsp, "xvtsqrtsp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "test-sqrt"},
				873	{ &test_xvtdivdp, "xvtdivdp", two_arg_fp_tests, 68, DOUBLE_TEST, VX_DEFAULT, "test-div"},
				874	{ &test_xvtdivsp, "xvtdivsp", two_arg_fp_tests, 68, SINGLE_TEST, VX_DEFAULT, "test-div"},
				875	{ NULL, NULL, NULL, 0 , 0, 0, NULL}
				876	};
				877
				878	static unsigned long long doubleWord[] = { 0,
				879	0xffffffff00000000LL,
				880	0x00000000ffffffffLL,
				881	0xffffffffffffffffLL,
				882	0x89abcde123456789LL,
				883	0x0102030405060708LL,
				884	0x00000000a0b1c2d3LL,
				885	0x1111222233334444LL
				886	};
				887
				888	static unsigned int singleWord[] = {0,
				889	0xffff0000,
				890	0x0000ffff,
				891	0xffffffff,
				892	0x89a73522,
				893	0x01020304,
				894	0x0000abcd,
				895	0x11223344
				896	};
				897
				898	typedef struct vx_intToFp_test
				899	{
				900	test_func_t test_func;
				901	const char * name;
				902	void * targs;
				903	int num_tests;
				904	precision_type_t precision;
				905	vx_fp_test_type type;
				906	} vx_intToFp_test_t;
				907
				908	static vx_intToFp_test_t
				909	intToFp_tests[] = {
				910	{ test_xvcvsxddp, "xvcvsxddp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_DOUBLE },
				911	{ test_xvcvuxddp, "xvcvuxddp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_DOUBLE },
				912	{ test_xvcvsxdsp, "xvcvsxdsp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_SINGLE },
				913	{ test_xvcvuxdsp, "xvcvuxdsp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_SINGLE },
				914	{ test_xvcvsxwdp, "xvcvsxwdp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_DOUBLE },
				915	{ test_xvcvuxwdp, "xvcvuxwdp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_DOUBLE },
				916	{ test_xvcvsxwsp, "xvcvsxwsp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_SINGLE },
				917	{ test_xvcvuxwsp, "xvcvuxwsp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_SINGLE },
				918	{ NULL, NULL, NULL, 0, 0 }
				919	};
				920
				921	static Bool do_OE;
				922	typedef enum {
				923	DIV_BASE = 1,
				924	DIV_OE = 2,
				925	DIV_DOT = 4,
				926	} div_type_t;
				927	/* Possible divde type combinations are:
				928	* - base
				929	* - base+dot
				930	* - base+OE
				931	* - base+OE+dot
				932	*/
				933	#ifdef __powerpc64__
				934	static void test_divdeu(void)
				935	{
				936	int divdeu_type = DIV_BASE;
				937	if (do_OE)
				938	divdeu_type \|= DIV_OE;
				939	if (do_dot)
				940	divdeu_type \|= DIV_DOT;
				941
				942	switch (divdeu_type) {
				943	case 1:
				944	SET_CR_XER_ZERO;
				945	__asm__ __volatile__ ("divdeu %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
				946	GET_CR_XER(div_flags, div_xer);
				947	break;
				948	case 3:
				949	SET_CR_XER_ZERO;
				950	__asm__ __volatile__ ("divdeuo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
				951	GET_CR_XER(div_flags, div_xer);
				952	break;
				953	case 5:
				954	SET_CR_XER_ZERO;
				955	__asm__ __volatile__ ("divdeu. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
				956	GET_CR_XER(div_flags, div_xer);
				957	break;
				958	case 7:
				959	SET_CR_XER_ZERO;
				960	__asm__ __volatile__ ("divdeuo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
				961	GET_CR_XER(div_flags, div_xer);
				962	break;
				963	default:
				964	fprintf(stderr, "Invalid divdeu type. Exiting\n");
				965	exit(1);
				966	}
				967	}
				968	#endif
				969
				970	static void test_divwe(void)
				971	{
				972	int divwe_type = DIV_BASE;
				973	if (do_OE)
				974	divwe_type \|= DIV_OE;
				975	if (do_dot)
				976	divwe_type \|= DIV_DOT;
				977
				978	switch (divwe_type) {
				979	case 1:
				980	SET_CR_XER_ZERO;
				981	__asm__ __volatile__ ("divwe %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
				982	GET_CR_XER(div_flags, div_xer);
				983	break;
				984	case 3:
				985	SET_CR_XER_ZERO;
				986	__asm__ __volatile__ ("divweo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
				987	GET_CR_XER(div_flags, div_xer);
				988	break;
				989	case 5:
				990	SET_CR_XER_ZERO;
				991	__asm__ __volatile__ ("divwe. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
				992	GET_CR_XER(div_flags, div_xer);
				993	break;
				994	case 7:
				995	SET_CR_XER_ZERO;
				996	__asm__ __volatile__ ("divweo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
				997	GET_CR_XER(div_flags, div_xer);
				998	break;
				999	default:
				1000	fprintf(stderr, "Invalid divweu type. Exiting\n");
				1001	exit(1);
				1002	}
				1003	}
				1004
				1005
				1006	typedef struct simple_test {
				1007	test_func_t test_func;
				1008	char * name;
				1009	precision_type_t precision;
				1010	} simple_test_t;
				1011
				1012
				1013	static void setup_sp_fp_args(fp_test_args_t * targs, Bool swap_inputs)
				1014	{
				1015	int a_idx, b_idx, i;
				1016	void * inA, * inB;
				1017	void * vec_src = swap_inputs ? &vec_out : &vec_inB;
				1018
				1019	for (i = 0; i < 4; i++) {
				1020	a_idx = targs->fra_idx;
				1021	b_idx = targs->frb_idx;
				1022	inA = (void *)&spec_sp_fargs[a_idx];
				1023	inB = (void *)&spec_sp_fargs[b_idx];
				1024	// copy single precision FP into vector element i
				1025	memcpy(((void )&vec_inA) + (i 4), inA, 4);
				1026	memcpy(vec_src + (i * 4), inB, 4);
				1027	targs++;
				1028	}
				1029	}
				1030
				1031	static void setup_dp_fp_args(fp_test_args_t * targs, Bool swap_inputs)
				1032	{
				1033	int a_idx, b_idx, i;
				1034	void * inA, * inB;
				1035	void * vec_src = swap_inputs ? (void )&vec_out : (void )&vec_inB;
				1036
				1037	for (i = 0; i < 2; i++) {
				1038	a_idx = targs->fra_idx;
				1039	b_idx = targs->frb_idx;
				1040	inA = (void *)&spec_fargs[a_idx];
				1041	inB = (void *)&spec_fargs[b_idx];
				1042	// copy double precision FP into vector element i
				1043	memcpy(((void )&vec_inA) + (i 8), inA, 8);
				1044	memcpy(vec_src + (i * 8), inB, 8);
				1045	targs++;
				1046	}
				1047	}
				1048
				1049	#define VX_NOT_CMP_OP 0xffffffff
				1050	static void print_vector_fp_result(unsigned int cc, vx_fp_test_t * test_group, int i, Bool print_vec_out)
				1051	{
				1052	int a_idx, b_idx, k;
				1053	char * name = malloc(20);
				1054	int dp = test_group->precision == DOUBLE_TEST ? 1 : 0;
				1055	int loops = dp ? 2 : 4;
				1056	fp_test_args_t * targs = &test_group->targs[i];
				1057	unsigned long long * frA_dp, * frB_dp, * dst_dp;
				1058	unsigned int * frA_sp, frB_sp, dst_sp;
				1059	strcpy(name, test_group->name);
				1060	printf("#%d: %s%s ", dp? i/2 : i/4, name, (do_dot ? "." : ""));
				1061	for (k = 0; k < loops; k++) {
				1062	a_idx = targs->fra_idx;
				1063	b_idx = targs->frb_idx;
				1064	if (k)
				1065	printf(" AND ");
				1066	if (dp) {
				1067	frA_dp = (unsigned long long *)&spec_fargs[a_idx];
				1068	frB_dp = (unsigned long long *)&spec_fargs[b_idx];
				1069	printf("%016llx %s %016llx", frA_dp, test_group->op, frB_dp);
				1070	} else {
				1071	frA_sp = (unsigned int *)&spec_sp_fargs[a_idx];
				1072	frB_sp = (unsigned int *)&spec_sp_fargs[b_idx];
				1073	printf("%08x %s %08x", frA_sp, test_group->op, frB_sp);
				1074	}
				1075	targs++;
				1076	}
				1077	if (cc != VX_NOT_CMP_OP)
				1078	printf(" ? cc=%x", cc);
				1079
				1080	if (print_vec_out) {
				1081	if (dp) {
				1082	dst_dp = (unsigned long long *) &vec_out;
				1083	printf(" => %016llx %016llx\n", dst_dp[0], dst_dp[1]);
				1084	} else {
				1085	dst_sp = (unsigned int *) &vec_out;
				1086	printf(" => %08x %08x %08x %08x\n", dst_sp[0], dst_sp[1], dst_sp[2], dst_sp[3]);
				1087	}
				1088	} else {
				1089	printf("\n");
				1090	}
				1091	free(name);
				1092	}
				1093
				1094
				1095
				1096	static void test_vsx_one_fp_arg(void)
				1097	{
				1098	test_func_t func;
				1099	int k;
				1100	k = 0;
				1101	build_special_fargs_table();
				1102
				1103	while ((func = vsx_one_fp_arg_tests[k].test_func)) {
				1104	int idx, i;
				1105	vx_fp_test_t test_group = vsx_one_fp_arg_tests[k];
				1106	Bool estimate = (test_group.type == VX_ESTIMATE);
				1107	Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
				1108	Bool is_sqrt = (strstr(test_group.name, "sqrt")) ? True : False;
				1109	Bool is_scalar = (strstr(test_group.name, "xs")) ? True : False;
				1110	Bool sparse_sp = False;
				1111	int stride = dp ? 2 : 4;
				1112	int loops = is_scalar ? 1 : stride;
				1113	stride = is_scalar ? 1: stride;
				1114
				1115	/* For conversions of single to double, the 128-bit input register is sparsely populated:
				1116	* \|___ SP___\|_Unused_\|___SP___\|__Unused__\| // for vector op
				1117	* or
				1118	* \|___ SP___\|_Unused_\|_Unused_\|__Unused__\| // for scalar op
				1119	*
				1120	* For the vector op case, we need to adjust stride from '4' to '2', since
				1121	* we'll only be loading two values per loop into the input register.
				1122	*/
				1123	if (!dp && !is_scalar && test_group.type == VX_CONV_TO_DOUBLE) {
				1124	sparse_sp = True;
				1125	stride = 2;
				1126	}
				1127
				1128	for (i = 0; i < test_group.num_tests; i+=stride) {
				1129	unsigned int * pv;
carll	dd690bf	2014-08-07 23:49:27 +0000	[diff] [blame]	1130	void * inB, * vecB_void_ptr = (void *)&vec_inB;
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	1131
				1132	pv = (unsigned int *)&vec_out;
				1133	// clear vec_out
				1134	for (idx = 0; idx < 4; idx++, pv++)
				1135	*pv = 0;
				1136
				1137	if (dp) {
				1138	int j;
				1139	unsigned long long * frB_dp, *dst_dp;
				1140	for (j = 0; j < loops; j++) {
				1141	inB = (void *)&spec_fargs[i + j];
				1142	// copy double precision FP into vector element i
carll	dd690bf	2014-08-07 23:49:27 +0000	[diff] [blame]	1143	if (isLE && is_scalar)
				1144	vecB_void_ptr += 8;
				1145	memcpy(vecB_void_ptr + (j * 8), inB, 8);
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	1146	}
				1147	// execute test insn
				1148	(*func)();
				1149	dst_dp = (unsigned long long *) &vec_out;
carll	dd690bf	2014-08-07 23:49:27 +0000	[diff] [blame]	1150	if (isLE && is_scalar)
				1151	dst_dp++;
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	1152	printf("#%d: %s ", i/stride, test_group.name);
				1153	for (j = 0; j < loops; j++) {
				1154	if (j)
				1155	printf("; ");
				1156	frB_dp = (unsigned long long *)&spec_fargs[i + j];
				1157	printf("%s(%016llx)", test_group.op, *frB_dp);
				1158	if (estimate) {
carll	dd690bf	2014-08-07 23:49:27 +0000	[diff] [blame]	1159	Bool res = check_estimate(DOUBLE_TEST, is_sqrt, i + j, (isLE && is_scalar) ? 1: j);
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	1160	printf(" ==> %s)", res ? "PASS" : "FAIL");
				1161	/* For debugging . . .
				1162	printf(" ==> %s (res=%016llx)", res ? "PASS" : "FAIL", dst_dp[j]);
				1163	*/
				1164	} else {
				1165	vx_fp_test_type type = test_group.type;
				1166	switch (type) {
				1167	case VX_SCALAR_CONV_TO_WORD:
				1168	printf(" = %016llx", dst_dp[j] & 0x00000000ffffffffULL);
				1169	break;
				1170	case VX_CONV_TO_SINGLE:
				1171	printf(" = %016llx", dst_dp[j] & 0xffffffff00000000ULL);
				1172	break;
				1173	default: // For VX_CONV_TO_DOUBLE and non-convert instructions . . .
				1174	printf(" = %016llx", dst_dp[j]);
				1175	}
				1176	}
				1177	}
				1178	printf("\n");
				1179	} else {
carll	dd690bf	2014-08-07 23:49:27 +0000	[diff] [blame]	1180	int j;
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	1181	unsigned int * frB_sp, * dst_sp = NULL;
				1182	unsigned long long * dst_dp = NULL;
carll	dd690bf	2014-08-07 23:49:27 +0000	[diff] [blame]	1183	if (sparse_sp)
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	1184	loops = 2;
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	1185	for (j = 0; j < loops; j++) {
				1186	inB = (void *)&spec_sp_fargs[i + j];
				1187	// copy single precision FP into vector element i
carll	dd690bf	2014-08-07 23:49:27 +0000	[diff] [blame]	1188	if (sparse_sp) {
				1189	if (isLE)
				1190	memcpy(vecB_void_ptr + ((2 * j * 4) + 4), inB, 4);
				1191	else
				1192	memcpy(vecB_void_ptr + ((2 * j * 4) ), inB, 4);
				1193	} else {
				1194	if (isLE && is_scalar)
				1195	vecB_void_ptr += 12;
				1196	memcpy(vecB_void_ptr + (j * 4), inB, 4);
				1197	}
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	1198	}
				1199	// execute test insn
				1200	(*func)();
carll	dd690bf	2014-08-07 23:49:27 +0000	[diff] [blame]	1201	if (test_group.type == VX_CONV_TO_DOUBLE) {
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	1202	dst_dp = (unsigned long long *) &vec_out;
carll	dd690bf	2014-08-07 23:49:27 +0000	[diff] [blame]	1203	if (isLE && is_scalar)
				1204	dst_dp++;
				1205	} else {
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	1206	dst_sp = (unsigned int *) &vec_out;
carll	dd690bf	2014-08-07 23:49:27 +0000	[diff] [blame]	1207	if (isLE && is_scalar)
				1208	dst_sp += 3;
				1209	}
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	1210	// print result
				1211	printf("#%d: %s ", i/stride, test_group.name);
				1212	for (j = 0; j < loops; j++) {
				1213	if (j)
				1214	printf("; ");
				1215	frB_sp = (unsigned int *)&spec_sp_fargs[i + j];
				1216	printf("%s(%08x)", test_group.op, *frB_sp);
				1217	if (estimate) {
carll	dd690bf	2014-08-07 23:49:27 +0000	[diff] [blame]	1218	Bool res = check_estimate(SINGLE_TEST, is_sqrt, i + j, (isLE && is_scalar) ? 3 : j);
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	1219	printf(" ==> %s)", res ? "PASS" : "FAIL");
				1220	} else {
				1221	if (test_group.type == VX_CONV_TO_DOUBLE)
				1222	printf(" = %016llx", dst_dp[j]);
				1223	else
				1224	/* Special case: Current VEX implementation for fsqrts (single precision)
				1225	* uses the same implementation as that used for double precision fsqrt.
				1226	* However, I've found that for xvsqrtsp, the result from that implementation
				1227	* may be off by the two LSBs. Generally, even this small inaccuracy can cause the
				1228	* output to appear very different if you end up with a carry. But for the given
				1229	* inputs in this testcase, we can simply mask out these bits.
				1230	*/
				1231	printf(" = %08x", is_sqrt ? (dst_sp[j] & 0xfffffffc) : dst_sp[j]);
				1232	}
				1233	}
				1234	printf("\n");
				1235	}
				1236	}
				1237	k++;
				1238	printf( "\n" );
				1239	}
				1240	}
				1241
				1242	static void test_int_to_fp_convert(void)
				1243	{
				1244	test_func_t func;
				1245	int k;
				1246	k = 0;
				1247
				1248	while ((func = intToFp_tests[k].test_func)) {
				1249	int idx, i;
				1250	vx_intToFp_test_t test_group = intToFp_tests[k];
				1251	Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
				1252	Bool sparse_sp = False;
				1253	int stride = dp ? 2 : 4;
				1254	int loops = stride;
				1255
				1256	/* For conversions of single to double, the 128-bit input register is sparsely populated:
				1257	* \|___ int___\|_Unused_\|___int___\|__Unused__\| // for vector op
				1258	* or
				1259	* We need to adjust stride from '4' to '2', since we'll only be loading
				1260	* two values per loop into the input register.
				1261	*/
				1262	if (!dp && test_group.type == VX_CONV_TO_DOUBLE) {
				1263	sparse_sp = True;
				1264	stride = 2;
				1265	}
				1266
				1267	for (i = 0; i < test_group.num_tests; i+=stride) {
				1268	unsigned int * pv;
				1269	void * inB;
				1270
				1271	pv = (unsigned int *)&vec_out;
				1272	// clear vec_out
				1273	for (idx = 0; idx < 4; idx++, pv++)
				1274	*pv = 0;
				1275
				1276	if (dp) {
				1277	int j;
				1278	unsigned long long dst_dw, targs = test_group.targs;
				1279	for (j = 0; j < loops; j++) {
				1280	inB = (void *)&targs[i + j];
				1281	// copy doubleword into vector element i
				1282	memcpy(((void )&vec_inB) + (j 8), inB, 8);
				1283	}
				1284	// execute test insn
				1285	(*func)();
				1286	dst_dw = (unsigned long long *) &vec_out;
				1287	printf("#%d: %s ", i/stride, test_group.name);
				1288	for (j = 0; j < loops; j++) {
				1289	if (j)
				1290	printf("; ");
				1291	printf("conv(%016llx)", targs[i + j]);
				1292
				1293	if (test_group.type == VX_CONV_TO_SINGLE)
				1294	printf(" = %016llx", dst_dw[j] & 0xffffffff00000000ULL);
				1295	else
				1296	printf(" = %016llx", dst_dw[j]);
				1297	}
				1298	printf("\n");
				1299	} else {
carll	dd690bf	2014-08-07 23:49:27 +0000	[diff] [blame]	1300	int j;
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	1301	unsigned int * dst_sp = NULL;
				1302	unsigned int * targs = test_group.targs;
				1303	unsigned long long * dst_dp = NULL;
carll	dd690bf	2014-08-07 23:49:27 +0000	[diff] [blame]	1304	void * vecB_void_ptr = (void *)&vec_inB;
				1305	if (sparse_sp)
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	1306	loops = 2;
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	1307	for (j = 0; j < loops; j++) {
				1308	inB = (void *)&targs[i + j];
				1309	// copy single word into vector element i
carll	dd690bf	2014-08-07 23:49:27 +0000	[diff] [blame]	1310	if (sparse_sp) {
				1311	if (isLE)
				1312	memcpy(vecB_void_ptr + ((2 * j * 4) + 4), inB, 4);
				1313	else
				1314	memcpy(vecB_void_ptr + ((2 * j * 4) ), inB, 4);
				1315	} else {
				1316	memcpy(vecB_void_ptr + (j * 4), inB, 4);
				1317	}
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	1318	}
				1319	// execute test insn
				1320	(*func)();
				1321	if (test_group.type == VX_CONV_TO_DOUBLE)
				1322	dst_dp = (unsigned long long *) &vec_out;
				1323	else
				1324	dst_sp = (unsigned int *) &vec_out;
				1325	// print result
				1326	printf("#%d: %s ", i/stride, test_group.name);
				1327	for (j = 0; j < loops; j++) {
				1328	if (j)
				1329	printf("; ");
				1330	printf("conv(%08x)", targs[i + j]);
				1331	if (test_group.type == VX_CONV_TO_DOUBLE)
				1332	printf(" = %016llx", dst_dp[j]);
				1333	else
				1334	printf(" = %08x", dst_sp[j]);
				1335	}
				1336	printf("\n");
				1337	}
				1338	}
				1339	k++;
				1340	printf( "\n" );
				1341	}
				1342	}
				1343
				1344
				1345
				1346	// The div doubleword test data
				1347	signed long long div_dw_tdata[13][2] = {
				1348	{ 4, -4 },
				1349	{ 4, -3 },
				1350	{ 4, 4 },
				1351	{ 4, -5 },
				1352	{ 3, 8 },
bart	7ea7aa2	2012-06-23 11:04:01 +0000	[diff] [blame]	1353	{ 0x8000000000000000ULL, 0xa },
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	1354	{ 0x50c, -1 },
				1355	{ 0x50c, -4096 },
				1356	{ 0x1234fedc, 0x8000a873 },
bart	7ea7aa2	2012-06-23 11:04:01 +0000	[diff] [blame]	1357	{ 0xabcd87651234fedcULL, 0xa123b893 },
				1358	{ 0x123456789abdcULL, 0 },
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	1359	{ 0, 2 },
				1360	{ 0x77, 0xa3499 }
				1361	};
				1362	#define dw_tdata_len (sizeof(div_dw_tdata)/sizeof(signed long long)/2)
				1363
				1364	// The div word test data
				1365	unsigned int div_w_tdata[6][2] = {
				1366	{ 0, 2 },
				1367	{ 2, 0 },
				1368	{ 0x7abc1234, 0xf0000000 },
				1369	{ 0xfabc1234, 5 },
				1370	{ 77, 66 },
				1371	{ 5, 0xfabc1234 },
				1372	};
				1373	#define w_tdata_len (sizeof(div_w_tdata)/sizeof(unsigned int)/2)
				1374
				1375	typedef struct div_ext_test
				1376	{
				1377	test_func_t test_func;
				1378	const char *name;
				1379	int num_tests;
				1380	div_type_t div_type;
				1381	precision_type_t precision;
				1382	} div_ext_test_t;
				1383
				1384	static div_ext_test_t div_tests[] = {
				1385	#ifdef __powerpc64__
				1386	{ &test_divdeu, "divdeu", dw_tdata_len, DIV_BASE, DOUBLE_TEST },
				1387	{ &test_divdeu, "divdeuo", dw_tdata_len, DIV_OE, DOUBLE_TEST },
				1388	#endif
				1389	{ &test_divwe, "divwe", w_tdata_len, DIV_BASE, SINGLE_TEST },
				1390	{ &test_divwe, "divweo", w_tdata_len, DIV_OE, SINGLE_TEST },
				1391	{ NULL, NULL, 0, 0, 0 }
				1392	};
				1393
				1394	static void test_div_extensions(void)
				1395	{
				1396	test_func_t func;
				1397	int k;
				1398	k = 0;
				1399
				1400	while ((func = div_tests[k].test_func)) {
				1401	int i, repeat = 1;
				1402	div_ext_test_t test_group = div_tests[k];
				1403	do_dot = False;
				1404
				1405	again:
				1406	for (i = 0; i < test_group.num_tests; i++) {
				1407	unsigned int condreg;
				1408
				1409	if (test_group.div_type == DIV_OE)
				1410	do_OE = True;
				1411	else
				1412	do_OE = False;
				1413
				1414	if (test_group.precision == DOUBLE_TEST) {
				1415	r14 = div_dw_tdata[i][0];
				1416	r15 = div_dw_tdata[i][1];
				1417	} else {
				1418	r14 = div_w_tdata[i][0];
				1419	r15 = div_w_tdata[i][1];
				1420	}
				1421	// execute test insn
				1422	(*func)();
				1423	condreg = (div_flags & 0xf0000000) >> 28;
				1424	printf("#%d: %s%s: ", i, test_group.name, do_dot ? "." : "");
				1425	if (test_group.precision == DOUBLE_TEST) {
				1426	printf("0x%016llx0000000000000000 / 0x%016llx = 0x%016llx;",
				1427	div_dw_tdata[i][0], div_dw_tdata[i][1], (signed long long) r17);
				1428	} else {
				1429	printf("0x%08x00000000 / 0x%08x = 0x%08x;",
				1430	div_w_tdata[i][0], div_w_tdata[i][1], (unsigned int) r17);
				1431	}
				1432	printf(" CR=%x; XER=%x\n", condreg, div_xer);
				1433	}
				1434	printf("\n");
				1435	if (repeat) {
				1436	repeat = 0;
				1437	do_dot = True;
				1438	goto again;
				1439	}
				1440	k++;
				1441	printf( "\n" );
				1442	}
				1443	}
				1444
				1445
				1446	static void test_vx_tdivORtsqrt(void)
				1447	{
				1448	test_func_t func;
				1449	int k, crx;
				1450	unsigned int flags;
				1451	k = 0;
				1452	do_dot = False;
				1453	build_special_fargs_table();
				1454
				1455	while ((func = vx_tdivORtsqrt_tests[k].test_func)) {
				1456	int idx, i;
				1457	vx_fp_test_t test_group = vx_tdivORtsqrt_tests[k];
				1458	Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
				1459	Bool is_scalar = (strstr(test_group.name, "xs")) ? True : False;
				1460	Bool two_args = test_group.targs ? True : False;
				1461	int stride = dp ? 2 : 4;
				1462	int loops = is_scalar ? 1 : stride;
				1463	stride = is_scalar ? 1: stride;
				1464
				1465	for (i = 0; i < test_group.num_tests; i+=stride) {
				1466	unsigned int * pv;
carll	dd690bf	2014-08-07 23:49:27 +0000	[diff] [blame]	1467	void * inB, * vecB_void_ptr = (void *)&vec_inB;
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	1468
				1469	pv = (unsigned int *)&vec_out;
				1470	// clear vec_out
				1471	for (idx = 0; idx < 4; idx++, pv++)
				1472	*pv = 0;
				1473
				1474	if (dp) {
				1475	int j;
				1476	unsigned long long * frB_dp;
				1477	if (two_args) {
				1478	setup_dp_fp_args(&test_group.targs[i], False);
				1479	} else {
				1480	for (j = 0; j < loops; j++) {
				1481	inB = (void *)&spec_fargs[i + j];
				1482	// copy double precision FP into vector element i
carll	dd690bf	2014-08-07 23:49:27 +0000	[diff] [blame]	1483	if (isLE && is_scalar)
				1484	vecB_void_ptr += 8;
				1485	memcpy(vecB_void_ptr + (j * 8), inB, 8);
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	1486	}
				1487	}
				1488	// execute test insn
				1489	// Must do set/get of CRs immediately before/after calling the asm func
				1490	// to avoid CRs being modified by other instructions.
				1491	SET_FPSCR_ZERO;
				1492	SET_CR_XER_ZERO;
				1493	(*func)();
				1494	GET_CR(flags);
				1495	// assumes using CR1
				1496	crx = (flags & 0x0f000000) >> 24;
				1497	if (two_args) {
				1498	print_vector_fp_result(crx, &test_group, i, False/do not print vec_out/);
				1499	} else {
				1500	printf("#%d: %s ", i/stride, test_group.name);
				1501	for (j = 0; j < loops; j++) {
				1502	if (j)
				1503	printf("; ");
				1504	frB_dp = (unsigned long long *)&spec_fargs[i + j];
				1505	printf("%s(%016llx)", test_group.op, *frB_dp);
				1506	}
				1507	printf( " ? %x (CRx)\n", crx);
				1508	}
				1509	} else {
				1510	int j;
				1511	unsigned int * frB_sp;
				1512	if (two_args) {
				1513	setup_sp_fp_args(&test_group.targs[i], False);
				1514	} else {
				1515	for (j = 0; j < loops; j++) {
				1516	inB = (void *)&spec_sp_fargs[i + j];
				1517	// copy single precision FP into vector element i
				1518	memcpy(((void )&vec_inB) + (j 4), inB, 4);
				1519	}
				1520	}
				1521	// execute test insn
				1522	SET_FPSCR_ZERO;
				1523	SET_CR_XER_ZERO;
				1524	(*func)();
				1525	GET_CR(flags);
				1526	crx = (flags & 0x0f000000) >> 24;
				1527	// print result
				1528	if (two_args) {
				1529	print_vector_fp_result(crx, &test_group, i, False/do not print vec_out/);
				1530	} else {
				1531	printf("#%d: %s ", i/stride, test_group.name);
				1532	for (j = 0; j < loops; j++) {
				1533	if (j)
				1534	printf("; ");
				1535	frB_sp = (unsigned int *)&spec_sp_fargs[i + j];
				1536	printf("%s(%08x)", test_group.op, *frB_sp);
				1537	}
				1538	printf( " ? %x (CRx)\n", crx);
				1539	}
				1540	}
				1541	}
				1542	k++;
				1543	printf( "\n" );
				1544	}
				1545	}
				1546
				1547
				1548	static void test_ftsqrt(void)
				1549	{
				1550	int i, crx;
				1551	unsigned int flags;
				1552	unsigned long long * frbp;
				1553	build_special_fargs_table();
				1554
				1555
				1556	for (i = 0; i < nb_special_fargs; i++) {
				1557	f14 = spec_fargs[i];
				1558	frbp = (unsigned long long *)&spec_fargs[i];
				1559	SET_FPSCR_ZERO;
				1560	SET_CR_XER_ZERO;
				1561	__asm__ __volatile__ ("ftsqrt cr1, %0" : : "d" (f14));
				1562	GET_CR(flags);
				1563	crx = (flags & 0x0f000000) >> 24;
				1564	printf( "ftsqrt: %016llx ? %x (CRx)\n", *frbp, crx);
				1565	}
				1566	printf( "\n" );
				1567	}
				1568
				1569	static void
				1570	test_popcntw(void)
				1571	{
				1572	#ifdef __powerpc64__
				1573	uint64_t res;
				1574	unsigned long long src = 0x9182736405504536ULL;
				1575	r14 = src;
				1576	__asm__ __volatile__ ("popcntw %0, %1" : "=r" (res): "r" (r14));
				1577	printf("popcntw: 0x%llx => 0x%016llx\n", (unsigned long long)src, (unsigned long long)res);
				1578	#else
				1579	uint32_t res;
				1580	unsigned int src = 0x9182730E;
				1581	r14 = src;
				1582	__asm__ __volatile__ ("popcntw %0, %1" : "=r" (res): "r" (r14));
				1583	printf("popcntw: 0x%x => 0x%08x\n", src, (int)res);
				1584	#endif
				1585	printf( "\n" );
				1586	}
				1587
				1588
				1589	static test_table_t
				1590	all_tests[] =
				1591	{
				1592
				1593	{ &test_vsx_one_fp_arg,
Elliott Hughes	ed39800	2017-06-21 14:41:24 -0700	[diff] [blame^]	1594	"Test VSX vector and scalar single argument instructions", OTHER_INST } ,
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	1595	{ &test_int_to_fp_convert,
Elliott Hughes	ed39800	2017-06-21 14:41:24 -0700	[diff] [blame^]	1596	"Test VSX vector integer to float conversion instructions", OTHER_INST },
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	1597	{ &test_div_extensions,
Elliott Hughes	ed39800	2017-06-21 14:41:24 -0700	[diff] [blame^]	1598	"Test div extensions", SCALAR_DIV_INST },
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	1599	{ &test_ftsqrt,
Elliott Hughes	ed39800	2017-06-21 14:41:24 -0700	[diff] [blame^]	1600	"Test ftsqrt instruction", OTHER_INST },
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	1601	{ &test_vx_tdivORtsqrt,
Elliott Hughes	ed39800	2017-06-21 14:41:24 -0700	[diff] [blame^]	1602	"Test vector and scalar tdiv and tsqrt instructions", OTHER_INST },
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	1603	{ &test_popcntw,
Elliott Hughes	ed39800	2017-06-21 14:41:24 -0700	[diff] [blame^]	1604	"Test popcntw instruction", OTHER_INST },
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	1605	{ NULL, NULL }
				1606	};
				1607	#endif // HAS_VSX
				1608
Elliott Hughes	ed39800	2017-06-21 14:41:24 -0700	[diff] [blame^]	1609	static void usage (void)
				1610	{
				1611	fprintf(stderr,
				1612	"Usage: test_isa_3_0 [OPTIONS]\n"
				1613	"\t-d: test scalar division instructions (default)\n"
				1614	"\t-o: test non scalar division instructions (default)\n"
				1615	"\t-A: test all instructions (default)\n"
				1616	"\t-h: display this help and exit\n"
				1617	);
				1618	}
				1619
				1620	int main(int argc, char **argv)
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	1621	{
				1622	#ifdef HAS_VSX
				1623
				1624	test_table_t aTest;
				1625	test_func_t func;
Elliott Hughes	ed39800	2017-06-21 14:41:24 -0700	[diff] [blame^]	1626	int c;
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	1627	int i = 0;
Elliott Hughes	ed39800	2017-06-21 14:41:24 -0700	[diff] [blame^]	1628	unsigned int test_run_mask = 0;
				1629
				1630	/* NOTE, ISA 3.0 introduces the OV32 and CA32 bits in the FPSCR. These
				1631	* bits are set on various arithimetic instructions. This means this
				1632	* test generates different FPSCR output for pre ISA 3.0 versus ISA 3.0
				1633	* hardware. The tests have been grouped so that the tests that generate
				1634	* different results are in one test and the rest are in a different test.
				1635	* this minimizes the size of the result expect files for the two cases.
				1636	*/
				1637
				1638	while ((c = getopt(argc, argv, "doAh")) != -1) {
				1639	switch (c) {
				1640	case 'd':
				1641	test_run_mask \|= SCALAR_DIV_INST;
				1642	break;
				1643	case 'o':
				1644	test_run_mask \|= OTHER_INST;
				1645	break;
				1646	case 'A':
				1647	test_run_mask = 0xFFFF;
				1648	break;
				1649	case 'h':
				1650	usage();
				1651	return 0;
				1652
				1653	default:
				1654	usage();
				1655	fprintf(stderr, "Unknown argument: '%c'\n", c);
				1656	return 1;
				1657	}
				1658	}
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	1659
				1660	while ((func = all_tests[i].test_category)) {
				1661	aTest = all_tests[i];
Elliott Hughes	ed39800	2017-06-21 14:41:24 -0700	[diff] [blame^]	1662
				1663	if(test_run_mask & aTest.test_group) {
				1664	/* Test group specified on command line */
				1665
				1666	printf( "%s\n", aTest.name );
				1667	(*func)();
				1668	}
sewardj	2062dc6	2011-09-05 12:15:16 +0000	[diff] [blame]	1669	i++;
				1670	}
				1671	if (spec_fargs)
				1672	free(spec_fargs);
				1673	if (spec_sp_fargs)
				1674	free(spec_sp_fargs);
				1675
				1676	#endif // HAS _VSX
				1677
				1678	return 0;
				1679	}