blob: 8c74c09d4ea771e60aa7808815e61aaf6fe97f95 [file] [log] [blame]
sewardj2062dc62011-09-05 12:15:16 +00001/* Copyright (C) 2011 IBM
2
3 Author: Maynard Johnson <maynardj@us.ibm.com>
4
5 This program is free software; you can redistribute it and/or
6 modify it under the terms of the GNU General Public License as
7 published by the Free Software Foundation; either version 2 of the
8 License, or (at your option) any later version.
9
10 This program is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18 02111-1307, USA.
19
20 The GNU General Public License is contained in the file COPYING.
21 */
22
23#ifdef HAS_VSX
24
25#include <stdio.h>
26#include <stdint.h>
27#include <stdlib.h>
28#include <string.h>
29#include <malloc.h>
30#include <altivec.h>
31#include <math.h>
Elliott Hughesed398002017-06-21 14:41:24 -070032#include <unistd.h> // getopt
sewardj2062dc62011-09-05 12:15:16 +000033
34#ifndef __powerpc64__
35typedef uint32_t HWord_t;
36#else
37typedef uint64_t HWord_t;
38#endif /* __powerpc64__ */
39
carlldd690bf2014-08-07 23:49:27 +000040#ifdef VGP_ppc64le_linux
41#define isLE 1
42#else
43#define isLE 0
44#endif
45
sewardj2062dc62011-09-05 12:15:16 +000046typedef unsigned char Bool;
47#define True 1
48#define False 0
49register HWord_t r14 __asm__ ("r14");
50register HWord_t r15 __asm__ ("r15");
51register HWord_t r16 __asm__ ("r16");
52register HWord_t r17 __asm__ ("r17");
53register double f14 __asm__ ("fr14");
54register double f15 __asm__ ("fr15");
55register double f16 __asm__ ("fr16");
56register double f17 __asm__ ("fr17");
57
58static volatile unsigned int div_flags, div_xer;
59
60#define ALLCR "cr0","cr1","cr2","cr3","cr4","cr5","cr6","cr7"
61
62#define SET_CR(_arg) \
63 __asm__ __volatile__ ("mtcr %0" : : "b"(_arg) : ALLCR );
64
65#define SET_XER(_arg) \
66 __asm__ __volatile__ ("mtxer %0" : : "b"(_arg) : "xer" );
67
68#define GET_CR(_lval) \
69 __asm__ __volatile__ ("mfcr %0" : "=b"(_lval) )
70
71#define GET_XER(_lval) \
72 __asm__ __volatile__ ("mfxer %0" : "=b"(_lval) )
73
74#define GET_CR_XER(_lval_cr,_lval_xer) \
75 do { GET_CR(_lval_cr); GET_XER(_lval_xer); } while (0)
76
77#define SET_CR_ZERO \
78 SET_CR(0)
79
80#define SET_XER_ZERO \
81 SET_XER(0)
82
83#define SET_CR_XER_ZERO \
84 do { SET_CR_ZERO; SET_XER_ZERO; } while (0)
85
86#define SET_FPSCR_ZERO \
87 do { double _d = 0.0; \
88 __asm__ __volatile__ ("mtfsf 0xFF, %0" : : "f"(_d) ); \
89 } while (0)
90
91
92typedef void (*test_func_t)(void);
93typedef struct test_table test_table_t;
94
Elliott Hughesed398002017-06-21 14:41:24 -070095/* Defines for the instructiion groups, use bit field to identify */
96#define SCALAR_DIV_INST 0x0001
97#define OTHER_INST 0x0002
sewardj2062dc62011-09-05 12:15:16 +000098
99/* These functions below that construct a table of floating point
100 * values were lifted from none/tests/ppc32/jm-insns.c.
101 */
102
103#if defined (DEBUG_ARGS_BUILD)
104#define AB_DPRINTF(fmt, args...) do { fprintf(stderr, fmt , ##args); } while (0)
105#else
106#define AB_DPRINTF(fmt, args...) do { } while (0)
107#endif
108
109static inline void register_farg (void *farg,
110 int s, uint16_t _exp, uint64_t mant)
111{
112 uint64_t tmp;
113
114 tmp = ((uint64_t)s << 63) | ((uint64_t)_exp << 52) | mant;
115 *(uint64_t *)farg = tmp;
116 AB_DPRINTF("%d %03x %013llx => %016llx %0e\n",
117 s, _exp, mant, *(uint64_t *)farg, *(double *)farg);
118}
119
carll8efe4e42013-09-12 17:38:13 +0000120static inline void register_sp_farg (void *farg,
121 int s, uint16_t _exp, uint32_t mant)
122{
123 uint32_t tmp;
124 tmp = ((uint32_t)s << 31) | ((uint32_t)_exp << 23) | mant;
125 *(uint32_t *)farg = tmp;
126}
127
sewardj2062dc62011-09-05 12:15:16 +0000128
129typedef struct fp_test_args {
130 int fra_idx;
131 int frb_idx;
132} fp_test_args_t;
133
134
135fp_test_args_t two_arg_fp_tests[] = {
136 {8, 8},
137 {8, 14},
138 {15, 16},
139 {8, 5},
140 {8, 4},
141 {8, 7},
142 {8, 9},
143 {8, 11},
144 {14, 8},
145 {14, 14},
146 {14, 6},
147 {14, 5},
148 {14, 4},
149 {14, 7},
150 {14, 9},
151 {14, 11},
152 {6, 8},
153 {6, 14},
154 {6, 6},
155 {6, 5},
156 {6, 4},
157 {6, 7},
158 {6, 9},
159 {6, 11},
160 {5, 8},
161 {5, 14},
162 {5, 6},
163 {5, 5},
164 {5, 4},
165 {5, 7},
166 {5, 9},
167 {5, 11},
168 {4, 8},
169 {4, 14},
170 {4, 6},
171 {4, 5},
172 {4, 1},
173 {4, 7},
174 {4, 9},
175 {4, 11},
176 {7, 8},
177 {7, 14},
178 {7, 6},
179 {7, 5},
180 {7, 4},
181 {7, 7},
182 {7, 9},
183 {7, 11},
184 {10, 8},
185 {10, 14},
186 {12, 6},
187 {12, 5},
188 {10, 4},
189 {10, 7},
190 {10, 9},
191 {10, 11},
192 {12, 8 },
193 {12, 14},
194 {12, 6},
195 {15, 16},
196 {15, 16},
197 {9, 11},
198 {11, 11},
199 {11, 12},
200 {16, 18},
201 {17, 16},
202 {19, 19},
203 {19, 18}
204};
205
206
207static int nb_special_fargs;
208static double * spec_fargs;
209static float * spec_sp_fargs;
210
211static void build_special_fargs_table(void)
212{
213/*
214 Entry Sign Exp fraction Special value
215 0 0 3fd 0x8000000000000ULL Positive finite number
216 1 0 404 0xf000000000000ULL ...
217 2 0 001 0x8000000b77501ULL ...
218 3 0 7fe 0x800000000051bULL ...
219 4 0 012 0x3214569900000ULL ...
220 5 0 000 0x0000000000000ULL +0.0 (+zero)
221 6 1 000 0x0000000000000ULL -0.0 (-zero)
222 7 0 7ff 0x0000000000000ULL +infinity
223 8 1 7ff 0x0000000000000ULL -infinity
224 9 0 7ff 0x7FFFFFFFFFFFFULL +SNaN
225 10 1 7ff 0x7FFFFFFFFFFFFULL -SNaN
226 11 0 7ff 0x8000000000000ULL +QNaN
227 12 1 7ff 0x8000000000000ULL -QNaN
228 13 1 000 0x8340000078000ULL Denormalized val (zero exp and non-zero fraction)
229 14 1 40d 0x0650f5a07b353ULL Negative finite number
230 15 0 412 0x32585a9900000ULL A few more positive finite numbers
231 16 0 413 0x82511a2000000ULL ...
232 17 . . . . . . . . . . . . . . . . . . . . . . .
233 18 . . . . . . . . . . . . . . . . . . . . . . .
234 19 . . . . . . . . . . . . . . . . . . . . . . .
235*/
236
237 uint64_t mant;
carll8efe4e42013-09-12 17:38:13 +0000238 uint32_t mant_sp;
sewardj2062dc62011-09-05 12:15:16 +0000239 uint16_t _exp;
240 int s;
241 int j, i = 0;
242
243 if (spec_fargs)
244 return;
245
246 spec_fargs = malloc( 20 * sizeof(double) );
247 spec_sp_fargs = malloc( 20 * sizeof(float) );
248
249 // #0
250 s = 0;
251 _exp = 0x3fd;
252 mant = 0x8000000000000ULL;
253 register_farg(&spec_fargs[i++], s, _exp, mant);
254
255 // #1
256 s = 0;
257 _exp = 0x404;
258 mant = 0xf000000000000ULL;
259 register_farg(&spec_fargs[i++], s, _exp, mant);
260
261 // #2
262 s = 0;
263 _exp = 0x001;
264 mant = 0x8000000b77501ULL;
265 register_farg(&spec_fargs[i++], s, _exp, mant);
266
267 // #3
268 s = 0;
269 _exp = 0x7fe;
270 mant = 0x800000000051bULL;
271 register_farg(&spec_fargs[i++], s, _exp, mant);
272
273 // #4
274 s = 0;
275 _exp = 0x012;
276 mant = 0x3214569900000ULL;
277 register_farg(&spec_fargs[i++], s, _exp, mant);
278
279
280 /* Special values */
281 /* +0.0 : 0 0x000 0x0000000000000 */
282 // #5
283 s = 0;
284 _exp = 0x000;
285 mant = 0x0000000000000ULL;
286 register_farg(&spec_fargs[i++], s, _exp, mant);
287
288 /* -0.0 : 1 0x000 0x0000000000000 */
289 // #6
290 s = 1;
291 _exp = 0x000;
292 mant = 0x0000000000000ULL;
293 register_farg(&spec_fargs[i++], s, _exp, mant);
294
295 /* +infinity : 0 0x7FF 0x0000000000000 */
296 // #7
297 s = 0;
298 _exp = 0x7FF;
299 mant = 0x0000000000000ULL;
300 register_farg(&spec_fargs[i++], s, _exp, mant);
301
302 /* -infinity : 1 0x7FF 0x0000000000000 */
303 // #8
304 s = 1;
305 _exp = 0x7FF;
306 mant = 0x0000000000000ULL;
307 register_farg(&spec_fargs[i++], s, _exp, mant);
308
carll8efe4e42013-09-12 17:38:13 +0000309 /*
310 * This comment applies to values #9 and #10 below:
311 * When src is a SNaN, it's converted to a QNaN first before rounding to single-precision,
312 * so we can't just copy the double-precision value to the corresponding slot in the
313 * single-precision array (i.e., in the loop at the end of this function). Instead, we
314 * have to manually set the bits using register_sp_farg().
315 */
316
317 /* +SNaN : 0 0x7FF 0x7FFFFFFFFFFFF */
sewardj2062dc62011-09-05 12:15:16 +0000318 // #9
319 s = 0;
320 _exp = 0x7FF;
321 mant = 0x7FFFFFFFFFFFFULL;
322 register_farg(&spec_fargs[i++], s, _exp, mant);
carll8efe4e42013-09-12 17:38:13 +0000323 _exp = 0xff;
324 mant_sp = 0x3FFFFF;
325 register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
sewardj2062dc62011-09-05 12:15:16 +0000326
carll8efe4e42013-09-12 17:38:13 +0000327 /* -SNaN : 1 0x7FF 0x7FFFFFFFFFFFF */
sewardj2062dc62011-09-05 12:15:16 +0000328 // #10
329 s = 1;
330 _exp = 0x7FF;
331 mant = 0x7FFFFFFFFFFFFULL;
332 register_farg(&spec_fargs[i++], s, _exp, mant);
carll8efe4e42013-09-12 17:38:13 +0000333 _exp = 0xff;
334 mant_sp = 0x3FFFFF;
335 register_sp_farg(&spec_sp_fargs[i-1], s, _exp, mant_sp);
sewardj2062dc62011-09-05 12:15:16 +0000336
carll8efe4e42013-09-12 17:38:13 +0000337 /* +QNaN : 0 0x7FF 0x8000000000000 */
sewardj2062dc62011-09-05 12:15:16 +0000338 // #11
339 s = 0;
340 _exp = 0x7FF;
341 mant = 0x8000000000000ULL;
342 register_farg(&spec_fargs[i++], s, _exp, mant);
343
carll8efe4e42013-09-12 17:38:13 +0000344 /* -QNaN : 1 0x7FF 0x8000000000000 */
sewardj2062dc62011-09-05 12:15:16 +0000345 // #12
346 s = 1;
347 _exp = 0x7FF;
348 mant = 0x8000000000000ULL;
349 register_farg(&spec_fargs[i++], s, _exp, mant);
350
351 /* denormalized value */
352 // #13
353 s = 1;
354 _exp = 0x000;
355 mant = 0x8340000078000ULL;
356 register_farg(&spec_fargs[i++], s, _exp, mant);
357
358 /* Negative finite number */
359 // #14
360 s = 1;
361 _exp = 0x40d;
362 mant = 0x0650f5a07b353ULL;
363 register_farg(&spec_fargs[i++], s, _exp, mant);
364
365 /* A few positive finite numbers ... */
366 // #15
367 s = 0;
368 _exp = 0x412;
369 mant = 0x32585a9900000ULL;
370 register_farg(&spec_fargs[i++], s, _exp, mant);
371
372 // #16
373 s = 0;
374 _exp = 0x413;
375 mant = 0x82511a2000000ULL;
376 register_farg(&spec_fargs[i++], s, _exp, mant);
377
378 // #17
379 s = 0;
380 _exp = 0x403;
381 mant = 0x12ef5a9300000ULL;
382 register_farg(&spec_fargs[i++], s, _exp, mant);
383
384 // #18
385 s = 0;
386 _exp = 0x405;
387 mant = 0x14bf5d2300000ULL;
388 register_farg(&spec_fargs[i++], s, _exp, mant);
389
390 // #19
391 s = 0;
392 _exp = 0x409;
393 mant = 0x76bf982440000ULL;
394 register_farg(&spec_fargs[i++], s, _exp, mant);
395
396 nb_special_fargs = i;
397 for (j = 0; j < i; j++) {
carll8efe4e42013-09-12 17:38:13 +0000398 if (!(j == 9 || j == 10))
399 spec_sp_fargs[j] = spec_fargs[j];
sewardj2062dc62011-09-05 12:15:16 +0000400 }
401}
402
403
404struct test_table
405{
406 test_func_t test_category;
407 char * name;
Elliott Hughesed398002017-06-21 14:41:24 -0700408 unsigned int test_group;
sewardj2062dc62011-09-05 12:15:16 +0000409};
410
411/* Type of input for floating point operations.*/
412typedef enum {
413 SINGLE_TEST,
414 DOUBLE_TEST
415} precision_type_t;
416
417typedef enum {
418 VX_SCALAR_CONV_TO_WORD,
419 VX_CONV_TO_SINGLE,
420 VX_CONV_TO_DOUBLE,
421 VX_ESTIMATE,
422 VX_DEFAULT
423} vx_fp_test_type;
424
425static vector unsigned int vec_out, vec_inA, vec_inB;
426
427/* This function is for checking the reciprocal and reciprocal square root
428 * estimate instructions.
429 */
430Bool check_estimate(precision_type_t type, Bool is_rsqrte, int idx, int output_vec_idx)
431{
432 /* Technically, the number of bits of precision for xvredp and xvrsqrtedp is
433 * 14 bits (14 = log2 16384). However, the VEX emulation of these instructions
434 * does an actual reciprocal calculation versus estimation, so the answer we get back from
435 * valgrind can easily differ from the estimate in the lower bits (within the 14 bits of
436 * precision) and the estimate may still be within expected tolerances. On top of that,
437 * we can't count on these estimates always being the same across implementations.
438 * For example, with the fre[s] instruction (which should be correct to within one part
439 * in 256 -- i.e., 8 bits of precision) . . . When approximating the value 1.0111_1111_1111,
440 * one implementation could return 1.0111_1111_0000 and another implementation could return
441 * 1.1000_0000_0000. Both estimates meet the 1/256 accuracy requirement, but share only a
442 * single bit in common.
443 *
444 * The upshot is we can't validate the VEX output for these instructions by comparing against
445 * stored bit patterns. We must check that the result is within expected tolerances.
446 */
447
448
449 /* A mask to be used for validation as a last resort.
450 * Only use 12 bits of precision for reasons discussed above.
451 */
452#define VSX_RECIP_ESTIMATE_MASK_DP 0xFFFFFF0000000000ULL
453#define VSX_RECIP_ESTIMATE_MASK_SP 0xFFFFFF00
454
455 Bool result = False;
456 Bool dp_test = type == DOUBLE_TEST;
457 double src_dp, res_dp;
458 float src_sp, res_sp;
459 src_dp = res_dp = 0;
460 src_sp = res_sp = 0;
461#define SRC (dp_test ? src_dp : src_sp)
462#define RES (dp_test ? res_dp : res_sp)
463 Bool src_is_negative = False;
464 Bool res_is_negative = False;
465 unsigned long long * dst_dp = NULL;
466 unsigned int * dst_sp = NULL;
467 if (dp_test) {
468 unsigned long long * src_dp_ull;
469 dst_dp = (unsigned long long *) &vec_out;
470 src_dp = spec_fargs[idx];
471 src_dp_ull = (unsigned long long *) &src_dp;
472 src_is_negative = (*src_dp_ull & 0x8000000000000000ULL) ? True : False;
473 res_is_negative = (dst_dp[output_vec_idx] & 0x8000000000000000ULL) ? True : False;
474 memcpy(&res_dp, &dst_dp[output_vec_idx], 8);
475 } else {
476 unsigned int * src_sp_uint;
477 dst_sp = (unsigned int *) &vec_out;
478 src_sp = spec_sp_fargs[idx];
479 src_sp_uint = (unsigned int *) &src_sp;
480 src_is_negative = (*src_sp_uint & 0x80000000) ? True : False;
481 res_is_negative = (dst_sp[output_vec_idx] & 0x80000000) ? True : False;
482 memcpy(&res_sp, &dst_sp[output_vec_idx], 4);
483 }
484
485 // Below are common rules for xvre{d|s}p and xvrsqrte{d|s}p
486 if (isnan(SRC))
487 return isnan(RES);
488 if (fpclassify(SRC) == FP_ZERO)
489 return isinf(RES);
490 if (!src_is_negative && isinf(SRC))
491 return !res_is_negative && (fpclassify(RES) == FP_ZERO);
492 if (is_rsqrte) {
493 if (src_is_negative)
494 return isnan(RES);
495 } else {
496 if (src_is_negative && isinf(SRC))
497 return res_is_negative && (fpclassify(RES) == FP_ZERO);
498 }
499 if (dp_test) {
500 double calc_diff;
501 double real_diff;
502 double recip_divisor;
503 double div_result;
504 double calc_diff_tmp;
505
506 if (is_rsqrte)
507 recip_divisor = sqrt(src_dp);
508 else
509 recip_divisor = src_dp;
510
511 div_result = 1.0/recip_divisor;
512 calc_diff_tmp = recip_divisor * 16384.0;
513 if (isnormal(calc_diff_tmp)) {
514 calc_diff = fabs(1.0/calc_diff_tmp);
515 real_diff = fabs(res_dp - div_result);
516 result = ( ( res_dp == div_result )
517 || ( real_diff <= calc_diff ) );
518 } else {
519 /* Unable to compute theoretical difference, so we fall back to masking out
520 * un-precise bits.
521 */
522 unsigned long long * div_result_dp = (unsigned long long *) &div_result;
523 result = (dst_dp[output_vec_idx] & VSX_RECIP_ESTIMATE_MASK_DP) == (*div_result_dp & VSX_RECIP_ESTIMATE_MASK_DP);
524 }
525 /* For debug use . . .
526 if (!result) {
527 unsigned long long * dv = &div_result;
528 unsigned long long * rd = &real_diff;
529 unsigned long long * cd = &calc_diff;
530 printf("\n\t {actual div_result: %016llx; real_diff: %016llx; calc_diff: %016llx}\n",
531 *dv, *rd, *cd);
532 }
533 */
534 } else { // single precision test (only have xvrsqrtesp, since xvresp was implemented in stage 2)
535 float calc_diff;
536 float real_diff;
537 float div_result;
538 float calc_diff_tmp;
539 float recip_divisor = sqrt(src_sp);
540
541 div_result = 1.0/recip_divisor;
542 calc_diff_tmp = recip_divisor * 16384.0;
543 if (isnormal(calc_diff_tmp)) {
544 calc_diff = fabsf(1.0/calc_diff_tmp);
545 real_diff = fabsf(res_sp - div_result);
546 result = ( ( res_sp == div_result )
547 || ( real_diff <= calc_diff ) );
548 } else {
549 /* Unable to compute theoretical difference, so we fall back to masking out
550 * un-precise bits.
551 */
552 unsigned int * div_result_sp = (unsigned int *) &div_result;
553 result = (dst_sp[output_vec_idx] & VSX_RECIP_ESTIMATE_MASK_SP) == (*div_result_sp & VSX_RECIP_ESTIMATE_MASK_SP);
554 }
555 /* For debug use . . .
556 if (!result) {
557 unsigned long long * dv = &div_result;
558 unsigned long long * rd = &real_diff;
559 unsigned long long * cd = &calc_diff;
560 printf("\n\t {actual div_result: %016llx; real_diff: %016llx; calc_diff: %016llx}\n",
561 *dv, *rd, *cd);
562 }
563 */
564 }
565 return result;
566}
567
568typedef struct vx_fp_test
569{
570 test_func_t test_func;
571 const char * name;
572 fp_test_args_t * targs;
573 int num_tests;
574 precision_type_t precision;
575 vx_fp_test_type type;
576 const char * op;
577} vx_fp_test_t;
578
579
580static Bool do_dot;
581
582static void test_xvredp(void)
583{
584 __asm__ __volatile__ ("xvredp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
585}
586
587static void test_xsredp(void)
588{
589 __asm__ __volatile__ ("xsredp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
590}
591
592static void test_xvrsqrtedp(void)
593{
594 __asm__ __volatile__ ("xvrsqrtedp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
595}
596
597static void test_xsrsqrtedp(void)
598{
599 __asm__ __volatile__ ("xsrsqrtedp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
600}
601
602static void test_xvrsqrtesp(void)
603{
604 __asm__ __volatile__ ("xvrsqrtesp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
605}
606
607static void test_xstsqrtdp(void)
608{
609 __asm__ __volatile__ ("xstsqrtdp cr1, %x0" : : "wa" (vec_inB));
610}
611
612static void test_xvtsqrtdp(void)
613{
614 __asm__ __volatile__ ("xvtsqrtdp cr1, %x0" : : "wa" (vec_inB));
615}
616
617static void test_xvtsqrtsp(void)
618{
619 __asm__ __volatile__ ("xvtsqrtsp cr1, %x0" : : "wa" (vec_inB));
620}
621
622static void test_xvsqrtdp(void)
623{
624 __asm__ __volatile__ ("xvsqrtdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
625}
626
627static void test_xvsqrtsp(void)
628{
629 __asm__ __volatile__ ("xvsqrtsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
630}
631
632static void test_xvtdivdp(void)
633{
634 __asm__ __volatile__ ("xvtdivdp cr1, %x0, %x1" : : "wa" (vec_inA), "wa" (vec_inB));
635}
636
637static void test_xvtdivsp(void)
638{
639 __asm__ __volatile__ ("xvtdivsp cr1, %x0, %x1" : : "wa" (vec_inA), "wa" (vec_inB));
640}
641
642static void test_xscvdpsp(void)
643{
644 __asm__ __volatile__ ("xscvdpsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
645}
646
647static void test_xscvdpuxws(void)
648{
649 __asm__ __volatile__ ("xscvdpuxws %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
650}
651
652static void test_xscvspdp(void)
653{
654 __asm__ __volatile__ ("xscvspdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
655}
656
657static void test_xvcvdpsp(void)
658{
659 __asm__ __volatile__ ("xvcvdpsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
660}
661
662static void test_xvcvdpuxds(void)
663{
664 __asm__ __volatile__ ("xvcvdpuxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
665}
666
667static void test_xvcvdpuxws(void)
668{
669 __asm__ __volatile__ ("xvcvdpuxws %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
670}
671
672static void test_xvcvspdp(void)
673{
674 __asm__ __volatile__ ("xvcvspdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
675}
676
677static void test_xvcvspsxds(void)
678{
679 __asm__ __volatile__ ("xvcvspsxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
680}
681
682static void test_xvcvspuxds(void)
683{
684 __asm__ __volatile__ ("xvcvspuxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
685}
686
687static void test_xvcvdpsxds(void)
688{
689 __asm__ __volatile__ ("xvcvdpsxds %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
690}
691
692static void test_xvcvspuxws(void)
693{
694 __asm__ __volatile__ ("xvcvspuxws %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
695}
696
697static void test_xvcvsxddp(void)
698{
699 __asm__ __volatile__ ("xvcvsxddp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
700}
701
702static void test_xvcvuxddp(void)
703{
704 __asm__ __volatile__ ("xvcvuxddp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
705}
706
707static void test_xvcvsxdsp(void)
708{
709 __asm__ __volatile__ ("xvcvsxdsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
710}
711
712static void test_xvcvuxdsp(void)
713{
714 __asm__ __volatile__ ("xvcvuxdsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
715}
716
717static void test_xvcvsxwdp(void)
718{
719 __asm__ __volatile__ ("xvcvsxwdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
720}
721
722static void test_xvcvuxwdp(void)
723{
724 __asm__ __volatile__ ("xvcvuxwdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
725}
726
727static void test_xvcvsxwsp(void)
728{
729 __asm__ __volatile__ ("xvcvsxwsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
730}
731
732static void test_xvcvuxwsp(void)
733{
734 __asm__ __volatile__ ("xvcvuxwsp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
735}
736
737static void test_xsrdpic(void)
738{
739 __asm__ __volatile__ ("xsrdpic %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
740}
741
742static void test_xsrdpiz(void)
743{
744 __asm__ __volatile__ ("xsrdpiz %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
745}
746
747static void test_xsrdpi(void)
748{
749 __asm__ __volatile__ ("xsrdpi %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
750}
751
752static void test_xvabsdp(void)
753{
754 __asm__ __volatile__ ("xvabsdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
755}
756
757static void test_xvnabsdp(void)
758{
759 __asm__ __volatile__ ("xvnabsdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
760}
761
762static void test_xvnegdp(void)
763{
764 __asm__ __volatile__ ("xvnegdp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
765}
766
767static void test_xvabssp(void)
768{
769 __asm__ __volatile__ ("xvabssp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
770}
771
772static void test_xvnabssp(void)
773{
774 __asm__ __volatile__ ("xvnabssp %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
775}
776
777static void test_xvrdpi(void)
778{
779 __asm__ __volatile__ ("xvrdpi %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
780}
781
782static void test_xvrdpic(void)
783{
784 __asm__ __volatile__ ("xvrdpic %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
785}
786
787static void test_xvrdpim(void)
788{
789 __asm__ __volatile__ ("xvrdpim %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
790}
791
792static void test_xvrdpip(void)
793{
794 __asm__ __volatile__ ("xvrdpip %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
795}
796
797static void test_xvrdpiz(void)
798{
799 __asm__ __volatile__ ("xvrdpiz %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
800}
801
802static void test_xvrspi(void)
803{
804 __asm__ __volatile__ ("xvrspi %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
805}
806
807static void test_xvrspic(void)
808{
809 __asm__ __volatile__ ("xvrspic %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
810}
811
812static void test_xvrspim(void)
813{
814 __asm__ __volatile__ ("xvrspim %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
815}
816
817static void test_xvrspip(void)
818{
819 __asm__ __volatile__ ("xvrspip %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
820}
821
822static void test_xvrspiz(void)
823{
824 __asm__ __volatile__ ("xvrspiz %x0, %x1" : "=wa" (vec_out): "wa" (vec_inB));
825}
826
827static vx_fp_test_t
828vsx_one_fp_arg_tests[] = {
829 { &test_xvredp, "xvredp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x"},
830 { &test_xsredp, "xsredp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x"},
831 { &test_xvrsqrtedp, "xvrsqrtedp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x-sqrt"},
832 { &test_xsrsqrtedp, "xsrsqrtedp", NULL, 18, DOUBLE_TEST, VX_ESTIMATE, "1/x-sqrt"},
833 { &test_xvrsqrtesp, "xvrsqrtesp", NULL, 18, SINGLE_TEST, VX_ESTIMATE, "1/x-sqrt"},
834 { &test_xvsqrtdp, "xvsqrtdp", NULL, 18, DOUBLE_TEST, VX_DEFAULT, "sqrt"},
835 { &test_xvsqrtsp, "xvsqrtsp", NULL, 18, SINGLE_TEST, VX_DEFAULT, "sqrt"},
836 { &test_xscvdpsp, "xscvdpsp", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"},
837 { &test_xscvdpuxws, "xscvdpuxws", NULL, 20, DOUBLE_TEST, VX_SCALAR_CONV_TO_WORD, "conv"},
838 { &test_xscvspdp, "xscvspdp", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
839 { &test_xvcvdpsp, "xvcvdpsp", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"},
840 { &test_xvcvdpuxds, "xvcvdpuxds", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
841 { &test_xvcvdpuxws, "xvcvdpuxws", NULL, 20, DOUBLE_TEST, VX_CONV_TO_SINGLE, "conv"},
842 { &test_xvcvspdp, "xvcvspdp", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
843 { &test_xvcvspsxds, "xvcvspsxds", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
844 { &test_xvcvdpsxds, "xvcvdpsxds", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
845 { &test_xvcvspuxds, "xvcvspuxds", NULL, 20, SINGLE_TEST, VX_CONV_TO_DOUBLE, "conv"},
846 { &test_xvcvspuxws, "xvcvspuxws", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "conv"},
847 { &test_xsrdpic, "xsrdpic", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
848 { &test_xsrdpiz, "xsrdpiz", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
849 { &test_xsrdpi, "xsrdpi", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
850 { &test_xvabsdp, "xvabsdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "abs"},
851 { &test_xvnabsdp, "xvnabsdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "nabs"},
852 { &test_xvnegdp, "xvnegdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "neg"},
853 { &test_xvabssp, "xvabssp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "abs"},
854 { &test_xvnabssp, "xvnabssp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "nabs"},
855 { &test_xvrdpi, "xvrdpi", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
856 { &test_xvrdpic, "xvrdpic", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
857 { &test_xvrdpim, "xvrdpim", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
858 { &test_xvrdpip, "xvrdpip", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
859 { &test_xvrdpiz, "xvrdpiz", NULL, 20, DOUBLE_TEST, VX_CONV_TO_DOUBLE, "round"},
860 { &test_xvrspi, "xvrspi", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
861 { &test_xvrspic, "xvrspic", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
862 { &test_xvrspim, "xvrspim", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
863 { &test_xvrspip, "xvrspip", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
864 { &test_xvrspiz, "xvrspiz", NULL, 20, SINGLE_TEST, VX_CONV_TO_SINGLE, "round"},
865 { NULL, NULL, NULL, 0, 0, 0, NULL}
866};
867
868static vx_fp_test_t
869vx_tdivORtsqrt_tests[] = {
870 { &test_xstsqrtdp, "xstsqrtdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "test-sqrt"},
871 { &test_xvtsqrtdp, "xvtsqrtdp", NULL, 20, DOUBLE_TEST, VX_DEFAULT, "test-sqrt"},
872 { &test_xvtsqrtsp, "xvtsqrtsp", NULL, 20, SINGLE_TEST, VX_DEFAULT, "test-sqrt"},
873 { &test_xvtdivdp, "xvtdivdp", two_arg_fp_tests, 68, DOUBLE_TEST, VX_DEFAULT, "test-div"},
874 { &test_xvtdivsp, "xvtdivsp", two_arg_fp_tests, 68, SINGLE_TEST, VX_DEFAULT, "test-div"},
875 { NULL, NULL, NULL, 0 , 0, 0, NULL}
876};
877
878static unsigned long long doubleWord[] = { 0,
879 0xffffffff00000000LL,
880 0x00000000ffffffffLL,
881 0xffffffffffffffffLL,
882 0x89abcde123456789LL,
883 0x0102030405060708LL,
884 0x00000000a0b1c2d3LL,
885 0x1111222233334444LL
886};
887
888static unsigned int singleWord[] = {0,
889 0xffff0000,
890 0x0000ffff,
891 0xffffffff,
892 0x89a73522,
893 0x01020304,
894 0x0000abcd,
895 0x11223344
896};
897
898typedef struct vx_intToFp_test
899{
900 test_func_t test_func;
901 const char * name;
902 void * targs;
903 int num_tests;
904 precision_type_t precision;
905 vx_fp_test_type type;
906} vx_intToFp_test_t;
907
908static vx_intToFp_test_t
909intToFp_tests[] = {
910 { test_xvcvsxddp, "xvcvsxddp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_DOUBLE },
911 { test_xvcvuxddp, "xvcvuxddp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_DOUBLE },
912 { test_xvcvsxdsp, "xvcvsxdsp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_SINGLE },
913 { test_xvcvuxdsp, "xvcvuxdsp", (void *)doubleWord, 8, DOUBLE_TEST, VX_CONV_TO_SINGLE },
914 { test_xvcvsxwdp, "xvcvsxwdp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_DOUBLE },
915 { test_xvcvuxwdp, "xvcvuxwdp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_DOUBLE },
916 { test_xvcvsxwsp, "xvcvsxwsp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_SINGLE },
917 { test_xvcvuxwsp, "xvcvuxwsp", (void *)singleWord, 8, SINGLE_TEST, VX_CONV_TO_SINGLE },
918 { NULL, NULL, NULL, 0, 0 }
919};
920
921static Bool do_OE;
922typedef enum {
923 DIV_BASE = 1,
924 DIV_OE = 2,
925 DIV_DOT = 4,
926} div_type_t;
927/* Possible divde type combinations are:
928 * - base
929 * - base+dot
930 * - base+OE
931 * - base+OE+dot
932 */
933#ifdef __powerpc64__
934static void test_divdeu(void)
935{
936 int divdeu_type = DIV_BASE;
937 if (do_OE)
938 divdeu_type |= DIV_OE;
939 if (do_dot)
940 divdeu_type |= DIV_DOT;
941
942 switch (divdeu_type) {
943 case 1:
944 SET_CR_XER_ZERO;
945 __asm__ __volatile__ ("divdeu %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
946 GET_CR_XER(div_flags, div_xer);
947 break;
948 case 3:
949 SET_CR_XER_ZERO;
950 __asm__ __volatile__ ("divdeuo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
951 GET_CR_XER(div_flags, div_xer);
952 break;
953 case 5:
954 SET_CR_XER_ZERO;
955 __asm__ __volatile__ ("divdeu. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
956 GET_CR_XER(div_flags, div_xer);
957 break;
958 case 7:
959 SET_CR_XER_ZERO;
960 __asm__ __volatile__ ("divdeuo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
961 GET_CR_XER(div_flags, div_xer);
962 break;
963 default:
964 fprintf(stderr, "Invalid divdeu type. Exiting\n");
965 exit(1);
966 }
967}
968#endif
969
970static void test_divwe(void)
971{
972 int divwe_type = DIV_BASE;
973 if (do_OE)
974 divwe_type |= DIV_OE;
975 if (do_dot)
976 divwe_type |= DIV_DOT;
977
978 switch (divwe_type) {
979 case 1:
980 SET_CR_XER_ZERO;
981 __asm__ __volatile__ ("divwe %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
982 GET_CR_XER(div_flags, div_xer);
983 break;
984 case 3:
985 SET_CR_XER_ZERO;
986 __asm__ __volatile__ ("divweo %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
987 GET_CR_XER(div_flags, div_xer);
988 break;
989 case 5:
990 SET_CR_XER_ZERO;
991 __asm__ __volatile__ ("divwe. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
992 GET_CR_XER(div_flags, div_xer);
993 break;
994 case 7:
995 SET_CR_XER_ZERO;
996 __asm__ __volatile__ ("divweo. %0, %1, %2" : "=r" (r17) : "r" (r14),"r" (r15));
997 GET_CR_XER(div_flags, div_xer);
998 break;
999 default:
1000 fprintf(stderr, "Invalid divweu type. Exiting\n");
1001 exit(1);
1002 }
1003}
1004
1005
1006typedef struct simple_test {
1007 test_func_t test_func;
1008 char * name;
1009 precision_type_t precision;
1010} simple_test_t;
1011
1012
1013static void setup_sp_fp_args(fp_test_args_t * targs, Bool swap_inputs)
1014{
1015 int a_idx, b_idx, i;
1016 void * inA, * inB;
1017 void * vec_src = swap_inputs ? &vec_out : &vec_inB;
1018
1019 for (i = 0; i < 4; i++) {
1020 a_idx = targs->fra_idx;
1021 b_idx = targs->frb_idx;
1022 inA = (void *)&spec_sp_fargs[a_idx];
1023 inB = (void *)&spec_sp_fargs[b_idx];
1024 // copy single precision FP into vector element i
1025 memcpy(((void *)&vec_inA) + (i * 4), inA, 4);
1026 memcpy(vec_src + (i * 4), inB, 4);
1027 targs++;
1028 }
1029}
1030
1031static void setup_dp_fp_args(fp_test_args_t * targs, Bool swap_inputs)
1032{
1033 int a_idx, b_idx, i;
1034 void * inA, * inB;
1035 void * vec_src = swap_inputs ? (void *)&vec_out : (void *)&vec_inB;
1036
1037 for (i = 0; i < 2; i++) {
1038 a_idx = targs->fra_idx;
1039 b_idx = targs->frb_idx;
1040 inA = (void *)&spec_fargs[a_idx];
1041 inB = (void *)&spec_fargs[b_idx];
1042 // copy double precision FP into vector element i
1043 memcpy(((void *)&vec_inA) + (i * 8), inA, 8);
1044 memcpy(vec_src + (i * 8), inB, 8);
1045 targs++;
1046 }
1047}
1048
1049#define VX_NOT_CMP_OP 0xffffffff
1050static void print_vector_fp_result(unsigned int cc, vx_fp_test_t * test_group, int i, Bool print_vec_out)
1051{
1052 int a_idx, b_idx, k;
1053 char * name = malloc(20);
1054 int dp = test_group->precision == DOUBLE_TEST ? 1 : 0;
1055 int loops = dp ? 2 : 4;
1056 fp_test_args_t * targs = &test_group->targs[i];
1057 unsigned long long * frA_dp, * frB_dp, * dst_dp;
1058 unsigned int * frA_sp, *frB_sp, * dst_sp;
1059 strcpy(name, test_group->name);
1060 printf("#%d: %s%s ", dp? i/2 : i/4, name, (do_dot ? "." : ""));
1061 for (k = 0; k < loops; k++) {
1062 a_idx = targs->fra_idx;
1063 b_idx = targs->frb_idx;
1064 if (k)
1065 printf(" AND ");
1066 if (dp) {
1067 frA_dp = (unsigned long long *)&spec_fargs[a_idx];
1068 frB_dp = (unsigned long long *)&spec_fargs[b_idx];
1069 printf("%016llx %s %016llx", *frA_dp, test_group->op, *frB_dp);
1070 } else {
1071 frA_sp = (unsigned int *)&spec_sp_fargs[a_idx];
1072 frB_sp = (unsigned int *)&spec_sp_fargs[b_idx];
1073 printf("%08x %s %08x", *frA_sp, test_group->op, *frB_sp);
1074 }
1075 targs++;
1076 }
1077 if (cc != VX_NOT_CMP_OP)
1078 printf(" ? cc=%x", cc);
1079
1080 if (print_vec_out) {
1081 if (dp) {
1082 dst_dp = (unsigned long long *) &vec_out;
1083 printf(" => %016llx %016llx\n", dst_dp[0], dst_dp[1]);
1084 } else {
1085 dst_sp = (unsigned int *) &vec_out;
1086 printf(" => %08x %08x %08x %08x\n", dst_sp[0], dst_sp[1], dst_sp[2], dst_sp[3]);
1087 }
1088 } else {
1089 printf("\n");
1090 }
1091 free(name);
1092}
1093
1094
1095
1096static void test_vsx_one_fp_arg(void)
1097{
1098 test_func_t func;
1099 int k;
1100 k = 0;
1101 build_special_fargs_table();
1102
1103 while ((func = vsx_one_fp_arg_tests[k].test_func)) {
1104 int idx, i;
1105 vx_fp_test_t test_group = vsx_one_fp_arg_tests[k];
1106 Bool estimate = (test_group.type == VX_ESTIMATE);
1107 Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
1108 Bool is_sqrt = (strstr(test_group.name, "sqrt")) ? True : False;
1109 Bool is_scalar = (strstr(test_group.name, "xs")) ? True : False;
1110 Bool sparse_sp = False;
1111 int stride = dp ? 2 : 4;
1112 int loops = is_scalar ? 1 : stride;
1113 stride = is_scalar ? 1: stride;
1114
1115 /* For conversions of single to double, the 128-bit input register is sparsely populated:
1116 * |___ SP___|_Unused_|___SP___|__Unused__| // for vector op
1117 * or
1118 * |___ SP___|_Unused_|_Unused_|__Unused__| // for scalar op
1119 *
1120 * For the vector op case, we need to adjust stride from '4' to '2', since
1121 * we'll only be loading two values per loop into the input register.
1122 */
1123 if (!dp && !is_scalar && test_group.type == VX_CONV_TO_DOUBLE) {
1124 sparse_sp = True;
1125 stride = 2;
1126 }
1127
1128 for (i = 0; i < test_group.num_tests; i+=stride) {
1129 unsigned int * pv;
carlldd690bf2014-08-07 23:49:27 +00001130 void * inB, * vecB_void_ptr = (void *)&vec_inB;
sewardj2062dc62011-09-05 12:15:16 +00001131
1132 pv = (unsigned int *)&vec_out;
1133 // clear vec_out
1134 for (idx = 0; idx < 4; idx++, pv++)
1135 *pv = 0;
1136
1137 if (dp) {
1138 int j;
1139 unsigned long long * frB_dp, *dst_dp;
1140 for (j = 0; j < loops; j++) {
1141 inB = (void *)&spec_fargs[i + j];
1142 // copy double precision FP into vector element i
carlldd690bf2014-08-07 23:49:27 +00001143 if (isLE && is_scalar)
1144 vecB_void_ptr += 8;
1145 memcpy(vecB_void_ptr + (j * 8), inB, 8);
sewardj2062dc62011-09-05 12:15:16 +00001146 }
1147 // execute test insn
1148 (*func)();
1149 dst_dp = (unsigned long long *) &vec_out;
carlldd690bf2014-08-07 23:49:27 +00001150 if (isLE && is_scalar)
1151 dst_dp++;
sewardj2062dc62011-09-05 12:15:16 +00001152 printf("#%d: %s ", i/stride, test_group.name);
1153 for (j = 0; j < loops; j++) {
1154 if (j)
1155 printf("; ");
1156 frB_dp = (unsigned long long *)&spec_fargs[i + j];
1157 printf("%s(%016llx)", test_group.op, *frB_dp);
1158 if (estimate) {
carlldd690bf2014-08-07 23:49:27 +00001159 Bool res = check_estimate(DOUBLE_TEST, is_sqrt, i + j, (isLE && is_scalar) ? 1: j);
sewardj2062dc62011-09-05 12:15:16 +00001160 printf(" ==> %s)", res ? "PASS" : "FAIL");
1161 /* For debugging . . .
1162 printf(" ==> %s (res=%016llx)", res ? "PASS" : "FAIL", dst_dp[j]);
1163 */
1164 } else {
1165 vx_fp_test_type type = test_group.type;
1166 switch (type) {
1167 case VX_SCALAR_CONV_TO_WORD:
1168 printf(" = %016llx", dst_dp[j] & 0x00000000ffffffffULL);
1169 break;
1170 case VX_CONV_TO_SINGLE:
1171 printf(" = %016llx", dst_dp[j] & 0xffffffff00000000ULL);
1172 break;
1173 default: // For VX_CONV_TO_DOUBLE and non-convert instructions . . .
1174 printf(" = %016llx", dst_dp[j]);
1175 }
1176 }
1177 }
1178 printf("\n");
1179 } else {
carlldd690bf2014-08-07 23:49:27 +00001180 int j;
sewardj2062dc62011-09-05 12:15:16 +00001181 unsigned int * frB_sp, * dst_sp = NULL;
1182 unsigned long long * dst_dp = NULL;
carlldd690bf2014-08-07 23:49:27 +00001183 if (sparse_sp)
sewardj2062dc62011-09-05 12:15:16 +00001184 loops = 2;
sewardj2062dc62011-09-05 12:15:16 +00001185 for (j = 0; j < loops; j++) {
1186 inB = (void *)&spec_sp_fargs[i + j];
1187 // copy single precision FP into vector element i
carlldd690bf2014-08-07 23:49:27 +00001188 if (sparse_sp) {
1189 if (isLE)
1190 memcpy(vecB_void_ptr + ((2 * j * 4) + 4), inB, 4);
1191 else
1192 memcpy(vecB_void_ptr + ((2 * j * 4) ), inB, 4);
1193 } else {
1194 if (isLE && is_scalar)
1195 vecB_void_ptr += 12;
1196 memcpy(vecB_void_ptr + (j * 4), inB, 4);
1197 }
sewardj2062dc62011-09-05 12:15:16 +00001198 }
1199 // execute test insn
1200 (*func)();
carlldd690bf2014-08-07 23:49:27 +00001201 if (test_group.type == VX_CONV_TO_DOUBLE) {
sewardj2062dc62011-09-05 12:15:16 +00001202 dst_dp = (unsigned long long *) &vec_out;
carlldd690bf2014-08-07 23:49:27 +00001203 if (isLE && is_scalar)
1204 dst_dp++;
1205 } else {
sewardj2062dc62011-09-05 12:15:16 +00001206 dst_sp = (unsigned int *) &vec_out;
carlldd690bf2014-08-07 23:49:27 +00001207 if (isLE && is_scalar)
1208 dst_sp += 3;
1209 }
sewardj2062dc62011-09-05 12:15:16 +00001210 // print result
1211 printf("#%d: %s ", i/stride, test_group.name);
1212 for (j = 0; j < loops; j++) {
1213 if (j)
1214 printf("; ");
1215 frB_sp = (unsigned int *)&spec_sp_fargs[i + j];
1216 printf("%s(%08x)", test_group.op, *frB_sp);
1217 if (estimate) {
carlldd690bf2014-08-07 23:49:27 +00001218 Bool res = check_estimate(SINGLE_TEST, is_sqrt, i + j, (isLE && is_scalar) ? 3 : j);
sewardj2062dc62011-09-05 12:15:16 +00001219 printf(" ==> %s)", res ? "PASS" : "FAIL");
1220 } else {
1221 if (test_group.type == VX_CONV_TO_DOUBLE)
1222 printf(" = %016llx", dst_dp[j]);
1223 else
1224 /* Special case: Current VEX implementation for fsqrts (single precision)
1225 * uses the same implementation as that used for double precision fsqrt.
1226 * However, I've found that for xvsqrtsp, the result from that implementation
1227 * may be off by the two LSBs. Generally, even this small inaccuracy can cause the
1228 * output to appear very different if you end up with a carry. But for the given
1229 * inputs in this testcase, we can simply mask out these bits.
1230 */
1231 printf(" = %08x", is_sqrt ? (dst_sp[j] & 0xfffffffc) : dst_sp[j]);
1232 }
1233 }
1234 printf("\n");
1235 }
1236 }
1237 k++;
1238 printf( "\n" );
1239 }
1240}
1241
1242static void test_int_to_fp_convert(void)
1243{
1244 test_func_t func;
1245 int k;
1246 k = 0;
1247
1248 while ((func = intToFp_tests[k].test_func)) {
1249 int idx, i;
1250 vx_intToFp_test_t test_group = intToFp_tests[k];
1251 Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
1252 Bool sparse_sp = False;
1253 int stride = dp ? 2 : 4;
1254 int loops = stride;
1255
1256 /* For conversions of single to double, the 128-bit input register is sparsely populated:
1257 * |___ int___|_Unused_|___int___|__Unused__| // for vector op
1258 * or
1259 * We need to adjust stride from '4' to '2', since we'll only be loading
1260 * two values per loop into the input register.
1261 */
1262 if (!dp && test_group.type == VX_CONV_TO_DOUBLE) {
1263 sparse_sp = True;
1264 stride = 2;
1265 }
1266
1267 for (i = 0; i < test_group.num_tests; i+=stride) {
1268 unsigned int * pv;
1269 void * inB;
1270
1271 pv = (unsigned int *)&vec_out;
1272 // clear vec_out
1273 for (idx = 0; idx < 4; idx++, pv++)
1274 *pv = 0;
1275
1276 if (dp) {
1277 int j;
1278 unsigned long long *dst_dw, * targs = test_group.targs;
1279 for (j = 0; j < loops; j++) {
1280 inB = (void *)&targs[i + j];
1281 // copy doubleword into vector element i
1282 memcpy(((void *)&vec_inB) + (j * 8), inB, 8);
1283 }
1284 // execute test insn
1285 (*func)();
1286 dst_dw = (unsigned long long *) &vec_out;
1287 printf("#%d: %s ", i/stride, test_group.name);
1288 for (j = 0; j < loops; j++) {
1289 if (j)
1290 printf("; ");
1291 printf("conv(%016llx)", targs[i + j]);
1292
1293 if (test_group.type == VX_CONV_TO_SINGLE)
1294 printf(" = %016llx", dst_dw[j] & 0xffffffff00000000ULL);
1295 else
1296 printf(" = %016llx", dst_dw[j]);
1297 }
1298 printf("\n");
1299 } else {
carlldd690bf2014-08-07 23:49:27 +00001300 int j;
sewardj2062dc62011-09-05 12:15:16 +00001301 unsigned int * dst_sp = NULL;
1302 unsigned int * targs = test_group.targs;
1303 unsigned long long * dst_dp = NULL;
carlldd690bf2014-08-07 23:49:27 +00001304 void * vecB_void_ptr = (void *)&vec_inB;
1305 if (sparse_sp)
sewardj2062dc62011-09-05 12:15:16 +00001306 loops = 2;
sewardj2062dc62011-09-05 12:15:16 +00001307 for (j = 0; j < loops; j++) {
1308 inB = (void *)&targs[i + j];
1309 // copy single word into vector element i
carlldd690bf2014-08-07 23:49:27 +00001310 if (sparse_sp) {
1311 if (isLE)
1312 memcpy(vecB_void_ptr + ((2 * j * 4) + 4), inB, 4);
1313 else
1314 memcpy(vecB_void_ptr + ((2 * j * 4) ), inB, 4);
1315 } else {
1316 memcpy(vecB_void_ptr + (j * 4), inB, 4);
1317 }
sewardj2062dc62011-09-05 12:15:16 +00001318 }
1319 // execute test insn
1320 (*func)();
1321 if (test_group.type == VX_CONV_TO_DOUBLE)
1322 dst_dp = (unsigned long long *) &vec_out;
1323 else
1324 dst_sp = (unsigned int *) &vec_out;
1325 // print result
1326 printf("#%d: %s ", i/stride, test_group.name);
1327 for (j = 0; j < loops; j++) {
1328 if (j)
1329 printf("; ");
1330 printf("conv(%08x)", targs[i + j]);
1331 if (test_group.type == VX_CONV_TO_DOUBLE)
1332 printf(" = %016llx", dst_dp[j]);
1333 else
1334 printf(" = %08x", dst_sp[j]);
1335 }
1336 printf("\n");
1337 }
1338 }
1339 k++;
1340 printf( "\n" );
1341 }
1342}
1343
1344
1345
1346// The div doubleword test data
1347signed long long div_dw_tdata[13][2] = {
1348 { 4, -4 },
1349 { 4, -3 },
1350 { 4, 4 },
1351 { 4, -5 },
1352 { 3, 8 },
bart7ea7aa22012-06-23 11:04:01 +00001353 { 0x8000000000000000ULL, 0xa },
sewardj2062dc62011-09-05 12:15:16 +00001354 { 0x50c, -1 },
1355 { 0x50c, -4096 },
1356 { 0x1234fedc, 0x8000a873 },
bart7ea7aa22012-06-23 11:04:01 +00001357 { 0xabcd87651234fedcULL, 0xa123b893 },
1358 { 0x123456789abdcULL, 0 },
sewardj2062dc62011-09-05 12:15:16 +00001359 { 0, 2 },
1360 { 0x77, 0xa3499 }
1361};
1362#define dw_tdata_len (sizeof(div_dw_tdata)/sizeof(signed long long)/2)
1363
1364// The div word test data
1365unsigned int div_w_tdata[6][2] = {
1366 { 0, 2 },
1367 { 2, 0 },
1368 { 0x7abc1234, 0xf0000000 },
1369 { 0xfabc1234, 5 },
1370 { 77, 66 },
1371 { 5, 0xfabc1234 },
1372};
1373#define w_tdata_len (sizeof(div_w_tdata)/sizeof(unsigned int)/2)
1374
1375typedef struct div_ext_test
1376{
1377 test_func_t test_func;
1378 const char *name;
1379 int num_tests;
1380 div_type_t div_type;
1381 precision_type_t precision;
1382} div_ext_test_t;
1383
1384static div_ext_test_t div_tests[] = {
1385#ifdef __powerpc64__
1386 { &test_divdeu, "divdeu", dw_tdata_len, DIV_BASE, DOUBLE_TEST },
1387 { &test_divdeu, "divdeuo", dw_tdata_len, DIV_OE, DOUBLE_TEST },
1388#endif
1389 { &test_divwe, "divwe", w_tdata_len, DIV_BASE, SINGLE_TEST },
1390 { &test_divwe, "divweo", w_tdata_len, DIV_OE, SINGLE_TEST },
1391 { NULL, NULL, 0, 0, 0 }
1392};
1393
1394static void test_div_extensions(void)
1395{
1396 test_func_t func;
1397 int k;
1398 k = 0;
1399
1400 while ((func = div_tests[k].test_func)) {
1401 int i, repeat = 1;
1402 div_ext_test_t test_group = div_tests[k];
1403 do_dot = False;
1404
1405again:
1406 for (i = 0; i < test_group.num_tests; i++) {
1407 unsigned int condreg;
1408
1409 if (test_group.div_type == DIV_OE)
1410 do_OE = True;
1411 else
1412 do_OE = False;
1413
1414 if (test_group.precision == DOUBLE_TEST) {
1415 r14 = div_dw_tdata[i][0];
1416 r15 = div_dw_tdata[i][1];
1417 } else {
1418 r14 = div_w_tdata[i][0];
1419 r15 = div_w_tdata[i][1];
1420 }
1421 // execute test insn
1422 (*func)();
1423 condreg = (div_flags & 0xf0000000) >> 28;
1424 printf("#%d: %s%s: ", i, test_group.name, do_dot ? "." : "");
1425 if (test_group.precision == DOUBLE_TEST) {
1426 printf("0x%016llx0000000000000000 / 0x%016llx = 0x%016llx;",
1427 div_dw_tdata[i][0], div_dw_tdata[i][1], (signed long long) r17);
1428 } else {
1429 printf("0x%08x00000000 / 0x%08x = 0x%08x;",
1430 div_w_tdata[i][0], div_w_tdata[i][1], (unsigned int) r17);
1431 }
1432 printf(" CR=%x; XER=%x\n", condreg, div_xer);
1433 }
1434 printf("\n");
1435 if (repeat) {
1436 repeat = 0;
1437 do_dot = True;
1438 goto again;
1439 }
1440 k++;
1441 printf( "\n" );
1442 }
1443}
1444
1445
1446static void test_vx_tdivORtsqrt(void)
1447{
1448 test_func_t func;
1449 int k, crx;
1450 unsigned int flags;
1451 k = 0;
1452 do_dot = False;
1453 build_special_fargs_table();
1454
1455 while ((func = vx_tdivORtsqrt_tests[k].test_func)) {
1456 int idx, i;
1457 vx_fp_test_t test_group = vx_tdivORtsqrt_tests[k];
1458 Bool dp = (test_group.precision == DOUBLE_TEST) ? True : False;
1459 Bool is_scalar = (strstr(test_group.name, "xs")) ? True : False;
1460 Bool two_args = test_group.targs ? True : False;
1461 int stride = dp ? 2 : 4;
1462 int loops = is_scalar ? 1 : stride;
1463 stride = is_scalar ? 1: stride;
1464
1465 for (i = 0; i < test_group.num_tests; i+=stride) {
1466 unsigned int * pv;
carlldd690bf2014-08-07 23:49:27 +00001467 void * inB, * vecB_void_ptr = (void *)&vec_inB;
sewardj2062dc62011-09-05 12:15:16 +00001468
1469 pv = (unsigned int *)&vec_out;
1470 // clear vec_out
1471 for (idx = 0; idx < 4; idx++, pv++)
1472 *pv = 0;
1473
1474 if (dp) {
1475 int j;
1476 unsigned long long * frB_dp;
1477 if (two_args) {
1478 setup_dp_fp_args(&test_group.targs[i], False);
1479 } else {
1480 for (j = 0; j < loops; j++) {
1481 inB = (void *)&spec_fargs[i + j];
1482 // copy double precision FP into vector element i
carlldd690bf2014-08-07 23:49:27 +00001483 if (isLE && is_scalar)
1484 vecB_void_ptr += 8;
1485 memcpy(vecB_void_ptr + (j * 8), inB, 8);
sewardj2062dc62011-09-05 12:15:16 +00001486 }
1487 }
1488 // execute test insn
1489 // Must do set/get of CRs immediately before/after calling the asm func
1490 // to avoid CRs being modified by other instructions.
1491 SET_FPSCR_ZERO;
1492 SET_CR_XER_ZERO;
1493 (*func)();
1494 GET_CR(flags);
1495 // assumes using CR1
1496 crx = (flags & 0x0f000000) >> 24;
1497 if (two_args) {
1498 print_vector_fp_result(crx, &test_group, i, False/*do not print vec_out*/);
1499 } else {
1500 printf("#%d: %s ", i/stride, test_group.name);
1501 for (j = 0; j < loops; j++) {
1502 if (j)
1503 printf("; ");
1504 frB_dp = (unsigned long long *)&spec_fargs[i + j];
1505 printf("%s(%016llx)", test_group.op, *frB_dp);
1506 }
1507 printf( " ? %x (CRx)\n", crx);
1508 }
1509 } else {
1510 int j;
1511 unsigned int * frB_sp;
1512 if (two_args) {
1513 setup_sp_fp_args(&test_group.targs[i], False);
1514 } else {
1515 for (j = 0; j < loops; j++) {
1516 inB = (void *)&spec_sp_fargs[i + j];
1517 // copy single precision FP into vector element i
1518 memcpy(((void *)&vec_inB) + (j * 4), inB, 4);
1519 }
1520 }
1521 // execute test insn
1522 SET_FPSCR_ZERO;
1523 SET_CR_XER_ZERO;
1524 (*func)();
1525 GET_CR(flags);
1526 crx = (flags & 0x0f000000) >> 24;
1527 // print result
1528 if (two_args) {
1529 print_vector_fp_result(crx, &test_group, i, False/*do not print vec_out*/);
1530 } else {
1531 printf("#%d: %s ", i/stride, test_group.name);
1532 for (j = 0; j < loops; j++) {
1533 if (j)
1534 printf("; ");
1535 frB_sp = (unsigned int *)&spec_sp_fargs[i + j];
1536 printf("%s(%08x)", test_group.op, *frB_sp);
1537 }
1538 printf( " ? %x (CRx)\n", crx);
1539 }
1540 }
1541 }
1542 k++;
1543 printf( "\n" );
1544 }
1545}
1546
1547
1548static void test_ftsqrt(void)
1549{
1550 int i, crx;
1551 unsigned int flags;
1552 unsigned long long * frbp;
1553 build_special_fargs_table();
1554
1555
1556 for (i = 0; i < nb_special_fargs; i++) {
1557 f14 = spec_fargs[i];
1558 frbp = (unsigned long long *)&spec_fargs[i];
1559 SET_FPSCR_ZERO;
1560 SET_CR_XER_ZERO;
1561 __asm__ __volatile__ ("ftsqrt cr1, %0" : : "d" (f14));
1562 GET_CR(flags);
1563 crx = (flags & 0x0f000000) >> 24;
1564 printf( "ftsqrt: %016llx ? %x (CRx)\n", *frbp, crx);
1565 }
1566 printf( "\n" );
1567}
1568
1569static void
1570test_popcntw(void)
1571{
1572#ifdef __powerpc64__
1573 uint64_t res;
1574 unsigned long long src = 0x9182736405504536ULL;
1575 r14 = src;
1576 __asm__ __volatile__ ("popcntw %0, %1" : "=r" (res): "r" (r14));
1577 printf("popcntw: 0x%llx => 0x%016llx\n", (unsigned long long)src, (unsigned long long)res);
1578#else
1579 uint32_t res;
1580 unsigned int src = 0x9182730E;
1581 r14 = src;
1582 __asm__ __volatile__ ("popcntw %0, %1" : "=r" (res): "r" (r14));
1583 printf("popcntw: 0x%x => 0x%08x\n", src, (int)res);
1584#endif
1585 printf( "\n" );
1586}
1587
1588
1589static test_table_t
1590 all_tests[] =
1591{
1592
1593 { &test_vsx_one_fp_arg,
Elliott Hughesed398002017-06-21 14:41:24 -07001594 "Test VSX vector and scalar single argument instructions", OTHER_INST } ,
sewardj2062dc62011-09-05 12:15:16 +00001595 { &test_int_to_fp_convert,
Elliott Hughesed398002017-06-21 14:41:24 -07001596 "Test VSX vector integer to float conversion instructions", OTHER_INST },
sewardj2062dc62011-09-05 12:15:16 +00001597 { &test_div_extensions,
Elliott Hughesed398002017-06-21 14:41:24 -07001598 "Test div extensions", SCALAR_DIV_INST },
sewardj2062dc62011-09-05 12:15:16 +00001599 { &test_ftsqrt,
Elliott Hughesed398002017-06-21 14:41:24 -07001600 "Test ftsqrt instruction", OTHER_INST },
sewardj2062dc62011-09-05 12:15:16 +00001601 { &test_vx_tdivORtsqrt,
Elliott Hughesed398002017-06-21 14:41:24 -07001602 "Test vector and scalar tdiv and tsqrt instructions", OTHER_INST },
sewardj2062dc62011-09-05 12:15:16 +00001603 { &test_popcntw,
Elliott Hughesed398002017-06-21 14:41:24 -07001604 "Test popcntw instruction", OTHER_INST },
sewardj2062dc62011-09-05 12:15:16 +00001605 { NULL, NULL }
1606};
1607#endif // HAS_VSX
1608
Elliott Hughesed398002017-06-21 14:41:24 -07001609static void usage (void)
1610{
1611 fprintf(stderr,
1612 "Usage: test_isa_3_0 [OPTIONS]\n"
1613 "\t-d: test scalar division instructions (default)\n"
1614 "\t-o: test non scalar division instructions (default)\n"
1615 "\t-A: test all instructions (default)\n"
1616 "\t-h: display this help and exit\n"
1617 );
1618}
1619
1620int main(int argc, char **argv)
sewardj2062dc62011-09-05 12:15:16 +00001621{
1622#ifdef HAS_VSX
1623
1624 test_table_t aTest;
1625 test_func_t func;
Elliott Hughesed398002017-06-21 14:41:24 -07001626 int c;
sewardj2062dc62011-09-05 12:15:16 +00001627 int i = 0;
Elliott Hughesed398002017-06-21 14:41:24 -07001628 unsigned int test_run_mask = 0;
1629
1630 /* NOTE, ISA 3.0 introduces the OV32 and CA32 bits in the FPSCR. These
1631 * bits are set on various arithimetic instructions. This means this
1632 * test generates different FPSCR output for pre ISA 3.0 versus ISA 3.0
1633 * hardware. The tests have been grouped so that the tests that generate
1634 * different results are in one test and the rest are in a different test.
1635 * this minimizes the size of the result expect files for the two cases.
1636 */
1637
1638 while ((c = getopt(argc, argv, "doAh")) != -1) {
1639 switch (c) {
1640 case 'd':
1641 test_run_mask |= SCALAR_DIV_INST;
1642 break;
1643 case 'o':
1644 test_run_mask |= OTHER_INST;
1645 break;
1646 case 'A':
1647 test_run_mask = 0xFFFF;
1648 break;
1649 case 'h':
1650 usage();
1651 return 0;
1652
1653 default:
1654 usage();
1655 fprintf(stderr, "Unknown argument: '%c'\n", c);
1656 return 1;
1657 }
1658 }
sewardj2062dc62011-09-05 12:15:16 +00001659
1660 while ((func = all_tests[i].test_category)) {
1661 aTest = all_tests[i];
Elliott Hughesed398002017-06-21 14:41:24 -07001662
1663 if(test_run_mask & aTest.test_group) {
1664 /* Test group specified on command line */
1665
1666 printf( "%s\n", aTest.name );
1667 (*func)();
1668 }
sewardj2062dc62011-09-05 12:15:16 +00001669 i++;
1670 }
1671 if (spec_fargs)
1672 free(spec_fargs);
1673 if (spec_sp_fargs)
1674 free(spec_sp_fargs);
1675
1676#endif // HAS _VSX
1677
1678 return 0;
1679}