blob: d1979efcc938791e65a483f3229dad8dd0e1c8e5 [file] [log] [blame]
sewardj52ff4cc2005-03-26 20:33:38 +00001
2/*---------------------------------------------------------------*/
sewardj752f9062010-05-03 21:38:49 +00003/*--- begin guest_generic_x87.c ---*/
sewardj52ff4cc2005-03-26 20:33:38 +00004/*---------------------------------------------------------------*/
5
6/*
sewardj752f9062010-05-03 21:38:49 +00007 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
sewardj52ff4cc2005-03-26 20:33:38 +00009
Elliott Hughesed398002017-06-21 14:41:24 -070010 Copyright (C) 2004-2017 OpenWorks LLP
sewardj752f9062010-05-03 21:38:49 +000011 info@open-works.net
sewardj52ff4cc2005-03-26 20:33:38 +000012
sewardj752f9062010-05-03 21:38:49 +000013 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
sewardj52ff4cc2005-03-26 20:33:38 +000017
sewardj752f9062010-05-03 21:38:49 +000018 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
sewardj7bd6ffe2005-08-03 16:07:36 +000026 02110-1301, USA.
27
sewardj752f9062010-05-03 21:38:49 +000028 The GNU General Public License is contained in the file COPYING.
sewardj52ff4cc2005-03-26 20:33:38 +000029
30 Neither the names of the U.S. Department of Energy nor the
31 University of California nor the names of its contributors may be
32 used to endorse or promote products derived from this software
33 without prior written permission.
sewardj52ff4cc2005-03-26 20:33:38 +000034*/
35
36/* This file contains functions for doing some x87-specific
37 operations. Both the amd64 and x86 front ends (guests) indirectly
38 call these functions via guest helper calls. By putting them here,
39 code duplication is avoided. Some of these functions are tricky
40 and hard to verify, so there is much to be said for only having one
41 copy thereof.
42*/
43
44#include "libvex_basictypes.h"
45
sewardjcef7d3e2009-07-02 12:21:59 +000046#include "main_util.h"
47#include "guest_generic_x87.h"
sewardj52ff4cc2005-03-26 20:33:38 +000048
49
50/* 80 and 64-bit floating point formats:
51
52 80-bit:
53
54 S 0 0-------0 zero
55 S 0 0X------X denormals
56 S 1-7FFE 1X------X normals (all normals have leading 1)
57 S 7FFF 10------0 infinity
58 S 7FFF 10X-----X snan
59 S 7FFF 11X-----X qnan
60
61 S is the sign bit. For runs X----X, at least one of the Xs must be
62 nonzero. Exponent is 15 bits, fractional part is 63 bits, and
63 there is an explicitly represented leading 1, and a sign bit,
64 giving 80 in total.
65
66 64-bit avoids the confusion of an explicitly represented leading 1
67 and so is simpler:
68
69 S 0 0------0 zero
70 S 0 X------X denormals
71 S 1-7FE any normals
72 S 7FF 0------0 infinity
73 S 7FF 0X-----X snan
74 S 7FF 1X-----X qnan
75
76 Exponent is 11 bits, fractional part is 52 bits, and there is a
77 sign bit, giving 64 in total.
78*/
79
80
81static inline UInt read_bit_array ( UChar* arr, UInt n )
82{
83 UChar c = arr[n >> 3];
84 c >>= (n&7);
85 return c & 1;
86}
87
88static inline void write_bit_array ( UChar* arr, UInt n, UInt b )
89{
90 UChar c = arr[n >> 3];
91 c = toUChar( c & ~(1 << (n&7)) );
92 c = toUChar( c | ((b&1) << (n&7)) );
93 arr[n >> 3] = c;
94}
95
96/* Convert an IEEE754 double (64-bit) into an x87 extended double
97 (80-bit), mimicing the hardware fairly closely. Both numbers are
98 stored little-endian. Limitations, all of which could be fixed,
99 given some level of hassle:
100
101 * Identity of NaNs is not preserved.
102
103 See comments in the code for more details.
104*/
105void convert_f64le_to_f80le ( /*IN*/UChar* f64, /*OUT*/UChar* f80 )
106{
107 Bool mantissaIsZero;
108 Int bexp, i, j, shift;
109 UChar sign;
110
111 sign = toUChar( (f64[7] >> 7) & 1 );
112 bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F);
113 bexp &= 0x7FF;
114
115 mantissaIsZero = False;
116 if (bexp == 0 || bexp == 0x7FF) {
117 /* We'll need to know whether or not the mantissa (bits 51:0) is
118 all zeroes in order to handle these cases. So figure it
119 out. */
120 mantissaIsZero
121 = toBool(
122 (f64[6] & 0x0F) == 0
123 && f64[5] == 0 && f64[4] == 0 && f64[3] == 0
124 && f64[2] == 0 && f64[1] == 0 && f64[0] == 0
125 );
126 }
127
128 /* If the exponent is zero, either we have a zero or a denormal.
129 Produce a zero. This is a hack in that it forces denormals to
130 zero. Could do better. */
131 if (bexp == 0) {
132 f80[9] = toUChar( sign << 7 );
133 f80[8] = f80[7] = f80[6] = f80[5] = f80[4]
134 = f80[3] = f80[2] = f80[1] = f80[0] = 0;
135
136 if (mantissaIsZero)
137 /* It really is zero, so that's all we can do. */
138 return;
139
140 /* There is at least one 1-bit in the mantissa. So it's a
141 potentially denormalised double -- but we can produce a
142 normalised long double. Count the leading zeroes in the
143 mantissa so as to decide how much to bump the exponent down
144 by. Note, this is SLOW. */
145 shift = 0;
146 for (i = 51; i >= 0; i--) {
147 if (read_bit_array(f64, i))
148 break;
149 shift++;
150 }
151
152 /* and copy into place as many bits as we can get our hands on. */
153 j = 63;
154 for (i = 51 - shift; i >= 0; i--) {
155 write_bit_array( f80, j,
156 read_bit_array( f64, i ) );
157 j--;
158 }
159
160 /* Set the exponent appropriately, and we're done. */
161 bexp -= shift;
162 bexp += (16383 - 1023);
163 f80[9] = toUChar( (sign << 7) | ((bexp >> 8) & 0xFF) );
164 f80[8] = toUChar( bexp & 0xFF );
165 return;
166 }
167
168 /* If the exponent is 7FF, this is either an Infinity, a SNaN or
169 QNaN, as determined by examining bits 51:0, thus:
170 0 ... 0 Inf
171 0X ... X SNaN
172 1X ... X QNaN
173 where at least one of the Xs is not zero.
174 */
175 if (bexp == 0x7FF) {
176 if (mantissaIsZero) {
177 /* Produce an appropriately signed infinity:
178 S 1--1 (15) 1 0--0 (63)
179 */
180 f80[9] = toUChar( (sign << 7) | 0x7F );
181 f80[8] = 0xFF;
182 f80[7] = 0x80;
183 f80[6] = f80[5] = f80[4] = f80[3]
184 = f80[2] = f80[1] = f80[0] = 0;
185 return;
186 }
187 /* So it's either a QNaN or SNaN. Distinguish by considering
188 bit 51. Note, this destroys all the trailing bits
189 (identity?) of the NaN. IEEE754 doesn't require preserving
190 these (it only requires that there be one QNaN value and one
191 SNaN value), but x87 does seem to have some ability to
192 preserve them. Anyway, here, the NaN's identity is
193 destroyed. Could be improved. */
194 if (f64[6] & 8) {
sewardj0548cdb2012-04-11 07:11:28 +0000195 /* QNaN. Make a canonical QNaN:
196 S 1--1 (15) 1 1 0--0 (62)
sewardj52ff4cc2005-03-26 20:33:38 +0000197 */
198 f80[9] = toUChar( (sign << 7) | 0x7F );
199 f80[8] = 0xFF;
sewardj0548cdb2012-04-11 07:11:28 +0000200 f80[7] = 0xC0;
sewardj52ff4cc2005-03-26 20:33:38 +0000201 f80[6] = f80[5] = f80[4] = f80[3]
sewardj0548cdb2012-04-11 07:11:28 +0000202 = f80[2] = f80[1] = f80[0] = 0x00;
sewardj52ff4cc2005-03-26 20:33:38 +0000203 } else {
204 /* SNaN. Make a SNaN:
sewardj0548cdb2012-04-11 07:11:28 +0000205 S 1--1 (15) 1 0 1--1 (62)
sewardj52ff4cc2005-03-26 20:33:38 +0000206 */
207 f80[9] = toUChar( (sign << 7) | 0x7F );
208 f80[8] = 0xFF;
sewardj0548cdb2012-04-11 07:11:28 +0000209 f80[7] = 0xBF;
sewardj52ff4cc2005-03-26 20:33:38 +0000210 f80[6] = f80[5] = f80[4] = f80[3]
211 = f80[2] = f80[1] = f80[0] = 0xFF;
212 }
213 return;
214 }
215
216 /* It's not a zero, denormal, infinity or nan. So it must be a
217 normalised number. Rebias the exponent and build the new
218 number. */
219 bexp += (16383 - 1023);
220
221 f80[9] = toUChar( (sign << 7) | ((bexp >> 8) & 0xFF) );
222 f80[8] = toUChar( bexp & 0xFF );
223 f80[7] = toUChar( (1 << 7) | ((f64[6] << 3) & 0x78)
224 | ((f64[5] >> 5) & 7) );
225 f80[6] = toUChar( ((f64[5] << 3) & 0xF8) | ((f64[4] >> 5) & 7) );
226 f80[5] = toUChar( ((f64[4] << 3) & 0xF8) | ((f64[3] >> 5) & 7) );
227 f80[4] = toUChar( ((f64[3] << 3) & 0xF8) | ((f64[2] >> 5) & 7) );
228 f80[3] = toUChar( ((f64[2] << 3) & 0xF8) | ((f64[1] >> 5) & 7) );
229 f80[2] = toUChar( ((f64[1] << 3) & 0xF8) | ((f64[0] >> 5) & 7) );
230 f80[1] = toUChar( ((f64[0] << 3) & 0xF8) );
231 f80[0] = toUChar( 0 );
232}
233
234
235/* Convert an x87 extended double (80-bit) into an IEEE 754 double
236 (64-bit), mimicking the hardware fairly closely. Both numbers are
237 stored little-endian. Limitations, both of which could be fixed,
238 given some level of hassle:
239
240 * Rounding following truncation could be a bit better.
241
242 * Identity of NaNs is not preserved.
243
244 See comments in the code for more details.
245*/
246void convert_f80le_to_f64le ( /*IN*/UChar* f80, /*OUT*/UChar* f64 )
247{
248 Bool isInf;
249 Int bexp, i, j;
250 UChar sign;
251
252 sign = toUChar((f80[9] >> 7) & 1);
253 bexp = (((UInt)f80[9]) << 8) | (UInt)f80[8];
254 bexp &= 0x7FFF;
255
256 /* If the exponent is zero, either we have a zero or a denormal.
257 But an extended precision denormal becomes a double precision
258 zero, so in either case, just produce the appropriately signed
259 zero. */
260 if (bexp == 0) {
261 f64[7] = toUChar(sign << 7);
262 f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
263 return;
264 }
265
266 /* If the exponent is 7FFF, this is either an Infinity, a SNaN or
267 QNaN, as determined by examining bits 62:0, thus:
sewardj0548cdb2012-04-11 07:11:28 +0000268 10 ... 0 Inf
269 10X ... X SNaN
270 11X ... X QNaN
sewardj52ff4cc2005-03-26 20:33:38 +0000271 where at least one of the Xs is not zero.
272 */
273 if (bexp == 0x7FFF) {
274 isInf = toBool(
275 (f80[7] & 0x7F) == 0
276 && f80[6] == 0 && f80[5] == 0 && f80[4] == 0
277 && f80[3] == 0 && f80[2] == 0 && f80[1] == 0
278 && f80[0] == 0
279 );
280 if (isInf) {
281 if (0 == (f80[7] & 0x80))
282 goto wierd_NaN;
283 /* Produce an appropriately signed infinity:
284 S 1--1 (11) 0--0 (52)
285 */
286 f64[7] = toUChar((sign << 7) | 0x7F);
287 f64[6] = 0xF0;
288 f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
289 return;
290 }
291 /* So it's either a QNaN or SNaN. Distinguish by considering
sewardj0548cdb2012-04-11 07:11:28 +0000292 bit 61. Note, this destroys all the trailing bits
sewardj52ff4cc2005-03-26 20:33:38 +0000293 (identity?) of the NaN. IEEE754 doesn't require preserving
294 these (it only requires that there be one QNaN value and one
295 SNaN value), but x87 does seem to have some ability to
296 preserve them. Anyway, here, the NaN's identity is
297 destroyed. Could be improved. */
sewardj0548cdb2012-04-11 07:11:28 +0000298 if (f80[7] & 0x40) {
299 /* QNaN. Make a canonical QNaN:
300 S 1--1 (11) 1 0--0 (51)
sewardj52ff4cc2005-03-26 20:33:38 +0000301 */
302 f64[7] = toUChar((sign << 7) | 0x7F);
sewardj0548cdb2012-04-11 07:11:28 +0000303 f64[6] = 0xF8;
304 f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0x00;
sewardj52ff4cc2005-03-26 20:33:38 +0000305 } else {
306 /* SNaN. Make a SNaN:
307 S 1--1 (11) 0 1--1 (51)
308 */
309 f64[7] = toUChar((sign << 7) | 0x7F);
310 f64[6] = 0xF7;
311 f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0xFF;
312 }
313 return;
314 }
315
316 /* If it's not a Zero, NaN or Inf, and the integer part (bit 62) is
317 zero, the x87 FPU appears to consider the number denormalised
318 and converts it to a QNaN. */
319 if (0 == (f80[7] & 0x80)) {
320 wierd_NaN:
321 /* Strange hardware QNaN:
322 S 1--1 (11) 1 0--0 (51)
323 */
324 /* On a PIII, these QNaNs always appear with sign==1. I have
325 no idea why. */
326 f64[7] = (1 /*sign*/ << 7) | 0x7F;
327 f64[6] = 0xF8;
328 f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
329 return;
330 }
331
332 /* It's not a zero, denormal, infinity or nan. So it must be a
333 normalised number. Rebias the exponent and consider. */
334 bexp -= (16383 - 1023);
335 if (bexp >= 0x7FF) {
336 /* It's too big for a double. Construct an infinity. */
337 f64[7] = toUChar((sign << 7) | 0x7F);
338 f64[6] = 0xF0;
339 f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
340 return;
341 }
342
343 if (bexp <= 0) {
344 /* It's too small for a normalised double. First construct a
345 zero and then see if it can be improved into a denormal. */
346 f64[7] = toUChar(sign << 7);
347 f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0;
348
349 if (bexp < -52)
350 /* Too small even for a denormal. */
351 return;
352
353 /* Ok, let's make a denormal. Note, this is SLOW. */
354 /* Copy bits 63, 62, 61, etc of the src mantissa into the dst,
355 indexes 52+bexp, 51+bexp, etc, until k+bexp < 0. */
356 /* bexp is in range -52 .. 0 inclusive */
357 for (i = 63; i >= 0; i--) {
358 j = i - 12 + bexp;
359 if (j < 0) break;
360 /* We shouldn't really call vassert from generated code. */
361 vassert(j >= 0 && j < 52);
362 write_bit_array ( f64,
363 j,
364 read_bit_array ( f80, i ) );
365 }
366 /* and now we might have to round ... */
367 if (read_bit_array(f80, 10+1 - bexp) == 1)
368 goto do_rounding;
369
370 return;
371 }
372
373 /* Ok, it's a normalised number which is representable as a double.
374 Copy the exponent and mantissa into place. */
375 /*
376 for (i = 0; i < 52; i++)
377 write_bit_array ( f64,
378 i,
379 read_bit_array ( f80, i+11 ) );
380 */
381 f64[0] = toUChar( (f80[1] >> 3) | (f80[2] << 5) );
382 f64[1] = toUChar( (f80[2] >> 3) | (f80[3] << 5) );
383 f64[2] = toUChar( (f80[3] >> 3) | (f80[4] << 5) );
384 f64[3] = toUChar( (f80[4] >> 3) | (f80[5] << 5) );
385 f64[4] = toUChar( (f80[5] >> 3) | (f80[6] << 5) );
386 f64[5] = toUChar( (f80[6] >> 3) | (f80[7] << 5) );
387
388 f64[6] = toUChar( ((bexp << 4) & 0xF0) | ((f80[7] >> 3) & 0x0F) );
389
390 f64[7] = toUChar( (sign << 7) | ((bexp >> 4) & 0x7F) );
391
392 /* Now consider any rounding that needs to happen as a result of
393 truncating the mantissa. */
394 if (f80[1] & 4) /* read_bit_array(f80, 10) == 1) */ {
395
396 /* If the bottom bits of f80 are "100 0000 0000", then the
397 infinitely precise value is deemed to be mid-way between the
398 two closest representable values. Since we're doing
399 round-to-nearest (the default mode), in that case it is the
400 bit immediately above which indicates whether we should round
401 upwards or not -- if 0, we don't. All that is encapsulated
402 in the following simple test. */
403 if ((f80[1] & 0xF) == 4/*0100b*/ && f80[0] == 0)
404 return;
405
406 do_rounding:
407 /* Round upwards. This is a kludge. Once in every 2^24
408 roundings (statistically) the bottom three bytes are all 0xFF
409 and so we don't round at all. Could be improved. */
410 if (f64[0] != 0xFF) {
411 f64[0]++;
412 }
413 else
414 if (f64[0] == 0xFF && f64[1] != 0xFF) {
415 f64[0] = 0;
416 f64[1]++;
417 }
418 else
419 if (f64[0] == 0xFF && f64[1] == 0xFF && f64[2] != 0xFF) {
420 f64[0] = 0;
421 f64[1] = 0;
422 f64[2]++;
423 }
424 /* else we don't round, but we should. */
425 }
426}
427
428
sewardj879cee02006-03-07 01:15:50 +0000429/* CALLED FROM GENERATED CODE: CLEAN HELPER */
430/* Extract the signed significand or exponent component as per
431 fxtract. Arg and result are doubles travelling under the guise of
432 ULongs. Returns significand when getExp is zero and exponent
433 otherwise. */
434ULong x86amd64g_calculate_FXTRACT ( ULong arg, HWord getExp )
435{
436 ULong uSig, uExp;
437 /* Long sSig; */
438 Int sExp, i;
439 UInt sign, expExp;
440
441 /*
442 S 7FF 0------0 infinity
443 S 7FF 0X-----X snan
444 S 7FF 1X-----X qnan
445 */
446 const ULong posInf = 0x7FF0000000000000ULL;
447 const ULong negInf = 0xFFF0000000000000ULL;
448 const ULong nanMask = 0x7FF0000000000000ULL;
449 const ULong qNan = 0x7FF8000000000000ULL;
450 const ULong posZero = 0x0000000000000000ULL;
451 const ULong negZero = 0x8000000000000000ULL;
452 const ULong bit51 = 1ULL << 51;
453 const ULong bit52 = 1ULL << 52;
454 const ULong sigMask = bit52 - 1;
455
sewardj772f6df2010-07-29 07:01:29 +0000456 /* Mimic Core i5 behaviour for special cases. */
sewardj879cee02006-03-07 01:15:50 +0000457 if (arg == posInf)
458 return getExp ? posInf : posInf;
459 if (arg == negInf)
460 return getExp ? posInf : negInf;
461 if ((arg & nanMask) == nanMask)
sewardj772f6df2010-07-29 07:01:29 +0000462 return qNan | (arg & (1ULL << 63));
sewardj879cee02006-03-07 01:15:50 +0000463 if (arg == posZero)
464 return getExp ? negInf : posZero;
465 if (arg == negZero)
466 return getExp ? negInf : negZero;
467
468 /* Split into sign, exponent and significand. */
469 sign = ((UInt)(arg >> 63)) & 1;
470
471 /* Mask off exponent & sign. uSig is in range 0 .. 2^52-1. */
472 uSig = arg & sigMask;
473
474 /* Get the exponent. */
475 sExp = ((Int)(arg >> 52)) & 0x7FF;
476
477 /* Deal with denormals: if the exponent is zero, then the
478 significand cannot possibly be zero (negZero/posZero are handled
479 above). Shift the significand left until bit 51 of it becomes
480 1, and decrease the exponent accordingly.
481 */
482 if (sExp == 0) {
483 for (i = 0; i < 52; i++) {
484 if (uSig & bit51)
485 break;
486 uSig <<= 1;
487 sExp--;
488 }
489 uSig <<= 1;
490 } else {
491 /* Add the implied leading-1 in the significand. */
492 uSig |= bit52;
493 }
494
495 /* Roll in the sign. */
496 /* sSig = uSig; */
497 /* if (sign) sSig =- sSig; */
498
499 /* Convert sig into a double. This should be an exact conversion.
500 Then divide by 2^52, which should give a value in the range 1.0
501 to 2.0-epsilon, at least for normalised args. */
502 /* dSig = (Double)sSig; */
503 /* dSig /= 67108864.0; */ /* 2^26 */
504 /* dSig /= 67108864.0; */ /* 2^26 */
505 uSig &= sigMask;
506 uSig |= 0x3FF0000000000000ULL;
507 if (sign)
508 uSig ^= negZero;
509
510 /* Convert exp into a double. Also an exact conversion. */
511 /* dExp = (Double)(sExp - 1023); */
512 sExp -= 1023;
513 if (sExp == 0) {
514 uExp = 0;
515 } else {
516 uExp = sExp < 0 ? -sExp : sExp;
517 expExp = 0x3FF +52;
518 /* 1 <= uExp <= 1074 */
519 /* Skip first 42 iterations of normalisation loop as we know they
520 will always happen */
521 uExp <<= 42;
522 expExp -= 42;
523 for (i = 0; i < 52-42; i++) {
524 if (uExp & bit52)
525 break;
526 uExp <<= 1;
527 expExp--;
528 }
529 uExp &= sigMask;
530 uExp |= ((ULong)expExp) << 52;
531 if (sExp < 0) uExp ^= negZero;
532 }
533
534 return getExp ? uExp : uSig;
535}
536
537
sewardj0b2d3fe2010-08-06 07:59:38 +0000538
539/*---------------------------------------------------------*/
540/*--- SSE4.2 PCMP{E,I}STR{I,M} helpers ---*/
541/*---------------------------------------------------------*/
542
543/* We need the definitions for OSZACP eflags/rflags offsets.
544 #including guest_{amd64,x86}_defs.h causes chaos, so just copy the
sewardjacfbd7d2010-08-17 22:52:08 +0000545 required values directly. They are not going to change in the
546 foreseeable future :-)
sewardj0b2d3fe2010-08-06 07:59:38 +0000547*/
sewardjacfbd7d2010-08-17 22:52:08 +0000548
sewardj0b2d3fe2010-08-06 07:59:38 +0000549#define SHIFT_O 11
550#define SHIFT_S 7
551#define SHIFT_Z 6
552#define SHIFT_A 4
553#define SHIFT_C 0
554#define SHIFT_P 2
555
556#define MASK_O (1 << SHIFT_O)
557#define MASK_S (1 << SHIFT_S)
558#define MASK_Z (1 << SHIFT_Z)
559#define MASK_A (1 << SHIFT_A)
560#define MASK_C (1 << SHIFT_C)
561#define MASK_P (1 << SHIFT_P)
562
563
564/* Count leading zeroes, w/ 0-produces-32 semantics, a la Hacker's
565 Delight. */
566static UInt clz32 ( UInt x )
567{
568 Int y, m, n;
569 y = -(x >> 16);
570 m = (y >> 16) & 16;
571 n = 16 - m;
572 x = x >> m;
573 y = x - 0x100;
574 m = (y >> 16) & 8;
575 n = n + m;
576 x = x << m;
577 y = x - 0x1000;
578 m = (y >> 16) & 4;
579 n = n + m;
580 x = x << m;
581 y = x - 0x4000;
582 m = (y >> 16) & 2;
583 n = n + m;
584 x = x << m;
585 y = x >> 14;
586 m = y & ~(y >> 1);
587 return n + 2 - m;
588}
589
590static UInt ctz32 ( UInt x )
591{
592 return 32 - clz32((~x) & (x-1));
593}
594
sewardjacfbd7d2010-08-17 22:52:08 +0000595/* Convert a 4-bit value to a 32-bit value by cloning each bit 8
596 times. There's surely a better way to do this, but I don't know
597 what it is. */
598static UInt bits4_to_bytes4 ( UInt bits4 )
sewardj0b2d3fe2010-08-06 07:59:38 +0000599{
sewardjacfbd7d2010-08-17 22:52:08 +0000600 UInt r = 0;
601 r |= (bits4 & 1) ? 0x000000FF : 0;
602 r |= (bits4 & 2) ? 0x0000FF00 : 0;
603 r |= (bits4 & 4) ? 0x00FF0000 : 0;
604 r |= (bits4 & 8) ? 0xFF000000 : 0;
605 return r;
sewardj0b2d3fe2010-08-06 07:59:38 +0000606}
607
608
sewardj3c3d6d62012-02-16 15:21:08 +0000609/* Convert a 2-bit value to a 32-bit value by cloning each bit 16
610 times. There's surely a better way to do this, but I don't know
611 what it is. */
612static UInt bits2_to_bytes4 ( UInt bits2 )
613{
614 UInt r = 0;
615 r |= (bits2 & 1) ? 0x0000FFFF : 0;
616 r |= (bits2 & 2) ? 0xFFFF0000 : 0;
617 return r;
618}
619
620
sewardjacfbd7d2010-08-17 22:52:08 +0000621/* Given partial results from a pcmpXstrX operation (intRes1,
622 basically), generate an I- or M-format output value, also the new
623 OSZACP flags. */
624static
625void compute_PCMPxSTRx_gen_output (/*OUT*/V128* resV,
626 /*OUT*/UInt* resOSZACP,
627 UInt intRes1,
628 UInt zmaskL, UInt zmaskR,
629 UInt validL,
630 UInt pol, UInt idx,
631 Bool isxSTRM )
sewardj0b2d3fe2010-08-06 07:59:38 +0000632{
sewardjacfbd7d2010-08-17 22:52:08 +0000633 vassert((pol >> 2) == 0);
634 vassert((idx >> 1) == 0);
sewardj0b2d3fe2010-08-06 07:59:38 +0000635
sewardjacfbd7d2010-08-17 22:52:08 +0000636 UInt intRes2 = 0;
637 switch (pol) {
638 case 0: intRes2 = intRes1; break; // pol +
639 case 1: intRes2 = ~intRes1; break; // pol -
640 case 2: intRes2 = intRes1; break; // pol m+
641 case 3: intRes2 = intRes1 ^ validL; break; // pol m-
642 }
643 intRes2 &= 0xFFFF;
sewardj0b2d3fe2010-08-06 07:59:38 +0000644
sewardjacfbd7d2010-08-17 22:52:08 +0000645 if (isxSTRM) {
646
647 // generate M-format output (a bit or byte mask in XMM0)
648 if (idx) {
649 resV->w32[0] = bits4_to_bytes4( (intRes2 >> 0) & 0xF );
650 resV->w32[1] = bits4_to_bytes4( (intRes2 >> 4) & 0xF );
651 resV->w32[2] = bits4_to_bytes4( (intRes2 >> 8) & 0xF );
652 resV->w32[3] = bits4_to_bytes4( (intRes2 >> 12) & 0xF );
sewardj0b2d3fe2010-08-06 07:59:38 +0000653 } else {
sewardjacfbd7d2010-08-17 22:52:08 +0000654 resV->w32[0] = intRes2 & 0xFFFF;
655 resV->w32[1] = 0;
656 resV->w32[2] = 0;
657 resV->w32[3] = 0;
sewardj0b2d3fe2010-08-06 07:59:38 +0000658 }
sewardjacfbd7d2010-08-17 22:52:08 +0000659
660 } else {
661
662 // generate I-format output (an index in ECX)
663 // generate ecx value
664 UInt newECX = 0;
665 if (idx) {
666 // index of ms-1-bit
667 newECX = intRes2 == 0 ? 16 : (31 - clz32(intRes2));
668 } else {
669 // index of ls-1-bit
670 newECX = intRes2 == 0 ? 16 : ctz32(intRes2);
671 }
672
673 resV->w32[0] = newECX;
674 resV->w32[1] = 0;
675 resV->w32[2] = 0;
676 resV->w32[3] = 0;
sewardj0b2d3fe2010-08-06 07:59:38 +0000677
678 }
679
sewardj0b2d3fe2010-08-06 07:59:38 +0000680 // generate new flags, common to all ISTRI and ISTRM cases
sewardjacfbd7d2010-08-17 22:52:08 +0000681 *resOSZACP // A, P are zero
682 = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0
683 | ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0
684 | ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0
685 | ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0]
sewardj0b2d3fe2010-08-06 07:59:38 +0000686}
687
688
sewardj3c3d6d62012-02-16 15:21:08 +0000689/* Given partial results from a 16-bit pcmpXstrX operation (intRes1,
690 basically), generate an I- or M-format output value, also the new
691 OSZACP flags. */
692static
693void compute_PCMPxSTRx_gen_output_wide (/*OUT*/V128* resV,
694 /*OUT*/UInt* resOSZACP,
695 UInt intRes1,
696 UInt zmaskL, UInt zmaskR,
697 UInt validL,
698 UInt pol, UInt idx,
699 Bool isxSTRM )
700{
701 vassert((pol >> 2) == 0);
702 vassert((idx >> 1) == 0);
703
704 UInt intRes2 = 0;
705 switch (pol) {
706 case 0: intRes2 = intRes1; break; // pol +
707 case 1: intRes2 = ~intRes1; break; // pol -
708 case 2: intRes2 = intRes1; break; // pol m+
709 case 3: intRes2 = intRes1 ^ validL; break; // pol m-
710 }
711 intRes2 &= 0xFF;
712
713 if (isxSTRM) {
714
715 // generate M-format output (a bit or byte mask in XMM0)
716 if (idx) {
717 resV->w32[0] = bits2_to_bytes4( (intRes2 >> 0) & 0x3 );
718 resV->w32[1] = bits2_to_bytes4( (intRes2 >> 2) & 0x3 );
719 resV->w32[2] = bits2_to_bytes4( (intRes2 >> 4) & 0x3 );
720 resV->w32[3] = bits2_to_bytes4( (intRes2 >> 6) & 0x3 );
721 } else {
722 resV->w32[0] = intRes2 & 0xFF;
723 resV->w32[1] = 0;
724 resV->w32[2] = 0;
725 resV->w32[3] = 0;
726 }
727
728 } else {
729
730 // generate I-format output (an index in ECX)
731 // generate ecx value
732 UInt newECX = 0;
733 if (idx) {
734 // index of ms-1-bit
735 newECX = intRes2 == 0 ? 8 : (31 - clz32(intRes2));
736 } else {
737 // index of ls-1-bit
738 newECX = intRes2 == 0 ? 8 : ctz32(intRes2);
739 }
740
741 resV->w32[0] = newECX;
742 resV->w32[1] = 0;
743 resV->w32[2] = 0;
744 resV->w32[3] = 0;
745
746 }
747
748 // generate new flags, common to all ISTRI and ISTRM cases
749 *resOSZACP // A, P are zero
750 = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0
751 | ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0
752 | ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0
753 | ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0]
754}
755
756
sewardjacfbd7d2010-08-17 22:52:08 +0000757/* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M}
sewardj3c3d6d62012-02-16 15:21:08 +0000758 variants on 8-bit data.
sewardjacfbd7d2010-08-17 22:52:08 +0000759
760 For xSTRI variants, the new ECX value is placed in the 32 bits
761 pointed to by *resV, and the top 96 bits are zeroed. For xSTRM
762 variants, the result is a 128 bit value and is placed at *resV in
763 the obvious way.
764
765 For all variants, the new OSZACP value is placed at *resOSZACP.
766
767 argLV and argRV are the vector args. The caller must prepare a
768 16-bit mask for each, zmaskL and zmaskR. For ISTRx variants this
769 must be 1 for each zero byte of of the respective arg. For ESTRx
770 variants this is derived from the explicit length indication, and
771 must be 0 in all places except at the bit index corresponding to
772 the valid length (0 .. 16). If the valid length is 16 then the
773 mask must be all zeroes. In all cases, bits 31:16 must be zero.
774
775 imm8 is the original immediate from the instruction. isSTRM
776 indicates whether this is a xSTRM or xSTRI variant, which controls
777 how much of *res is written.
778
779 If the given imm8 case can be handled, the return value is True.
780 If not, False is returned, and neither *res not *resOSZACP are
781 altered.
782*/
783
784Bool compute_PCMPxSTRx ( /*OUT*/V128* resV,
785 /*OUT*/UInt* resOSZACP,
786 V128* argLV, V128* argRV,
787 UInt zmaskL, UInt zmaskR,
788 UInt imm8, Bool isxSTRM )
sewardj0b2d3fe2010-08-06 07:59:38 +0000789{
sewardjacfbd7d2010-08-17 22:52:08 +0000790 vassert(imm8 < 0x80);
791 vassert((zmaskL >> 16) == 0);
792 vassert((zmaskR >> 16) == 0);
793
794 /* Explicitly reject any imm8 values that haven't been validated,
795 even if they would probably work. Life is too short to have
796 unvalidated cases in the code base. */
797 switch (imm8) {
Elliott Hughesa0664b92017-04-18 17:46:52 -0700798 case 0x00: case 0x02:
799 case 0x08: case 0x0A: case 0x0C: case 0x0E:
Elliott Hughesed398002017-06-21 14:41:24 -0700800 case 0x10: case 0x12: case 0x14:
Elliott Hughesa0664b92017-04-18 17:46:52 -0700801 case 0x18: case 0x1A:
802 case 0x30: case 0x34:
803 case 0x38: case 0x3A:
804 case 0x40: case 0x42: case 0x44: case 0x46:
805 case 0x4A:
806 case 0x62:
807 case 0x70: case 0x72:
sewardjacfbd7d2010-08-17 22:52:08 +0000808 break;
809 default:
810 return False;
sewardj0b2d3fe2010-08-06 07:59:38 +0000811 }
sewardj0b2d3fe2010-08-06 07:59:38 +0000812
sewardjacfbd7d2010-08-17 22:52:08 +0000813 UInt fmt = (imm8 >> 0) & 3; // imm8[1:0] data format
814 UInt agg = (imm8 >> 2) & 3; // imm8[3:2] aggregation fn
815 UInt pol = (imm8 >> 4) & 3; // imm8[5:4] polarity
816 UInt idx = (imm8 >> 6) & 1; // imm8[6] 1==msb/bytemask
sewardj0b2d3fe2010-08-06 07:59:38 +0000817
sewardjacfbd7d2010-08-17 22:52:08 +0000818 /*----------------------------------------*/
819 /*-- strcmp on byte data --*/
820 /*----------------------------------------*/
sewardj0b2d3fe2010-08-06 07:59:38 +0000821
sewardjacfbd7d2010-08-17 22:52:08 +0000822 if (agg == 2/*equal each, aka strcmp*/
823 && (fmt == 0/*ub*/ || fmt == 2/*sb*/)) {
824 Int i;
825 UChar* argL = (UChar*)argLV;
826 UChar* argR = (UChar*)argRV;
827 UInt boolResII = 0;
828 for (i = 15; i >= 0; i--) {
829 UChar cL = argL[i];
830 UChar cR = argR[i];
831 boolResII = (boolResII << 1) | (cL == cR ? 1 : 0);
832 }
833 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
834 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
sewardj0b2d3fe2010-08-06 07:59:38 +0000835
sewardjacfbd7d2010-08-17 22:52:08 +0000836 // do invalidation, common to all equal-each cases
837 UInt intRes1
838 = (boolResII & validL & validR) // if both valid, use cmpres
839 | (~ (validL | validR)); // if both invalid, force 1
840 // else force 0
841 intRes1 &= 0xFFFF;
sewardj0b2d3fe2010-08-06 07:59:38 +0000842
sewardjacfbd7d2010-08-17 22:52:08 +0000843 // generate I-format output
844 compute_PCMPxSTRx_gen_output(
845 resV, resOSZACP,
846 intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
847 );
sewardj0b2d3fe2010-08-06 07:59:38 +0000848
sewardjacfbd7d2010-08-17 22:52:08 +0000849 return True;
sewardj0b2d3fe2010-08-06 07:59:38 +0000850 }
sewardj0b2d3fe2010-08-06 07:59:38 +0000851
sewardjacfbd7d2010-08-17 22:52:08 +0000852 /*----------------------------------------*/
853 /*-- set membership on byte data --*/
854 /*----------------------------------------*/
sewardj0b2d3fe2010-08-06 07:59:38 +0000855
sewardjacfbd7d2010-08-17 22:52:08 +0000856 if (agg == 0/*equal any, aka find chars in a set*/
857 && (fmt == 0/*ub*/ || fmt == 2/*sb*/)) {
858 /* argL: the string, argR: charset */
859 UInt si, ci;
860 UChar* argL = (UChar*)argLV;
861 UChar* argR = (UChar*)argRV;
862 UInt boolRes = 0;
863 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
864 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
sewardj0b2d3fe2010-08-06 07:59:38 +0000865
sewardjacfbd7d2010-08-17 22:52:08 +0000866 for (si = 0; si < 16; si++) {
867 if ((validL & (1 << si)) == 0)
868 // run off the end of the string.
869 break;
870 UInt m = 0;
871 for (ci = 0; ci < 16; ci++) {
872 if ((validR & (1 << ci)) == 0) break;
873 if (argR[ci] == argL[si]) { m = 1; break; }
874 }
875 boolRes |= (m << si);
876 }
sewardj0b2d3fe2010-08-06 07:59:38 +0000877
sewardjacfbd7d2010-08-17 22:52:08 +0000878 // boolRes is "pre-invalidated"
879 UInt intRes1 = boolRes & 0xFFFF;
880
881 // generate I-format output
882 compute_PCMPxSTRx_gen_output(
883 resV, resOSZACP,
884 intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
885 );
sewardj0b2d3fe2010-08-06 07:59:38 +0000886
sewardjacfbd7d2010-08-17 22:52:08 +0000887 return True;
888 }
889
890 /*----------------------------------------*/
891 /*-- substring search on byte data --*/
892 /*----------------------------------------*/
893
894 if (agg == 3/*equal ordered, aka substring search*/
895 && (fmt == 0/*ub*/ || fmt == 2/*sb*/)) {
896
897 /* argL: haystack, argR: needle */
898 UInt ni, hi;
899 UChar* argL = (UChar*)argLV;
900 UChar* argR = (UChar*)argRV;
901 UInt boolRes = 0;
902 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
903 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
904 for (hi = 0; hi < 16; hi++) {
sewardjacfbd7d2010-08-17 22:52:08 +0000905 UInt m = 1;
906 for (ni = 0; ni < 16; ni++) {
907 if ((validR & (1 << ni)) == 0) break;
908 UInt i = ni + hi;
909 if (i >= 16) break;
910 if (argL[i] != argR[ni]) { m = 0; break; }
911 }
912 boolRes |= (m << hi);
weidendo14a55df2012-07-25 09:36:54 +0000913 if ((validL & (1 << hi)) == 0)
914 // run off the end of the haystack
915 break;
sewardjacfbd7d2010-08-17 22:52:08 +0000916 }
917
918 // boolRes is "pre-invalidated"
919 UInt intRes1 = boolRes & 0xFFFF;
920
921 // generate I-format output
922 compute_PCMPxSTRx_gen_output(
923 resV, resOSZACP,
924 intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
925 );
926
927 return True;
928 }
929
930 /*----------------------------------------*/
931 /*-- ranges, unsigned byte data --*/
932 /*----------------------------------------*/
933
934 if (agg == 1/*ranges*/
935 && fmt == 0/*ub*/) {
936
937 /* argL: string, argR: range-pairs */
938 UInt ri, si;
939 UChar* argL = (UChar*)argLV;
940 UChar* argR = (UChar*)argRV;
941 UInt boolRes = 0;
942 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
943 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
944 for (si = 0; si < 16; si++) {
945 if ((validL & (1 << si)) == 0)
946 // run off the end of the string
947 break;
948 UInt m = 0;
949 for (ri = 0; ri < 16; ri += 2) {
950 if ((validR & (3 << ri)) != (3 << ri)) break;
951 if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
952 m = 1; break;
953 }
954 }
955 boolRes |= (m << si);
956 }
957
958 // boolRes is "pre-invalidated"
959 UInt intRes1 = boolRes & 0xFFFF;
960
961 // generate I-format output
962 compute_PCMPxSTRx_gen_output(
963 resV, resOSZACP,
964 intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
965 );
966
967 return True;
968 }
969
sewardja87ce532012-09-17 13:40:11 +0000970 /*----------------------------------------*/
971 /*-- ranges, signed byte data --*/
972 /*----------------------------------------*/
973
974 if (agg == 1/*ranges*/
975 && fmt == 2/*sb*/) {
976
977 /* argL: string, argR: range-pairs */
978 UInt ri, si;
979 Char* argL = (Char*)argLV;
980 Char* argR = (Char*)argRV;
981 UInt boolRes = 0;
982 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
983 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
984 for (si = 0; si < 16; si++) {
985 if ((validL & (1 << si)) == 0)
986 // run off the end of the string
987 break;
988 UInt m = 0;
989 for (ri = 0; ri < 16; ri += 2) {
990 if ((validR & (3 << ri)) != (3 << ri)) break;
991 if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
992 m = 1; break;
993 }
994 }
995 boolRes |= (m << si);
996 }
997
998 // boolRes is "pre-invalidated"
999 UInt intRes1 = boolRes & 0xFFFF;
1000
1001 // generate I-format output
1002 compute_PCMPxSTRx_gen_output(
1003 resV, resOSZACP,
1004 intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
1005 );
1006
1007 return True;
1008 }
1009
sewardjacfbd7d2010-08-17 22:52:08 +00001010 return False;
sewardj0b2d3fe2010-08-06 07:59:38 +00001011}
1012
1013
sewardj3c3d6d62012-02-16 15:21:08 +00001014/* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M}
1015 variants on 16-bit characters.
1016
1017 For xSTRI variants, the new ECX value is placed in the 32 bits
1018 pointed to by *resV, and the top 96 bits are zeroed. For xSTRM
1019 variants, the result is a 128 bit value and is placed at *resV in
1020 the obvious way.
1021
1022 For all variants, the new OSZACP value is placed at *resOSZACP.
1023
1024 argLV and argRV are the vector args. The caller must prepare a
1025 8-bit mask for each, zmaskL and zmaskR. For ISTRx variants this
1026 must be 1 for each zero byte of of the respective arg. For ESTRx
1027 variants this is derived from the explicit length indication, and
1028 must be 0 in all places except at the bit index corresponding to
1029 the valid length (0 .. 8). If the valid length is 8 then the
1030 mask must be all zeroes. In all cases, bits 31:8 must be zero.
1031
1032 imm8 is the original immediate from the instruction. isSTRM
1033 indicates whether this is a xSTRM or xSTRI variant, which controls
1034 how much of *res is written.
1035
1036 If the given imm8 case can be handled, the return value is True.
1037 If not, False is returned, and neither *res not *resOSZACP are
1038 altered.
1039*/
1040
1041Bool compute_PCMPxSTRx_wide ( /*OUT*/V128* resV,
1042 /*OUT*/UInt* resOSZACP,
1043 V128* argLV, V128* argRV,
1044 UInt zmaskL, UInt zmaskR,
1045 UInt imm8, Bool isxSTRM )
1046{
1047 vassert(imm8 < 0x80);
1048 vassert((zmaskL >> 8) == 0);
1049 vassert((zmaskR >> 8) == 0);
1050
1051 /* Explicitly reject any imm8 values that haven't been validated,
1052 even if they would probably work. Life is too short to have
1053 unvalidated cases in the code base. */
1054 switch (imm8) {
sewardjeead3192014-05-21 14:42:04 +00001055 case 0x01: case 0x03: case 0x09: case 0x0B: case 0x0D:
Elliott Hughesa0664b92017-04-18 17:46:52 -07001056 case 0x13: case 0x19: case 0x1B:
sewardjeead3192014-05-21 14:42:04 +00001057 case 0x39: case 0x3B:
1058 case 0x45: case 0x4B:
sewardj3c3d6d62012-02-16 15:21:08 +00001059 break;
1060 default:
1061 return False;
1062 }
1063
1064 UInt fmt = (imm8 >> 0) & 3; // imm8[1:0] data format
1065 UInt agg = (imm8 >> 2) & 3; // imm8[3:2] aggregation fn
1066 UInt pol = (imm8 >> 4) & 3; // imm8[5:4] polarity
1067 UInt idx = (imm8 >> 6) & 1; // imm8[6] 1==msb/bytemask
1068
1069 /*----------------------------------------*/
1070 /*-- strcmp on wide data --*/
1071 /*----------------------------------------*/
1072
1073 if (agg == 2/*equal each, aka strcmp*/
1074 && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) {
1075 Int i;
1076 UShort* argL = (UShort*)argLV;
1077 UShort* argR = (UShort*)argRV;
1078 UInt boolResII = 0;
1079 for (i = 7; i >= 0; i--) {
1080 UShort cL = argL[i];
1081 UShort cR = argR[i];
1082 boolResII = (boolResII << 1) | (cL == cR ? 1 : 0);
1083 }
1084 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
1085 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
1086
1087 // do invalidation, common to all equal-each cases
1088 UInt intRes1
1089 = (boolResII & validL & validR) // if both valid, use cmpres
1090 | (~ (validL | validR)); // if both invalid, force 1
1091 // else force 0
1092 intRes1 &= 0xFF;
1093
1094 // generate I-format output
1095 compute_PCMPxSTRx_gen_output_wide(
1096 resV, resOSZACP,
1097 intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
1098 );
1099
1100 return True;
1101 }
1102
1103 /*----------------------------------------*/
1104 /*-- set membership on wide data --*/
1105 /*----------------------------------------*/
1106
1107 if (agg == 0/*equal any, aka find chars in a set*/
1108 && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) {
1109 /* argL: the string, argR: charset */
1110 UInt si, ci;
1111 UShort* argL = (UShort*)argLV;
1112 UShort* argR = (UShort*)argRV;
1113 UInt boolRes = 0;
1114 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
1115 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
1116
1117 for (si = 0; si < 8; si++) {
1118 if ((validL & (1 << si)) == 0)
1119 // run off the end of the string.
1120 break;
1121 UInt m = 0;
1122 for (ci = 0; ci < 8; ci++) {
1123 if ((validR & (1 << ci)) == 0) break;
1124 if (argR[ci] == argL[si]) { m = 1; break; }
1125 }
1126 boolRes |= (m << si);
1127 }
1128
1129 // boolRes is "pre-invalidated"
1130 UInt intRes1 = boolRes & 0xFF;
1131
1132 // generate I-format output
1133 compute_PCMPxSTRx_gen_output_wide(
1134 resV, resOSZACP,
1135 intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
1136 );
1137
1138 return True;
1139 }
1140
1141 /*----------------------------------------*/
1142 /*-- substring search on wide data --*/
1143 /*----------------------------------------*/
1144
1145 if (agg == 3/*equal ordered, aka substring search*/
1146 && (fmt == 1/*uw*/ || fmt == 3/*sw*/)) {
1147
1148 /* argL: haystack, argR: needle */
1149 UInt ni, hi;
1150 UShort* argL = (UShort*)argLV;
1151 UShort* argR = (UShort*)argRV;
1152 UInt boolRes = 0;
1153 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
1154 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
1155 for (hi = 0; hi < 8; hi++) {
sewardj3c3d6d62012-02-16 15:21:08 +00001156 UInt m = 1;
1157 for (ni = 0; ni < 8; ni++) {
1158 if ((validR & (1 << ni)) == 0) break;
1159 UInt i = ni + hi;
1160 if (i >= 8) break;
1161 if (argL[i] != argR[ni]) { m = 0; break; }
1162 }
1163 boolRes |= (m << hi);
weidendo14a55df2012-07-25 09:36:54 +00001164 if ((validL & (1 << hi)) == 0)
1165 // run off the end of the haystack
1166 break;
sewardj3c3d6d62012-02-16 15:21:08 +00001167 }
1168
1169 // boolRes is "pre-invalidated"
1170 UInt intRes1 = boolRes & 0xFF;
1171
1172 // generate I-format output
1173 compute_PCMPxSTRx_gen_output_wide(
1174 resV, resOSZACP,
1175 intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
1176 );
1177
1178 return True;
1179 }
1180
1181 /*----------------------------------------*/
1182 /*-- ranges, unsigned wide data --*/
1183 /*----------------------------------------*/
1184
1185 if (agg == 1/*ranges*/
1186 && fmt == 1/*uw*/) {
1187
1188 /* argL: string, argR: range-pairs */
1189 UInt ri, si;
1190 UShort* argL = (UShort*)argLV;
1191 UShort* argR = (UShort*)argRV;
1192 UInt boolRes = 0;
1193 UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL))
1194 UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR))
1195 for (si = 0; si < 8; si++) {
1196 if ((validL & (1 << si)) == 0)
1197 // run off the end of the string
1198 break;
1199 UInt m = 0;
1200 for (ri = 0; ri < 8; ri += 2) {
1201 if ((validR & (3 << ri)) != (3 << ri)) break;
1202 if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) {
1203 m = 1; break;
1204 }
1205 }
1206 boolRes |= (m << si);
1207 }
1208
1209 // boolRes is "pre-invalidated"
1210 UInt intRes1 = boolRes & 0xFF;
1211
1212 // generate I-format output
1213 compute_PCMPxSTRx_gen_output_wide(
1214 resV, resOSZACP,
1215 intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM
1216 );
1217
1218 return True;
1219 }
1220
1221 return False;
1222}
1223
1224
sewardj52ff4cc2005-03-26 20:33:38 +00001225/*---------------------------------------------------------------*/
sewardjcef7d3e2009-07-02 12:21:59 +00001226/*--- end guest_generic_x87.c ---*/
sewardj52ff4cc2005-03-26 20:33:38 +00001227/*---------------------------------------------------------------*/