sewardj | 52ff4cc | 2005-03-26 20:33:38 +0000 | [diff] [blame] | 1 | |
| 2 | /*---------------------------------------------------------------*/ |
sewardj | 752f906 | 2010-05-03 21:38:49 +0000 | [diff] [blame] | 3 | /*--- begin guest_generic_x87.c ---*/ |
sewardj | 52ff4cc | 2005-03-26 20:33:38 +0000 | [diff] [blame] | 4 | /*---------------------------------------------------------------*/ |
| 5 | |
| 6 | /* |
sewardj | 752f906 | 2010-05-03 21:38:49 +0000 | [diff] [blame] | 7 | This file is part of Valgrind, a dynamic binary instrumentation |
| 8 | framework. |
sewardj | 52ff4cc | 2005-03-26 20:33:38 +0000 | [diff] [blame] | 9 | |
sewardj | e6c53e0 | 2011-10-23 07:33:43 +0000 | [diff] [blame] | 10 | Copyright (C) 2004-2011 OpenWorks LLP |
sewardj | 752f906 | 2010-05-03 21:38:49 +0000 | [diff] [blame] | 11 | info@open-works.net |
sewardj | 52ff4cc | 2005-03-26 20:33:38 +0000 | [diff] [blame] | 12 | |
sewardj | 752f906 | 2010-05-03 21:38:49 +0000 | [diff] [blame] | 13 | This program is free software; you can redistribute it and/or |
| 14 | modify it under the terms of the GNU General Public License as |
| 15 | published by the Free Software Foundation; either version 2 of the |
| 16 | License, or (at your option) any later version. |
sewardj | 52ff4cc | 2005-03-26 20:33:38 +0000 | [diff] [blame] | 17 | |
sewardj | 752f906 | 2010-05-03 21:38:49 +0000 | [diff] [blame] | 18 | This program is distributed in the hope that it will be useful, but |
| 19 | WITHOUT ANY WARRANTY; without even the implied warranty of |
| 20 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
| 21 | General Public License for more details. |
| 22 | |
| 23 | You should have received a copy of the GNU General Public License |
| 24 | along with this program; if not, write to the Free Software |
| 25 | Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA |
sewardj | 7bd6ffe | 2005-08-03 16:07:36 +0000 | [diff] [blame] | 26 | 02110-1301, USA. |
| 27 | |
sewardj | 752f906 | 2010-05-03 21:38:49 +0000 | [diff] [blame] | 28 | The GNU General Public License is contained in the file COPYING. |
sewardj | 52ff4cc | 2005-03-26 20:33:38 +0000 | [diff] [blame] | 29 | |
| 30 | Neither the names of the U.S. Department of Energy nor the |
| 31 | University of California nor the names of its contributors may be |
| 32 | used to endorse or promote products derived from this software |
| 33 | without prior written permission. |
sewardj | 52ff4cc | 2005-03-26 20:33:38 +0000 | [diff] [blame] | 34 | */ |
| 35 | |
| 36 | /* This file contains functions for doing some x87-specific |
| 37 | operations. Both the amd64 and x86 front ends (guests) indirectly |
| 38 | call these functions via guest helper calls. By putting them here, |
| 39 | code duplication is avoided. Some of these functions are tricky |
| 40 | and hard to verify, so there is much to be said for only having one |
| 41 | copy thereof. |
| 42 | */ |
| 43 | |
| 44 | #include "libvex_basictypes.h" |
| 45 | |
sewardj | cef7d3e | 2009-07-02 12:21:59 +0000 | [diff] [blame] | 46 | #include "main_util.h" |
| 47 | #include "guest_generic_x87.h" |
sewardj | 52ff4cc | 2005-03-26 20:33:38 +0000 | [diff] [blame] | 48 | |
| 49 | |
| 50 | /* 80 and 64-bit floating point formats: |
| 51 | |
| 52 | 80-bit: |
| 53 | |
| 54 | S 0 0-------0 zero |
| 55 | S 0 0X------X denormals |
| 56 | S 1-7FFE 1X------X normals (all normals have leading 1) |
| 57 | S 7FFF 10------0 infinity |
| 58 | S 7FFF 10X-----X snan |
| 59 | S 7FFF 11X-----X qnan |
| 60 | |
| 61 | S is the sign bit. For runs X----X, at least one of the Xs must be |
| 62 | nonzero. Exponent is 15 bits, fractional part is 63 bits, and |
| 63 | there is an explicitly represented leading 1, and a sign bit, |
| 64 | giving 80 in total. |
| 65 | |
| 66 | 64-bit avoids the confusion of an explicitly represented leading 1 |
| 67 | and so is simpler: |
| 68 | |
| 69 | S 0 0------0 zero |
| 70 | S 0 X------X denormals |
| 71 | S 1-7FE any normals |
| 72 | S 7FF 0------0 infinity |
| 73 | S 7FF 0X-----X snan |
| 74 | S 7FF 1X-----X qnan |
| 75 | |
| 76 | Exponent is 11 bits, fractional part is 52 bits, and there is a |
| 77 | sign bit, giving 64 in total. |
| 78 | */ |
| 79 | |
| 80 | |
| 81 | static inline UInt read_bit_array ( UChar* arr, UInt n ) |
| 82 | { |
| 83 | UChar c = arr[n >> 3]; |
| 84 | c >>= (n&7); |
| 85 | return c & 1; |
| 86 | } |
| 87 | |
| 88 | static inline void write_bit_array ( UChar* arr, UInt n, UInt b ) |
| 89 | { |
| 90 | UChar c = arr[n >> 3]; |
| 91 | c = toUChar( c & ~(1 << (n&7)) ); |
| 92 | c = toUChar( c | ((b&1) << (n&7)) ); |
| 93 | arr[n >> 3] = c; |
| 94 | } |
| 95 | |
| 96 | /* Convert an IEEE754 double (64-bit) into an x87 extended double |
| 97 | (80-bit), mimicing the hardware fairly closely. Both numbers are |
| 98 | stored little-endian. Limitations, all of which could be fixed, |
| 99 | given some level of hassle: |
| 100 | |
| 101 | * Identity of NaNs is not preserved. |
| 102 | |
| 103 | See comments in the code for more details. |
| 104 | */ |
| 105 | void convert_f64le_to_f80le ( /*IN*/UChar* f64, /*OUT*/UChar* f80 ) |
| 106 | { |
| 107 | Bool mantissaIsZero; |
| 108 | Int bexp, i, j, shift; |
| 109 | UChar sign; |
| 110 | |
| 111 | sign = toUChar( (f64[7] >> 7) & 1 ); |
| 112 | bexp = (f64[7] << 4) | ((f64[6] >> 4) & 0x0F); |
| 113 | bexp &= 0x7FF; |
| 114 | |
| 115 | mantissaIsZero = False; |
| 116 | if (bexp == 0 || bexp == 0x7FF) { |
| 117 | /* We'll need to know whether or not the mantissa (bits 51:0) is |
| 118 | all zeroes in order to handle these cases. So figure it |
| 119 | out. */ |
| 120 | mantissaIsZero |
| 121 | = toBool( |
| 122 | (f64[6] & 0x0F) == 0 |
| 123 | && f64[5] == 0 && f64[4] == 0 && f64[3] == 0 |
| 124 | && f64[2] == 0 && f64[1] == 0 && f64[0] == 0 |
| 125 | ); |
| 126 | } |
| 127 | |
| 128 | /* If the exponent is zero, either we have a zero or a denormal. |
| 129 | Produce a zero. This is a hack in that it forces denormals to |
| 130 | zero. Could do better. */ |
| 131 | if (bexp == 0) { |
| 132 | f80[9] = toUChar( sign << 7 ); |
| 133 | f80[8] = f80[7] = f80[6] = f80[5] = f80[4] |
| 134 | = f80[3] = f80[2] = f80[1] = f80[0] = 0; |
| 135 | |
| 136 | if (mantissaIsZero) |
| 137 | /* It really is zero, so that's all we can do. */ |
| 138 | return; |
| 139 | |
| 140 | /* There is at least one 1-bit in the mantissa. So it's a |
| 141 | potentially denormalised double -- but we can produce a |
| 142 | normalised long double. Count the leading zeroes in the |
| 143 | mantissa so as to decide how much to bump the exponent down |
| 144 | by. Note, this is SLOW. */ |
| 145 | shift = 0; |
| 146 | for (i = 51; i >= 0; i--) { |
| 147 | if (read_bit_array(f64, i)) |
| 148 | break; |
| 149 | shift++; |
| 150 | } |
| 151 | |
| 152 | /* and copy into place as many bits as we can get our hands on. */ |
| 153 | j = 63; |
| 154 | for (i = 51 - shift; i >= 0; i--) { |
| 155 | write_bit_array( f80, j, |
| 156 | read_bit_array( f64, i ) ); |
| 157 | j--; |
| 158 | } |
| 159 | |
| 160 | /* Set the exponent appropriately, and we're done. */ |
| 161 | bexp -= shift; |
| 162 | bexp += (16383 - 1023); |
| 163 | f80[9] = toUChar( (sign << 7) | ((bexp >> 8) & 0xFF) ); |
| 164 | f80[8] = toUChar( bexp & 0xFF ); |
| 165 | return; |
| 166 | } |
| 167 | |
| 168 | /* If the exponent is 7FF, this is either an Infinity, a SNaN or |
| 169 | QNaN, as determined by examining bits 51:0, thus: |
| 170 | 0 ... 0 Inf |
| 171 | 0X ... X SNaN |
| 172 | 1X ... X QNaN |
| 173 | where at least one of the Xs is not zero. |
| 174 | */ |
| 175 | if (bexp == 0x7FF) { |
| 176 | if (mantissaIsZero) { |
| 177 | /* Produce an appropriately signed infinity: |
| 178 | S 1--1 (15) 1 0--0 (63) |
| 179 | */ |
| 180 | f80[9] = toUChar( (sign << 7) | 0x7F ); |
| 181 | f80[8] = 0xFF; |
| 182 | f80[7] = 0x80; |
| 183 | f80[6] = f80[5] = f80[4] = f80[3] |
| 184 | = f80[2] = f80[1] = f80[0] = 0; |
| 185 | return; |
| 186 | } |
| 187 | /* So it's either a QNaN or SNaN. Distinguish by considering |
| 188 | bit 51. Note, this destroys all the trailing bits |
| 189 | (identity?) of the NaN. IEEE754 doesn't require preserving |
| 190 | these (it only requires that there be one QNaN value and one |
| 191 | SNaN value), but x87 does seem to have some ability to |
| 192 | preserve them. Anyway, here, the NaN's identity is |
| 193 | destroyed. Could be improved. */ |
| 194 | if (f64[6] & 8) { |
| 195 | /* QNaN. Make a QNaN: |
| 196 | S 1--1 (15) 1 1--1 (63) |
| 197 | */ |
| 198 | f80[9] = toUChar( (sign << 7) | 0x7F ); |
| 199 | f80[8] = 0xFF; |
| 200 | f80[7] = 0xFF; |
| 201 | f80[6] = f80[5] = f80[4] = f80[3] |
| 202 | = f80[2] = f80[1] = f80[0] = 0xFF; |
| 203 | } else { |
| 204 | /* SNaN. Make a SNaN: |
| 205 | S 1--1 (15) 0 1--1 (63) |
| 206 | */ |
| 207 | f80[9] = toUChar( (sign << 7) | 0x7F ); |
| 208 | f80[8] = 0xFF; |
| 209 | f80[7] = 0x7F; |
| 210 | f80[6] = f80[5] = f80[4] = f80[3] |
| 211 | = f80[2] = f80[1] = f80[0] = 0xFF; |
| 212 | } |
| 213 | return; |
| 214 | } |
| 215 | |
| 216 | /* It's not a zero, denormal, infinity or nan. So it must be a |
| 217 | normalised number. Rebias the exponent and build the new |
| 218 | number. */ |
| 219 | bexp += (16383 - 1023); |
| 220 | |
| 221 | f80[9] = toUChar( (sign << 7) | ((bexp >> 8) & 0xFF) ); |
| 222 | f80[8] = toUChar( bexp & 0xFF ); |
| 223 | f80[7] = toUChar( (1 << 7) | ((f64[6] << 3) & 0x78) |
| 224 | | ((f64[5] >> 5) & 7) ); |
| 225 | f80[6] = toUChar( ((f64[5] << 3) & 0xF8) | ((f64[4] >> 5) & 7) ); |
| 226 | f80[5] = toUChar( ((f64[4] << 3) & 0xF8) | ((f64[3] >> 5) & 7) ); |
| 227 | f80[4] = toUChar( ((f64[3] << 3) & 0xF8) | ((f64[2] >> 5) & 7) ); |
| 228 | f80[3] = toUChar( ((f64[2] << 3) & 0xF8) | ((f64[1] >> 5) & 7) ); |
| 229 | f80[2] = toUChar( ((f64[1] << 3) & 0xF8) | ((f64[0] >> 5) & 7) ); |
| 230 | f80[1] = toUChar( ((f64[0] << 3) & 0xF8) ); |
| 231 | f80[0] = toUChar( 0 ); |
| 232 | } |
| 233 | |
| 234 | |
| 235 | /* Convert an x87 extended double (80-bit) into an IEEE 754 double |
| 236 | (64-bit), mimicking the hardware fairly closely. Both numbers are |
| 237 | stored little-endian. Limitations, both of which could be fixed, |
| 238 | given some level of hassle: |
| 239 | |
| 240 | * Rounding following truncation could be a bit better. |
| 241 | |
| 242 | * Identity of NaNs is not preserved. |
| 243 | |
| 244 | See comments in the code for more details. |
| 245 | */ |
| 246 | void convert_f80le_to_f64le ( /*IN*/UChar* f80, /*OUT*/UChar* f64 ) |
| 247 | { |
| 248 | Bool isInf; |
| 249 | Int bexp, i, j; |
| 250 | UChar sign; |
| 251 | |
| 252 | sign = toUChar((f80[9] >> 7) & 1); |
| 253 | bexp = (((UInt)f80[9]) << 8) | (UInt)f80[8]; |
| 254 | bexp &= 0x7FFF; |
| 255 | |
| 256 | /* If the exponent is zero, either we have a zero or a denormal. |
| 257 | But an extended precision denormal becomes a double precision |
| 258 | zero, so in either case, just produce the appropriately signed |
| 259 | zero. */ |
| 260 | if (bexp == 0) { |
| 261 | f64[7] = toUChar(sign << 7); |
| 262 | f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0; |
| 263 | return; |
| 264 | } |
| 265 | |
| 266 | /* If the exponent is 7FFF, this is either an Infinity, a SNaN or |
| 267 | QNaN, as determined by examining bits 62:0, thus: |
| 268 | 0 ... 0 Inf |
| 269 | 0X ... X SNaN |
| 270 | 1X ... X QNaN |
| 271 | where at least one of the Xs is not zero. |
| 272 | */ |
| 273 | if (bexp == 0x7FFF) { |
| 274 | isInf = toBool( |
| 275 | (f80[7] & 0x7F) == 0 |
| 276 | && f80[6] == 0 && f80[5] == 0 && f80[4] == 0 |
| 277 | && f80[3] == 0 && f80[2] == 0 && f80[1] == 0 |
| 278 | && f80[0] == 0 |
| 279 | ); |
| 280 | if (isInf) { |
| 281 | if (0 == (f80[7] & 0x80)) |
| 282 | goto wierd_NaN; |
| 283 | /* Produce an appropriately signed infinity: |
| 284 | S 1--1 (11) 0--0 (52) |
| 285 | */ |
| 286 | f64[7] = toUChar((sign << 7) | 0x7F); |
| 287 | f64[6] = 0xF0; |
| 288 | f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0; |
| 289 | return; |
| 290 | } |
| 291 | /* So it's either a QNaN or SNaN. Distinguish by considering |
| 292 | bit 62. Note, this destroys all the trailing bits |
| 293 | (identity?) of the NaN. IEEE754 doesn't require preserving |
| 294 | these (it only requires that there be one QNaN value and one |
| 295 | SNaN value), but x87 does seem to have some ability to |
| 296 | preserve them. Anyway, here, the NaN's identity is |
| 297 | destroyed. Could be improved. */ |
| 298 | if (f80[8] & 0x40) { |
| 299 | /* QNaN. Make a QNaN: |
| 300 | S 1--1 (11) 1 1--1 (51) |
| 301 | */ |
| 302 | f64[7] = toUChar((sign << 7) | 0x7F); |
| 303 | f64[6] = 0xFF; |
| 304 | f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0xFF; |
| 305 | } else { |
| 306 | /* SNaN. Make a SNaN: |
| 307 | S 1--1 (11) 0 1--1 (51) |
| 308 | */ |
| 309 | f64[7] = toUChar((sign << 7) | 0x7F); |
| 310 | f64[6] = 0xF7; |
| 311 | f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0xFF; |
| 312 | } |
| 313 | return; |
| 314 | } |
| 315 | |
| 316 | /* If it's not a Zero, NaN or Inf, and the integer part (bit 62) is |
| 317 | zero, the x87 FPU appears to consider the number denormalised |
| 318 | and converts it to a QNaN. */ |
| 319 | if (0 == (f80[7] & 0x80)) { |
| 320 | wierd_NaN: |
| 321 | /* Strange hardware QNaN: |
| 322 | S 1--1 (11) 1 0--0 (51) |
| 323 | */ |
| 324 | /* On a PIII, these QNaNs always appear with sign==1. I have |
| 325 | no idea why. */ |
| 326 | f64[7] = (1 /*sign*/ << 7) | 0x7F; |
| 327 | f64[6] = 0xF8; |
| 328 | f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0; |
| 329 | return; |
| 330 | } |
| 331 | |
| 332 | /* It's not a zero, denormal, infinity or nan. So it must be a |
| 333 | normalised number. Rebias the exponent and consider. */ |
| 334 | bexp -= (16383 - 1023); |
| 335 | if (bexp >= 0x7FF) { |
| 336 | /* It's too big for a double. Construct an infinity. */ |
| 337 | f64[7] = toUChar((sign << 7) | 0x7F); |
| 338 | f64[6] = 0xF0; |
| 339 | f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0; |
| 340 | return; |
| 341 | } |
| 342 | |
| 343 | if (bexp <= 0) { |
| 344 | /* It's too small for a normalised double. First construct a |
| 345 | zero and then see if it can be improved into a denormal. */ |
| 346 | f64[7] = toUChar(sign << 7); |
| 347 | f64[6] = f64[5] = f64[4] = f64[3] = f64[2] = f64[1] = f64[0] = 0; |
| 348 | |
| 349 | if (bexp < -52) |
| 350 | /* Too small even for a denormal. */ |
| 351 | return; |
| 352 | |
| 353 | /* Ok, let's make a denormal. Note, this is SLOW. */ |
| 354 | /* Copy bits 63, 62, 61, etc of the src mantissa into the dst, |
| 355 | indexes 52+bexp, 51+bexp, etc, until k+bexp < 0. */ |
| 356 | /* bexp is in range -52 .. 0 inclusive */ |
| 357 | for (i = 63; i >= 0; i--) { |
| 358 | j = i - 12 + bexp; |
| 359 | if (j < 0) break; |
| 360 | /* We shouldn't really call vassert from generated code. */ |
| 361 | vassert(j >= 0 && j < 52); |
| 362 | write_bit_array ( f64, |
| 363 | j, |
| 364 | read_bit_array ( f80, i ) ); |
| 365 | } |
| 366 | /* and now we might have to round ... */ |
| 367 | if (read_bit_array(f80, 10+1 - bexp) == 1) |
| 368 | goto do_rounding; |
| 369 | |
| 370 | return; |
| 371 | } |
| 372 | |
| 373 | /* Ok, it's a normalised number which is representable as a double. |
| 374 | Copy the exponent and mantissa into place. */ |
| 375 | /* |
| 376 | for (i = 0; i < 52; i++) |
| 377 | write_bit_array ( f64, |
| 378 | i, |
| 379 | read_bit_array ( f80, i+11 ) ); |
| 380 | */ |
| 381 | f64[0] = toUChar( (f80[1] >> 3) | (f80[2] << 5) ); |
| 382 | f64[1] = toUChar( (f80[2] >> 3) | (f80[3] << 5) ); |
| 383 | f64[2] = toUChar( (f80[3] >> 3) | (f80[4] << 5) ); |
| 384 | f64[3] = toUChar( (f80[4] >> 3) | (f80[5] << 5) ); |
| 385 | f64[4] = toUChar( (f80[5] >> 3) | (f80[6] << 5) ); |
| 386 | f64[5] = toUChar( (f80[6] >> 3) | (f80[7] << 5) ); |
| 387 | |
| 388 | f64[6] = toUChar( ((bexp << 4) & 0xF0) | ((f80[7] >> 3) & 0x0F) ); |
| 389 | |
| 390 | f64[7] = toUChar( (sign << 7) | ((bexp >> 4) & 0x7F) ); |
| 391 | |
| 392 | /* Now consider any rounding that needs to happen as a result of |
| 393 | truncating the mantissa. */ |
| 394 | if (f80[1] & 4) /* read_bit_array(f80, 10) == 1) */ { |
| 395 | |
| 396 | /* If the bottom bits of f80 are "100 0000 0000", then the |
| 397 | infinitely precise value is deemed to be mid-way between the |
| 398 | two closest representable values. Since we're doing |
| 399 | round-to-nearest (the default mode), in that case it is the |
| 400 | bit immediately above which indicates whether we should round |
| 401 | upwards or not -- if 0, we don't. All that is encapsulated |
| 402 | in the following simple test. */ |
| 403 | if ((f80[1] & 0xF) == 4/*0100b*/ && f80[0] == 0) |
| 404 | return; |
| 405 | |
| 406 | do_rounding: |
| 407 | /* Round upwards. This is a kludge. Once in every 2^24 |
| 408 | roundings (statistically) the bottom three bytes are all 0xFF |
| 409 | and so we don't round at all. Could be improved. */ |
| 410 | if (f64[0] != 0xFF) { |
| 411 | f64[0]++; |
| 412 | } |
| 413 | else |
| 414 | if (f64[0] == 0xFF && f64[1] != 0xFF) { |
| 415 | f64[0] = 0; |
| 416 | f64[1]++; |
| 417 | } |
| 418 | else |
| 419 | if (f64[0] == 0xFF && f64[1] == 0xFF && f64[2] != 0xFF) { |
| 420 | f64[0] = 0; |
| 421 | f64[1] = 0; |
| 422 | f64[2]++; |
| 423 | } |
| 424 | /* else we don't round, but we should. */ |
| 425 | } |
| 426 | } |
| 427 | |
| 428 | |
sewardj | 879cee0 | 2006-03-07 01:15:50 +0000 | [diff] [blame] | 429 | /* CALLED FROM GENERATED CODE: CLEAN HELPER */ |
| 430 | /* Extract the signed significand or exponent component as per |
| 431 | fxtract. Arg and result are doubles travelling under the guise of |
| 432 | ULongs. Returns significand when getExp is zero and exponent |
| 433 | otherwise. */ |
| 434 | ULong x86amd64g_calculate_FXTRACT ( ULong arg, HWord getExp ) |
| 435 | { |
| 436 | ULong uSig, uExp; |
| 437 | /* Long sSig; */ |
| 438 | Int sExp, i; |
| 439 | UInt sign, expExp; |
| 440 | |
| 441 | /* |
| 442 | S 7FF 0------0 infinity |
| 443 | S 7FF 0X-----X snan |
| 444 | S 7FF 1X-----X qnan |
| 445 | */ |
| 446 | const ULong posInf = 0x7FF0000000000000ULL; |
| 447 | const ULong negInf = 0xFFF0000000000000ULL; |
| 448 | const ULong nanMask = 0x7FF0000000000000ULL; |
| 449 | const ULong qNan = 0x7FF8000000000000ULL; |
| 450 | const ULong posZero = 0x0000000000000000ULL; |
| 451 | const ULong negZero = 0x8000000000000000ULL; |
| 452 | const ULong bit51 = 1ULL << 51; |
| 453 | const ULong bit52 = 1ULL << 52; |
| 454 | const ULong sigMask = bit52 - 1; |
| 455 | |
sewardj | 772f6df | 2010-07-29 07:01:29 +0000 | [diff] [blame] | 456 | /* Mimic Core i5 behaviour for special cases. */ |
sewardj | 879cee0 | 2006-03-07 01:15:50 +0000 | [diff] [blame] | 457 | if (arg == posInf) |
| 458 | return getExp ? posInf : posInf; |
| 459 | if (arg == negInf) |
| 460 | return getExp ? posInf : negInf; |
| 461 | if ((arg & nanMask) == nanMask) |
sewardj | 772f6df | 2010-07-29 07:01:29 +0000 | [diff] [blame] | 462 | return qNan | (arg & (1ULL << 63)); |
sewardj | 879cee0 | 2006-03-07 01:15:50 +0000 | [diff] [blame] | 463 | if (arg == posZero) |
| 464 | return getExp ? negInf : posZero; |
| 465 | if (arg == negZero) |
| 466 | return getExp ? negInf : negZero; |
| 467 | |
| 468 | /* Split into sign, exponent and significand. */ |
| 469 | sign = ((UInt)(arg >> 63)) & 1; |
| 470 | |
| 471 | /* Mask off exponent & sign. uSig is in range 0 .. 2^52-1. */ |
| 472 | uSig = arg & sigMask; |
| 473 | |
| 474 | /* Get the exponent. */ |
| 475 | sExp = ((Int)(arg >> 52)) & 0x7FF; |
| 476 | |
| 477 | /* Deal with denormals: if the exponent is zero, then the |
| 478 | significand cannot possibly be zero (negZero/posZero are handled |
| 479 | above). Shift the significand left until bit 51 of it becomes |
| 480 | 1, and decrease the exponent accordingly. |
| 481 | */ |
| 482 | if (sExp == 0) { |
| 483 | for (i = 0; i < 52; i++) { |
| 484 | if (uSig & bit51) |
| 485 | break; |
| 486 | uSig <<= 1; |
| 487 | sExp--; |
| 488 | } |
| 489 | uSig <<= 1; |
| 490 | } else { |
| 491 | /* Add the implied leading-1 in the significand. */ |
| 492 | uSig |= bit52; |
| 493 | } |
| 494 | |
| 495 | /* Roll in the sign. */ |
| 496 | /* sSig = uSig; */ |
| 497 | /* if (sign) sSig =- sSig; */ |
| 498 | |
| 499 | /* Convert sig into a double. This should be an exact conversion. |
| 500 | Then divide by 2^52, which should give a value in the range 1.0 |
| 501 | to 2.0-epsilon, at least for normalised args. */ |
| 502 | /* dSig = (Double)sSig; */ |
| 503 | /* dSig /= 67108864.0; */ /* 2^26 */ |
| 504 | /* dSig /= 67108864.0; */ /* 2^26 */ |
| 505 | uSig &= sigMask; |
| 506 | uSig |= 0x3FF0000000000000ULL; |
| 507 | if (sign) |
| 508 | uSig ^= negZero; |
| 509 | |
| 510 | /* Convert exp into a double. Also an exact conversion. */ |
| 511 | /* dExp = (Double)(sExp - 1023); */ |
| 512 | sExp -= 1023; |
| 513 | if (sExp == 0) { |
| 514 | uExp = 0; |
| 515 | } else { |
| 516 | uExp = sExp < 0 ? -sExp : sExp; |
| 517 | expExp = 0x3FF +52; |
| 518 | /* 1 <= uExp <= 1074 */ |
| 519 | /* Skip first 42 iterations of normalisation loop as we know they |
| 520 | will always happen */ |
| 521 | uExp <<= 42; |
| 522 | expExp -= 42; |
| 523 | for (i = 0; i < 52-42; i++) { |
| 524 | if (uExp & bit52) |
| 525 | break; |
| 526 | uExp <<= 1; |
| 527 | expExp--; |
| 528 | } |
| 529 | uExp &= sigMask; |
| 530 | uExp |= ((ULong)expExp) << 52; |
| 531 | if (sExp < 0) uExp ^= negZero; |
| 532 | } |
| 533 | |
| 534 | return getExp ? uExp : uSig; |
| 535 | } |
| 536 | |
| 537 | |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 538 | |
| 539 | /*---------------------------------------------------------*/ |
| 540 | /*--- SSE4.2 PCMP{E,I}STR{I,M} helpers ---*/ |
| 541 | /*---------------------------------------------------------*/ |
| 542 | |
| 543 | /* We need the definitions for OSZACP eflags/rflags offsets. |
| 544 | #including guest_{amd64,x86}_defs.h causes chaos, so just copy the |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 545 | required values directly. They are not going to change in the |
| 546 | foreseeable future :-) |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 547 | */ |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 548 | |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 549 | #define SHIFT_O 11 |
| 550 | #define SHIFT_S 7 |
| 551 | #define SHIFT_Z 6 |
| 552 | #define SHIFT_A 4 |
| 553 | #define SHIFT_C 0 |
| 554 | #define SHIFT_P 2 |
| 555 | |
| 556 | #define MASK_O (1 << SHIFT_O) |
| 557 | #define MASK_S (1 << SHIFT_S) |
| 558 | #define MASK_Z (1 << SHIFT_Z) |
| 559 | #define MASK_A (1 << SHIFT_A) |
| 560 | #define MASK_C (1 << SHIFT_C) |
| 561 | #define MASK_P (1 << SHIFT_P) |
| 562 | |
| 563 | |
| 564 | /* Count leading zeroes, w/ 0-produces-32 semantics, a la Hacker's |
| 565 | Delight. */ |
| 566 | static UInt clz32 ( UInt x ) |
| 567 | { |
| 568 | Int y, m, n; |
| 569 | y = -(x >> 16); |
| 570 | m = (y >> 16) & 16; |
| 571 | n = 16 - m; |
| 572 | x = x >> m; |
| 573 | y = x - 0x100; |
| 574 | m = (y >> 16) & 8; |
| 575 | n = n + m; |
| 576 | x = x << m; |
| 577 | y = x - 0x1000; |
| 578 | m = (y >> 16) & 4; |
| 579 | n = n + m; |
| 580 | x = x << m; |
| 581 | y = x - 0x4000; |
| 582 | m = (y >> 16) & 2; |
| 583 | n = n + m; |
| 584 | x = x << m; |
| 585 | y = x >> 14; |
| 586 | m = y & ~(y >> 1); |
| 587 | return n + 2 - m; |
| 588 | } |
| 589 | |
| 590 | static UInt ctz32 ( UInt x ) |
| 591 | { |
| 592 | return 32 - clz32((~x) & (x-1)); |
| 593 | } |
| 594 | |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 595 | /* Convert a 4-bit value to a 32-bit value by cloning each bit 8 |
| 596 | times. There's surely a better way to do this, but I don't know |
| 597 | what it is. */ |
| 598 | static UInt bits4_to_bytes4 ( UInt bits4 ) |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 599 | { |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 600 | UInt r = 0; |
| 601 | r |= (bits4 & 1) ? 0x000000FF : 0; |
| 602 | r |= (bits4 & 2) ? 0x0000FF00 : 0; |
| 603 | r |= (bits4 & 4) ? 0x00FF0000 : 0; |
| 604 | r |= (bits4 & 8) ? 0xFF000000 : 0; |
| 605 | return r; |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 606 | } |
| 607 | |
| 608 | |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 609 | /* Given partial results from a pcmpXstrX operation (intRes1, |
| 610 | basically), generate an I- or M-format output value, also the new |
| 611 | OSZACP flags. */ |
| 612 | static |
| 613 | void compute_PCMPxSTRx_gen_output (/*OUT*/V128* resV, |
| 614 | /*OUT*/UInt* resOSZACP, |
| 615 | UInt intRes1, |
| 616 | UInt zmaskL, UInt zmaskR, |
| 617 | UInt validL, |
| 618 | UInt pol, UInt idx, |
| 619 | Bool isxSTRM ) |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 620 | { |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 621 | vassert((pol >> 2) == 0); |
| 622 | vassert((idx >> 1) == 0); |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 623 | |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 624 | UInt intRes2 = 0; |
| 625 | switch (pol) { |
| 626 | case 0: intRes2 = intRes1; break; // pol + |
| 627 | case 1: intRes2 = ~intRes1; break; // pol - |
| 628 | case 2: intRes2 = intRes1; break; // pol m+ |
| 629 | case 3: intRes2 = intRes1 ^ validL; break; // pol m- |
| 630 | } |
| 631 | intRes2 &= 0xFFFF; |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 632 | |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 633 | if (isxSTRM) { |
| 634 | |
| 635 | // generate M-format output (a bit or byte mask in XMM0) |
| 636 | if (idx) { |
| 637 | resV->w32[0] = bits4_to_bytes4( (intRes2 >> 0) & 0xF ); |
| 638 | resV->w32[1] = bits4_to_bytes4( (intRes2 >> 4) & 0xF ); |
| 639 | resV->w32[2] = bits4_to_bytes4( (intRes2 >> 8) & 0xF ); |
| 640 | resV->w32[3] = bits4_to_bytes4( (intRes2 >> 12) & 0xF ); |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 641 | } else { |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 642 | resV->w32[0] = intRes2 & 0xFFFF; |
| 643 | resV->w32[1] = 0; |
| 644 | resV->w32[2] = 0; |
| 645 | resV->w32[3] = 0; |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 646 | } |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 647 | |
| 648 | } else { |
| 649 | |
| 650 | // generate I-format output (an index in ECX) |
| 651 | // generate ecx value |
| 652 | UInt newECX = 0; |
| 653 | if (idx) { |
| 654 | // index of ms-1-bit |
| 655 | newECX = intRes2 == 0 ? 16 : (31 - clz32(intRes2)); |
| 656 | } else { |
| 657 | // index of ls-1-bit |
| 658 | newECX = intRes2 == 0 ? 16 : ctz32(intRes2); |
| 659 | } |
| 660 | |
| 661 | resV->w32[0] = newECX; |
| 662 | resV->w32[1] = 0; |
| 663 | resV->w32[2] = 0; |
| 664 | resV->w32[3] = 0; |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 665 | |
| 666 | } |
| 667 | |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 668 | // generate new flags, common to all ISTRI and ISTRM cases |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 669 | *resOSZACP // A, P are zero |
| 670 | = ((intRes2 == 0) ? 0 : MASK_C) // C == 0 iff intRes2 == 0 |
| 671 | | ((zmaskL == 0) ? 0 : MASK_Z) // Z == 1 iff any in argL is 0 |
| 672 | | ((zmaskR == 0) ? 0 : MASK_S) // S == 1 iff any in argR is 0 |
| 673 | | ((intRes2 & 1) << SHIFT_O); // O == IntRes2[0] |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 674 | } |
| 675 | |
| 676 | |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 677 | /* Compute result and new OSZACP flags for all PCMP{E,I}STR{I,M} |
| 678 | variants. |
| 679 | |
| 680 | For xSTRI variants, the new ECX value is placed in the 32 bits |
| 681 | pointed to by *resV, and the top 96 bits are zeroed. For xSTRM |
| 682 | variants, the result is a 128 bit value and is placed at *resV in |
| 683 | the obvious way. |
| 684 | |
| 685 | For all variants, the new OSZACP value is placed at *resOSZACP. |
| 686 | |
| 687 | argLV and argRV are the vector args. The caller must prepare a |
| 688 | 16-bit mask for each, zmaskL and zmaskR. For ISTRx variants this |
| 689 | must be 1 for each zero byte of of the respective arg. For ESTRx |
| 690 | variants this is derived from the explicit length indication, and |
| 691 | must be 0 in all places except at the bit index corresponding to |
| 692 | the valid length (0 .. 16). If the valid length is 16 then the |
| 693 | mask must be all zeroes. In all cases, bits 31:16 must be zero. |
| 694 | |
| 695 | imm8 is the original immediate from the instruction. isSTRM |
| 696 | indicates whether this is a xSTRM or xSTRI variant, which controls |
| 697 | how much of *res is written. |
| 698 | |
| 699 | If the given imm8 case can be handled, the return value is True. |
| 700 | If not, False is returned, and neither *res not *resOSZACP are |
| 701 | altered. |
| 702 | */ |
| 703 | |
| 704 | Bool compute_PCMPxSTRx ( /*OUT*/V128* resV, |
| 705 | /*OUT*/UInt* resOSZACP, |
| 706 | V128* argLV, V128* argRV, |
| 707 | UInt zmaskL, UInt zmaskR, |
| 708 | UInt imm8, Bool isxSTRM ) |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 709 | { |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 710 | vassert(imm8 < 0x80); |
| 711 | vassert((zmaskL >> 16) == 0); |
| 712 | vassert((zmaskR >> 16) == 0); |
| 713 | |
| 714 | /* Explicitly reject any imm8 values that haven't been validated, |
| 715 | even if they would probably work. Life is too short to have |
| 716 | unvalidated cases in the code base. */ |
| 717 | switch (imm8) { |
sewardj | d59d92f | 2011-01-17 23:06:16 +0000 | [diff] [blame] | 718 | case 0x00: |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 719 | case 0x02: case 0x08: case 0x0A: case 0x0C: case 0x12: |
sewardj | 94fb5b0 | 2011-10-19 20:08:57 +0000 | [diff] [blame] | 720 | case 0x1A: case 0x38: case 0x3A: case 0x44: case 0x4A: |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 721 | break; |
| 722 | default: |
| 723 | return False; |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 724 | } |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 725 | |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 726 | UInt fmt = (imm8 >> 0) & 3; // imm8[1:0] data format |
| 727 | UInt agg = (imm8 >> 2) & 3; // imm8[3:2] aggregation fn |
| 728 | UInt pol = (imm8 >> 4) & 3; // imm8[5:4] polarity |
| 729 | UInt idx = (imm8 >> 6) & 1; // imm8[6] 1==msb/bytemask |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 730 | |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 731 | /*----------------------------------------*/ |
| 732 | /*-- strcmp on byte data --*/ |
| 733 | /*----------------------------------------*/ |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 734 | |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 735 | if (agg == 2/*equal each, aka strcmp*/ |
| 736 | && (fmt == 0/*ub*/ || fmt == 2/*sb*/)) { |
| 737 | Int i; |
| 738 | UChar* argL = (UChar*)argLV; |
| 739 | UChar* argR = (UChar*)argRV; |
| 740 | UInt boolResII = 0; |
| 741 | for (i = 15; i >= 0; i--) { |
| 742 | UChar cL = argL[i]; |
| 743 | UChar cR = argR[i]; |
| 744 | boolResII = (boolResII << 1) | (cL == cR ? 1 : 0); |
| 745 | } |
| 746 | UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) |
| 747 | UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 748 | |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 749 | // do invalidation, common to all equal-each cases |
| 750 | UInt intRes1 |
| 751 | = (boolResII & validL & validR) // if both valid, use cmpres |
| 752 | | (~ (validL | validR)); // if both invalid, force 1 |
| 753 | // else force 0 |
| 754 | intRes1 &= 0xFFFF; |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 755 | |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 756 | // generate I-format output |
| 757 | compute_PCMPxSTRx_gen_output( |
| 758 | resV, resOSZACP, |
| 759 | intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM |
| 760 | ); |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 761 | |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 762 | return True; |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 763 | } |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 764 | |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 765 | /*----------------------------------------*/ |
| 766 | /*-- set membership on byte data --*/ |
| 767 | /*----------------------------------------*/ |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 768 | |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 769 | if (agg == 0/*equal any, aka find chars in a set*/ |
| 770 | && (fmt == 0/*ub*/ || fmt == 2/*sb*/)) { |
| 771 | /* argL: the string, argR: charset */ |
| 772 | UInt si, ci; |
| 773 | UChar* argL = (UChar*)argLV; |
| 774 | UChar* argR = (UChar*)argRV; |
| 775 | UInt boolRes = 0; |
| 776 | UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) |
| 777 | UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 778 | |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 779 | for (si = 0; si < 16; si++) { |
| 780 | if ((validL & (1 << si)) == 0) |
| 781 | // run off the end of the string. |
| 782 | break; |
| 783 | UInt m = 0; |
| 784 | for (ci = 0; ci < 16; ci++) { |
| 785 | if ((validR & (1 << ci)) == 0) break; |
| 786 | if (argR[ci] == argL[si]) { m = 1; break; } |
| 787 | } |
| 788 | boolRes |= (m << si); |
| 789 | } |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 790 | |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 791 | // boolRes is "pre-invalidated" |
| 792 | UInt intRes1 = boolRes & 0xFFFF; |
| 793 | |
| 794 | // generate I-format output |
| 795 | compute_PCMPxSTRx_gen_output( |
| 796 | resV, resOSZACP, |
| 797 | intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM |
| 798 | ); |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 799 | |
sewardj | acfbd7d | 2010-08-17 22:52:08 +0000 | [diff] [blame] | 800 | return True; |
| 801 | } |
| 802 | |
| 803 | /*----------------------------------------*/ |
| 804 | /*-- substring search on byte data --*/ |
| 805 | /*----------------------------------------*/ |
| 806 | |
| 807 | if (agg == 3/*equal ordered, aka substring search*/ |
| 808 | && (fmt == 0/*ub*/ || fmt == 2/*sb*/)) { |
| 809 | |
| 810 | /* argL: haystack, argR: needle */ |
| 811 | UInt ni, hi; |
| 812 | UChar* argL = (UChar*)argLV; |
| 813 | UChar* argR = (UChar*)argRV; |
| 814 | UInt boolRes = 0; |
| 815 | UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) |
| 816 | UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) |
| 817 | for (hi = 0; hi < 16; hi++) { |
| 818 | if ((validL & (1 << hi)) == 0) |
| 819 | // run off the end of the haystack |
| 820 | break; |
| 821 | UInt m = 1; |
| 822 | for (ni = 0; ni < 16; ni++) { |
| 823 | if ((validR & (1 << ni)) == 0) break; |
| 824 | UInt i = ni + hi; |
| 825 | if (i >= 16) break; |
| 826 | if (argL[i] != argR[ni]) { m = 0; break; } |
| 827 | } |
| 828 | boolRes |= (m << hi); |
| 829 | } |
| 830 | |
| 831 | // boolRes is "pre-invalidated" |
| 832 | UInt intRes1 = boolRes & 0xFFFF; |
| 833 | |
| 834 | // generate I-format output |
| 835 | compute_PCMPxSTRx_gen_output( |
| 836 | resV, resOSZACP, |
| 837 | intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM |
| 838 | ); |
| 839 | |
| 840 | return True; |
| 841 | } |
| 842 | |
| 843 | /*----------------------------------------*/ |
| 844 | /*-- ranges, unsigned byte data --*/ |
| 845 | /*----------------------------------------*/ |
| 846 | |
| 847 | if (agg == 1/*ranges*/ |
| 848 | && fmt == 0/*ub*/) { |
| 849 | |
| 850 | /* argL: string, argR: range-pairs */ |
| 851 | UInt ri, si; |
| 852 | UChar* argL = (UChar*)argLV; |
| 853 | UChar* argR = (UChar*)argRV; |
| 854 | UInt boolRes = 0; |
| 855 | UInt validL = ~(zmaskL | -zmaskL); // not(left(zmaskL)) |
| 856 | UInt validR = ~(zmaskR | -zmaskR); // not(left(zmaskR)) |
| 857 | for (si = 0; si < 16; si++) { |
| 858 | if ((validL & (1 << si)) == 0) |
| 859 | // run off the end of the string |
| 860 | break; |
| 861 | UInt m = 0; |
| 862 | for (ri = 0; ri < 16; ri += 2) { |
| 863 | if ((validR & (3 << ri)) != (3 << ri)) break; |
| 864 | if (argR[ri] <= argL[si] && argL[si] <= argR[ri+1]) { |
| 865 | m = 1; break; |
| 866 | } |
| 867 | } |
| 868 | boolRes |= (m << si); |
| 869 | } |
| 870 | |
| 871 | // boolRes is "pre-invalidated" |
| 872 | UInt intRes1 = boolRes & 0xFFFF; |
| 873 | |
| 874 | // generate I-format output |
| 875 | compute_PCMPxSTRx_gen_output( |
| 876 | resV, resOSZACP, |
| 877 | intRes1, zmaskL, zmaskR, validL, pol, idx, isxSTRM |
| 878 | ); |
| 879 | |
| 880 | return True; |
| 881 | } |
| 882 | |
| 883 | return False; |
sewardj | 0b2d3fe | 2010-08-06 07:59:38 +0000 | [diff] [blame] | 884 | } |
| 885 | |
| 886 | |
sewardj | 52ff4cc | 2005-03-26 20:33:38 +0000 | [diff] [blame] | 887 | /*---------------------------------------------------------------*/ |
sewardj | cef7d3e | 2009-07-02 12:21:59 +0000 | [diff] [blame] | 888 | /*--- end guest_generic_x87.c ---*/ |
sewardj | 52ff4cc | 2005-03-26 20:33:38 +0000 | [diff] [blame] | 889 | /*---------------------------------------------------------------*/ |