Tony-LunarG | b0b195d | 2015-05-13 15:01:06 -0600 | [diff] [blame] | 1 | /////////////////////////////////////////////////////////////////////////////////////////////////// |
| 2 | // OpenGL Mathematics Copyright (c) 2005 - 2014 G-Truc Creation (www.g-truc.net) |
| 3 | /////////////////////////////////////////////////////////////////////////////////////////////////// |
| 4 | // Created : 2009-05-07 |
| 5 | // Updated : 2009-05-07 |
| 6 | // Licence : This source is under MIT License |
| 7 | // File : glm/gtx/simd_vec4.inl |
| 8 | /////////////////////////////////////////////////////////////////////////////////////////////////// |
| 9 | |
| 10 | namespace glm{ |
| 11 | namespace detail{ |
| 12 | |
| 13 | template <int Value> |
| 14 | struct mask |
| 15 | { |
| 16 | enum{value = Value}; |
| 17 | }; |
| 18 | |
| 19 | ////////////////////////////////////// |
| 20 | // Implicit basic constructors |
| 21 | |
| 22 | GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD() |
| 23 | #ifdef GLM_SIMD_ENABLE_DEFAULT_INIT |
| 24 | : Data(_mm_set_ps(0.0f, 0.0f, 0.0f, 0.0f)) |
| 25 | #endif |
| 26 | {} |
| 27 | |
| 28 | GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(__m128 const & Data) : |
| 29 | Data(Data) |
| 30 | {} |
| 31 | |
| 32 | GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(fvec4SIMD const & v) : |
| 33 | Data(v.Data) |
| 34 | {} |
| 35 | |
| 36 | GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(vec4 const & v) : |
| 37 | Data(_mm_set_ps(v.w, v.z, v.y, v.x)) |
| 38 | {} |
| 39 | |
| 40 | ////////////////////////////////////// |
| 41 | // Explicit basic constructors |
| 42 | |
| 43 | GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & s) : |
| 44 | Data(_mm_set1_ps(s)) |
| 45 | {} |
| 46 | |
| 47 | GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & x, float const & y, float const & z, float const & w) : |
| 48 | // Data(_mm_setr_ps(x, y, z, w)) |
| 49 | Data(_mm_set_ps(w, z, y, x)) |
| 50 | {} |
| 51 | /* |
| 52 | GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const v[4]) : |
| 53 | Data(_mm_load_ps(v)) |
| 54 | {} |
| 55 | */ |
| 56 | ////////////////////////////////////// |
| 57 | // Swizzle constructors |
| 58 | |
| 59 | //fvec4SIMD(ref4<float> const & r); |
| 60 | |
| 61 | ////////////////////////////////////// |
| 62 | // Conversion vector constructors |
| 63 | |
| 64 | GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(vec2 const & v, float const & s1, float const & s2) : |
| 65 | Data(_mm_set_ps(s2, s1, v.y, v.x)) |
| 66 | {} |
| 67 | |
| 68 | GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & s1, vec2 const & v, float const & s2) : |
| 69 | Data(_mm_set_ps(s2, v.y, v.x, s1)) |
| 70 | {} |
| 71 | |
| 72 | GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & s1, float const & s2, vec2 const & v) : |
| 73 | Data(_mm_set_ps(v.y, v.x, s2, s1)) |
| 74 | {} |
| 75 | |
| 76 | GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(vec3 const & v, float const & s) : |
| 77 | Data(_mm_set_ps(s, v.z, v.y, v.x)) |
| 78 | {} |
| 79 | |
| 80 | GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(float const & s, vec3 const & v) : |
| 81 | Data(_mm_set_ps(v.z, v.y, v.x, s)) |
| 82 | {} |
| 83 | |
| 84 | GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(vec2 const & v1, vec2 const & v2) : |
| 85 | Data(_mm_set_ps(v2.y, v2.x, v1.y, v1.x)) |
| 86 | {} |
| 87 | |
| 88 | //GLM_FUNC_QUALIFIER fvec4SIMD::fvec4SIMD(ivec4SIMD const & v) : |
| 89 | // Data(_mm_cvtepi32_ps(v.Data)) |
| 90 | //{} |
| 91 | |
| 92 | ////////////////////////////////////// |
| 93 | // Unary arithmetic operators |
| 94 | |
| 95 | GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator=(fvec4SIMD const & v) |
| 96 | { |
| 97 | this->Data = v.Data; |
| 98 | return *this; |
| 99 | } |
| 100 | |
| 101 | GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator+=(float const & s) |
| 102 | { |
| 103 | this->Data = _mm_add_ps(Data, _mm_set_ps1(s)); |
| 104 | return *this; |
| 105 | } |
| 106 | |
| 107 | GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator+=(fvec4SIMD const & v) |
| 108 | { |
| 109 | this->Data = _mm_add_ps(this->Data , v.Data); |
| 110 | return *this; |
| 111 | } |
| 112 | |
| 113 | GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator-=(float const & s) |
| 114 | { |
| 115 | this->Data = _mm_sub_ps(Data, _mm_set_ps1(s)); |
| 116 | return *this; |
| 117 | } |
| 118 | |
| 119 | GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator-=(fvec4SIMD const & v) |
| 120 | { |
| 121 | this->Data = _mm_sub_ps(this->Data , v.Data); |
| 122 | return *this; |
| 123 | } |
| 124 | |
| 125 | GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator*=(float const & s) |
| 126 | { |
| 127 | this->Data = _mm_mul_ps(this->Data, _mm_set_ps1(s)); |
| 128 | return *this; |
| 129 | } |
| 130 | |
| 131 | GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator*=(fvec4SIMD const & v) |
| 132 | { |
| 133 | this->Data = _mm_mul_ps(this->Data , v.Data); |
| 134 | return *this; |
| 135 | } |
| 136 | |
| 137 | GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator/=(float const & s) |
| 138 | { |
| 139 | this->Data = _mm_div_ps(Data, _mm_set1_ps(s)); |
| 140 | return *this; |
| 141 | } |
| 142 | |
| 143 | GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator/=(fvec4SIMD const & v) |
| 144 | { |
| 145 | this->Data = _mm_div_ps(this->Data , v.Data); |
| 146 | return *this; |
| 147 | } |
| 148 | |
| 149 | GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator++() |
| 150 | { |
| 151 | this->Data = _mm_add_ps(this->Data , glm::detail::one); |
| 152 | return *this; |
| 153 | } |
| 154 | |
| 155 | GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::operator--() |
| 156 | { |
| 157 | this->Data = _mm_sub_ps(this->Data, glm::detail::one); |
| 158 | return *this; |
| 159 | } |
| 160 | |
| 161 | ////////////////////////////////////// |
| 162 | // Swizzle operators |
| 163 | |
| 164 | template <comp X, comp Y, comp Z, comp W> |
| 165 | GLM_FUNC_QUALIFIER fvec4SIMD fvec4SIMD::swizzle() const |
| 166 | { |
| 167 | __m128 Data = _mm_shuffle_ps( |
| 168 | this->Data, this->Data, |
| 169 | mask<(W << 6) | (Z << 4) | (Y << 2) | (X << 0)>::value); |
| 170 | return fvec4SIMD(Data); |
| 171 | } |
| 172 | |
| 173 | template <comp X, comp Y, comp Z, comp W> |
| 174 | GLM_FUNC_QUALIFIER fvec4SIMD& fvec4SIMD::swizzle() |
| 175 | { |
| 176 | this->Data = _mm_shuffle_ps( |
| 177 | this->Data, this->Data, |
| 178 | mask<(W << 6) | (Z << 4) | (Y << 2) | (X << 0)>::value); |
| 179 | return *this; |
| 180 | } |
| 181 | |
| 182 | // operator+ |
| 183 | GLM_FUNC_QUALIFIER fvec4SIMD operator+ (fvec4SIMD const & v, float s) |
| 184 | { |
| 185 | return fvec4SIMD(_mm_add_ps(v.Data, _mm_set1_ps(s))); |
| 186 | } |
| 187 | |
| 188 | GLM_FUNC_QUALIFIER fvec4SIMD operator+ (float s, fvec4SIMD const & v) |
| 189 | { |
| 190 | return fvec4SIMD(_mm_add_ps(_mm_set1_ps(s), v.Data)); |
| 191 | } |
| 192 | |
| 193 | GLM_FUNC_QUALIFIER fvec4SIMD operator+ (fvec4SIMD const & v1, fvec4SIMD const & v2) |
| 194 | { |
| 195 | return fvec4SIMD(_mm_add_ps(v1.Data, v2.Data)); |
| 196 | } |
| 197 | |
| 198 | //operator- |
| 199 | GLM_FUNC_QUALIFIER fvec4SIMD operator- (fvec4SIMD const & v, float s) |
| 200 | { |
| 201 | return fvec4SIMD(_mm_sub_ps(v.Data, _mm_set1_ps(s))); |
| 202 | } |
| 203 | |
| 204 | GLM_FUNC_QUALIFIER fvec4SIMD operator- (float s, fvec4SIMD const & v) |
| 205 | { |
| 206 | return fvec4SIMD(_mm_sub_ps(_mm_set1_ps(s), v.Data)); |
| 207 | } |
| 208 | |
| 209 | GLM_FUNC_QUALIFIER fvec4SIMD operator- (fvec4SIMD const & v1, fvec4SIMD const & v2) |
| 210 | { |
| 211 | return fvec4SIMD(_mm_sub_ps(v1.Data, v2.Data)); |
| 212 | } |
| 213 | |
| 214 | //operator* |
| 215 | GLM_FUNC_QUALIFIER fvec4SIMD operator* (fvec4SIMD const & v, float s) |
| 216 | { |
| 217 | __m128 par0 = v.Data; |
| 218 | __m128 par1 = _mm_set1_ps(s); |
| 219 | return fvec4SIMD(_mm_mul_ps(par0, par1)); |
| 220 | } |
| 221 | |
| 222 | GLM_FUNC_QUALIFIER fvec4SIMD operator* (float s, fvec4SIMD const & v) |
| 223 | { |
| 224 | __m128 par0 = _mm_set1_ps(s); |
| 225 | __m128 par1 = v.Data; |
| 226 | return fvec4SIMD(_mm_mul_ps(par0, par1)); |
| 227 | } |
| 228 | |
| 229 | GLM_FUNC_QUALIFIER fvec4SIMD operator* (fvec4SIMD const & v1, fvec4SIMD const & v2) |
| 230 | { |
| 231 | return fvec4SIMD(_mm_mul_ps(v1.Data, v2.Data)); |
| 232 | } |
| 233 | |
| 234 | //operator/ |
| 235 | GLM_FUNC_QUALIFIER fvec4SIMD operator/ (fvec4SIMD const & v, float s) |
| 236 | { |
| 237 | __m128 par0 = v.Data; |
| 238 | __m128 par1 = _mm_set1_ps(s); |
| 239 | return fvec4SIMD(_mm_div_ps(par0, par1)); |
| 240 | } |
| 241 | |
| 242 | GLM_FUNC_QUALIFIER fvec4SIMD operator/ (float s, fvec4SIMD const & v) |
| 243 | { |
| 244 | __m128 par0 = _mm_set1_ps(s); |
| 245 | __m128 par1 = v.Data; |
| 246 | return fvec4SIMD(_mm_div_ps(par0, par1)); |
| 247 | } |
| 248 | |
| 249 | GLM_FUNC_QUALIFIER fvec4SIMD operator/ (fvec4SIMD const & v1, fvec4SIMD const & v2) |
| 250 | { |
| 251 | return fvec4SIMD(_mm_div_ps(v1.Data, v2.Data)); |
| 252 | } |
| 253 | |
| 254 | // Unary constant operators |
| 255 | GLM_FUNC_QUALIFIER fvec4SIMD operator- (fvec4SIMD const & v) |
| 256 | { |
| 257 | return fvec4SIMD(_mm_sub_ps(_mm_setzero_ps(), v.Data)); |
| 258 | } |
| 259 | |
| 260 | GLM_FUNC_QUALIFIER fvec4SIMD operator++ (fvec4SIMD const & v, int) |
| 261 | { |
| 262 | return fvec4SIMD(_mm_add_ps(v.Data, glm::detail::one)); |
| 263 | } |
| 264 | |
| 265 | GLM_FUNC_QUALIFIER fvec4SIMD operator-- (fvec4SIMD const & v, int) |
| 266 | { |
| 267 | return fvec4SIMD(_mm_sub_ps(v.Data, glm::detail::one)); |
| 268 | } |
| 269 | |
| 270 | }//namespace detail |
| 271 | |
| 272 | GLM_FUNC_QUALIFIER vec4 vec4_cast |
| 273 | ( |
| 274 | detail::fvec4SIMD const & x |
| 275 | ) |
| 276 | { |
| 277 | GLM_ALIGN(16) vec4 Result; |
| 278 | _mm_store_ps(&Result[0], x.Data); |
| 279 | return Result; |
| 280 | } |
| 281 | |
| 282 | // Other possible implementation |
| 283 | //float abs(float a) |
| 284 | //{ |
| 285 | // return max(-a, a); |
| 286 | //} |
| 287 | GLM_FUNC_QUALIFIER detail::fvec4SIMD abs |
| 288 | ( |
| 289 | detail::fvec4SIMD const & x |
| 290 | ) |
| 291 | { |
| 292 | return detail::sse_abs_ps(x.Data); |
| 293 | } |
| 294 | |
| 295 | GLM_FUNC_QUALIFIER detail::fvec4SIMD sign |
| 296 | ( |
| 297 | detail::fvec4SIMD const & x |
| 298 | ) |
| 299 | { |
| 300 | return detail::sse_sgn_ps(x.Data); |
| 301 | } |
| 302 | |
| 303 | GLM_FUNC_QUALIFIER detail::fvec4SIMD floor |
| 304 | ( |
| 305 | detail::fvec4SIMD const & x |
| 306 | ) |
| 307 | { |
| 308 | return detail::sse_flr_ps(x.Data); |
| 309 | } |
| 310 | |
| 311 | GLM_FUNC_QUALIFIER detail::fvec4SIMD trunc |
| 312 | ( |
| 313 | detail::fvec4SIMD const & x |
| 314 | ) |
| 315 | { |
| 316 | //return x < 0 ? -floor(-x) : floor(x); |
| 317 | |
| 318 | __m128 Flr0 = detail::sse_flr_ps(_mm_sub_ps(_mm_setzero_ps(), x.Data)); |
| 319 | __m128 Sub0 = _mm_sub_ps(Flr0, x.Data); |
| 320 | __m128 Flr1 = detail::sse_flr_ps(x.Data); |
| 321 | |
| 322 | __m128 Cmp0 = _mm_cmplt_ps(x.Data, glm::detail::zero); |
| 323 | __m128 Cmp1 = _mm_cmpnlt_ps(x.Data, glm::detail::zero); |
| 324 | |
| 325 | __m128 And0 = _mm_and_ps(Sub0, Cmp0); |
| 326 | __m128 And1 = _mm_and_ps(Flr1, Cmp1); |
| 327 | |
| 328 | return _mm_or_ps(And0, And1); |
| 329 | } |
| 330 | |
| 331 | GLM_FUNC_QUALIFIER detail::fvec4SIMD round |
| 332 | ( |
| 333 | detail::fvec4SIMD const & x |
| 334 | ) |
| 335 | { |
| 336 | return detail::sse_rnd_ps(x.Data); |
| 337 | } |
| 338 | |
| 339 | //GLM_FUNC_QUALIFIER detail::fvec4SIMD roundEven |
| 340 | //( |
| 341 | // detail::fvec4SIMD const & x |
| 342 | //) |
| 343 | //{ |
| 344 | |
| 345 | //} |
| 346 | |
| 347 | GLM_FUNC_QUALIFIER detail::fvec4SIMD ceil |
| 348 | ( |
| 349 | detail::fvec4SIMD const & x |
| 350 | ) |
| 351 | { |
| 352 | return detail::sse_ceil_ps(x.Data); |
| 353 | } |
| 354 | |
| 355 | GLM_FUNC_QUALIFIER detail::fvec4SIMD fract |
| 356 | ( |
| 357 | detail::fvec4SIMD const & x |
| 358 | ) |
| 359 | { |
| 360 | return detail::sse_frc_ps(x.Data); |
| 361 | } |
| 362 | |
| 363 | GLM_FUNC_QUALIFIER detail::fvec4SIMD mod |
| 364 | ( |
| 365 | detail::fvec4SIMD const & x, |
| 366 | detail::fvec4SIMD const & y |
| 367 | ) |
| 368 | { |
| 369 | return detail::sse_mod_ps(x.Data, y.Data); |
| 370 | } |
| 371 | |
| 372 | GLM_FUNC_QUALIFIER detail::fvec4SIMD mod |
| 373 | ( |
| 374 | detail::fvec4SIMD const & x, |
| 375 | float const & y |
| 376 | ) |
| 377 | { |
| 378 | return detail::sse_mod_ps(x.Data, _mm_set1_ps(y)); |
| 379 | } |
| 380 | |
| 381 | //GLM_FUNC_QUALIFIER detail::fvec4SIMD modf |
| 382 | //( |
| 383 | // detail::fvec4SIMD const & x, |
| 384 | // detail::fvec4SIMD & i |
| 385 | //) |
| 386 | //{ |
| 387 | |
| 388 | //} |
| 389 | |
| 390 | GLM_FUNC_QUALIFIER detail::fvec4SIMD min |
| 391 | ( |
| 392 | detail::fvec4SIMD const & x, |
| 393 | detail::fvec4SIMD const & y |
| 394 | ) |
| 395 | { |
| 396 | return _mm_min_ps(x.Data, y.Data); |
| 397 | } |
| 398 | |
| 399 | GLM_FUNC_QUALIFIER detail::fvec4SIMD min |
| 400 | ( |
| 401 | detail::fvec4SIMD const & x, |
| 402 | float const & y |
| 403 | ) |
| 404 | { |
| 405 | return _mm_min_ps(x.Data, _mm_set1_ps(y)); |
| 406 | } |
| 407 | |
| 408 | GLM_FUNC_QUALIFIER detail::fvec4SIMD max |
| 409 | ( |
| 410 | detail::fvec4SIMD const & x, |
| 411 | detail::fvec4SIMD const & y |
| 412 | ) |
| 413 | { |
| 414 | return _mm_max_ps(x.Data, y.Data); |
| 415 | } |
| 416 | |
| 417 | GLM_FUNC_QUALIFIER detail::fvec4SIMD max |
| 418 | ( |
| 419 | detail::fvec4SIMD const & x, |
| 420 | float const & y |
| 421 | ) |
| 422 | { |
| 423 | return _mm_max_ps(x.Data, _mm_set1_ps(y)); |
| 424 | } |
| 425 | |
| 426 | GLM_FUNC_QUALIFIER detail::fvec4SIMD clamp |
| 427 | ( |
| 428 | detail::fvec4SIMD const & x, |
| 429 | detail::fvec4SIMD const & minVal, |
| 430 | detail::fvec4SIMD const & maxVal |
| 431 | ) |
| 432 | { |
| 433 | return detail::sse_clp_ps(x.Data, minVal.Data, maxVal.Data); |
| 434 | } |
| 435 | |
| 436 | GLM_FUNC_QUALIFIER detail::fvec4SIMD clamp |
| 437 | ( |
| 438 | detail::fvec4SIMD const & x, |
| 439 | float const & minVal, |
| 440 | float const & maxVal |
| 441 | ) |
| 442 | { |
| 443 | return detail::sse_clp_ps(x.Data, _mm_set1_ps(minVal), _mm_set1_ps(maxVal)); |
| 444 | } |
| 445 | |
| 446 | GLM_FUNC_QUALIFIER detail::fvec4SIMD mix |
| 447 | ( |
| 448 | detail::fvec4SIMD const & x, |
| 449 | detail::fvec4SIMD const & y, |
| 450 | detail::fvec4SIMD const & a |
| 451 | ) |
| 452 | { |
| 453 | __m128 Sub0 = _mm_sub_ps(y.Data, x.Data); |
| 454 | __m128 Mul0 = _mm_mul_ps(a.Data, Sub0); |
| 455 | return _mm_add_ps(x.Data, Mul0); |
| 456 | } |
| 457 | |
| 458 | GLM_FUNC_QUALIFIER detail::fvec4SIMD step |
| 459 | ( |
| 460 | detail::fvec4SIMD const & edge, |
| 461 | detail::fvec4SIMD const & x |
| 462 | ) |
| 463 | { |
| 464 | __m128 cmp0 = _mm_cmpngt_ps(x.Data, edge.Data); |
| 465 | return _mm_max_ps(_mm_min_ps(cmp0, _mm_setzero_ps()), detail::one); |
| 466 | } |
| 467 | |
| 468 | GLM_FUNC_QUALIFIER detail::fvec4SIMD step |
| 469 | ( |
| 470 | float const & edge, |
| 471 | detail::fvec4SIMD const & x |
| 472 | ) |
| 473 | { |
| 474 | __m128 cmp0 = _mm_cmpngt_ps(x.Data, _mm_set1_ps(edge)); |
| 475 | return _mm_max_ps(_mm_min_ps(cmp0, _mm_setzero_ps()), detail::one); |
| 476 | } |
| 477 | |
| 478 | GLM_FUNC_QUALIFIER detail::fvec4SIMD smoothstep |
| 479 | ( |
| 480 | detail::fvec4SIMD const & edge0, |
| 481 | detail::fvec4SIMD const & edge1, |
| 482 | detail::fvec4SIMD const & x |
| 483 | ) |
| 484 | { |
| 485 | return detail::sse_ssp_ps(edge0.Data, edge1.Data, x.Data); |
| 486 | } |
| 487 | |
| 488 | GLM_FUNC_QUALIFIER detail::fvec4SIMD smoothstep |
| 489 | ( |
| 490 | float const & edge0, |
| 491 | float const & edge1, |
| 492 | detail::fvec4SIMD const & x |
| 493 | ) |
| 494 | { |
| 495 | return detail::sse_ssp_ps(_mm_set1_ps(edge0), _mm_set1_ps(edge1), x.Data); |
| 496 | } |
| 497 | |
| 498 | //GLM_FUNC_QUALIFIER bvec4 isnan(detail::fvec4SIMD const & x) |
| 499 | //{ |
| 500 | |
| 501 | //} |
| 502 | |
| 503 | //GLM_FUNC_QUALIFIER bvec4 isinf(detail::fvec4SIMD const & x) |
| 504 | //{ |
| 505 | |
| 506 | //} |
| 507 | |
| 508 | //GLM_FUNC_QUALIFIER detail::ivec4SIMD floatBitsToInt |
| 509 | //( |
| 510 | // detail::fvec4SIMD const & value |
| 511 | //) |
| 512 | //{ |
| 513 | |
| 514 | //} |
| 515 | |
| 516 | //GLM_FUNC_QUALIFIER detail::fvec4SIMD intBitsToFloat |
| 517 | //( |
| 518 | // detail::ivec4SIMD const & value |
| 519 | //) |
| 520 | //{ |
| 521 | |
| 522 | //} |
| 523 | |
| 524 | GLM_FUNC_QUALIFIER detail::fvec4SIMD fma |
| 525 | ( |
| 526 | detail::fvec4SIMD const & a, |
| 527 | detail::fvec4SIMD const & b, |
| 528 | detail::fvec4SIMD const & c |
| 529 | ) |
| 530 | { |
| 531 | return _mm_add_ps(_mm_mul_ps(a.Data, b.Data), c.Data); |
| 532 | } |
| 533 | |
| 534 | GLM_FUNC_QUALIFIER float length |
| 535 | ( |
| 536 | detail::fvec4SIMD const & x |
| 537 | ) |
| 538 | { |
| 539 | detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data); |
| 540 | detail::fvec4SIMD sqt0 = sqrt(dot0); |
| 541 | float Result = 0; |
| 542 | _mm_store_ss(&Result, sqt0.Data); |
| 543 | return Result; |
| 544 | } |
| 545 | |
| 546 | GLM_FUNC_QUALIFIER float fastLength |
| 547 | ( |
| 548 | detail::fvec4SIMD const & x |
| 549 | ) |
| 550 | { |
| 551 | detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data); |
| 552 | detail::fvec4SIMD sqt0 = fastSqrt(dot0); |
| 553 | float Result = 0; |
| 554 | _mm_store_ss(&Result, sqt0.Data); |
| 555 | return Result; |
| 556 | } |
| 557 | |
| 558 | GLM_FUNC_QUALIFIER float niceLength |
| 559 | ( |
| 560 | detail::fvec4SIMD const & x |
| 561 | ) |
| 562 | { |
| 563 | detail::fvec4SIMD dot0 = detail::sse_dot_ss(x.Data, x.Data); |
| 564 | detail::fvec4SIMD sqt0 = niceSqrt(dot0); |
| 565 | float Result = 0; |
| 566 | _mm_store_ss(&Result, sqt0.Data); |
| 567 | return Result; |
| 568 | } |
| 569 | |
| 570 | GLM_FUNC_QUALIFIER detail::fvec4SIMD length4 |
| 571 | ( |
| 572 | detail::fvec4SIMD const & x |
| 573 | ) |
| 574 | { |
| 575 | return sqrt(dot4(x, x)); |
| 576 | } |
| 577 | |
| 578 | GLM_FUNC_QUALIFIER detail::fvec4SIMD fastLength4 |
| 579 | ( |
| 580 | detail::fvec4SIMD const & x |
| 581 | ) |
| 582 | { |
| 583 | return fastSqrt(dot4(x, x)); |
| 584 | } |
| 585 | |
| 586 | GLM_FUNC_QUALIFIER detail::fvec4SIMD niceLength4 |
| 587 | ( |
| 588 | detail::fvec4SIMD const & x |
| 589 | ) |
| 590 | { |
| 591 | return niceSqrt(dot4(x, x)); |
| 592 | } |
| 593 | |
| 594 | GLM_FUNC_QUALIFIER float distance |
| 595 | ( |
| 596 | detail::fvec4SIMD const & p0, |
| 597 | detail::fvec4SIMD const & p1 |
| 598 | ) |
| 599 | { |
| 600 | float Result = 0; |
| 601 | _mm_store_ss(&Result, detail::sse_dst_ps(p0.Data, p1.Data)); |
| 602 | return Result; |
| 603 | } |
| 604 | |
| 605 | GLM_FUNC_QUALIFIER detail::fvec4SIMD distance4 |
| 606 | ( |
| 607 | detail::fvec4SIMD const & p0, |
| 608 | detail::fvec4SIMD const & p1 |
| 609 | ) |
| 610 | { |
| 611 | return detail::sse_dst_ps(p0.Data, p1.Data); |
| 612 | } |
| 613 | |
| 614 | GLM_FUNC_QUALIFIER float dot |
| 615 | ( |
| 616 | detail::fvec4SIMD const & x, |
| 617 | detail::fvec4SIMD const & y |
| 618 | ) |
| 619 | { |
| 620 | float Result = 0; |
| 621 | _mm_store_ss(&Result, detail::sse_dot_ss(x.Data, y.Data)); |
| 622 | return Result; |
| 623 | } |
| 624 | |
| 625 | GLM_FUNC_QUALIFIER detail::fvec4SIMD dot4 |
| 626 | ( |
| 627 | detail::fvec4SIMD const & x, |
| 628 | detail::fvec4SIMD const & y |
| 629 | ) |
| 630 | { |
| 631 | return detail::sse_dot_ps(x.Data, y.Data); |
| 632 | } |
| 633 | |
| 634 | GLM_FUNC_QUALIFIER detail::fvec4SIMD cross |
| 635 | ( |
| 636 | detail::fvec4SIMD const & x, |
| 637 | detail::fvec4SIMD const & y |
| 638 | ) |
| 639 | { |
| 640 | return detail::sse_xpd_ps(x.Data, y.Data); |
| 641 | } |
| 642 | |
| 643 | GLM_FUNC_QUALIFIER detail::fvec4SIMD normalize |
| 644 | ( |
| 645 | detail::fvec4SIMD const & x |
| 646 | ) |
| 647 | { |
| 648 | __m128 dot0 = detail::sse_dot_ps(x.Data, x.Data); |
| 649 | __m128 isr0 = inversesqrt(detail::fvec4SIMD(dot0)).Data; |
| 650 | __m128 mul0 = _mm_mul_ps(x.Data, isr0); |
| 651 | return mul0; |
| 652 | } |
| 653 | |
| 654 | GLM_FUNC_QUALIFIER detail::fvec4SIMD fastNormalize |
| 655 | ( |
| 656 | detail::fvec4SIMD const & x |
| 657 | ) |
| 658 | { |
| 659 | __m128 dot0 = detail::sse_dot_ps(x.Data, x.Data); |
| 660 | __m128 isr0 = fastInversesqrt(dot0).Data; |
| 661 | __m128 mul0 = _mm_mul_ps(x.Data, isr0); |
| 662 | return mul0; |
| 663 | } |
| 664 | |
| 665 | GLM_FUNC_QUALIFIER detail::fvec4SIMD faceforward |
| 666 | ( |
| 667 | detail::fvec4SIMD const & N, |
| 668 | detail::fvec4SIMD const & I, |
| 669 | detail::fvec4SIMD const & Nref |
| 670 | ) |
| 671 | { |
| 672 | return detail::sse_ffd_ps(N.Data, I.Data, Nref.Data); |
| 673 | } |
| 674 | |
| 675 | GLM_FUNC_QUALIFIER detail::fvec4SIMD reflect |
| 676 | ( |
| 677 | detail::fvec4SIMD const & I, |
| 678 | detail::fvec4SIMD const & N |
| 679 | ) |
| 680 | { |
| 681 | return detail::sse_rfe_ps(I.Data, N.Data); |
| 682 | } |
| 683 | |
| 684 | GLM_FUNC_QUALIFIER detail::fvec4SIMD refract |
| 685 | ( |
| 686 | detail::fvec4SIMD const & I, |
| 687 | detail::fvec4SIMD const & N, |
| 688 | float const & eta |
| 689 | ) |
| 690 | { |
| 691 | return detail::sse_rfa_ps(I.Data, N.Data, _mm_set1_ps(eta)); |
| 692 | } |
| 693 | |
| 694 | GLM_FUNC_QUALIFIER detail::fvec4SIMD sqrt(detail::fvec4SIMD const & x) |
| 695 | { |
| 696 | return _mm_mul_ps(inversesqrt(x).Data, x.Data); |
| 697 | } |
| 698 | |
| 699 | GLM_FUNC_QUALIFIER detail::fvec4SIMD niceSqrt(detail::fvec4SIMD const & x) |
| 700 | { |
| 701 | return _mm_sqrt_ps(x.Data); |
| 702 | } |
| 703 | |
| 704 | GLM_FUNC_QUALIFIER detail::fvec4SIMD fastSqrt(detail::fvec4SIMD const & x) |
| 705 | { |
| 706 | return _mm_mul_ps(fastInversesqrt(x.Data).Data, x.Data); |
| 707 | } |
| 708 | |
| 709 | // SSE scalar reciprocal sqrt using rsqrt op, plus one Newton-Rhaphson iteration |
| 710 | // By Elan Ruskin, http://assemblyrequired.crashworks.org/ |
| 711 | GLM_FUNC_QUALIFIER detail::fvec4SIMD inversesqrt(detail::fvec4SIMD const & x) |
| 712 | { |
| 713 | GLM_ALIGN(4) static const __m128 three = {3, 3, 3, 3}; // aligned consts for fast load |
| 714 | GLM_ALIGN(4) static const __m128 half = {0.5,0.5,0.5,0.5}; |
| 715 | |
| 716 | __m128 recip = _mm_rsqrt_ps(x.Data); // "estimate" opcode |
| 717 | __m128 halfrecip = _mm_mul_ps(half, recip); |
| 718 | __m128 threeminus_xrr = _mm_sub_ps(three, _mm_mul_ps(x.Data, _mm_mul_ps(recip, recip))); |
| 719 | return _mm_mul_ps(halfrecip, threeminus_xrr); |
| 720 | } |
| 721 | |
| 722 | GLM_FUNC_QUALIFIER detail::fvec4SIMD fastInversesqrt(detail::fvec4SIMD const & x) |
| 723 | { |
| 724 | return _mm_rsqrt_ps(x.Data); |
| 725 | } |
| 726 | |
| 727 | }//namespace glm |