Aaron Watry | 268beab | 2014-09-10 15:43:29 +0000 | [diff] [blame] | 1 | /* |
| 2 | * There are multiple formulas for calculating arccosine of x: |
| 3 | * 1) acos(x) = (1/2*pi) + i * ln(i*x + sqrt(1-x^2)) (notice the 'i'...) |
| 4 | * 2) acos(x) = pi/2 + asin(-x) (asin isn't implemented yet) |
| 5 | * 3) acos(x) = pi/2 - asin(x) (ditto) |
| 6 | * 4) acos(x) = 2*atan2(sqrt(1-x), sqrt(1+x)) |
| 7 | * 5) acos(x) = pi/2 - atan2(x, ( sqrt(1-x^2) ) ) |
| 8 | * |
| 9 | * Options 1-3 are not currently usable, #5 generates more concise radeonsi |
| 10 | * bitcode and assembly than #4 (134 vs 132 instructions on radeonsi), but |
| 11 | * precision of #4 may be better. |
| 12 | */ |
| 13 | |
Jan Vesely | 44e768e | 2015-04-24 19:54:17 +0000 | [diff] [blame] | 14 | #if __CLC_FPSIZE == 32 |
| 15 | #define __CLC_CONST(x) x ## f |
| 16 | #else |
| 17 | #define __CLC_CONST(x) x |
| 18 | #endif |
| 19 | |
Aaron Watry | 268beab | 2014-09-10 15:43:29 +0000 | [diff] [blame] | 20 | _CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE acos(__CLC_GENTYPE x) { |
| 21 | return ( |
Jan Vesely | 44e768e | 2015-04-24 19:54:17 +0000 | [diff] [blame] | 22 | (__CLC_GENTYPE) __CLC_CONST(2.0) * atan2( |
| 23 | sqrt((__CLC_GENTYPE) __CLC_CONST(1.0) - x), |
| 24 | sqrt((__CLC_GENTYPE) __CLC_CONST(1.0) + x) |
Aaron Watry | 268beab | 2014-09-10 15:43:29 +0000 | [diff] [blame] | 25 | ) |
| 26 | ); |
| 27 | } |
Jan Vesely | 44e768e | 2015-04-24 19:54:17 +0000 | [diff] [blame] | 28 | |
| 29 | #undef __CLC_CONST |