| /* Copyright (c) 2002-2008 Jean-Marc Valin |
| Copyright (c) 2007-2008 CSIRO |
| Copyright (c) 2007-2009 Xiph.Org Foundation |
| Written by Jean-Marc Valin */ |
| /** |
| @file mathops.h |
| @brief Various math functions |
| */ |
| /* |
| Redistribution and use in source and binary forms, with or without |
| modification, are permitted provided that the following conditions |
| are met: |
| |
| - Redistributions of source code must retain the above copyright |
| notice, this list of conditions and the following disclaimer. |
| |
| - Redistributions in binary form must reproduce the above copyright |
| notice, this list of conditions and the following disclaimer in the |
| documentation and/or other materials provided with the distribution. |
| |
| - Neither the name of the Xiph.org Foundation nor the names of its |
| contributors may be used to endorse or promote products derived from |
| this software without specific prior written permission. |
| |
| THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR |
| CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
| LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
| NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| */ |
| |
| #ifdef HAVE_CONFIG_H |
| #include "config.h" |
| #endif |
| |
| #include "mathops.h" |
| |
| #ifdef FIXED_POINT |
| |
| celt_word32 frac_div32(celt_word32 a, celt_word32 b) |
| { |
| celt_word16 rcp; |
| celt_word32 result, rem; |
| int shift = 30-celt_ilog2(b); |
| a = SHL32(a,shift); |
| b = SHL32(b,shift); |
| |
| /* 16-bit reciprocal */ |
| rcp = ROUND16(celt_rcp(ROUND16(b,16)),2); |
| result = SHL32(MULT16_32_Q15(rcp, a),1); |
| rem = a-MULT32_32_Q31(result, b); |
| result += SHL32(MULT16_32_Q15(rcp, rem),1); |
| return result; |
| } |
| |
| /** Reciprocal sqrt approximation in the range [0.25,1) (Q16 in, Q14 out) */ |
| celt_word16 celt_rsqrt_norm(celt_word32 x) |
| { |
| celt_word16 n; |
| celt_word16 r; |
| celt_word16 r2; |
| celt_word16 y; |
| /* Range of n is [-16384,32767] ([-0.5,1) in Q15). */ |
| n = x-32768; |
| /* Get a rough initial guess for the root. |
| The optimal minimax quadratic approximation (using relative error) is |
| r = 1.437799046117536+n*(-0.823394375837328+n*0.4096419668459485). |
| Coefficients here, and the final result r, are Q14.*/ |
| r = ADD16(23557, MULT16_16_Q15(n, ADD16(-13490, MULT16_16_Q15(n, 6713)))); |
| /* We want y = x*r*r-1 in Q15, but x is 32-bit Q16 and r is Q14. |
| We can compute the result from n and r using Q15 multiplies with some |
| adjustment, carefully done to avoid overflow. |
| Range of y is [-1564,1594]. */ |
| r2 = MULT16_16_Q15(r, r); |
| y = SHL16(SUB16(ADD16(MULT16_16_Q15(r2, n), r2), 16384), 1); |
| /* Apply a 2nd-order Householder iteration: r += r*y*(y*0.375-0.5). |
| This yields the Q14 reciprocal square root of the Q16 x, with a maximum |
| relative error of 1.04956E-4, a (relative) RMSE of 2.80979E-5, and a |
| peak absolute error of 2.26591/16384. */ |
| return ADD16(r, MULT16_16_Q15(r, MULT16_16_Q15(y, |
| SUB16(MULT16_16_Q15(y, 12288), 16384)))); |
| } |
| |
| /** Sqrt approximation (QX input, QX/2 output) */ |
| celt_word32 celt_sqrt(celt_word32 x) |
| { |
| int k; |
| celt_word16 n; |
| celt_word32 rt; |
| static const celt_word16 C[5] = {23175, 11561, -3011, 1699, -664}; |
| if (x==0) |
| return 0; |
| k = (celt_ilog2(x)>>1)-7; |
| x = VSHR32(x, (k<<1)); |
| n = x-32768; |
| rt = ADD16(C[0], MULT16_16_Q15(n, ADD16(C[1], MULT16_16_Q15(n, ADD16(C[2], |
| MULT16_16_Q15(n, ADD16(C[3], MULT16_16_Q15(n, (C[4]))))))))); |
| rt = VSHR32(rt,7-k); |
| return rt; |
| } |
| |
| #define L1 32767 |
| #define L2 -7651 |
| #define L3 8277 |
| #define L4 -626 |
| |
| static inline celt_word16 _celt_cos_pi_2(celt_word16 x) |
| { |
| celt_word16 x2; |
| |
| x2 = MULT16_16_P15(x,x); |
| return ADD16(1,MIN16(32766,ADD32(SUB16(L1,x2), MULT16_16_P15(x2, ADD32(L2, MULT16_16_P15(x2, ADD32(L3, MULT16_16_P15(L4, x2 |
| )))))))); |
| } |
| |
| #undef L1 |
| #undef L2 |
| #undef L3 |
| #undef L4 |
| |
| celt_word16 celt_cos_norm(celt_word32 x) |
| { |
| x = x&0x0001ffff; |
| if (x>SHL32(EXTEND32(1), 16)) |
| x = SUB32(SHL32(EXTEND32(1), 17),x); |
| if (x&0x00007fff) |
| { |
| if (x<SHL32(EXTEND32(1), 15)) |
| { |
| return _celt_cos_pi_2(EXTRACT16(x)); |
| } else { |
| return NEG32(_celt_cos_pi_2(EXTRACT16(65536-x))); |
| } |
| } else { |
| if (x&0x0000ffff) |
| return 0; |
| else if (x&0x0001ffff) |
| return -32767; |
| else |
| return 32767; |
| } |
| } |
| |
| /** Reciprocal approximation (Q15 input, Q16 output) */ |
| celt_word32 celt_rcp(celt_word32 x) |
| { |
| int i; |
| celt_word16 n; |
| celt_word16 r; |
| celt_assert2(x>0, "celt_rcp() only defined for positive values"); |
| i = celt_ilog2(x); |
| /* n is Q15 with range [0,1). */ |
| n = VSHR32(x,i-15)-32768; |
| /* Start with a linear approximation: |
| r = 1.8823529411764706-0.9411764705882353*n. |
| The coefficients and the result are Q14 in the range [15420,30840].*/ |
| r = ADD16(30840, MULT16_16_Q15(-15420, n)); |
| /* Perform two Newton iterations: |
| r -= r*((r*n)-1.Q15) |
| = r*((r*n)+(r-1.Q15)). */ |
| r = SUB16(r, MULT16_16_Q15(r, |
| ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768)))); |
| /* We subtract an extra 1 in the second iteration to avoid overflow; it also |
| neatly compensates for truncation error in the rest of the process. */ |
| r = SUB16(r, ADD16(1, MULT16_16_Q15(r, |
| ADD16(MULT16_16_Q15(r, n), ADD16(r, -32768))))); |
| /* r is now the Q15 solution to 2/(n+1), with a maximum relative error |
| of 7.05346E-5, a (relative) RMSE of 2.14418E-5, and a peak absolute |
| error of 1.24665/32768. */ |
| return VSHR32(EXTEND32(r),i-16); |
| } |
| |
| #endif |