Timothy B. Terriberry | a093f4d | 2011-02-03 14:22:15 -0800 | [diff] [blame] | 1 | /* Copyright (c) 2001-2011 Timothy B. Terriberry |
Gregory Maxwell | f40bbf7 | 2009-02-03 20:36:57 -0500 | [diff] [blame] | 2 | */ |
| 3 | /* |
| 4 | Redistribution and use in source and binary forms, with or without |
| 5 | modification, are permitted provided that the following conditions |
| 6 | are met: |
| 7 | |
| 8 | - Redistributions of source code must retain the above copyright |
| 9 | notice, this list of conditions and the following disclaimer. |
| 10 | |
| 11 | - Redistributions in binary form must reproduce the above copyright |
| 12 | notice, this list of conditions and the following disclaimer in the |
| 13 | documentation and/or other materials provided with the distribution. |
| 14 | |
Gregory Maxwell | f40bbf7 | 2009-02-03 20:36:57 -0500 | [diff] [blame] | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 16 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 17 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
Jean-Marc Valin | cb05e7c | 2012-04-20 16:40:24 -0400 | [diff] [blame] | 18 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER |
| 19 | OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
Gregory Maxwell | f40bbf7 | 2009-02-03 20:36:57 -0500 | [diff] [blame] | 20 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 21 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 22 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
| 23 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
| 24 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| 25 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 26 | */ |
| 27 | |
Jean-Marc Valin | 02fa913 | 2008-02-20 12:09:29 +1100 | [diff] [blame] | 28 | #ifdef HAVE_CONFIG_H |
| 29 | #include "config.h" |
| 30 | #endif |
| 31 | |
Timothy B. Terriberry | 2ec8d9e | 2007-12-06 15:09:53 +1100 | [diff] [blame] | 32 | #include "entcode.h" |
Timothy B. Terriberry | 1a45886 | 2011-05-04 15:03:30 -0700 | [diff] [blame] | 33 | #include "arch.h" |
Timothy B. Terriberry | 2ec8d9e | 2007-12-06 15:09:53 +1100 | [diff] [blame] | 34 | |
Timothy B. Terriberry | a093f4d | 2011-02-03 14:22:15 -0800 | [diff] [blame] | 35 | #if !defined(EC_CLZ) |
Gregory Maxwell | 07418d9 | 2012-11-27 12:51:20 -0500 | [diff] [blame] | 36 | /*This is a fallback for systems where we don't know how to access |
| 37 | a BSR or CLZ instruction (see ecintrin.h). |
| 38 | If you are optimizing Opus on a new platform and it has a native CLZ or |
| 39 | BZR (e.g. cell, MIPS, x86, etc) then making it available to Opus will be |
| 40 | an easy performance win.*/ |
Jean-Marc Valin | d77d6a5 | 2011-07-29 17:33:06 -0400 | [diff] [blame] | 41 | int ec_ilog(opus_uint32 _v){ |
Timothy B. Terriberry | 2ec8d9e | 2007-12-06 15:09:53 +1100 | [diff] [blame] | 42 | /*On a Pentium M, this branchless version tested as the fastest on |
| 43 | 1,000,000,000 random 32-bit integers, edging out a similar version with |
| 44 | branches, and a 256-entry LUT version.*/ |
| 45 | int ret; |
| 46 | int m; |
| 47 | ret=!!_v; |
| 48 | m=!!(_v&0xFFFF0000)<<4; |
| 49 | _v>>=m; |
| 50 | ret|=m; |
| 51 | m=!!(_v&0xFF00)<<3; |
| 52 | _v>>=m; |
| 53 | ret|=m; |
| 54 | m=!!(_v&0xF0)<<2; |
| 55 | _v>>=m; |
| 56 | ret|=m; |
| 57 | m=!!(_v&0xC)<<1; |
| 58 | _v>>=m; |
| 59 | ret|=m; |
| 60 | ret+=!!(_v&0x2); |
| 61 | return ret; |
Timothy B. Terriberry | 2ec8d9e | 2007-12-06 15:09:53 +1100 | [diff] [blame] | 62 | } |
Timothy B. Terriberry | a093f4d | 2011-02-03 14:22:15 -0800 | [diff] [blame] | 63 | #endif |
Timothy B. Terriberry | f13fea7 | 2007-12-11 13:25:57 +1100 | [diff] [blame] | 64 | |
Jean-Marc Valin | 5123175 | 2014-01-29 10:16:53 -0500 | [diff] [blame] | 65 | #if 1 |
| 66 | /* This is a faster version of ec_tell_frac() that takes advantage |
| 67 | of the low (1/8 bit) resolution to use just a linear function |
| 68 | followed by a lookup to determine the exact transition thresholds. */ |
| 69 | opus_uint32 ec_tell_frac(ec_ctx *_this){ |
| 70 | static const unsigned correction[8] = |
| 71 | {35733, 38967, 42495, 46340, |
| 72 | 50535, 55109, 60097, 65535}; |
| 73 | opus_uint32 nbits; |
| 74 | opus_uint32 r; |
| 75 | int l; |
| 76 | unsigned b; |
| 77 | nbits=_this->nbits_total<<BITRES; |
| 78 | l=EC_ILOG(_this->rng); |
| 79 | r=_this->rng>>(l-16); |
| 80 | b = (r>>12)-8; |
| 81 | b += r>correction[b]; |
| 82 | l = (l<<3)+b; |
| 83 | return nbits-l; |
| 84 | } |
| 85 | #else |
Jean-Marc Valin | d77d6a5 | 2011-07-29 17:33:06 -0400 | [diff] [blame] | 86 | opus_uint32 ec_tell_frac(ec_ctx *_this){ |
| 87 | opus_uint32 nbits; |
| 88 | opus_uint32 r; |
Timothy B. Terriberry | 9bac8c1 | 2011-03-02 16:24:32 -0800 | [diff] [blame] | 89 | int l; |
| 90 | int i; |
Timothy B. Terriberry | a093f4d | 2011-02-03 14:22:15 -0800 | [diff] [blame] | 91 | /*To handle the non-integral number of bits still left in the encoder/decoder |
| 92 | state, we compute the worst-case number of bits of val that must be |
| 93 | encoded to ensure that the value is inside the range for any possible |
| 94 | subsequent bits. |
| 95 | The computation here is independent of val itself (the decoder does not |
| 96 | even track that value), even though the real number of bits used after |
| 97 | ec_enc_done() may be 1 smaller if rng is a power of two and the |
| 98 | corresponding trailing bits of val are all zeros. |
| 99 | If we did try to track that special case, then coding a value with a |
| 100 | probability of 1/(1<<n) might sometimes appear to use more than n bits. |
| 101 | This may help explain the surprising result that a newly initialized |
| 102 | encoder or decoder claims to have used 1 bit.*/ |
| 103 | nbits=_this->nbits_total<<BITRES; |
| 104 | l=EC_ILOG(_this->rng); |
Gregory Maxwell | 75d2780 | 2011-08-30 14:02:41 -0400 | [diff] [blame] | 105 | r=_this->rng>>(l-16); |
Timothy B. Terriberry | a093f4d | 2011-02-03 14:22:15 -0800 | [diff] [blame] | 106 | for(i=BITRES;i-->0;){ |
| 107 | int b; |
| 108 | r=r*r>>15; |
| 109 | b=(int)(r>>16); |
| 110 | l=l<<1|b; |
| 111 | r>>=b; |
| 112 | } |
| 113 | return nbits-l; |
| 114 | } |
Jean-Marc Valin | 5123175 | 2014-01-29 10:16:53 -0500 | [diff] [blame] | 115 | #endif |
Jean-Marc Valin | ec5d01c | 2014-01-20 16:32:16 -0500 | [diff] [blame] | 116 | |
| 117 | #ifdef USE_SMALL_DIV_TABLE |
| 118 | /* Result of 2^32/(2*i+1), except for i=0. */ |
| 119 | const opus_uint32 SMALL_DIV_TABLE[129] = { |
| 120 | 0xFFFFFFFF, 0x55555555, 0x33333333, 0x24924924, |
| 121 | 0x1C71C71C, 0x1745D174, 0x13B13B13, 0x11111111, |
| 122 | 0x0F0F0F0F, 0x0D79435E, 0x0C30C30C, 0x0B21642C, |
| 123 | 0x0A3D70A3, 0x097B425E, 0x08D3DCB0, 0x08421084, |
| 124 | 0x07C1F07C, 0x07507507, 0x06EB3E45, 0x06906906, |
| 125 | 0x063E7063, 0x05F417D0, 0x05B05B05, 0x0572620A, |
| 126 | 0x05397829, 0x05050505, 0x04D4873E, 0x04A7904A, |
| 127 | 0x047DC11F, 0x0456C797, 0x04325C53, 0x04104104, |
| 128 | 0x03F03F03, 0x03D22635, 0x03B5CC0E, 0x039B0AD1, |
| 129 | 0x0381C0E0, 0x0369D036, 0x03531DEC, 0x033D91D2, |
| 130 | 0x0329161F, 0x03159721, 0x03030303, 0x02F14990, |
| 131 | 0x02E05C0B, 0x02D02D02, 0x02C0B02C, 0x02B1DA46, |
| 132 | 0x02A3A0FD, 0x0295FAD4, 0x0288DF0C, 0x027C4597, |
| 133 | 0x02702702, 0x02647C69, 0x02593F69, 0x024E6A17, |
| 134 | 0x0243F6F0, 0x0239E0D5, 0x02302302, 0x0226B902, |
| 135 | 0x021D9EAD, 0x0214D021, 0x020C49BA, 0x02040810, |
| 136 | 0x01FC07F0, 0x01F44659, 0x01ECC07B, 0x01E573AC, |
| 137 | 0x01DE5D6E, 0x01D77B65, 0x01D0CB58, 0x01CA4B30, |
| 138 | 0x01C3F8F0, 0x01BDD2B8, 0x01B7D6C3, 0x01B20364, |
| 139 | 0x01AC5701, 0x01A6D01A, 0x01A16D3F, 0x019C2D14, |
| 140 | 0x01970E4F, 0x01920FB4, 0x018D3018, 0x01886E5F, |
| 141 | 0x0183C977, 0x017F405F, 0x017AD220, 0x01767DCE, |
| 142 | 0x01724287, 0x016E1F76, 0x016A13CD, 0x01661EC6, |
| 143 | 0x01623FA7, 0x015E75BB, 0x015AC056, 0x01571ED3, |
| 144 | 0x01539094, 0x01501501, 0x014CAB88, 0x0149539E, |
| 145 | 0x01460CBC, 0x0142D662, 0x013FB013, 0x013C995A, |
| 146 | 0x013991C2, 0x013698DF, 0x0133AE45, 0x0130D190, |
| 147 | 0x012E025C, 0x012B404A, 0x01288B01, 0x0125E227, |
| 148 | 0x01234567, 0x0120B470, 0x011E2EF3, 0x011BB4A4, |
| 149 | 0x01194538, 0x0116E068, 0x011485F0, 0x0112358E, |
| 150 | 0x010FEF01, 0x010DB20A, 0x010B7E6E, 0x010953F3, |
| 151 | 0x01073260, 0x0105197F, 0x0103091B, 0x01010101 |
| 152 | }; |
| 153 | #endif |