Jean-Marc Valin | 8b2ff0d | 2009-10-17 21:40:10 -0400 | [diff] [blame] | 1 | /* Copyright (c) 2001-2008 Timothy B. Terriberry |
| 2 | Copyright (c) 2008-2009 Xiph.Org Foundation */ |
Gregory Maxwell | f40bbf7 | 2009-02-03 20:36:57 -0500 | [diff] [blame] | 3 | /* |
| 4 | Redistribution and use in source and binary forms, with or without |
| 5 | modification, are permitted provided that the following conditions |
| 6 | are met: |
| 7 | |
| 8 | - Redistributions of source code must retain the above copyright |
| 9 | notice, this list of conditions and the following disclaimer. |
| 10 | |
| 11 | - Redistributions in binary form must reproduce the above copyright |
| 12 | notice, this list of conditions and the following disclaimer in the |
| 13 | documentation and/or other materials provided with the distribution. |
| 14 | |
| 15 | - Neither the name of the Xiph.org Foundation nor the names of its |
| 16 | contributors may be used to endorse or promote products derived from |
| 17 | this software without specific prior written permission. |
| 18 | |
| 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 20 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR |
| 23 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 24 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 25 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 26 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
| 27 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
| 28 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| 29 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 30 | */ |
| 31 | |
Jean-Marc Valin | 02fa913 | 2008-02-20 12:09:29 +1100 | [diff] [blame] | 32 | #ifdef HAVE_CONFIG_H |
| 33 | #include "config.h" |
| 34 | #endif |
| 35 | |
Jean-Marc Valin | 821945d | 2008-04-10 13:24:48 +1000 | [diff] [blame] | 36 | #include "arch.h" |
Jean-Marc Valin | 9c3e22c | 2007-12-07 22:25:31 +1100 | [diff] [blame] | 37 | #include "entdec.h" |
| 38 | #include "mfrngcod.h" |
| 39 | |
| 40 | |
| 41 | |
Timothy B. Terriberry | f13fea7 | 2007-12-11 13:25:57 +1100 | [diff] [blame] | 42 | /*A range decoder. |
Jean-Marc Valin | 9c3e22c | 2007-12-07 22:25:31 +1100 | [diff] [blame] | 43 | This is an entropy decoder based upon \cite{Mar79}, which is itself a |
| 44 | rediscovery of the FIFO arithmetic code introduced by \cite{Pas76}. |
| 45 | It is very similar to arithmetic encoding, except that encoding is done with |
| 46 | digits in any base, instead of with bits, and so it is faster when using |
| 47 | larger bases (i.e.: a byte). |
| 48 | The author claims an average waste of $\frac{1}{2}\log_b(2b)$ bits, where $b$ |
| 49 | is the base, longer than the theoretical optimum, but to my knowledge there |
| 50 | is no published justification for this claim. |
| 51 | This only seems true when using near-infinite precision arithmetic so that |
| 52 | the process is carried out with no rounding errors. |
| 53 | |
| 54 | IBM (the author's employer) never sought to patent the idea, and to my |
| 55 | knowledge the algorithm is unencumbered by any patents, though its |
| 56 | performance is very competitive with proprietary arithmetic coding. |
| 57 | The two are based on very similar ideas, however. |
| 58 | An excellent description of implementation details is available at |
| 59 | http://www.arturocampos.com/ac_range.html |
| 60 | A recent work \cite{MNW98} which proposes several changes to arithmetic |
| 61 | encoding for efficiency actually re-discovers many of the principles |
| 62 | behind range encoding, and presents a good theoretical analysis of them. |
| 63 | |
Timothy B. Terriberry | 8d940a6 | 2008-10-19 14:41:38 -0400 | [diff] [blame] | 64 | End of stream is handled by writing out the smallest number of bits that |
| 65 | ensures that the stream will be correctly decoded regardless of the value of |
| 66 | any subsequent bits. |
| 67 | ec_dec_tell() can be used to determine how many bits were needed to decode |
| 68 | all the symbols thus far; other data can be packed in the remaining bits of |
| 69 | the input buffer. |
Jean-Marc Valin | 9c3e22c | 2007-12-07 22:25:31 +1100 | [diff] [blame] | 70 | @PHDTHESIS{Pas76, |
| 71 | author="Richard Clark Pasco", |
Timothy B. Terriberry | d710177 | 2007-12-11 13:25:00 +1100 | [diff] [blame] | 72 | title="Source coding algorithms for fast data compression", |
Jean-Marc Valin | 9c3e22c | 2007-12-07 22:25:31 +1100 | [diff] [blame] | 73 | school="Dept. of Electrical Engineering, Stanford University", |
| 74 | address="Stanford, CA", |
| 75 | month=May, |
| 76 | year=1976 |
| 77 | } |
| 78 | @INPROCEEDINGS{Mar79, |
| 79 | author="Martin, G.N.N.", |
| 80 | title="Range encoding: an algorithm for removing redundancy from a digitised |
| 81 | message", |
| 82 | booktitle="Video & Data Recording Conference", |
| 83 | year=1979, |
| 84 | address="Southampton", |
| 85 | month=Jul |
| 86 | } |
| 87 | @ARTICLE{MNW98, |
| 88 | author="Alistair Moffat and Radford Neal and Ian H. Witten", |
| 89 | title="Arithmetic Coding Revisited", |
| 90 | journal="{ACM} Transactions on Information Systems", |
| 91 | year=1998, |
| 92 | volume=16, |
| 93 | number=3, |
| 94 | pages="256--294", |
| 95 | month=Jul, |
Timothy B. Terriberry | f13fea7 | 2007-12-11 13:25:57 +1100 | [diff] [blame] | 96 | URL="http://www.stanford.edu/class/ee398/handouts/papers/Moffat98ArithmCoding.pdf" |
Jean-Marc Valin | 9c3e22c | 2007-12-07 22:25:31 +1100 | [diff] [blame] | 97 | }*/ |
| 98 | |
| 99 | |
Jean-Marc Valin | 9c3e22c | 2007-12-07 22:25:31 +1100 | [diff] [blame] | 100 | /*Gets the next byte of input. |
| 101 | After all the bytes in the current packet have been consumed, and the extra |
| 102 | end code returned if needed, this function will continue to return zero each |
| 103 | time it is called. |
| 104 | Return: The next byte of input.*/ |
| 105 | static int ec_dec_in(ec_dec *_this){ |
| 106 | int ret; |
| 107 | ret=ec_byte_read1(_this->buf); |
| 108 | if(ret<0){ |
Timothy B. Terriberry | a2fd116 | 2008-01-24 22:28:58 -0500 | [diff] [blame] | 109 | ret=0; |
Timothy B.B Terriberry | d77f61a | 2008-10-19 14:24:53 -0400 | [diff] [blame] | 110 | /*Needed to keep oc_dec_tell() operating correctly.*/ |
tterribe | 7e3293f | 2008-01-23 23:04:43 +0000 | [diff] [blame] | 111 | ec_byte_adv1(_this->buf); |
Jean-Marc Valin | 9c3e22c | 2007-12-07 22:25:31 +1100 | [diff] [blame] | 112 | } |
tterribe | 7e3293f | 2008-01-23 23:04:43 +0000 | [diff] [blame] | 113 | return ret; |
Jean-Marc Valin | 9c3e22c | 2007-12-07 22:25:31 +1100 | [diff] [blame] | 114 | } |
| 115 | |
Timothy B. Terriberry | d710177 | 2007-12-11 13:25:00 +1100 | [diff] [blame] | 116 | /*Normalizes the contents of dif and rng so that rng lies entirely in the |
| 117 | high-order symbol.*/ |
Jean-Marc Valin | fd8fda9 | 2008-03-27 09:00:14 +1100 | [diff] [blame] | 118 | static inline void ec_dec_normalize(ec_dec *_this){ |
Jean-Marc Valin | 9c3e22c | 2007-12-07 22:25:31 +1100 | [diff] [blame] | 119 | /*If the range is too small, rescale it and input some bits.*/ |
| 120 | while(_this->rng<=EC_CODE_BOT){ |
| 121 | int sym; |
| 122 | _this->rng<<=EC_SYM_BITS; |
| 123 | /*Use up the remaining bits from our last symbol.*/ |
| 124 | sym=_this->rem<<EC_CODE_EXTRA&EC_SYM_MAX; |
| 125 | /*Read the next value from the input.*/ |
| 126 | _this->rem=ec_dec_in(_this); |
| 127 | /*Take the rest of the bits we need from this new symbol.*/ |
| 128 | sym|=_this->rem>>EC_SYM_BITS-EC_CODE_EXTRA; |
| 129 | _this->dif=(_this->dif<<EC_SYM_BITS)-sym&EC_CODE_MASK; |
| 130 | /*dif can never be larger than EC_CODE_TOP. |
| 131 | This is equivalent to the slightly more readable: |
| 132 | if(_this->dif>EC_CODE_TOP)_this->dif-=EC_CODE_TOP;*/ |
Timothy B. Terriberry | d710177 | 2007-12-11 13:25:00 +1100 | [diff] [blame] | 133 | _this->dif^=_this->dif&_this->dif-1&EC_CODE_TOP; |
Jean-Marc Valin | 9c3e22c | 2007-12-07 22:25:31 +1100 | [diff] [blame] | 134 | } |
| 135 | } |
| 136 | |
| 137 | void ec_dec_init(ec_dec *_this,ec_byte_buffer *_buf){ |
| 138 | _this->buf=_buf; |
| 139 | _this->rem=ec_dec_in(_this); |
| 140 | _this->rng=1U<<EC_CODE_EXTRA; |
| 141 | _this->dif=_this->rng-(_this->rem>>EC_SYM_BITS-EC_CODE_EXTRA); |
| 142 | /*Normalize the interval.*/ |
| 143 | ec_dec_normalize(_this); |
Jean-Marc Valin | cef1d6a | 2010-09-02 10:16:56 -0400 | [diff] [blame] | 144 | _this->end_byte=0; /* Required for platforms that have chars > 8 bits */ |
Jean-Marc Valin | c08be44 | 2009-06-17 23:23:46 -0400 | [diff] [blame] | 145 | _this->end_bits_left=0; |
| 146 | _this->nb_end_bits=0; |
Jean-Marc Valin | b1e017f | 2010-07-18 21:20:35 -0400 | [diff] [blame] | 147 | _this->error=0; |
Jean-Marc Valin | 9c3e22c | 2007-12-07 22:25:31 +1100 | [diff] [blame] | 148 | } |
| 149 | |
| 150 | |
| 151 | unsigned ec_decode(ec_dec *_this,unsigned _ft){ |
| 152 | unsigned s; |
| 153 | _this->nrm=_this->rng/_ft; |
| 154 | s=(unsigned)((_this->dif-1)/_this->nrm); |
| 155 | return _ft-EC_MINI(s+1,_ft); |
| 156 | } |
| 157 | |
Jean-Marc Valin | 949a29b | 2009-07-25 20:16:01 -0400 | [diff] [blame] | 158 | unsigned ec_decode_bin(ec_dec *_this,unsigned _bits){ |
Jean-Marc Valin | c2decd3 | 2008-03-22 22:58:45 +1100 | [diff] [blame] | 159 | unsigned s; |
Jean-Marc Valin | 949a29b | 2009-07-25 20:16:01 -0400 | [diff] [blame] | 160 | _this->nrm=_this->rng>>_bits; |
Jean-Marc Valin | c2decd3 | 2008-03-22 22:58:45 +1100 | [diff] [blame] | 161 | s=(unsigned)((_this->dif-1)/_this->nrm); |
Jean-Marc Valin | 949a29b | 2009-07-25 20:16:01 -0400 | [diff] [blame] | 162 | return (1<<_bits)-EC_MINI(s+1,1<<_bits); |
| 163 | } |
| 164 | |
Jean-Marc Valin | aaedf17 | 2010-09-10 20:20:04 -0400 | [diff] [blame] | 165 | unsigned ec_dec_bits(ec_dec *_this,unsigned bits){ |
Jean-Marc Valin | c08be44 | 2009-06-17 23:23:46 -0400 | [diff] [blame] | 166 | unsigned value=0; |
| 167 | int count=0; |
| 168 | _this->nb_end_bits += bits; |
| 169 | while (bits>=_this->end_bits_left) |
| 170 | { |
| 171 | value |= _this->end_byte>>(8-_this->end_bits_left)<<count; |
| 172 | count += _this->end_bits_left; |
| 173 | bits -= _this->end_bits_left; |
| 174 | _this->end_byte=ec_byte_look_at_end(_this->buf); |
| 175 | _this->end_bits_left = 8; |
| 176 | } |
| 177 | value |= ((_this->end_byte>>(8-_this->end_bits_left))&((1<<bits)-1))<<count; |
| 178 | _this->end_bits_left -= bits; |
| 179 | return value; |
Jean-Marc Valin | c2decd3 | 2008-03-22 22:58:45 +1100 | [diff] [blame] | 180 | } |
| 181 | |
Jean-Marc Valin | 9c3e22c | 2007-12-07 22:25:31 +1100 | [diff] [blame] | 182 | void ec_dec_update(ec_dec *_this,unsigned _fl,unsigned _fh,unsigned _ft){ |
| 183 | ec_uint32 s; |
Jean-Marc Valin | 821945d | 2008-04-10 13:24:48 +1000 | [diff] [blame] | 184 | s=IMUL32(_this->nrm,(_ft-_fh)); |
Jean-Marc Valin | 9c3e22c | 2007-12-07 22:25:31 +1100 | [diff] [blame] | 185 | _this->dif-=s; |
Jean-Marc Valin | 821945d | 2008-04-10 13:24:48 +1000 | [diff] [blame] | 186 | _this->rng=_fl>0?IMUL32(_this->nrm,(_fh-_fl)):_this->rng-s; |
Jean-Marc Valin | 9c3e22c | 2007-12-07 22:25:31 +1100 | [diff] [blame] | 187 | ec_dec_normalize(_this); |
| 188 | } |
| 189 | |
Timothy B. Terriberry | 43e9406 | 2010-05-29 23:02:33 -0400 | [diff] [blame] | 190 | /*The probability of having a "one" is given in 1/65536.*/ |
| 191 | int ec_dec_bit_prob(ec_dec *_this,unsigned _prob){ |
Timothy B. Terriberry | 299747e | 2010-05-29 22:47:37 -0400 | [diff] [blame] | 192 | ec_uint32 r; |
| 193 | ec_uint32 s; |
| 194 | ec_uint32 d; |
| 195 | int val; |
| 196 | r=_this->rng; |
| 197 | d=_this->dif; |
Timothy B. Terriberry | 43e9406 | 2010-05-29 23:02:33 -0400 | [diff] [blame] | 198 | s=(r>>16)*_prob; |
Timothy B. Terriberry | 299747e | 2010-05-29 22:47:37 -0400 | [diff] [blame] | 199 | val=d<=s; |
| 200 | if(!val)_this->dif=d-s; |
| 201 | _this->rng=val?s:r-s; |
| 202 | ec_dec_normalize(_this); |
| 203 | return val; |
| 204 | } |
| 205 | |
Jean-Marc Valin | 531f2ae | 2010-08-02 09:01:28 -0400 | [diff] [blame] | 206 | ec_uint32 ec_dec_tell(ec_dec *_this,int _b){ |
tterribe | 3eff11d | 2008-01-11 05:51:49 +0000 | [diff] [blame] | 207 | ec_uint32 r; |
| 208 | int l; |
Jean-Marc Valin | 531f2ae | 2010-08-02 09:01:28 -0400 | [diff] [blame] | 209 | ec_uint32 nbits; |
Timothy B.B Terriberry | d77f61a | 2008-10-19 14:24:53 -0400 | [diff] [blame] | 210 | nbits=(ec_byte_bytes(_this->buf)-(EC_CODE_BITS+EC_SYM_BITS-1)/EC_SYM_BITS)* |
| 211 | EC_SYM_BITS; |
Timothy B. Terriberry | 8d940a6 | 2008-10-19 14:41:38 -0400 | [diff] [blame] | 212 | /*To handle the non-integral number of bits still left in the decoder state, |
tterribe | 3eff11d | 2008-01-11 05:51:49 +0000 | [diff] [blame] | 213 | we compute the number of bits of low that must be encoded to ensure that |
Timothy B. Terriberry | 8d940a6 | 2008-10-19 14:41:38 -0400 | [diff] [blame] | 214 | the value is inside the range for any possible subsequent bits.*/ |
Jean-Marc Valin | c08be44 | 2009-06-17 23:23:46 -0400 | [diff] [blame] | 215 | nbits+=EC_CODE_BITS+1+_this->nb_end_bits; |
tterribe | 3eff11d | 2008-01-11 05:51:49 +0000 | [diff] [blame] | 216 | nbits<<=_b; |
| 217 | l=EC_ILOG(_this->rng); |
| 218 | r=_this->rng>>l-16; |
| 219 | while(_b-->0){ |
| 220 | int b; |
| 221 | r=r*r>>15; |
| 222 | b=(int)(r>>16); |
| 223 | l=l<<1|b; |
| 224 | r>>=b; |
| 225 | } |
| 226 | return nbits-l; |
| 227 | } |