Timothy B. Terriberry | a093f4d | 2011-02-03 14:22:15 -0800 | [diff] [blame] | 1 | /* Copyright (c) 2001-2011 Timothy B. Terriberry |
Jean-Marc Valin | 8b2ff0d | 2009-10-17 21:40:10 -0400 | [diff] [blame] | 2 | Copyright (c) 2008-2009 Xiph.Org Foundation */ |
Gregory Maxwell | f40bbf7 | 2009-02-03 20:36:57 -0500 | [diff] [blame] | 3 | /* |
| 4 | Redistribution and use in source and binary forms, with or without |
| 5 | modification, are permitted provided that the following conditions |
| 6 | are met: |
| 7 | |
| 8 | - Redistributions of source code must retain the above copyright |
| 9 | notice, this list of conditions and the following disclaimer. |
| 10 | |
| 11 | - Redistributions in binary form must reproduce the above copyright |
| 12 | notice, this list of conditions and the following disclaimer in the |
| 13 | documentation and/or other materials provided with the distribution. |
| 14 | |
Gregory Maxwell | f40bbf7 | 2009-02-03 20:36:57 -0500 | [diff] [blame] | 15 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS |
| 16 | ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT |
| 17 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR |
| 18 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR |
| 19 | CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, |
| 20 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, |
| 21 | PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
| 22 | PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
| 23 | LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
| 24 | NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
| 25 | SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 26 | */ |
| 27 | |
Jean-Marc Valin | 02fa913 | 2008-02-20 12:09:29 +1100 | [diff] [blame] | 28 | #ifdef HAVE_CONFIG_H |
| 29 | #include "config.h" |
| 30 | #endif |
| 31 | |
Timothy B. Terriberry | 2ec8d9e | 2007-12-06 15:09:53 +1100 | [diff] [blame] | 32 | #include <stddef.h> |
Jean-Marc Valin | a1bb9c7 | 2008-04-28 17:30:26 +1000 | [diff] [blame] | 33 | #include "os_support.h" |
Timothy B. Terriberry | 0268a99 | 2008-12-20 22:12:18 -0500 | [diff] [blame] | 34 | #include "arch.h" |
Timothy B. Terriberry | a093f4d | 2011-02-03 14:22:15 -0800 | [diff] [blame] | 35 | #include "entdec.h" |
| 36 | #include "mfrngcod.h" |
Timothy B. Terriberry | 2ec8d9e | 2007-12-06 15:09:53 +1100 | [diff] [blame] | 37 | |
Timothy B. Terriberry | a093f4d | 2011-02-03 14:22:15 -0800 | [diff] [blame] | 38 | |
| 39 | |
| 40 | /*A range decoder. |
| 41 | This is an entropy decoder based upon \cite{Mar79}, which is itself a |
| 42 | rediscovery of the FIFO arithmetic code introduced by \cite{Pas76}. |
| 43 | It is very similar to arithmetic encoding, except that encoding is done with |
| 44 | digits in any base, instead of with bits, and so it is faster when using |
| 45 | larger bases (i.e.: a byte). |
| 46 | The author claims an average waste of $\frac{1}{2}\log_b(2b)$ bits, where $b$ |
| 47 | is the base, longer than the theoretical optimum, but to my knowledge there |
| 48 | is no published justification for this claim. |
| 49 | This only seems true when using near-infinite precision arithmetic so that |
| 50 | the process is carried out with no rounding errors. |
| 51 | |
| 52 | IBM (the author's employer) never sought to patent the idea, and to my |
| 53 | knowledge the algorithm is unencumbered by any patents, though its |
| 54 | performance is very competitive with proprietary arithmetic coding. |
| 55 | The two are based on very similar ideas, however. |
| 56 | An excellent description of implementation details is available at |
| 57 | http://www.arturocampos.com/ac_range.html |
| 58 | A recent work \cite{MNW98} which proposes several changes to arithmetic |
| 59 | encoding for efficiency actually re-discovers many of the principles |
| 60 | behind range encoding, and presents a good theoretical analysis of them. |
| 61 | |
| 62 | End of stream is handled by writing out the smallest number of bits that |
| 63 | ensures that the stream will be correctly decoded regardless of the value of |
| 64 | any subsequent bits. |
| 65 | ec_tell() can be used to determine how many bits were needed to decode |
| 66 | all the symbols thus far; other data can be packed in the remaining bits of |
| 67 | the input buffer. |
| 68 | @PHDTHESIS{Pas76, |
| 69 | author="Richard Clark Pasco", |
| 70 | title="Source coding algorithms for fast data compression", |
| 71 | school="Dept. of Electrical Engineering, Stanford University", |
| 72 | address="Stanford, CA", |
| 73 | month=May, |
| 74 | year=1976 |
| 75 | } |
| 76 | @INPROCEEDINGS{Mar79, |
| 77 | author="Martin, G.N.N.", |
| 78 | title="Range encoding: an algorithm for removing redundancy from a digitised |
| 79 | message", |
| 80 | booktitle="Video & Data Recording Conference", |
| 81 | year=1979, |
| 82 | address="Southampton", |
| 83 | month=Jul |
| 84 | } |
| 85 | @ARTICLE{MNW98, |
| 86 | author="Alistair Moffat and Radford Neal and Ian H. Witten", |
| 87 | title="Arithmetic Coding Revisited", |
| 88 | journal="{ACM} Transactions on Information Systems", |
| 89 | year=1998, |
| 90 | volume=16, |
| 91 | number=3, |
| 92 | pages="256--294", |
| 93 | month=Jul, |
| 94 | URL="http://www.stanford.edu/class/ee398/handouts/papers/Moffat98ArithmCoding.pdf" |
| 95 | }*/ |
| 96 | |
| 97 | |
| 98 | |
| 99 | static int ec_read_byte(ec_dec *_this){ |
| 100 | return _this->offs<_this->storage?_this->buf[_this->offs++]:0; |
Timothy B. Terriberry | 2ec8d9e | 2007-12-06 15:09:53 +1100 | [diff] [blame] | 101 | } |
| 102 | |
Timothy B. Terriberry | a093f4d | 2011-02-03 14:22:15 -0800 | [diff] [blame] | 103 | static int ec_read_byte_from_end(ec_dec *_this){ |
| 104 | return _this->end_offs<_this->storage? |
| 105 | _this->buf[_this->storage-++(_this->end_offs)]:0; |
Jean-Marc Valin | c08be44 | 2009-06-17 23:23:46 -0400 | [diff] [blame] | 106 | } |
| 107 | |
Timothy B. Terriberry | a093f4d | 2011-02-03 14:22:15 -0800 | [diff] [blame] | 108 | |
| 109 | /*Normalizes the contents of val and rng so that rng lies entirely in the |
| 110 | high-order symbol.*/ |
| 111 | static void ec_dec_normalize(ec_dec *_this){ |
| 112 | /*If the range is too small, rescale it and input some bits.*/ |
| 113 | while(_this->rng<=EC_CODE_BOT){ |
| 114 | int sym; |
| 115 | _this->nbits_total+=EC_SYM_BITS; |
| 116 | _this->rng<<=EC_SYM_BITS; |
| 117 | /*Use up the remaining bits from our last symbol.*/ |
| 118 | sym=_this->rem; |
| 119 | /*Read the next value from the input.*/ |
| 120 | _this->rem=ec_read_byte(_this); |
| 121 | /*Take the rest of the bits we need from this new symbol.*/ |
| 122 | sym=(sym<<EC_SYM_BITS|_this->rem)>>EC_SYM_BITS-EC_CODE_EXTRA; |
| 123 | /*And subtract them from val, capped to be less than EC_CODE_TOP.*/ |
| 124 | _this->val=(_this->val<<EC_SYM_BITS)+(EC_SYM_MAX&~sym)&EC_CODE_TOP-1; |
| 125 | } |
| 126 | } |
| 127 | |
Timothy B. Terriberry | 9bac8c1 | 2011-03-02 16:24:32 -0800 | [diff] [blame] | 128 | void ec_dec_init(ec_dec *_this,unsigned char *_buf,celt_uint32 _storage){ |
Timothy B. Terriberry | a093f4d | 2011-02-03 14:22:15 -0800 | [diff] [blame] | 129 | _this->buf=_buf; |
| 130 | _this->storage=_storage; |
| 131 | _this->end_offs=0; |
| 132 | _this->end_window=0; |
| 133 | _this->nend_bits=0; |
| 134 | _this->offs=0; |
| 135 | _this->rng=1U<<EC_CODE_EXTRA; |
| 136 | _this->rem=ec_read_byte(_this); |
| 137 | _this->val=_this->rng-1-(_this->rem>>EC_SYM_BITS-EC_CODE_EXTRA); |
| 138 | _this->error=0; |
| 139 | /*Normalize the interval.*/ |
| 140 | ec_dec_normalize(_this); |
| 141 | /*This is the offset from which ec_tell() will subtract partial bits. |
| 142 | This must be after the initial ec_dec_normalize(), or you will have to |
| 143 | compensate for the bits that are read there.*/ |
| 144 | _this->nbits_total=EC_CODE_BITS+1; |
| 145 | } |
| 146 | |
| 147 | |
| 148 | unsigned ec_decode(ec_dec *_this,unsigned _ft){ |
| 149 | unsigned s; |
| 150 | _this->ext=_this->rng/_ft; |
| 151 | s=(unsigned)(_this->val/_this->ext); |
| 152 | return _ft-EC_MINI(s+1,_ft); |
| 153 | } |
| 154 | |
| 155 | unsigned ec_decode_bin(ec_dec *_this,unsigned _bits){ |
| 156 | unsigned s; |
| 157 | _this->ext=_this->rng>>_bits; |
| 158 | s=(unsigned)(_this->val/_this->ext); |
| 159 | return (1<<_bits)-EC_MINI(s+1,1<<_bits); |
| 160 | } |
| 161 | |
| 162 | void ec_dec_update(ec_dec *_this,unsigned _fl,unsigned _fh,unsigned _ft){ |
Timothy B. Terriberry | 9bac8c1 | 2011-03-02 16:24:32 -0800 | [diff] [blame] | 163 | celt_uint32 s; |
Timothy B. Terriberry | a093f4d | 2011-02-03 14:22:15 -0800 | [diff] [blame] | 164 | s=IMUL32(_this->ext,_ft-_fh); |
| 165 | _this->val-=s; |
| 166 | _this->rng=_fl>0?IMUL32(_this->ext,_fh-_fl):_this->rng-s; |
| 167 | ec_dec_normalize(_this); |
| 168 | } |
| 169 | |
| 170 | /*The probability of having a "one" is 1/(1<<_logp).*/ |
| 171 | int ec_dec_bit_logp(ec_dec *_this,unsigned _logp){ |
Timothy B. Terriberry | 9bac8c1 | 2011-03-02 16:24:32 -0800 | [diff] [blame] | 172 | celt_uint32 r; |
| 173 | celt_uint32 d; |
| 174 | celt_uint32 s; |
| 175 | int ret; |
Timothy B. Terriberry | a093f4d | 2011-02-03 14:22:15 -0800 | [diff] [blame] | 176 | r=_this->rng; |
| 177 | d=_this->val; |
| 178 | s=r>>_logp; |
| 179 | ret=d<s; |
| 180 | if(!ret)_this->val=d-s; |
| 181 | _this->rng=ret?s:r-s; |
| 182 | ec_dec_normalize(_this); |
| 183 | return ret; |
| 184 | } |
| 185 | |
| 186 | int ec_dec_icdf(ec_dec *_this,const unsigned char *_icdf,unsigned _ftb){ |
Timothy B. Terriberry | 9bac8c1 | 2011-03-02 16:24:32 -0800 | [diff] [blame] | 187 | celt_uint32 r; |
| 188 | celt_uint32 d; |
| 189 | celt_uint32 s; |
| 190 | celt_uint32 t; |
| 191 | int ret; |
Timothy B. Terriberry | a093f4d | 2011-02-03 14:22:15 -0800 | [diff] [blame] | 192 | s=_this->rng; |
| 193 | d=_this->val; |
| 194 | r=s>>_ftb; |
| 195 | ret=-1; |
| 196 | do{ |
| 197 | t=s; |
| 198 | s=IMUL32(r,_icdf[++ret]); |
| 199 | } |
| 200 | while(d<s); |
| 201 | _this->val=d-s; |
| 202 | _this->rng=t-s; |
| 203 | ec_dec_normalize(_this); |
| 204 | return ret; |
Timothy B. Terriberry | 2ec8d9e | 2007-12-06 15:09:53 +1100 | [diff] [blame] | 205 | } |
| 206 | |
Timothy B. Terriberry | 9bac8c1 | 2011-03-02 16:24:32 -0800 | [diff] [blame] | 207 | celt_uint32 ec_dec_uint(ec_dec *_this,celt_uint32 _ft){ |
Timothy B. Terriberry | 30df6cf | 2010-12-21 08:42:26 -0800 | [diff] [blame] | 208 | unsigned ft; |
| 209 | unsigned s; |
| 210 | int ftb; |
Timothy B. Terriberry | 0268a99 | 2008-12-20 22:12:18 -0500 | [diff] [blame] | 211 | /*In order to optimize EC_ILOG(), it is undefined for the value 0.*/ |
| 212 | celt_assert(_ft>1); |
Timothy B. Terriberry | 2ec8d9e | 2007-12-06 15:09:53 +1100 | [diff] [blame] | 213 | _ft--; |
| 214 | ftb=EC_ILOG(_ft); |
Timothy B. Terriberry | 30df6cf | 2010-12-21 08:42:26 -0800 | [diff] [blame] | 215 | if(ftb>EC_UINT_BITS){ |
Timothy B. Terriberry | 9bac8c1 | 2011-03-02 16:24:32 -0800 | [diff] [blame] | 216 | celt_uint32 t; |
Timothy B. Terriberry | 30df6cf | 2010-12-21 08:42:26 -0800 | [diff] [blame] | 217 | ftb-=EC_UINT_BITS; |
Timothy B. Terriberry | f13fea7 | 2007-12-11 13:25:57 +1100 | [diff] [blame] | 218 | ft=(unsigned)(_ft>>ftb)+1; |
Timothy B. Terriberry | 2ec8d9e | 2007-12-06 15:09:53 +1100 | [diff] [blame] | 219 | s=ec_decode(_this,ft); |
| 220 | ec_dec_update(_this,s,s+1,ft); |
Timothy B. Terriberry | 9bac8c1 | 2011-03-02 16:24:32 -0800 | [diff] [blame] | 221 | t=(celt_uint32)s<<ftb|ec_dec_bits(_this,ftb); |
Timothy B. Terriberry | 30df6cf | 2010-12-21 08:42:26 -0800 | [diff] [blame] | 222 | if(t<=_ft)return t; |
| 223 | _this->error=1; |
| 224 | return _ft; |
| 225 | } |
| 226 | else{ |
Jean-Marc Valin | dc767f6 | 2008-03-22 22:23:58 +1100 | [diff] [blame] | 227 | _ft++; |
| 228 | s=ec_decode(_this,(unsigned)_ft); |
| 229 | ec_dec_update(_this,s,s+1,(unsigned)_ft); |
Jean-Marc Valin | e0aa9d1 | 2010-10-17 16:25:56 -0400 | [diff] [blame] | 230 | return s; |
Timothy B. Terriberry | 2ec8d9e | 2007-12-06 15:09:53 +1100 | [diff] [blame] | 231 | } |
Timothy B. Terriberry | f13fea7 | 2007-12-11 13:25:57 +1100 | [diff] [blame] | 232 | } |
Jean-Marc Valin | b1e017f | 2010-07-18 21:20:35 -0400 | [diff] [blame] | 233 | |
Timothy B. Terriberry | 9bac8c1 | 2011-03-02 16:24:32 -0800 | [diff] [blame] | 234 | celt_uint32 ec_dec_bits(ec_dec *_this,unsigned _bits){ |
| 235 | ec_window window; |
| 236 | int available; |
| 237 | celt_uint32 ret; |
Timothy B. Terriberry | a093f4d | 2011-02-03 14:22:15 -0800 | [diff] [blame] | 238 | window=_this->end_window; |
| 239 | available=_this->nend_bits; |
| 240 | if(available<_bits){ |
| 241 | do{ |
| 242 | window|=(ec_window)ec_read_byte_from_end(_this)<<available; |
| 243 | available+=EC_SYM_BITS; |
| 244 | } |
| 245 | while(available<=EC_WINDOW_SIZE-EC_SYM_BITS); |
| 246 | } |
Timothy B. Terriberry | 9bac8c1 | 2011-03-02 16:24:32 -0800 | [diff] [blame] | 247 | ret=(celt_uint32)window&((celt_uint32)1<<_bits)-1; |
Timothy B. Terriberry | a093f4d | 2011-02-03 14:22:15 -0800 | [diff] [blame] | 248 | window>>=_bits; |
| 249 | available-=_bits; |
| 250 | _this->end_window=window; |
| 251 | _this->nend_bits=available; |
| 252 | _this->nbits_total+=_bits; |
| 253 | return ret; |
Jean-Marc Valin | b1e017f | 2010-07-18 21:20:35 -0400 | [diff] [blame] | 254 | } |