Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Licensed to the Apache Software Foundation (ASF) under one |
| 3 | * or more contributor license agreements. See the NOTICE file |
| 4 | * distributed with this work for additional information |
| 5 | * regarding copyright ownership. The ASF licenses this file |
| 6 | * to you under the Apache License, Version 2.0 (the |
| 7 | * "License"); you may not use this file except in compliance |
| 8 | * with the License. You may obtain a copy of the License at |
| 9 | * |
| 10 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 11 | * |
| 12 | * Unless required by applicable law or agreed to in writing, |
| 13 | * software distributed under the License is distributed on an |
| 14 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| 15 | * KIND, either express or implied. See the License for the |
| 16 | * specific language governing permissions and limitations |
| 17 | * under the License. |
| 18 | */ |
| 19 | package org.apache.commons.compress.compressors.bzip2; |
| 20 | |
| 21 | import java.io.IOException; |
| 22 | import java.io.OutputStream; |
| 23 | |
| 24 | import org.apache.commons.compress.compressors.CompressorOutputStream; |
| 25 | |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 26 | /** |
Christian Grobmeier | c04eecc | 2009-04-13 17:24:58 +0000 | [diff] [blame] | 27 | * An output stream that compresses into the BZip2 format into another stream. |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 28 | * |
| 29 | * <p> |
| 30 | * The compression requires large amounts of memory. Thus you should call the |
| 31 | * {@link #close() close()} method as soon as possible, to force |
Sebastian Bazley | 04fa434 | 2009-03-27 19:19:44 +0000 | [diff] [blame] | 32 | * <tt>BZip2CompressorOutputStream</tt> to release the allocated memory. |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 33 | * </p> |
| 34 | * |
| 35 | * <p> You can shrink the amount of allocated memory and maybe raise |
| 36 | * the compression speed by choosing a lower blocksize, which in turn |
| 37 | * may cause a lower compression ratio. You can avoid unnecessary |
| 38 | * memory allocation by avoiding using a blocksize which is bigger |
| 39 | * than the size of the input. </p> |
| 40 | * |
| 41 | * <p> You can compute the memory usage for compressing by the |
| 42 | * following formula: </p> |
| 43 | * |
| 44 | * <pre> |
| 45 | * <code>400k + (9 * blocksize)</code>. |
| 46 | * </pre> |
| 47 | * |
| 48 | * <p> To get the memory required for decompression by {@link |
Sebastian Bazley | 04fa434 | 2009-03-27 19:19:44 +0000 | [diff] [blame] | 49 | * BZip2CompressorInputStream} use </p> |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 50 | * |
| 51 | * <pre> |
| 52 | * <code>65k + (5 * blocksize)</code>. |
| 53 | * </pre> |
| 54 | * |
Stefan Bodewig | 45e51c2 | 2013-12-22 07:03:43 +0000 | [diff] [blame^] | 55 | * <table width="100%" border="1" summary="Memory usage by blocksize"> |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 56 | * <tr> |
| 57 | * <th colspan="3">Memory usage by blocksize</th> |
| 58 | * </tr> |
| 59 | * <tr> |
| 60 | * <th align="right">Blocksize</th> <th align="right">Compression<br> |
| 61 | * memory usage</th> <th align="right">Decompression<br> |
| 62 | * memory usage</th> |
| 63 | * </tr> |
| 64 | * <tr> |
| 65 | * <td align="right">100k</td> |
| 66 | * <td align="right">1300k</td> |
| 67 | * <td align="right">565k</td> |
| 68 | * </tr> |
| 69 | * <tr> |
| 70 | * <td align="right">200k</td> |
| 71 | * <td align="right">2200k</td> |
| 72 | * <td align="right">1065k</td> |
| 73 | * </tr> |
| 74 | * <tr> |
| 75 | * <td align="right">300k</td> |
| 76 | * <td align="right">3100k</td> |
| 77 | * <td align="right">1565k</td> |
| 78 | * </tr> |
| 79 | * <tr> |
| 80 | * <td align="right">400k</td> |
| 81 | * <td align="right">4000k</td> |
| 82 | * <td align="right">2065k</td> |
| 83 | * </tr> |
| 84 | * <tr> |
| 85 | * <td align="right">500k</td> |
| 86 | * <td align="right">4900k</td> |
| 87 | * <td align="right">2565k</td> |
| 88 | * </tr> |
| 89 | * <tr> |
| 90 | * <td align="right">600k</td> |
| 91 | * <td align="right">5800k</td> |
| 92 | * <td align="right">3065k</td> |
| 93 | * </tr> |
| 94 | * <tr> |
| 95 | * <td align="right">700k</td> |
| 96 | * <td align="right">6700k</td> |
| 97 | * <td align="right">3565k</td> |
| 98 | * </tr> |
| 99 | * <tr> |
| 100 | * <td align="right">800k</td> |
| 101 | * <td align="right">7600k</td> |
| 102 | * <td align="right">4065k</td> |
| 103 | * </tr> |
| 104 | * <tr> |
| 105 | * <td align="right">900k</td> |
| 106 | * <td align="right">8500k</td> |
| 107 | * <td align="right">4565k</td> |
| 108 | * </tr> |
| 109 | * </table> |
| 110 | * |
| 111 | * <p> |
Sebastian Bazley | 04fa434 | 2009-03-27 19:19:44 +0000 | [diff] [blame] | 112 | * For decompression <tt>BZip2CompressorInputStream</tt> allocates less memory if the |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 113 | * bzipped input is smaller than one block. |
| 114 | * </p> |
| 115 | * |
| 116 | * <p> |
| 117 | * Instances of this class are not threadsafe. |
| 118 | * </p> |
| 119 | * |
| 120 | * <p> |
| 121 | * TODO: Update to BZip2 1.0.1 |
| 122 | * </p> |
Sebastian Bazley | 99870ef | 2009-03-28 00:04:36 +0000 | [diff] [blame] | 123 | * @NotThreadSafe |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 124 | */ |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 125 | public class BZip2CompressorOutputStream extends CompressorOutputStream |
| 126 | implements BZip2Constants { |
| 127 | |
| 128 | /** |
| 129 | * The minimum supported blocksize <tt> == 1</tt>. |
| 130 | */ |
| 131 | public static final int MIN_BLOCKSIZE = 1; |
| 132 | |
| 133 | /** |
| 134 | * The maximum supported blocksize <tt> == 9</tt>. |
| 135 | */ |
| 136 | public static final int MAX_BLOCKSIZE = 9; |
| 137 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 138 | private static final int GREATER_ICOST = 15; |
| 139 | private static final int LESSER_ICOST = 0; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 140 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 141 | private static void hbMakeCodeLengths(final byte[] len, final int[] freq, |
| 142 | final Data dat, final int alphaSize, |
| 143 | final int maxLen) { |
| 144 | /* |
| 145 | * Nodes and heap entries run from 1. Entry 0 for both the heap and |
| 146 | * nodes is a sentinel. |
| 147 | */ |
| 148 | final int[] heap = dat.heap; |
| 149 | final int[] weight = dat.weight; |
| 150 | final int[] parent = dat.parent; |
| 151 | |
| 152 | for (int i = alphaSize; --i >= 0;) { |
| 153 | weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8; |
| 154 | } |
| 155 | |
| 156 | for (boolean tooLong = true; tooLong;) { |
| 157 | tooLong = false; |
| 158 | |
| 159 | int nNodes = alphaSize; |
| 160 | int nHeap = 0; |
| 161 | heap[0] = 0; |
| 162 | weight[0] = 0; |
| 163 | parent[0] = -2; |
| 164 | |
| 165 | for (int i = 1; i <= alphaSize; i++) { |
| 166 | parent[i] = -1; |
| 167 | nHeap++; |
| 168 | heap[nHeap] = i; |
| 169 | |
| 170 | int zz = nHeap; |
| 171 | int tmp = heap[zz]; |
| 172 | while (weight[tmp] < weight[heap[zz >> 1]]) { |
| 173 | heap[zz] = heap[zz >> 1]; |
| 174 | zz >>= 1; |
| 175 | } |
| 176 | heap[zz] = tmp; |
| 177 | } |
| 178 | |
| 179 | while (nHeap > 1) { |
| 180 | int n1 = heap[1]; |
| 181 | heap[1] = heap[nHeap]; |
| 182 | nHeap--; |
| 183 | |
| 184 | int yy = 0; |
| 185 | int zz = 1; |
| 186 | int tmp = heap[1]; |
| 187 | |
| 188 | while (true) { |
| 189 | yy = zz << 1; |
| 190 | |
| 191 | if (yy > nHeap) { |
| 192 | break; |
| 193 | } |
| 194 | |
| 195 | if ((yy < nHeap) |
| 196 | && (weight[heap[yy + 1]] < weight[heap[yy]])) { |
| 197 | yy++; |
| 198 | } |
| 199 | |
| 200 | if (weight[tmp] < weight[heap[yy]]) { |
| 201 | break; |
| 202 | } |
| 203 | |
| 204 | heap[zz] = heap[yy]; |
| 205 | zz = yy; |
| 206 | } |
| 207 | |
| 208 | heap[zz] = tmp; |
| 209 | |
| 210 | int n2 = heap[1]; |
| 211 | heap[1] = heap[nHeap]; |
| 212 | nHeap--; |
| 213 | |
| 214 | yy = 0; |
| 215 | zz = 1; |
| 216 | tmp = heap[1]; |
| 217 | |
| 218 | while (true) { |
| 219 | yy = zz << 1; |
| 220 | |
| 221 | if (yy > nHeap) { |
| 222 | break; |
| 223 | } |
| 224 | |
| 225 | if ((yy < nHeap) |
| 226 | && (weight[heap[yy + 1]] < weight[heap[yy]])) { |
| 227 | yy++; |
| 228 | } |
| 229 | |
| 230 | if (weight[tmp] < weight[heap[yy]]) { |
| 231 | break; |
| 232 | } |
| 233 | |
| 234 | heap[zz] = heap[yy]; |
| 235 | zz = yy; |
| 236 | } |
| 237 | |
| 238 | heap[zz] = tmp; |
| 239 | nNodes++; |
| 240 | parent[n1] = parent[n2] = nNodes; |
| 241 | |
| 242 | final int weight_n1 = weight[n1]; |
| 243 | final int weight_n2 = weight[n2]; |
| 244 | weight[nNodes] = ((weight_n1 & 0xffffff00) |
| 245 | + (weight_n2 & 0xffffff00)) |
| 246 | | (1 + (((weight_n1 & 0x000000ff) |
| 247 | > (weight_n2 & 0x000000ff)) |
| 248 | ? (weight_n1 & 0x000000ff) |
| 249 | : (weight_n2 & 0x000000ff))); |
| 250 | |
| 251 | parent[nNodes] = -1; |
| 252 | nHeap++; |
| 253 | heap[nHeap] = nNodes; |
| 254 | |
| 255 | tmp = 0; |
| 256 | zz = nHeap; |
| 257 | tmp = heap[zz]; |
| 258 | final int weight_tmp = weight[tmp]; |
| 259 | while (weight_tmp < weight[heap[zz >> 1]]) { |
| 260 | heap[zz] = heap[zz >> 1]; |
| 261 | zz >>= 1; |
| 262 | } |
| 263 | heap[zz] = tmp; |
| 264 | |
| 265 | } |
| 266 | |
| 267 | for (int i = 1; i <= alphaSize; i++) { |
| 268 | int j = 0; |
| 269 | int k = i; |
| 270 | |
| 271 | for (int parent_k; (parent_k = parent[k]) >= 0;) { |
| 272 | k = parent_k; |
| 273 | j++; |
| 274 | } |
| 275 | |
| 276 | len[i - 1] = (byte) j; |
| 277 | if (j > maxLen) { |
| 278 | tooLong = true; |
| 279 | } |
| 280 | } |
| 281 | |
| 282 | if (tooLong) { |
| 283 | for (int i = 1; i < alphaSize; i++) { |
| 284 | int j = weight[i] >> 8; |
| 285 | j = 1 + (j >> 1); |
| 286 | weight[i] = j << 8; |
| 287 | } |
| 288 | } |
| 289 | } |
| 290 | } |
| 291 | |
| 292 | /** |
| 293 | * Index of the last char in the block, so the block size == last + 1. |
| 294 | */ |
Stefan Bodewig | 41f4a20 | 2009-03-20 15:42:37 +0000 | [diff] [blame] | 295 | private int last; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 296 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 297 | /** |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 298 | * Always: in the range 0 .. 9. The current block size is 100000 * this |
| 299 | * number. |
| 300 | */ |
| 301 | private final int blockSize100k; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 302 | |
Stefan Bodewig | 41f4a20 | 2009-03-20 15:42:37 +0000 | [diff] [blame] | 303 | private int bsBuff; |
| 304 | private int bsLive; |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 305 | private final CRC crc = new CRC(); |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 306 | |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 307 | private int nInUse; |
| 308 | |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 309 | private int nMTF; |
| 310 | |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 311 | private int currentChar = -1; |
| 312 | private int runLength = 0; |
| 313 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 314 | private int blockCRC; |
| 315 | private int combinedCRC; |
Stefan Bodewig | 1ce57d9 | 2012-05-03 16:07:38 +0000 | [diff] [blame] | 316 | private final int allowableBlockSize; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 317 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 318 | /** |
| 319 | * All memory intensive stuff. |
| 320 | */ |
| 321 | private Data data; |
Stefan Bodewig | 6e95697 | 2012-05-01 07:33:09 +0000 | [diff] [blame] | 322 | private BlockSort blockSorter; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 323 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 324 | private OutputStream out; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 325 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 326 | /** |
| 327 | * Chooses a blocksize based on the given length of the data to compress. |
| 328 | * |
| 329 | * @return The blocksize, between {@link #MIN_BLOCKSIZE} and |
| 330 | * {@link #MAX_BLOCKSIZE} both inclusive. For a negative |
| 331 | * <tt>inputLength</tt> this method returns <tt>MAX_BLOCKSIZE</tt> |
| 332 | * always. |
| 333 | * |
| 334 | * @param inputLength |
| 335 | * The length of the data which will be compressed by |
Stefan Bodewig | 7684a4b | 2012-05-21 04:24:20 +0000 | [diff] [blame] | 336 | * <tt>BZip2CompressorOutputStream</tt>. |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 337 | */ |
| 338 | public static int chooseBlockSize(long inputLength) { |
| 339 | return (inputLength > 0) ? (int) Math |
| 340 | .min((inputLength / 132000) + 1, 9) : MAX_BLOCKSIZE; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 341 | } |
| 342 | |
| 343 | /** |
Stefan Bodewig | 7684a4b | 2012-05-21 04:24:20 +0000 | [diff] [blame] | 344 | * Constructs a new <tt>BZip2CompressorOutputStream</tt> with a blocksize of 900k. |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 345 | * |
Christian Grobmeier | cf64557 | 2009-04-13 15:20:24 +0000 | [diff] [blame] | 346 | * @param out |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 347 | * the destination stream. |
| 348 | * |
| 349 | * @throws IOException |
| 350 | * if an I/O error occurs in the specified stream. |
| 351 | * @throws NullPointerException |
| 352 | * if <code>out == null</code>. |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 353 | */ |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 354 | public BZip2CompressorOutputStream(final OutputStream out) |
| 355 | throws IOException { |
| 356 | this(out, MAX_BLOCKSIZE); |
| 357 | } |
| 358 | |
| 359 | /** |
Stefan Bodewig | 7684a4b | 2012-05-21 04:24:20 +0000 | [diff] [blame] | 360 | * Constructs a new <tt>BZip2CompressorOutputStream</tt> with specified blocksize. |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 361 | * |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 362 | * @param out |
| 363 | * the destination stream. |
| 364 | * @param blockSize |
| 365 | * the blockSize as 100k units. |
| 366 | * |
| 367 | * @throws IOException |
| 368 | * if an I/O error occurs in the specified stream. |
| 369 | * @throws IllegalArgumentException |
Stefan Bodewig | 45e51c2 | 2013-12-22 07:03:43 +0000 | [diff] [blame^] | 370 | * if <code>(blockSize < 1) || (blockSize > 9)</code>. |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 371 | * @throws NullPointerException |
| 372 | * if <code>out == null</code>. |
| 373 | * |
| 374 | * @see #MIN_BLOCKSIZE |
| 375 | * @see #MAX_BLOCKSIZE |
| 376 | */ |
Emmanuel Bourg | dab23f7 | 2013-08-07 14:15:45 +0000 | [diff] [blame] | 377 | public BZip2CompressorOutputStream(final OutputStream out, final int blockSize) throws IOException { |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 378 | if (blockSize < 1) { |
Emmanuel Bourg | dab23f7 | 2013-08-07 14:15:45 +0000 | [diff] [blame] | 379 | throw new IllegalArgumentException("blockSize(" + blockSize + ") < 1"); |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 380 | } |
| 381 | if (blockSize > 9) { |
Emmanuel Bourg | dab23f7 | 2013-08-07 14:15:45 +0000 | [diff] [blame] | 382 | throw new IllegalArgumentException("blockSize(" + blockSize + ") > 9"); |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 383 | } |
| 384 | |
| 385 | this.blockSize100k = blockSize; |
| 386 | this.out = out; |
Sebastian Bazley | b58c039 | 2012-05-21 10:46:07 +0000 | [diff] [blame] | 387 | |
| 388 | /* 20 is just a paranoia constant */ |
| 389 | this.allowableBlockSize = (this.blockSize100k * BZip2Constants.BASEBLOCKSIZE) - 20; |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 390 | init(); |
| 391 | } |
| 392 | |
Stefan Bodewig | 46628ef | 2011-08-06 16:30:40 +0000 | [diff] [blame] | 393 | @Override |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 394 | public void write(final int b) throws IOException { |
| 395 | if (this.out != null) { |
| 396 | write0(b); |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 397 | } else { |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 398 | throw new IOException("closed"); |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 399 | } |
| 400 | } |
| 401 | |
Stefan Bodewig | 1ce57d9 | 2012-05-03 16:07:38 +0000 | [diff] [blame] | 402 | /** |
| 403 | * Writes the current byte to the buffer, run-length encoding it |
| 404 | * if it has been repeated at least four times (the first step |
| 405 | * RLEs sequences of four identical bytes). |
| 406 | * |
| 407 | * <p>Flushes the current block before writing data if it is |
| 408 | * full.</p> |
| 409 | * |
| 410 | * <p>"write to the buffer" means adding to data.buffer starting |
| 411 | * two steps "after" this.last - initially starting at index 1 |
| 412 | * (not 0) - and updating this.last to point to the last index |
| 413 | * written minus 1.</p> |
| 414 | */ |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 415 | private void writeRun() throws IOException { |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 416 | final int lastShadow = this.last; |
| 417 | |
| 418 | if (lastShadow < this.allowableBlockSize) { |
| 419 | final int currentCharShadow = this.currentChar; |
| 420 | final Data dataShadow = this.data; |
| 421 | dataShadow.inUse[currentCharShadow] = true; |
| 422 | final byte ch = (byte) currentCharShadow; |
| 423 | |
| 424 | int runLengthShadow = this.runLength; |
| 425 | this.crc.updateCRC(currentCharShadow, runLengthShadow); |
| 426 | |
| 427 | switch (runLengthShadow) { |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 428 | case 1: |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 429 | dataShadow.block[lastShadow + 2] = ch; |
| 430 | this.last = lastShadow + 1; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 431 | break; |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 432 | |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 433 | case 2: |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 434 | dataShadow.block[lastShadow + 2] = ch; |
| 435 | dataShadow.block[lastShadow + 3] = ch; |
| 436 | this.last = lastShadow + 2; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 437 | break; |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 438 | |
| 439 | case 3: { |
| 440 | final byte[] block = dataShadow.block; |
| 441 | block[lastShadow + 2] = ch; |
| 442 | block[lastShadow + 3] = ch; |
| 443 | block[lastShadow + 4] = ch; |
| 444 | this.last = lastShadow + 3; |
| 445 | } |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 446 | break; |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 447 | |
| 448 | default: { |
| 449 | runLengthShadow -= 4; |
| 450 | dataShadow.inUse[runLengthShadow] = true; |
| 451 | final byte[] block = dataShadow.block; |
| 452 | block[lastShadow + 2] = ch; |
| 453 | block[lastShadow + 3] = ch; |
| 454 | block[lastShadow + 4] = ch; |
| 455 | block[lastShadow + 5] = ch; |
| 456 | block[lastShadow + 6] = (byte) runLengthShadow; |
| 457 | this.last = lastShadow + 5; |
| 458 | } |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 459 | break; |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 460 | |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 461 | } |
| 462 | } else { |
| 463 | endBlock(); |
| 464 | initBlock(); |
| 465 | writeRun(); |
| 466 | } |
| 467 | } |
| 468 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 469 | /** |
| 470 | * Overriden to close the stream. |
| 471 | */ |
Stefan Bodewig | 46628ef | 2011-08-06 16:30:40 +0000 | [diff] [blame] | 472 | @Override |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 473 | protected void finalize() throws Throwable { |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 474 | finish(); |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 475 | super.finalize(); |
| 476 | } |
| 477 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 478 | |
| 479 | public void finish() throws IOException { |
| 480 | if (out != null) { |
| 481 | try { |
| 482 | if (this.runLength > 0) { |
| 483 | writeRun(); |
| 484 | } |
| 485 | this.currentChar = -1; |
| 486 | endBlock(); |
| 487 | endCompression(); |
| 488 | } finally { |
| 489 | this.out = null; |
| 490 | this.data = null; |
Stefan Bodewig | 6e95697 | 2012-05-01 07:33:09 +0000 | [diff] [blame] | 491 | this.blockSorter = null; |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 492 | } |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 493 | } |
Torsten Curdt | 7b879e5 | 2009-01-12 11:31:06 +0000 | [diff] [blame] | 494 | } |
Stefan Bodewig | 3f9bcc6 | 2009-02-10 14:20:05 +0000 | [diff] [blame] | 495 | |
Stefan Bodewig | 46628ef | 2011-08-06 16:30:40 +0000 | [diff] [blame] | 496 | @Override |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 497 | public void close() throws IOException { |
| 498 | if (out != null) { |
| 499 | OutputStream outShadow = this.out; |
| 500 | finish(); |
| 501 | outShadow.close(); |
Torsten Curdt | 7b879e5 | 2009-01-12 11:31:06 +0000 | [diff] [blame] | 502 | } |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 503 | } |
| 504 | |
Stefan Bodewig | 46628ef | 2011-08-06 16:30:40 +0000 | [diff] [blame] | 505 | @Override |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 506 | public void flush() throws IOException { |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 507 | OutputStream outShadow = this.out; |
| 508 | if (outShadow != null) { |
| 509 | outShadow.flush(); |
| 510 | } |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 511 | } |
| 512 | |
Christian Grobmeier | cf64557 | 2009-04-13 15:20:24 +0000 | [diff] [blame] | 513 | /** |
| 514 | * Writes magic bytes like BZ on the first position of the stream |
| 515 | * and bytes indiciating the file-format, which is |
| 516 | * huffmanised, followed by a digit indicating blockSize100k. |
| 517 | * @throws IOException if the magic bytes could not been written |
| 518 | */ |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 519 | private void init() throws IOException { |
Christian Grobmeier | cf64557 | 2009-04-13 15:20:24 +0000 | [diff] [blame] | 520 | bsPutUByte('B'); |
| 521 | bsPutUByte('Z'); |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 522 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 523 | this.data = new Data(this.blockSize100k); |
Stefan Bodewig | 6e95697 | 2012-05-01 07:33:09 +0000 | [diff] [blame] | 524 | this.blockSorter = new BlockSort(this.data); |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 525 | |
Christian Grobmeier | cf64557 | 2009-04-13 15:20:24 +0000 | [diff] [blame] | 526 | // huffmanised magic bytes |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 527 | bsPutUByte('h'); |
| 528 | bsPutUByte('0' + this.blockSize100k); |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 529 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 530 | this.combinedCRC = 0; |
| 531 | initBlock(); |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 532 | } |
| 533 | |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 534 | private void initBlock() { |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 535 | // blockNo++; |
| 536 | this.crc.initialiseCRC(); |
| 537 | this.last = -1; |
| 538 | // ch = 0; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 539 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 540 | boolean[] inUse = this.data.inUse; |
| 541 | for (int i = 256; --i >= 0;) { |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 542 | inUse[i] = false; |
| 543 | } |
Stefan Bodewig | 7684a4b | 2012-05-21 04:24:20 +0000 | [diff] [blame] | 544 | |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 545 | } |
| 546 | |
| 547 | private void endBlock() throws IOException { |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 548 | this.blockCRC = this.crc.getFinalCRC(); |
| 549 | this.combinedCRC = (this.combinedCRC << 1) | (this.combinedCRC >>> 31); |
| 550 | this.combinedCRC ^= this.blockCRC; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 551 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 552 | // empty block at end of file |
| 553 | if (this.last == -1) { |
Stefan Bodewig | 3f9bcc6 | 2009-02-10 14:20:05 +0000 | [diff] [blame] | 554 | return; |
Torsten Curdt | c199737 | 2009-01-08 11:13:50 +0000 | [diff] [blame] | 555 | } |
Stefan Bodewig | 3f9bcc6 | 2009-02-10 14:20:05 +0000 | [diff] [blame] | 556 | |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 557 | /* sort the block and establish posn of original string */ |
Stefan Bodewig | 6ced422 | 2012-05-20 18:10:46 +0000 | [diff] [blame] | 558 | blockSort(); |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 559 | |
| 560 | /* |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 561 | * A 6-byte block header, the value chosen arbitrarily as 0x314159265359 |
| 562 | * :-). A 32 bit value does not really give a strong enough guarantee |
| 563 | * that the value will not appear by chance in the compressed |
| 564 | * datastream. Worst-case probability of this event, for a 900k block, |
| 565 | * is about 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 |
| 566 | * bits. For a compressed file of size 100Gb -- about 100000 blocks -- |
| 567 | * only a 48-bit marker will do. NB: normal compression/ decompression |
| 568 | * donot rely on these statistical properties. They are only important |
| 569 | * when trying to recover blocks from damaged files. |
| 570 | */ |
| 571 | bsPutUByte(0x31); |
| 572 | bsPutUByte(0x41); |
| 573 | bsPutUByte(0x59); |
| 574 | bsPutUByte(0x26); |
| 575 | bsPutUByte(0x53); |
| 576 | bsPutUByte(0x59); |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 577 | |
| 578 | /* Now the block's CRC, so it is in a known place. */ |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 579 | bsPutInt(this.blockCRC); |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 580 | |
Stefan Bodewig | 6ced422 | 2012-05-20 18:10:46 +0000 | [diff] [blame] | 581 | /* Now a single bit indicating no randomisation. */ |
| 582 | bsW(1, 0); |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 583 | |
| 584 | /* Finally, block's contents proper. */ |
| 585 | moveToFrontCodeAndSend(); |
| 586 | } |
| 587 | |
| 588 | private void endCompression() throws IOException { |
| 589 | /* |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 590 | * Now another magic 48-bit number, 0x177245385090, to indicate the end |
| 591 | * of the last block. (sqrt(pi), if you want to know. I did want to use |
| 592 | * e, but it contains too much repetition -- 27 18 28 18 28 46 -- for me |
| 593 | * to feel statistically comfortable. Call me paranoid.) |
| 594 | */ |
| 595 | bsPutUByte(0x17); |
| 596 | bsPutUByte(0x72); |
| 597 | bsPutUByte(0x45); |
| 598 | bsPutUByte(0x38); |
| 599 | bsPutUByte(0x50); |
| 600 | bsPutUByte(0x90); |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 601 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 602 | bsPutInt(this.combinedCRC); |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 603 | bsFinishedWithStream(); |
| 604 | } |
| 605 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 606 | /** |
| 607 | * Returns the blocksize parameter specified at construction time. |
| 608 | */ |
| 609 | public final int getBlockSize() { |
| 610 | return this.blockSize100k; |
| 611 | } |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 612 | |
Stefan Bodewig | 46628ef | 2011-08-06 16:30:40 +0000 | [diff] [blame] | 613 | @Override |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 614 | public void write(final byte[] buf, int offs, final int len) |
| 615 | throws IOException { |
| 616 | if (offs < 0) { |
| 617 | throw new IndexOutOfBoundsException("offs(" + offs + ") < 0."); |
| 618 | } |
| 619 | if (len < 0) { |
| 620 | throw new IndexOutOfBoundsException("len(" + len + ") < 0."); |
| 621 | } |
| 622 | if (offs + len > buf.length) { |
| 623 | throw new IndexOutOfBoundsException("offs(" + offs + ") + len(" |
| 624 | + len + ") > buf.length(" |
| 625 | + buf.length + ")."); |
| 626 | } |
| 627 | if (this.out == null) { |
| 628 | throw new IOException("stream closed"); |
| 629 | } |
| 630 | |
| 631 | for (int hi = offs + len; offs < hi;) { |
| 632 | write0(buf[offs++]); |
| 633 | } |
| 634 | } |
| 635 | |
Stefan Bodewig | 1ce57d9 | 2012-05-03 16:07:38 +0000 | [diff] [blame] | 636 | /** |
| 637 | * Keeps track of the last bytes written and implicitly performs |
| 638 | * run-length encoding as the first step of the bzip2 algorithm. |
| 639 | */ |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 640 | private void write0(int b) throws IOException { |
| 641 | if (this.currentChar != -1) { |
| 642 | b &= 0xff; |
| 643 | if (this.currentChar == b) { |
| 644 | if (++this.runLength > 254) { |
| 645 | writeRun(); |
| 646 | this.currentChar = -1; |
| 647 | this.runLength = 0; |
| 648 | } |
| 649 | // else nothing to do |
| 650 | } else { |
| 651 | writeRun(); |
| 652 | this.runLength = 1; |
| 653 | this.currentChar = b; |
| 654 | } |
| 655 | } else { |
| 656 | this.currentChar = b & 0xff; |
| 657 | this.runLength++; |
| 658 | } |
| 659 | } |
| 660 | |
| 661 | private static void hbAssignCodes(final int[] code, final byte[] length, |
| 662 | final int minLen, final int maxLen, |
| 663 | final int alphaSize) { |
| 664 | int vec = 0; |
| 665 | for (int n = minLen; n <= maxLen; n++) { |
| 666 | for (int i = 0; i < alphaSize; i++) { |
| 667 | if ((length[i] & 0xff) == n) { |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 668 | code[i] = vec; |
| 669 | vec++; |
| 670 | } |
Torsten Curdt | c199737 | 2009-01-08 11:13:50 +0000 | [diff] [blame] | 671 | } |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 672 | vec <<= 1; |
| 673 | } |
| 674 | } |
| 675 | |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 676 | private void bsFinishedWithStream() throws IOException { |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 677 | while (this.bsLive > 0) { |
| 678 | int ch = this.bsBuff >> 24; |
| 679 | this.out.write(ch); // write 8-bit |
| 680 | this.bsBuff <<= 8; |
| 681 | this.bsLive -= 8; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 682 | } |
| 683 | } |
| 684 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 685 | private void bsW(final int n, final int v) throws IOException { |
| 686 | final OutputStream outShadow = this.out; |
| 687 | int bsLiveShadow = this.bsLive; |
| 688 | int bsBuffShadow = this.bsBuff; |
| 689 | |
| 690 | while (bsLiveShadow >= 8) { |
| 691 | outShadow.write(bsBuffShadow >> 24); // write 8-bit |
| 692 | bsBuffShadow <<= 8; |
| 693 | bsLiveShadow -= 8; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 694 | } |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 695 | |
| 696 | this.bsBuff = bsBuffShadow | (v << (32 - bsLiveShadow - n)); |
| 697 | this.bsLive = bsLiveShadow + n; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 698 | } |
| 699 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 700 | private void bsPutUByte(final int c) throws IOException { |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 701 | bsW(8, c); |
| 702 | } |
| 703 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 704 | private void bsPutInt(final int u) throws IOException { |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 705 | bsW(8, (u >> 24) & 0xff); |
| 706 | bsW(8, (u >> 16) & 0xff); |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 707 | bsW(8, (u >> 8) & 0xff); |
| 708 | bsW(8, u & 0xff); |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 709 | } |
| 710 | |
| 711 | private void sendMTFValues() throws IOException { |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 712 | final byte[][] len = this.data.sendMTFValues_len; |
| 713 | final int alphaSize = this.nInUse + 2; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 714 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 715 | for (int t = N_GROUPS; --t >= 0;) { |
| 716 | byte[] len_t = len[t]; |
| 717 | for (int v = alphaSize; --v >= 0;) { |
| 718 | len_t[v] = GREATER_ICOST; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 719 | } |
| 720 | } |
| 721 | |
| 722 | /* Decide how many coding tables to use */ |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 723 | // assert (this.nMTF > 0) : this.nMTF; |
| 724 | final int nGroups = (this.nMTF < 200) ? 2 : (this.nMTF < 600) ? 3 |
| 725 | : (this.nMTF < 1200) ? 4 : (this.nMTF < 2400) ? 5 : 6; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 726 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 727 | /* Generate an initial set of coding tables */ |
| 728 | sendMTFValues0(nGroups, alphaSize); |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 729 | |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 730 | /* |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 731 | * Iterate up to N_ITERS times to improve the tables. |
| 732 | */ |
| 733 | final int nSelectors = sendMTFValues1(nGroups, alphaSize); |
| 734 | |
| 735 | /* Compute MTF values for the selectors. */ |
| 736 | sendMTFValues2(nGroups, nSelectors); |
| 737 | |
| 738 | /* Assign actual codes for the tables. */ |
| 739 | sendMTFValues3(nGroups, alphaSize); |
| 740 | |
| 741 | /* Transmit the mapping table. */ |
| 742 | sendMTFValues4(); |
| 743 | |
| 744 | /* Now the selectors. */ |
| 745 | sendMTFValues5(nGroups, nSelectors); |
| 746 | |
| 747 | /* Now the coding tables. */ |
| 748 | sendMTFValues6(nGroups, alphaSize); |
| 749 | |
| 750 | /* And finally, the block data proper */ |
Stefan Bodewig | 58c56fc | 2011-07-23 12:41:55 +0000 | [diff] [blame] | 751 | sendMTFValues7(); |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 752 | } |
| 753 | |
| 754 | private void sendMTFValues0(final int nGroups, final int alphaSize) { |
| 755 | final byte[][] len = this.data.sendMTFValues_len; |
| 756 | final int[] mtfFreq = this.data.mtfFreq; |
| 757 | |
| 758 | int remF = this.nMTF; |
| 759 | int gs = 0; |
| 760 | |
| 761 | for (int nPart = nGroups; nPart > 0; nPart--) { |
| 762 | final int tFreq = remF / nPart; |
| 763 | int ge = gs - 1; |
| 764 | int aFreq = 0; |
| 765 | |
| 766 | for (final int a = alphaSize - 1; (aFreq < tFreq) && (ge < a);) { |
| 767 | aFreq += mtfFreq[++ge]; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 768 | } |
| 769 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 770 | if ((ge > gs) && (nPart != nGroups) && (nPart != 1) |
| 771 | && (((nGroups - nPart) & 1) != 0)) { |
| 772 | aFreq -= mtfFreq[ge--]; |
| 773 | } |
| 774 | |
| 775 | final byte[] len_np = len[nPart - 1]; |
| 776 | for (int v = alphaSize; --v >= 0;) { |
| 777 | if ((v >= gs) && (v <= ge)) { |
| 778 | len_np[v] = LESSER_ICOST; |
| 779 | } else { |
| 780 | len_np[v] = GREATER_ICOST; |
| 781 | } |
| 782 | } |
| 783 | |
| 784 | gs = ge + 1; |
| 785 | remF -= aFreq; |
| 786 | } |
| 787 | } |
| 788 | |
| 789 | private int sendMTFValues1(final int nGroups, final int alphaSize) { |
| 790 | final Data dataShadow = this.data; |
| 791 | final int[][] rfreq = dataShadow.sendMTFValues_rfreq; |
| 792 | final int[] fave = dataShadow.sendMTFValues_fave; |
| 793 | final short[] cost = dataShadow.sendMTFValues_cost; |
| 794 | final char[] sfmap = dataShadow.sfmap; |
| 795 | final byte[] selector = dataShadow.selector; |
| 796 | final byte[][] len = dataShadow.sendMTFValues_len; |
| 797 | final byte[] len_0 = len[0]; |
| 798 | final byte[] len_1 = len[1]; |
| 799 | final byte[] len_2 = len[2]; |
| 800 | final byte[] len_3 = len[3]; |
| 801 | final byte[] len_4 = len[4]; |
| 802 | final byte[] len_5 = len[5]; |
| 803 | final int nMTFShadow = this.nMTF; |
| 804 | |
| 805 | int nSelectors = 0; |
| 806 | |
| 807 | for (int iter = 0; iter < N_ITERS; iter++) { |
| 808 | for (int t = nGroups; --t >= 0;) { |
| 809 | fave[t] = 0; |
| 810 | int[] rfreqt = rfreq[t]; |
| 811 | for (int i = alphaSize; --i >= 0;) { |
| 812 | rfreqt[i] = 0; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 813 | } |
| 814 | } |
| 815 | |
| 816 | nSelectors = 0; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 817 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 818 | for (int gs = 0; gs < this.nMTF;) { |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 819 | /* Set group start & end marks. */ |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 820 | |
| 821 | /* |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 822 | * Calculate the cost of this group as coded by each of the |
| 823 | * coding tables. |
| 824 | */ |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 825 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 826 | final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1); |
| 827 | |
| 828 | if (nGroups == N_GROUPS) { |
| 829 | // unrolled version of the else-block |
| 830 | |
| 831 | short cost0 = 0; |
| 832 | short cost1 = 0; |
| 833 | short cost2 = 0; |
| 834 | short cost3 = 0; |
| 835 | short cost4 = 0; |
| 836 | short cost5 = 0; |
| 837 | |
| 838 | for (int i = gs; i <= ge; i++) { |
| 839 | final int icv = sfmap[i]; |
| 840 | cost0 += len_0[icv] & 0xff; |
| 841 | cost1 += len_1[icv] & 0xff; |
| 842 | cost2 += len_2[icv] & 0xff; |
| 843 | cost3 += len_3[icv] & 0xff; |
| 844 | cost4 += len_4[icv] & 0xff; |
| 845 | cost5 += len_5[icv] & 0xff; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 846 | } |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 847 | |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 848 | cost[0] = cost0; |
| 849 | cost[1] = cost1; |
| 850 | cost[2] = cost2; |
| 851 | cost[3] = cost3; |
| 852 | cost[4] = cost4; |
| 853 | cost[5] = cost5; |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 854 | |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 855 | } else { |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 856 | for (int t = nGroups; --t >= 0;) { |
| 857 | cost[t] = 0; |
| 858 | } |
| 859 | |
| 860 | for (int i = gs; i <= ge; i++) { |
| 861 | final int icv = sfmap[i]; |
| 862 | for (int t = nGroups; --t >= 0;) { |
| 863 | cost[t] += len[t][icv] & 0xff; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 864 | } |
| 865 | } |
| 866 | } |
| 867 | |
| 868 | /* |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 869 | * Find the coding table which is best for this group, and |
| 870 | * record its identity in the selector table. |
| 871 | */ |
| 872 | int bt = -1; |
| 873 | for (int t = nGroups, bc = 999999999; --t >= 0;) { |
| 874 | final int cost_t = cost[t]; |
| 875 | if (cost_t < bc) { |
| 876 | bc = cost_t; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 877 | bt = t; |
| 878 | } |
Torsten Curdt | c199737 | 2009-01-08 11:13:50 +0000 | [diff] [blame] | 879 | } |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 880 | |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 881 | fave[bt]++; |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 882 | selector[nSelectors] = (byte) bt; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 883 | nSelectors++; |
| 884 | |
| 885 | /* |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 886 | * Increment the symbol frequencies for the selected table. |
| 887 | */ |
| 888 | final int[] rfreq_bt = rfreq[bt]; |
| 889 | for (int i = gs; i <= ge; i++) { |
| 890 | rfreq_bt[sfmap[i]]++; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 891 | } |
| 892 | |
| 893 | gs = ge + 1; |
| 894 | } |
| 895 | |
| 896 | /* |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 897 | * Recompute the tables based on the accumulated frequencies. |
| 898 | */ |
| 899 | for (int t = 0; t < nGroups; t++) { |
| 900 | hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20); |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 901 | } |
| 902 | } |
| 903 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 904 | return nSelectors; |
| 905 | } |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 906 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 907 | private void sendMTFValues2(final int nGroups, final int nSelectors) { |
| 908 | // assert (nGroups < 8) : nGroups; |
| 909 | |
| 910 | final Data dataShadow = this.data; |
| 911 | byte[] pos = dataShadow.sendMTFValues2_pos; |
| 912 | |
| 913 | for (int i = nGroups; --i >= 0;) { |
| 914 | pos[i] = (byte) i; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 915 | } |
| 916 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 917 | for (int i = 0; i < nSelectors; i++) { |
| 918 | final byte ll_i = dataShadow.selector[i]; |
| 919 | byte tmp = pos[0]; |
| 920 | int j = 0; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 921 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 922 | while (ll_i != tmp) { |
| 923 | j++; |
| 924 | byte tmp2 = tmp; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 925 | tmp = pos[j]; |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 926 | pos[j] = tmp2; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 927 | } |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 928 | |
| 929 | pos[0] = tmp; |
| 930 | dataShadow.selectorMtf[i] = (byte) j; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 931 | } |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 932 | } |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 933 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 934 | private void sendMTFValues3(final int nGroups, final int alphaSize) { |
| 935 | int[][] code = this.data.sendMTFValues_code; |
| 936 | byte[][] len = this.data.sendMTFValues_len; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 937 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 938 | for (int t = 0; t < nGroups; t++) { |
| 939 | int minLen = 32; |
| 940 | int maxLen = 0; |
| 941 | final byte[] len_t = len[t]; |
| 942 | for (int i = alphaSize; --i >= 0;) { |
| 943 | final int l = len_t[i] & 0xff; |
| 944 | if (l > maxLen) { |
| 945 | maxLen = l; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 946 | } |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 947 | if (l < minLen) { |
| 948 | minLen = l; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 949 | } |
| 950 | } |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 951 | |
| 952 | // assert (maxLen <= 20) : maxLen; |
| 953 | // assert (minLen >= 1) : minLen; |
| 954 | |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 955 | hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize); |
| 956 | } |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 957 | } |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 958 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 959 | private void sendMTFValues4() throws IOException { |
| 960 | final boolean[] inUse = this.data.inUse; |
| 961 | final boolean[] inUse16 = this.data.sentMTFValues4_inUse16; |
| 962 | |
| 963 | for (int i = 16; --i >= 0;) { |
| 964 | inUse16[i] = false; |
| 965 | final int i16 = i * 16; |
| 966 | for (int j = 16; --j >= 0;) { |
| 967 | if (inUse[i16 + j]) { |
| 968 | inUse16[i] = true; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 969 | } |
| 970 | } |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 971 | } |
| 972 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 973 | for (int i = 0; i < 16; i++) { |
| 974 | bsW(1, inUse16[i] ? 1 : 0); |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 975 | } |
| 976 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 977 | final OutputStream outShadow = this.out; |
| 978 | int bsLiveShadow = this.bsLive; |
| 979 | int bsBuffShadow = this.bsBuff; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 980 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 981 | for (int i = 0; i < 16; i++) { |
| 982 | if (inUse16[i]) { |
| 983 | final int i16 = i * 16; |
| 984 | for (int j = 0; j < 16; j++) { |
| 985 | // inlined: bsW(1, inUse[i16 + j] ? 1 : 0); |
| 986 | while (bsLiveShadow >= 8) { |
| 987 | outShadow.write(bsBuffShadow >> 24); // write 8-bit |
| 988 | bsBuffShadow <<= 8; |
| 989 | bsLiveShadow -= 8; |
| 990 | } |
| 991 | if (inUse[i16 + j]) { |
| 992 | bsBuffShadow |= 1 << (32 - bsLiveShadow - 1); |
| 993 | } |
| 994 | bsLiveShadow++; |
| 995 | } |
| 996 | } |
| 997 | } |
| 998 | |
| 999 | this.bsBuff = bsBuffShadow; |
| 1000 | this.bsLive = bsLiveShadow; |
| 1001 | } |
| 1002 | |
| 1003 | private void sendMTFValues5(final int nGroups, final int nSelectors) |
| 1004 | throws IOException { |
| 1005 | bsW(3, nGroups); |
| 1006 | bsW(15, nSelectors); |
| 1007 | |
| 1008 | final OutputStream outShadow = this.out; |
| 1009 | final byte[] selectorMtf = this.data.selectorMtf; |
| 1010 | |
| 1011 | int bsLiveShadow = this.bsLive; |
| 1012 | int bsBuffShadow = this.bsBuff; |
| 1013 | |
| 1014 | for (int i = 0; i < nSelectors; i++) { |
| 1015 | for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) { |
| 1016 | // inlined: bsW(1, 1); |
| 1017 | while (bsLiveShadow >= 8) { |
| 1018 | outShadow.write(bsBuffShadow >> 24); |
| 1019 | bsBuffShadow <<= 8; |
| 1020 | bsLiveShadow -= 8; |
| 1021 | } |
| 1022 | bsBuffShadow |= 1 << (32 - bsLiveShadow - 1); |
| 1023 | bsLiveShadow++; |
| 1024 | } |
| 1025 | |
| 1026 | // inlined: bsW(1, 0); |
| 1027 | while (bsLiveShadow >= 8) { |
| 1028 | outShadow.write(bsBuffShadow >> 24); |
| 1029 | bsBuffShadow <<= 8; |
| 1030 | bsLiveShadow -= 8; |
| 1031 | } |
| 1032 | // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1); |
| 1033 | bsLiveShadow++; |
| 1034 | } |
| 1035 | |
| 1036 | this.bsBuff = bsBuffShadow; |
| 1037 | this.bsLive = bsLiveShadow; |
| 1038 | } |
| 1039 | |
| 1040 | private void sendMTFValues6(final int nGroups, final int alphaSize) |
| 1041 | throws IOException { |
| 1042 | final byte[][] len = this.data.sendMTFValues_len; |
| 1043 | final OutputStream outShadow = this.out; |
| 1044 | |
| 1045 | int bsLiveShadow = this.bsLive; |
| 1046 | int bsBuffShadow = this.bsBuff; |
| 1047 | |
| 1048 | for (int t = 0; t < nGroups; t++) { |
| 1049 | byte[] len_t = len[t]; |
| 1050 | int curr = len_t[0] & 0xff; |
| 1051 | |
| 1052 | // inlined: bsW(5, curr); |
| 1053 | while (bsLiveShadow >= 8) { |
| 1054 | outShadow.write(bsBuffShadow >> 24); // write 8-bit |
| 1055 | bsBuffShadow <<= 8; |
| 1056 | bsLiveShadow -= 8; |
| 1057 | } |
| 1058 | bsBuffShadow |= curr << (32 - bsLiveShadow - 5); |
| 1059 | bsLiveShadow += 5; |
| 1060 | |
| 1061 | for (int i = 0; i < alphaSize; i++) { |
| 1062 | int lti = len_t[i] & 0xff; |
| 1063 | while (curr < lti) { |
| 1064 | // inlined: bsW(2, 2); |
| 1065 | while (bsLiveShadow >= 8) { |
| 1066 | outShadow.write(bsBuffShadow >> 24); // write 8-bit |
| 1067 | bsBuffShadow <<= 8; |
| 1068 | bsLiveShadow -= 8; |
| 1069 | } |
| 1070 | bsBuffShadow |= 2 << (32 - bsLiveShadow - 2); |
| 1071 | bsLiveShadow += 2; |
| 1072 | |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 1073 | curr++; /* 10 */ |
| 1074 | } |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 1075 | |
| 1076 | while (curr > lti) { |
| 1077 | // inlined: bsW(2, 3); |
| 1078 | while (bsLiveShadow >= 8) { |
| 1079 | outShadow.write(bsBuffShadow >> 24); // write 8-bit |
| 1080 | bsBuffShadow <<= 8; |
| 1081 | bsLiveShadow -= 8; |
| 1082 | } |
| 1083 | bsBuffShadow |= 3 << (32 - bsLiveShadow - 2); |
| 1084 | bsLiveShadow += 2; |
| 1085 | |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 1086 | curr--; /* 11 */ |
| 1087 | } |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 1088 | |
| 1089 | // inlined: bsW(1, 0); |
| 1090 | while (bsLiveShadow >= 8) { |
| 1091 | outShadow.write(bsBuffShadow >> 24); // write 8-bit |
| 1092 | bsBuffShadow <<= 8; |
| 1093 | bsLiveShadow -= 8; |
| 1094 | } |
| 1095 | // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1); |
| 1096 | bsLiveShadow++; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 1097 | } |
| 1098 | } |
| 1099 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 1100 | this.bsBuff = bsBuffShadow; |
| 1101 | this.bsLive = bsLiveShadow; |
| 1102 | } |
| 1103 | |
Stefan Bodewig | 58c56fc | 2011-07-23 12:41:55 +0000 | [diff] [blame] | 1104 | private void sendMTFValues7() throws IOException { |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 1105 | final Data dataShadow = this.data; |
| 1106 | final byte[][] len = dataShadow.sendMTFValues_len; |
| 1107 | final int[][] code = dataShadow.sendMTFValues_code; |
| 1108 | final OutputStream outShadow = this.out; |
| 1109 | final byte[] selector = dataShadow.selector; |
| 1110 | final char[] sfmap = dataShadow.sfmap; |
| 1111 | final int nMTFShadow = this.nMTF; |
| 1112 | |
| 1113 | int selCtr = 0; |
| 1114 | |
| 1115 | int bsLiveShadow = this.bsLive; |
| 1116 | int bsBuffShadow = this.bsBuff; |
| 1117 | |
| 1118 | for (int gs = 0; gs < nMTFShadow;) { |
| 1119 | final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1); |
| 1120 | final int selector_selCtr = selector[selCtr] & 0xff; |
| 1121 | final int[] code_selCtr = code[selector_selCtr]; |
| 1122 | final byte[] len_selCtr = len[selector_selCtr]; |
| 1123 | |
| 1124 | while (gs <= ge) { |
| 1125 | final int sfmap_i = sfmap[gs]; |
| 1126 | |
| 1127 | // |
| 1128 | // inlined: bsW(len_selCtr[sfmap_i] & 0xff, |
| 1129 | // code_selCtr[sfmap_i]); |
| 1130 | // |
| 1131 | while (bsLiveShadow >= 8) { |
| 1132 | outShadow.write(bsBuffShadow >> 24); |
| 1133 | bsBuffShadow <<= 8; |
| 1134 | bsLiveShadow -= 8; |
| 1135 | } |
| 1136 | final int n = len_selCtr[sfmap_i] & 0xFF; |
| 1137 | bsBuffShadow |= code_selCtr[sfmap_i] << (32 - bsLiveShadow - n); |
| 1138 | bsLiveShadow += n; |
| 1139 | |
| 1140 | gs++; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 1141 | } |
| 1142 | |
| 1143 | gs = ge + 1; |
| 1144 | selCtr++; |
| 1145 | } |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 1146 | |
| 1147 | this.bsBuff = bsBuffShadow; |
| 1148 | this.bsLive = bsLiveShadow; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 1149 | } |
| 1150 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 1151 | private void moveToFrontCodeAndSend() throws IOException { |
Stefan Bodewig | b06f7b4 | 2012-05-01 07:06:25 +0000 | [diff] [blame] | 1152 | bsW(24, this.data.origPtr); |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 1153 | generateMTFValues(); |
| 1154 | sendMTFValues(); |
| 1155 | } |
| 1156 | |
Stefan Bodewig | 6ced422 | 2012-05-20 18:10:46 +0000 | [diff] [blame] | 1157 | private void blockSort() { |
| 1158 | blockSorter.blockSort(data, last); |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 1159 | } |
| 1160 | |
Stefan Bodewig | 1ce57d9 | 2012-05-03 16:07:38 +0000 | [diff] [blame] | 1161 | /* |
| 1162 | * Performs Move-To-Front on the Burrows-Wheeler transformed |
| 1163 | * buffer, storing the MTFed data in data.sfmap in RUNA/RUNB |
| 1164 | * run-length-encoded form. |
| 1165 | * |
| 1166 | * <p>Keeps track of byte frequencies in data.mtfFreq at the same time.</p> |
| 1167 | */ |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 1168 | private void generateMTFValues() { |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 1169 | final int lastShadow = this.last; |
| 1170 | final Data dataShadow = this.data; |
| 1171 | final boolean[] inUse = dataShadow.inUse; |
| 1172 | final byte[] block = dataShadow.block; |
| 1173 | final int[] fmap = dataShadow.fmap; |
| 1174 | final char[] sfmap = dataShadow.sfmap; |
| 1175 | final int[] mtfFreq = dataShadow.mtfFreq; |
| 1176 | final byte[] unseqToSeq = dataShadow.unseqToSeq; |
| 1177 | final byte[] yy = dataShadow.generateMTFValues_yy; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 1178 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 1179 | // make maps |
| 1180 | int nInUseShadow = 0; |
| 1181 | for (int i = 0; i < 256; i++) { |
| 1182 | if (inUse[i]) { |
| 1183 | unseqToSeq[i] = (byte) nInUseShadow; |
| 1184 | nInUseShadow++; |
| 1185 | } |
| 1186 | } |
| 1187 | this.nInUse = nInUseShadow; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 1188 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 1189 | final int eob = nInUseShadow + 1; |
| 1190 | |
| 1191 | for (int i = eob; i >= 0; i--) { |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 1192 | mtfFreq[i] = 0; |
| 1193 | } |
| 1194 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 1195 | for (int i = nInUseShadow; --i >= 0;) { |
| 1196 | yy[i] = (byte) i; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 1197 | } |
| 1198 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 1199 | int wr = 0; |
| 1200 | int zPend = 0; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 1201 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 1202 | for (int i = 0; i <= lastShadow; i++) { |
| 1203 | final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff]; |
| 1204 | byte tmp = yy[0]; |
| 1205 | int j = 0; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 1206 | |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 1207 | while (ll_i != tmp) { |
| 1208 | j++; |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 1209 | byte tmp2 = tmp; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 1210 | tmp = yy[j]; |
| 1211 | yy[j] = tmp2; |
Torsten Curdt | c199737 | 2009-01-08 11:13:50 +0000 | [diff] [blame] | 1212 | } |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 1213 | yy[0] = tmp; |
| 1214 | |
| 1215 | if (j == 0) { |
| 1216 | zPend++; |
| 1217 | } else { |
| 1218 | if (zPend > 0) { |
| 1219 | zPend--; |
| 1220 | while (true) { |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 1221 | if ((zPend & 1) == 0) { |
| 1222 | sfmap[wr] = RUNA; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 1223 | wr++; |
| 1224 | mtfFreq[RUNA]++; |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 1225 | } else { |
| 1226 | sfmap[wr] = RUNB; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 1227 | wr++; |
| 1228 | mtfFreq[RUNB]++; |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 1229 | } |
| 1230 | |
| 1231 | if (zPend >= 2) { |
| 1232 | zPend = (zPend - 2) >> 1; |
| 1233 | } else { |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 1234 | break; |
Torsten Curdt | c199737 | 2009-01-08 11:13:50 +0000 | [diff] [blame] | 1235 | } |
Torsten Curdt | c199737 | 2009-01-08 11:13:50 +0000 | [diff] [blame] | 1236 | } |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 1237 | zPend = 0; |
| 1238 | } |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 1239 | sfmap[wr] = (char) (j + 1); |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 1240 | wr++; |
| 1241 | mtfFreq[j + 1]++; |
| 1242 | } |
| 1243 | } |
| 1244 | |
| 1245 | if (zPend > 0) { |
| 1246 | zPend--; |
| 1247 | while (true) { |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 1248 | if ((zPend & 1) == 0) { |
| 1249 | sfmap[wr] = RUNA; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 1250 | wr++; |
| 1251 | mtfFreq[RUNA]++; |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 1252 | } else { |
| 1253 | sfmap[wr] = RUNB; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 1254 | wr++; |
| 1255 | mtfFreq[RUNB]++; |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 1256 | } |
| 1257 | |
| 1258 | if (zPend >= 2) { |
| 1259 | zPend = (zPend - 2) >> 1; |
| 1260 | } else { |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 1261 | break; |
| 1262 | } |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 1263 | } |
| 1264 | } |
| 1265 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 1266 | sfmap[wr] = (char) eob; |
| 1267 | mtfFreq[eob]++; |
| 1268 | this.nMTF = wr + 1; |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 1269 | } |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 1270 | |
Stefan Bodewig | b06f7b4 | 2012-05-01 07:06:25 +0000 | [diff] [blame] | 1271 | static final class Data extends Object { |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 1272 | |
| 1273 | // with blockSize 900k |
Stefan Bodewig | 1ce57d9 | 2012-05-03 16:07:38 +0000 | [diff] [blame] | 1274 | /* maps unsigned byte => "does it occur in block" */ |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 1275 | final boolean[] inUse = new boolean[256]; // 256 byte |
| 1276 | final byte[] unseqToSeq = new byte[256]; // 256 byte |
| 1277 | final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte |
| 1278 | final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte |
| 1279 | final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte |
| 1280 | |
| 1281 | final byte[] generateMTFValues_yy = new byte[256]; // 256 byte |
| 1282 | final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548 |
| 1283 | // byte |
| 1284 | final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 |
| 1285 | // byte |
| 1286 | final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte |
| 1287 | final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte |
| 1288 | final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192 |
| 1289 | // byte |
| 1290 | final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte |
| 1291 | final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte |
| 1292 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 1293 | final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte |
| 1294 | final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte |
| 1295 | final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte |
| 1296 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 1297 | // ------------ |
| 1298 | // 333408 byte |
| 1299 | |
Stefan Bodewig | 1ce57d9 | 2012-05-03 16:07:38 +0000 | [diff] [blame] | 1300 | /* holds the RLEd block of original data starting at index 1. |
| 1301 | * After sorting the last byte added to the buffer is at index |
| 1302 | * 0. */ |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 1303 | final byte[] block; // 900021 byte |
Stefan Bodewig | 1ce57d9 | 2012-05-03 16:07:38 +0000 | [diff] [blame] | 1304 | /* maps index in Burrows-Wheeler transformed block => index of |
| 1305 | * byte in original block */ |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 1306 | final int[] fmap; // 3600000 byte |
| 1307 | final char[] sfmap; // 3600000 byte |
| 1308 | // ------------ |
| 1309 | // 8433529 byte |
| 1310 | // ============ |
| 1311 | |
| 1312 | /** |
Stefan Bodewig | 020c03d | 2012-05-12 05:00:30 +0000 | [diff] [blame] | 1313 | * Index of original line in Burrows-Wheeler table. |
| 1314 | * |
| 1315 | * <p>This is the index in fmap that points to the last byte |
| 1316 | * of the original data.</p> |
Stefan Bodewig | b06f7b4 | 2012-05-01 07:06:25 +0000 | [diff] [blame] | 1317 | */ |
| 1318 | int origPtr; |
| 1319 | |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 1320 | Data(int blockSize100k) { |
Sebastian Bazley | df7f45f | 2009-04-15 00:33:02 +0000 | [diff] [blame] | 1321 | final int n = blockSize100k * BZip2Constants.BASEBLOCKSIZE; |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 1322 | this.block = new byte[(n + 1 + NUM_OVERSHOOT_BYTES)]; |
| 1323 | this.fmap = new int[n]; |
| 1324 | this.sfmap = new char[2 * n]; |
Stefan Bodewig | b1bfbcb | 2009-03-27 14:14:07 +0000 | [diff] [blame] | 1325 | } |
| 1326 | |
| 1327 | } |
| 1328 | |
Torsten Curdt | ca16539 | 2008-07-10 10:17:44 +0000 | [diff] [blame] | 1329 | } |