blob: a5eedcf04a357e396a6dff8cce501790b6024901 [file] [log] [blame]
Torsten Curdtca165392008-07-10 10:17:44 +00001/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19package org.apache.commons.compress.compressors.bzip2;
20
21import java.io.IOException;
22import java.io.OutputStream;
23
24import org.apache.commons.compress.compressors.CompressorOutputStream;
25
Torsten Curdtca165392008-07-10 10:17:44 +000026/**
Christian Grobmeierc04eecc2009-04-13 17:24:58 +000027 * An output stream that compresses into the BZip2 format into another stream.
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +000028 *
29 * <p>
30 * The compression requires large amounts of memory. Thus you should call the
31 * {@link #close() close()} method as soon as possible, to force
Sebastian Bazley04fa4342009-03-27 19:19:44 +000032 * <tt>BZip2CompressorOutputStream</tt> to release the allocated memory.
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +000033 * </p>
34 *
35 * <p> You can shrink the amount of allocated memory and maybe raise
36 * the compression speed by choosing a lower blocksize, which in turn
37 * may cause a lower compression ratio. You can avoid unnecessary
38 * memory allocation by avoiding using a blocksize which is bigger
39 * than the size of the input. </p>
40 *
41 * <p> You can compute the memory usage for compressing by the
42 * following formula: </p>
43 *
44 * <pre>
45 * &lt;code&gt;400k + (9 * blocksize)&lt;/code&gt;.
46 * </pre>
47 *
48 * <p> To get the memory required for decompression by {@link
Sebastian Bazley04fa4342009-03-27 19:19:44 +000049 * BZip2CompressorInputStream} use </p>
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +000050 *
51 * <pre>
52 * &lt;code&gt;65k + (5 * blocksize)&lt;/code&gt;.
53 * </pre>
54 *
Stefan Bodewig45e51c22013-12-22 07:03:43 +000055 * <table width="100%" border="1" summary="Memory usage by blocksize">
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +000056 * <tr>
57 * <th colspan="3">Memory usage by blocksize</th>
58 * </tr>
59 * <tr>
60 * <th align="right">Blocksize</th> <th align="right">Compression<br>
61 * memory usage</th> <th align="right">Decompression<br>
62 * memory usage</th>
63 * </tr>
64 * <tr>
65 * <td align="right">100k</td>
66 * <td align="right">1300k</td>
67 * <td align="right">565k</td>
68 * </tr>
69 * <tr>
70 * <td align="right">200k</td>
71 * <td align="right">2200k</td>
72 * <td align="right">1065k</td>
73 * </tr>
74 * <tr>
75 * <td align="right">300k</td>
76 * <td align="right">3100k</td>
77 * <td align="right">1565k</td>
78 * </tr>
79 * <tr>
80 * <td align="right">400k</td>
81 * <td align="right">4000k</td>
82 * <td align="right">2065k</td>
83 * </tr>
84 * <tr>
85 * <td align="right">500k</td>
86 * <td align="right">4900k</td>
87 * <td align="right">2565k</td>
88 * </tr>
89 * <tr>
90 * <td align="right">600k</td>
91 * <td align="right">5800k</td>
92 * <td align="right">3065k</td>
93 * </tr>
94 * <tr>
95 * <td align="right">700k</td>
96 * <td align="right">6700k</td>
97 * <td align="right">3565k</td>
98 * </tr>
99 * <tr>
100 * <td align="right">800k</td>
101 * <td align="right">7600k</td>
102 * <td align="right">4065k</td>
103 * </tr>
104 * <tr>
105 * <td align="right">900k</td>
106 * <td align="right">8500k</td>
107 * <td align="right">4565k</td>
108 * </tr>
109 * </table>
110 *
111 * <p>
Sebastian Bazley04fa4342009-03-27 19:19:44 +0000112 * For decompression <tt>BZip2CompressorInputStream</tt> allocates less memory if the
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000113 * bzipped input is smaller than one block.
114 * </p>
115 *
116 * <p>
117 * Instances of this class are not threadsafe.
118 * </p>
119 *
120 * <p>
121 * TODO: Update to BZip2 1.0.1
122 * </p>
Sebastian Bazley99870ef2009-03-28 00:04:36 +0000123 * @NotThreadSafe
Torsten Curdtca165392008-07-10 10:17:44 +0000124 */
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000125public class BZip2CompressorOutputStream extends CompressorOutputStream
126 implements BZip2Constants {
127
128 /**
129 * The minimum supported blocksize <tt> == 1</tt>.
130 */
131 public static final int MIN_BLOCKSIZE = 1;
132
133 /**
134 * The maximum supported blocksize <tt> == 9</tt>.
135 */
136 public static final int MAX_BLOCKSIZE = 9;
137
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000138 private static final int GREATER_ICOST = 15;
139 private static final int LESSER_ICOST = 0;
Torsten Curdtca165392008-07-10 10:17:44 +0000140
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000141 private static void hbMakeCodeLengths(final byte[] len, final int[] freq,
142 final Data dat, final int alphaSize,
143 final int maxLen) {
144 /*
145 * Nodes and heap entries run from 1. Entry 0 for both the heap and
146 * nodes is a sentinel.
147 */
148 final int[] heap = dat.heap;
149 final int[] weight = dat.weight;
150 final int[] parent = dat.parent;
151
152 for (int i = alphaSize; --i >= 0;) {
153 weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
154 }
155
156 for (boolean tooLong = true; tooLong;) {
157 tooLong = false;
158
159 int nNodes = alphaSize;
160 int nHeap = 0;
161 heap[0] = 0;
162 weight[0] = 0;
163 parent[0] = -2;
164
165 for (int i = 1; i <= alphaSize; i++) {
166 parent[i] = -1;
167 nHeap++;
168 heap[nHeap] = i;
169
170 int zz = nHeap;
171 int tmp = heap[zz];
172 while (weight[tmp] < weight[heap[zz >> 1]]) {
173 heap[zz] = heap[zz >> 1];
174 zz >>= 1;
175 }
176 heap[zz] = tmp;
177 }
178
179 while (nHeap > 1) {
180 int n1 = heap[1];
181 heap[1] = heap[nHeap];
182 nHeap--;
183
184 int yy = 0;
185 int zz = 1;
186 int tmp = heap[1];
187
188 while (true) {
189 yy = zz << 1;
190
191 if (yy > nHeap) {
192 break;
193 }
194
195 if ((yy < nHeap)
196 && (weight[heap[yy + 1]] < weight[heap[yy]])) {
197 yy++;
198 }
199
200 if (weight[tmp] < weight[heap[yy]]) {
201 break;
202 }
203
204 heap[zz] = heap[yy];
205 zz = yy;
206 }
207
208 heap[zz] = tmp;
209
210 int n2 = heap[1];
211 heap[1] = heap[nHeap];
212 nHeap--;
213
214 yy = 0;
215 zz = 1;
216 tmp = heap[1];
217
218 while (true) {
219 yy = zz << 1;
220
221 if (yy > nHeap) {
222 break;
223 }
224
225 if ((yy < nHeap)
226 && (weight[heap[yy + 1]] < weight[heap[yy]])) {
227 yy++;
228 }
229
230 if (weight[tmp] < weight[heap[yy]]) {
231 break;
232 }
233
234 heap[zz] = heap[yy];
235 zz = yy;
236 }
237
238 heap[zz] = tmp;
239 nNodes++;
240 parent[n1] = parent[n2] = nNodes;
241
242 final int weight_n1 = weight[n1];
243 final int weight_n2 = weight[n2];
244 weight[nNodes] = ((weight_n1 & 0xffffff00)
245 + (weight_n2 & 0xffffff00))
246 | (1 + (((weight_n1 & 0x000000ff)
247 > (weight_n2 & 0x000000ff))
248 ? (weight_n1 & 0x000000ff)
249 : (weight_n2 & 0x000000ff)));
250
251 parent[nNodes] = -1;
252 nHeap++;
253 heap[nHeap] = nNodes;
254
255 tmp = 0;
256 zz = nHeap;
257 tmp = heap[zz];
258 final int weight_tmp = weight[tmp];
259 while (weight_tmp < weight[heap[zz >> 1]]) {
260 heap[zz] = heap[zz >> 1];
261 zz >>= 1;
262 }
263 heap[zz] = tmp;
264
265 }
266
267 for (int i = 1; i <= alphaSize; i++) {
268 int j = 0;
269 int k = i;
270
271 for (int parent_k; (parent_k = parent[k]) >= 0;) {
272 k = parent_k;
273 j++;
274 }
275
276 len[i - 1] = (byte) j;
277 if (j > maxLen) {
278 tooLong = true;
279 }
280 }
281
282 if (tooLong) {
283 for (int i = 1; i < alphaSize; i++) {
284 int j = weight[i] >> 8;
285 j = 1 + (j >> 1);
286 weight[i] = j << 8;
287 }
288 }
289 }
290 }
291
292 /**
293 * Index of the last char in the block, so the block size == last + 1.
294 */
Stefan Bodewig41f4a202009-03-20 15:42:37 +0000295 private int last;
Torsten Curdtca165392008-07-10 10:17:44 +0000296
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000297 /**
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000298 * Always: in the range 0 .. 9. The current block size is 100000 * this
299 * number.
300 */
301 private final int blockSize100k;
Torsten Curdtca165392008-07-10 10:17:44 +0000302
Stefan Bodewig41f4a202009-03-20 15:42:37 +0000303 private int bsBuff;
304 private int bsLive;
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000305 private final CRC crc = new CRC();
Torsten Curdtca165392008-07-10 10:17:44 +0000306
Torsten Curdtca165392008-07-10 10:17:44 +0000307 private int nInUse;
308
Torsten Curdtca165392008-07-10 10:17:44 +0000309 private int nMTF;
310
Torsten Curdtca165392008-07-10 10:17:44 +0000311 private int currentChar = -1;
312 private int runLength = 0;
313
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000314 private int blockCRC;
315 private int combinedCRC;
Stefan Bodewig1ce57d92012-05-03 16:07:38 +0000316 private final int allowableBlockSize;
Torsten Curdtca165392008-07-10 10:17:44 +0000317
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000318 /**
319 * All memory intensive stuff.
320 */
321 private Data data;
Stefan Bodewig6e956972012-05-01 07:33:09 +0000322 private BlockSort blockSorter;
Torsten Curdtca165392008-07-10 10:17:44 +0000323
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000324 private OutputStream out;
Torsten Curdtca165392008-07-10 10:17:44 +0000325
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000326 /**
327 * Chooses a blocksize based on the given length of the data to compress.
328 *
329 * @return The blocksize, between {@link #MIN_BLOCKSIZE} and
330 * {@link #MAX_BLOCKSIZE} both inclusive. For a negative
331 * <tt>inputLength</tt> this method returns <tt>MAX_BLOCKSIZE</tt>
332 * always.
333 *
334 * @param inputLength
335 * The length of the data which will be compressed by
Stefan Bodewig7684a4b2012-05-21 04:24:20 +0000336 * <tt>BZip2CompressorOutputStream</tt>.
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000337 */
338 public static int chooseBlockSize(long inputLength) {
339 return (inputLength > 0) ? (int) Math
340 .min((inputLength / 132000) + 1, 9) : MAX_BLOCKSIZE;
Torsten Curdtca165392008-07-10 10:17:44 +0000341 }
342
343 /**
Stefan Bodewig7684a4b2012-05-21 04:24:20 +0000344 * Constructs a new <tt>BZip2CompressorOutputStream</tt> with a blocksize of 900k.
Torsten Curdtca165392008-07-10 10:17:44 +0000345 *
Christian Grobmeiercf645572009-04-13 15:20:24 +0000346 * @param out
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000347 * the destination stream.
348 *
349 * @throws IOException
350 * if an I/O error occurs in the specified stream.
351 * @throws NullPointerException
352 * if <code>out == null</code>.
Torsten Curdtca165392008-07-10 10:17:44 +0000353 */
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000354 public BZip2CompressorOutputStream(final OutputStream out)
355 throws IOException {
356 this(out, MAX_BLOCKSIZE);
357 }
358
359 /**
Stefan Bodewig7684a4b2012-05-21 04:24:20 +0000360 * Constructs a new <tt>BZip2CompressorOutputStream</tt> with specified blocksize.
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000361 *
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000362 * @param out
363 * the destination stream.
364 * @param blockSize
365 * the blockSize as 100k units.
366 *
367 * @throws IOException
368 * if an I/O error occurs in the specified stream.
369 * @throws IllegalArgumentException
Stefan Bodewig45e51c22013-12-22 07:03:43 +0000370 * if <code>(blockSize &lt; 1) || (blockSize &gt; 9)</code>.
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000371 * @throws NullPointerException
372 * if <code>out == null</code>.
373 *
374 * @see #MIN_BLOCKSIZE
375 * @see #MAX_BLOCKSIZE
376 */
Emmanuel Bourgdab23f72013-08-07 14:15:45 +0000377 public BZip2CompressorOutputStream(final OutputStream out, final int blockSize) throws IOException {
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000378 if (blockSize < 1) {
Emmanuel Bourgdab23f72013-08-07 14:15:45 +0000379 throw new IllegalArgumentException("blockSize(" + blockSize + ") < 1");
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000380 }
381 if (blockSize > 9) {
Emmanuel Bourgdab23f72013-08-07 14:15:45 +0000382 throw new IllegalArgumentException("blockSize(" + blockSize + ") > 9");
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000383 }
384
385 this.blockSize100k = blockSize;
386 this.out = out;
Sebastian Bazleyb58c0392012-05-21 10:46:07 +0000387
388 /* 20 is just a paranoia constant */
389 this.allowableBlockSize = (this.blockSize100k * BZip2Constants.BASEBLOCKSIZE) - 20;
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000390 init();
391 }
392
Stefan Bodewig46628ef2011-08-06 16:30:40 +0000393 @Override
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000394 public void write(final int b) throws IOException {
395 if (this.out != null) {
396 write0(b);
Torsten Curdtca165392008-07-10 10:17:44 +0000397 } else {
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000398 throw new IOException("closed");
Torsten Curdtca165392008-07-10 10:17:44 +0000399 }
400 }
401
Stefan Bodewig1ce57d92012-05-03 16:07:38 +0000402 /**
403 * Writes the current byte to the buffer, run-length encoding it
404 * if it has been repeated at least four times (the first step
405 * RLEs sequences of four identical bytes).
406 *
407 * <p>Flushes the current block before writing data if it is
408 * full.</p>
409 *
410 * <p>"write to the buffer" means adding to data.buffer starting
411 * two steps "after" this.last - initially starting at index 1
412 * (not 0) - and updating this.last to point to the last index
413 * written minus 1.</p>
414 */
Torsten Curdtca165392008-07-10 10:17:44 +0000415 private void writeRun() throws IOException {
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000416 final int lastShadow = this.last;
417
418 if (lastShadow < this.allowableBlockSize) {
419 final int currentCharShadow = this.currentChar;
420 final Data dataShadow = this.data;
421 dataShadow.inUse[currentCharShadow] = true;
422 final byte ch = (byte) currentCharShadow;
423
424 int runLengthShadow = this.runLength;
425 this.crc.updateCRC(currentCharShadow, runLengthShadow);
426
427 switch (runLengthShadow) {
Torsten Curdtca165392008-07-10 10:17:44 +0000428 case 1:
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000429 dataShadow.block[lastShadow + 2] = ch;
430 this.last = lastShadow + 1;
Torsten Curdtca165392008-07-10 10:17:44 +0000431 break;
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000432
Torsten Curdtca165392008-07-10 10:17:44 +0000433 case 2:
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000434 dataShadow.block[lastShadow + 2] = ch;
435 dataShadow.block[lastShadow + 3] = ch;
436 this.last = lastShadow + 2;
Torsten Curdtca165392008-07-10 10:17:44 +0000437 break;
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000438
439 case 3: {
440 final byte[] block = dataShadow.block;
441 block[lastShadow + 2] = ch;
442 block[lastShadow + 3] = ch;
443 block[lastShadow + 4] = ch;
444 this.last = lastShadow + 3;
445 }
Torsten Curdtca165392008-07-10 10:17:44 +0000446 break;
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000447
448 default: {
449 runLengthShadow -= 4;
450 dataShadow.inUse[runLengthShadow] = true;
451 final byte[] block = dataShadow.block;
452 block[lastShadow + 2] = ch;
453 block[lastShadow + 3] = ch;
454 block[lastShadow + 4] = ch;
455 block[lastShadow + 5] = ch;
456 block[lastShadow + 6] = (byte) runLengthShadow;
457 this.last = lastShadow + 5;
458 }
Torsten Curdtca165392008-07-10 10:17:44 +0000459 break;
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000460
Torsten Curdtca165392008-07-10 10:17:44 +0000461 }
462 } else {
463 endBlock();
464 initBlock();
465 writeRun();
466 }
467 }
468
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000469 /**
470 * Overriden to close the stream.
471 */
Stefan Bodewig46628ef2011-08-06 16:30:40 +0000472 @Override
Torsten Curdtca165392008-07-10 10:17:44 +0000473 protected void finalize() throws Throwable {
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000474 finish();
Torsten Curdtca165392008-07-10 10:17:44 +0000475 super.finalize();
476 }
477
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000478
479 public void finish() throws IOException {
480 if (out != null) {
481 try {
482 if (this.runLength > 0) {
483 writeRun();
484 }
485 this.currentChar = -1;
486 endBlock();
487 endCompression();
488 } finally {
489 this.out = null;
490 this.data = null;
Stefan Bodewig6e956972012-05-01 07:33:09 +0000491 this.blockSorter = null;
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000492 }
Torsten Curdtca165392008-07-10 10:17:44 +0000493 }
Torsten Curdt7b879e52009-01-12 11:31:06 +0000494 }
Stefan Bodewig3f9bcc62009-02-10 14:20:05 +0000495
Stefan Bodewig46628ef2011-08-06 16:30:40 +0000496 @Override
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000497 public void close() throws IOException {
498 if (out != null) {
499 OutputStream outShadow = this.out;
500 finish();
501 outShadow.close();
Torsten Curdt7b879e52009-01-12 11:31:06 +0000502 }
Torsten Curdtca165392008-07-10 10:17:44 +0000503 }
504
Stefan Bodewig46628ef2011-08-06 16:30:40 +0000505 @Override
Torsten Curdtca165392008-07-10 10:17:44 +0000506 public void flush() throws IOException {
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000507 OutputStream outShadow = this.out;
508 if (outShadow != null) {
509 outShadow.flush();
510 }
Torsten Curdtca165392008-07-10 10:17:44 +0000511 }
512
Christian Grobmeiercf645572009-04-13 15:20:24 +0000513 /**
514 * Writes magic bytes like BZ on the first position of the stream
515 * and bytes indiciating the file-format, which is
516 * huffmanised, followed by a digit indicating blockSize100k.
517 * @throws IOException if the magic bytes could not been written
518 */
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000519 private void init() throws IOException {
Christian Grobmeiercf645572009-04-13 15:20:24 +0000520 bsPutUByte('B');
521 bsPutUByte('Z');
Torsten Curdtca165392008-07-10 10:17:44 +0000522
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000523 this.data = new Data(this.blockSize100k);
Stefan Bodewig6e956972012-05-01 07:33:09 +0000524 this.blockSorter = new BlockSort(this.data);
Torsten Curdtca165392008-07-10 10:17:44 +0000525
Christian Grobmeiercf645572009-04-13 15:20:24 +0000526 // huffmanised magic bytes
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000527 bsPutUByte('h');
528 bsPutUByte('0' + this.blockSize100k);
Torsten Curdtca165392008-07-10 10:17:44 +0000529
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000530 this.combinedCRC = 0;
531 initBlock();
Torsten Curdtca165392008-07-10 10:17:44 +0000532 }
533
Torsten Curdtca165392008-07-10 10:17:44 +0000534 private void initBlock() {
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000535 // blockNo++;
536 this.crc.initialiseCRC();
537 this.last = -1;
538 // ch = 0;
Torsten Curdtca165392008-07-10 10:17:44 +0000539
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000540 boolean[] inUse = this.data.inUse;
541 for (int i = 256; --i >= 0;) {
Torsten Curdtca165392008-07-10 10:17:44 +0000542 inUse[i] = false;
543 }
Stefan Bodewig7684a4b2012-05-21 04:24:20 +0000544
Torsten Curdtca165392008-07-10 10:17:44 +0000545 }
546
547 private void endBlock() throws IOException {
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000548 this.blockCRC = this.crc.getFinalCRC();
549 this.combinedCRC = (this.combinedCRC << 1) | (this.combinedCRC >>> 31);
550 this.combinedCRC ^= this.blockCRC;
Torsten Curdtca165392008-07-10 10:17:44 +0000551
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000552 // empty block at end of file
553 if (this.last == -1) {
Stefan Bodewig3f9bcc62009-02-10 14:20:05 +0000554 return;
Torsten Curdtc1997372009-01-08 11:13:50 +0000555 }
Stefan Bodewig3f9bcc62009-02-10 14:20:05 +0000556
Torsten Curdtca165392008-07-10 10:17:44 +0000557 /* sort the block and establish posn of original string */
Stefan Bodewig6ced4222012-05-20 18:10:46 +0000558 blockSort();
Torsten Curdtca165392008-07-10 10:17:44 +0000559
560 /*
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000561 * A 6-byte block header, the value chosen arbitrarily as 0x314159265359
562 * :-). A 32 bit value does not really give a strong enough guarantee
563 * that the value will not appear by chance in the compressed
564 * datastream. Worst-case probability of this event, for a 900k block,
565 * is about 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48
566 * bits. For a compressed file of size 100Gb -- about 100000 blocks --
567 * only a 48-bit marker will do. NB: normal compression/ decompression
568 * donot rely on these statistical properties. They are only important
569 * when trying to recover blocks from damaged files.
570 */
571 bsPutUByte(0x31);
572 bsPutUByte(0x41);
573 bsPutUByte(0x59);
574 bsPutUByte(0x26);
575 bsPutUByte(0x53);
576 bsPutUByte(0x59);
Torsten Curdtca165392008-07-10 10:17:44 +0000577
578 /* Now the block's CRC, so it is in a known place. */
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000579 bsPutInt(this.blockCRC);
Torsten Curdtca165392008-07-10 10:17:44 +0000580
Stefan Bodewig6ced4222012-05-20 18:10:46 +0000581 /* Now a single bit indicating no randomisation. */
582 bsW(1, 0);
Torsten Curdtca165392008-07-10 10:17:44 +0000583
584 /* Finally, block's contents proper. */
585 moveToFrontCodeAndSend();
586 }
587
588 private void endCompression() throws IOException {
589 /*
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000590 * Now another magic 48-bit number, 0x177245385090, to indicate the end
591 * of the last block. (sqrt(pi), if you want to know. I did want to use
592 * e, but it contains too much repetition -- 27 18 28 18 28 46 -- for me
593 * to feel statistically comfortable. Call me paranoid.)
594 */
595 bsPutUByte(0x17);
596 bsPutUByte(0x72);
597 bsPutUByte(0x45);
598 bsPutUByte(0x38);
599 bsPutUByte(0x50);
600 bsPutUByte(0x90);
Torsten Curdtca165392008-07-10 10:17:44 +0000601
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000602 bsPutInt(this.combinedCRC);
Torsten Curdtca165392008-07-10 10:17:44 +0000603 bsFinishedWithStream();
604 }
605
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000606 /**
607 * Returns the blocksize parameter specified at construction time.
608 */
609 public final int getBlockSize() {
610 return this.blockSize100k;
611 }
Torsten Curdtca165392008-07-10 10:17:44 +0000612
Stefan Bodewig46628ef2011-08-06 16:30:40 +0000613 @Override
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000614 public void write(final byte[] buf, int offs, final int len)
615 throws IOException {
616 if (offs < 0) {
617 throw new IndexOutOfBoundsException("offs(" + offs + ") < 0.");
618 }
619 if (len < 0) {
620 throw new IndexOutOfBoundsException("len(" + len + ") < 0.");
621 }
622 if (offs + len > buf.length) {
623 throw new IndexOutOfBoundsException("offs(" + offs + ") + len("
624 + len + ") > buf.length("
625 + buf.length + ").");
626 }
627 if (this.out == null) {
628 throw new IOException("stream closed");
629 }
630
631 for (int hi = offs + len; offs < hi;) {
632 write0(buf[offs++]);
633 }
634 }
635
Stefan Bodewig1ce57d92012-05-03 16:07:38 +0000636 /**
637 * Keeps track of the last bytes written and implicitly performs
638 * run-length encoding as the first step of the bzip2 algorithm.
639 */
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000640 private void write0(int b) throws IOException {
641 if (this.currentChar != -1) {
642 b &= 0xff;
643 if (this.currentChar == b) {
644 if (++this.runLength > 254) {
645 writeRun();
646 this.currentChar = -1;
647 this.runLength = 0;
648 }
649 // else nothing to do
650 } else {
651 writeRun();
652 this.runLength = 1;
653 this.currentChar = b;
654 }
655 } else {
656 this.currentChar = b & 0xff;
657 this.runLength++;
658 }
659 }
660
661 private static void hbAssignCodes(final int[] code, final byte[] length,
662 final int minLen, final int maxLen,
663 final int alphaSize) {
664 int vec = 0;
665 for (int n = minLen; n <= maxLen; n++) {
666 for (int i = 0; i < alphaSize; i++) {
667 if ((length[i] & 0xff) == n) {
Torsten Curdtca165392008-07-10 10:17:44 +0000668 code[i] = vec;
669 vec++;
670 }
Torsten Curdtc1997372009-01-08 11:13:50 +0000671 }
Torsten Curdtca165392008-07-10 10:17:44 +0000672 vec <<= 1;
673 }
674 }
675
Torsten Curdtca165392008-07-10 10:17:44 +0000676 private void bsFinishedWithStream() throws IOException {
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000677 while (this.bsLive > 0) {
678 int ch = this.bsBuff >> 24;
679 this.out.write(ch); // write 8-bit
680 this.bsBuff <<= 8;
681 this.bsLive -= 8;
Torsten Curdtca165392008-07-10 10:17:44 +0000682 }
683 }
684
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000685 private void bsW(final int n, final int v) throws IOException {
686 final OutputStream outShadow = this.out;
687 int bsLiveShadow = this.bsLive;
688 int bsBuffShadow = this.bsBuff;
689
690 while (bsLiveShadow >= 8) {
691 outShadow.write(bsBuffShadow >> 24); // write 8-bit
692 bsBuffShadow <<= 8;
693 bsLiveShadow -= 8;
Torsten Curdtca165392008-07-10 10:17:44 +0000694 }
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000695
696 this.bsBuff = bsBuffShadow | (v << (32 - bsLiveShadow - n));
697 this.bsLive = bsLiveShadow + n;
Torsten Curdtca165392008-07-10 10:17:44 +0000698 }
699
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000700 private void bsPutUByte(final int c) throws IOException {
Torsten Curdtca165392008-07-10 10:17:44 +0000701 bsW(8, c);
702 }
703
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000704 private void bsPutInt(final int u) throws IOException {
Torsten Curdtca165392008-07-10 10:17:44 +0000705 bsW(8, (u >> 24) & 0xff);
706 bsW(8, (u >> 16) & 0xff);
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000707 bsW(8, (u >> 8) & 0xff);
708 bsW(8, u & 0xff);
Torsten Curdtca165392008-07-10 10:17:44 +0000709 }
710
711 private void sendMTFValues() throws IOException {
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000712 final byte[][] len = this.data.sendMTFValues_len;
713 final int alphaSize = this.nInUse + 2;
Torsten Curdtca165392008-07-10 10:17:44 +0000714
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000715 for (int t = N_GROUPS; --t >= 0;) {
716 byte[] len_t = len[t];
717 for (int v = alphaSize; --v >= 0;) {
718 len_t[v] = GREATER_ICOST;
Torsten Curdtca165392008-07-10 10:17:44 +0000719 }
720 }
721
722 /* Decide how many coding tables to use */
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000723 // assert (this.nMTF > 0) : this.nMTF;
724 final int nGroups = (this.nMTF < 200) ? 2 : (this.nMTF < 600) ? 3
725 : (this.nMTF < 1200) ? 4 : (this.nMTF < 2400) ? 5 : 6;
Torsten Curdtca165392008-07-10 10:17:44 +0000726
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000727 /* Generate an initial set of coding tables */
728 sendMTFValues0(nGroups, alphaSize);
Torsten Curdtca165392008-07-10 10:17:44 +0000729
Torsten Curdtca165392008-07-10 10:17:44 +0000730 /*
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000731 * Iterate up to N_ITERS times to improve the tables.
732 */
733 final int nSelectors = sendMTFValues1(nGroups, alphaSize);
734
735 /* Compute MTF values for the selectors. */
736 sendMTFValues2(nGroups, nSelectors);
737
738 /* Assign actual codes for the tables. */
739 sendMTFValues3(nGroups, alphaSize);
740
741 /* Transmit the mapping table. */
742 sendMTFValues4();
743
744 /* Now the selectors. */
745 sendMTFValues5(nGroups, nSelectors);
746
747 /* Now the coding tables. */
748 sendMTFValues6(nGroups, alphaSize);
749
750 /* And finally, the block data proper */
Stefan Bodewig58c56fc2011-07-23 12:41:55 +0000751 sendMTFValues7();
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000752 }
753
754 private void sendMTFValues0(final int nGroups, final int alphaSize) {
755 final byte[][] len = this.data.sendMTFValues_len;
756 final int[] mtfFreq = this.data.mtfFreq;
757
758 int remF = this.nMTF;
759 int gs = 0;
760
761 for (int nPart = nGroups; nPart > 0; nPart--) {
762 final int tFreq = remF / nPart;
763 int ge = gs - 1;
764 int aFreq = 0;
765
766 for (final int a = alphaSize - 1; (aFreq < tFreq) && (ge < a);) {
767 aFreq += mtfFreq[++ge];
Torsten Curdtca165392008-07-10 10:17:44 +0000768 }
769
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000770 if ((ge > gs) && (nPart != nGroups) && (nPart != 1)
771 && (((nGroups - nPart) & 1) != 0)) {
772 aFreq -= mtfFreq[ge--];
773 }
774
775 final byte[] len_np = len[nPart - 1];
776 for (int v = alphaSize; --v >= 0;) {
777 if ((v >= gs) && (v <= ge)) {
778 len_np[v] = LESSER_ICOST;
779 } else {
780 len_np[v] = GREATER_ICOST;
781 }
782 }
783
784 gs = ge + 1;
785 remF -= aFreq;
786 }
787 }
788
789 private int sendMTFValues1(final int nGroups, final int alphaSize) {
790 final Data dataShadow = this.data;
791 final int[][] rfreq = dataShadow.sendMTFValues_rfreq;
792 final int[] fave = dataShadow.sendMTFValues_fave;
793 final short[] cost = dataShadow.sendMTFValues_cost;
794 final char[] sfmap = dataShadow.sfmap;
795 final byte[] selector = dataShadow.selector;
796 final byte[][] len = dataShadow.sendMTFValues_len;
797 final byte[] len_0 = len[0];
798 final byte[] len_1 = len[1];
799 final byte[] len_2 = len[2];
800 final byte[] len_3 = len[3];
801 final byte[] len_4 = len[4];
802 final byte[] len_5 = len[5];
803 final int nMTFShadow = this.nMTF;
804
805 int nSelectors = 0;
806
807 for (int iter = 0; iter < N_ITERS; iter++) {
808 for (int t = nGroups; --t >= 0;) {
809 fave[t] = 0;
810 int[] rfreqt = rfreq[t];
811 for (int i = alphaSize; --i >= 0;) {
812 rfreqt[i] = 0;
Torsten Curdtca165392008-07-10 10:17:44 +0000813 }
814 }
815
816 nSelectors = 0;
Torsten Curdtca165392008-07-10 10:17:44 +0000817
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000818 for (int gs = 0; gs < this.nMTF;) {
Torsten Curdtca165392008-07-10 10:17:44 +0000819 /* Set group start & end marks. */
Torsten Curdtca165392008-07-10 10:17:44 +0000820
821 /*
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000822 * Calculate the cost of this group as coded by each of the
823 * coding tables.
824 */
Torsten Curdtca165392008-07-10 10:17:44 +0000825
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000826 final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
827
828 if (nGroups == N_GROUPS) {
829 // unrolled version of the else-block
830
831 short cost0 = 0;
832 short cost1 = 0;
833 short cost2 = 0;
834 short cost3 = 0;
835 short cost4 = 0;
836 short cost5 = 0;
837
838 for (int i = gs; i <= ge; i++) {
839 final int icv = sfmap[i];
840 cost0 += len_0[icv] & 0xff;
841 cost1 += len_1[icv] & 0xff;
842 cost2 += len_2[icv] & 0xff;
843 cost3 += len_3[icv] & 0xff;
844 cost4 += len_4[icv] & 0xff;
845 cost5 += len_5[icv] & 0xff;
Torsten Curdtca165392008-07-10 10:17:44 +0000846 }
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000847
Torsten Curdtca165392008-07-10 10:17:44 +0000848 cost[0] = cost0;
849 cost[1] = cost1;
850 cost[2] = cost2;
851 cost[3] = cost3;
852 cost[4] = cost4;
853 cost[5] = cost5;
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000854
Torsten Curdtca165392008-07-10 10:17:44 +0000855 } else {
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000856 for (int t = nGroups; --t >= 0;) {
857 cost[t] = 0;
858 }
859
860 for (int i = gs; i <= ge; i++) {
861 final int icv = sfmap[i];
862 for (int t = nGroups; --t >= 0;) {
863 cost[t] += len[t][icv] & 0xff;
Torsten Curdtca165392008-07-10 10:17:44 +0000864 }
865 }
866 }
867
868 /*
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000869 * Find the coding table which is best for this group, and
870 * record its identity in the selector table.
871 */
872 int bt = -1;
873 for (int t = nGroups, bc = 999999999; --t >= 0;) {
874 final int cost_t = cost[t];
875 if (cost_t < bc) {
876 bc = cost_t;
Torsten Curdtca165392008-07-10 10:17:44 +0000877 bt = t;
878 }
Torsten Curdtc1997372009-01-08 11:13:50 +0000879 }
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000880
Torsten Curdtca165392008-07-10 10:17:44 +0000881 fave[bt]++;
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000882 selector[nSelectors] = (byte) bt;
Torsten Curdtca165392008-07-10 10:17:44 +0000883 nSelectors++;
884
885 /*
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000886 * Increment the symbol frequencies for the selected table.
887 */
888 final int[] rfreq_bt = rfreq[bt];
889 for (int i = gs; i <= ge; i++) {
890 rfreq_bt[sfmap[i]]++;
Torsten Curdtca165392008-07-10 10:17:44 +0000891 }
892
893 gs = ge + 1;
894 }
895
896 /*
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000897 * Recompute the tables based on the accumulated frequencies.
898 */
899 for (int t = 0; t < nGroups; t++) {
900 hbMakeCodeLengths(len[t], rfreq[t], this.data, alphaSize, 20);
Torsten Curdtca165392008-07-10 10:17:44 +0000901 }
902 }
903
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000904 return nSelectors;
905 }
Torsten Curdtca165392008-07-10 10:17:44 +0000906
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000907 private void sendMTFValues2(final int nGroups, final int nSelectors) {
908 // assert (nGroups < 8) : nGroups;
909
910 final Data dataShadow = this.data;
911 byte[] pos = dataShadow.sendMTFValues2_pos;
912
913 for (int i = nGroups; --i >= 0;) {
914 pos[i] = (byte) i;
Torsten Curdtca165392008-07-10 10:17:44 +0000915 }
916
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000917 for (int i = 0; i < nSelectors; i++) {
918 final byte ll_i = dataShadow.selector[i];
919 byte tmp = pos[0];
920 int j = 0;
Torsten Curdtca165392008-07-10 10:17:44 +0000921
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000922 while (ll_i != tmp) {
923 j++;
924 byte tmp2 = tmp;
Torsten Curdtca165392008-07-10 10:17:44 +0000925 tmp = pos[j];
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000926 pos[j] = tmp2;
Torsten Curdtca165392008-07-10 10:17:44 +0000927 }
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000928
929 pos[0] = tmp;
930 dataShadow.selectorMtf[i] = (byte) j;
Torsten Curdtca165392008-07-10 10:17:44 +0000931 }
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000932 }
Torsten Curdtca165392008-07-10 10:17:44 +0000933
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000934 private void sendMTFValues3(final int nGroups, final int alphaSize) {
935 int[][] code = this.data.sendMTFValues_code;
936 byte[][] len = this.data.sendMTFValues_len;
Torsten Curdtca165392008-07-10 10:17:44 +0000937
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000938 for (int t = 0; t < nGroups; t++) {
939 int minLen = 32;
940 int maxLen = 0;
941 final byte[] len_t = len[t];
942 for (int i = alphaSize; --i >= 0;) {
943 final int l = len_t[i] & 0xff;
944 if (l > maxLen) {
945 maxLen = l;
Torsten Curdtca165392008-07-10 10:17:44 +0000946 }
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000947 if (l < minLen) {
948 minLen = l;
Torsten Curdtca165392008-07-10 10:17:44 +0000949 }
950 }
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000951
952 // assert (maxLen <= 20) : maxLen;
953 // assert (minLen >= 1) : minLen;
954
Torsten Curdtca165392008-07-10 10:17:44 +0000955 hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize);
956 }
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000957 }
Torsten Curdtca165392008-07-10 10:17:44 +0000958
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000959 private void sendMTFValues4() throws IOException {
960 final boolean[] inUse = this.data.inUse;
961 final boolean[] inUse16 = this.data.sentMTFValues4_inUse16;
962
963 for (int i = 16; --i >= 0;) {
964 inUse16[i] = false;
965 final int i16 = i * 16;
966 for (int j = 16; --j >= 0;) {
967 if (inUse[i16 + j]) {
968 inUse16[i] = true;
Torsten Curdtca165392008-07-10 10:17:44 +0000969 }
970 }
Torsten Curdtca165392008-07-10 10:17:44 +0000971 }
972
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000973 for (int i = 0; i < 16; i++) {
974 bsW(1, inUse16[i] ? 1 : 0);
Torsten Curdtca165392008-07-10 10:17:44 +0000975 }
976
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000977 final OutputStream outShadow = this.out;
978 int bsLiveShadow = this.bsLive;
979 int bsBuffShadow = this.bsBuff;
Torsten Curdtca165392008-07-10 10:17:44 +0000980
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +0000981 for (int i = 0; i < 16; i++) {
982 if (inUse16[i]) {
983 final int i16 = i * 16;
984 for (int j = 0; j < 16; j++) {
985 // inlined: bsW(1, inUse[i16 + j] ? 1 : 0);
986 while (bsLiveShadow >= 8) {
987 outShadow.write(bsBuffShadow >> 24); // write 8-bit
988 bsBuffShadow <<= 8;
989 bsLiveShadow -= 8;
990 }
991 if (inUse[i16 + j]) {
992 bsBuffShadow |= 1 << (32 - bsLiveShadow - 1);
993 }
994 bsLiveShadow++;
995 }
996 }
997 }
998
999 this.bsBuff = bsBuffShadow;
1000 this.bsLive = bsLiveShadow;
1001 }
1002
1003 private void sendMTFValues5(final int nGroups, final int nSelectors)
1004 throws IOException {
1005 bsW(3, nGroups);
1006 bsW(15, nSelectors);
1007
1008 final OutputStream outShadow = this.out;
1009 final byte[] selectorMtf = this.data.selectorMtf;
1010
1011 int bsLiveShadow = this.bsLive;
1012 int bsBuffShadow = this.bsBuff;
1013
1014 for (int i = 0; i < nSelectors; i++) {
1015 for (int j = 0, hj = selectorMtf[i] & 0xff; j < hj; j++) {
1016 // inlined: bsW(1, 1);
1017 while (bsLiveShadow >= 8) {
1018 outShadow.write(bsBuffShadow >> 24);
1019 bsBuffShadow <<= 8;
1020 bsLiveShadow -= 8;
1021 }
1022 bsBuffShadow |= 1 << (32 - bsLiveShadow - 1);
1023 bsLiveShadow++;
1024 }
1025
1026 // inlined: bsW(1, 0);
1027 while (bsLiveShadow >= 8) {
1028 outShadow.write(bsBuffShadow >> 24);
1029 bsBuffShadow <<= 8;
1030 bsLiveShadow -= 8;
1031 }
1032 // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
1033 bsLiveShadow++;
1034 }
1035
1036 this.bsBuff = bsBuffShadow;
1037 this.bsLive = bsLiveShadow;
1038 }
1039
1040 private void sendMTFValues6(final int nGroups, final int alphaSize)
1041 throws IOException {
1042 final byte[][] len = this.data.sendMTFValues_len;
1043 final OutputStream outShadow = this.out;
1044
1045 int bsLiveShadow = this.bsLive;
1046 int bsBuffShadow = this.bsBuff;
1047
1048 for (int t = 0; t < nGroups; t++) {
1049 byte[] len_t = len[t];
1050 int curr = len_t[0] & 0xff;
1051
1052 // inlined: bsW(5, curr);
1053 while (bsLiveShadow >= 8) {
1054 outShadow.write(bsBuffShadow >> 24); // write 8-bit
1055 bsBuffShadow <<= 8;
1056 bsLiveShadow -= 8;
1057 }
1058 bsBuffShadow |= curr << (32 - bsLiveShadow - 5);
1059 bsLiveShadow += 5;
1060
1061 for (int i = 0; i < alphaSize; i++) {
1062 int lti = len_t[i] & 0xff;
1063 while (curr < lti) {
1064 // inlined: bsW(2, 2);
1065 while (bsLiveShadow >= 8) {
1066 outShadow.write(bsBuffShadow >> 24); // write 8-bit
1067 bsBuffShadow <<= 8;
1068 bsLiveShadow -= 8;
1069 }
1070 bsBuffShadow |= 2 << (32 - bsLiveShadow - 2);
1071 bsLiveShadow += 2;
1072
Torsten Curdtca165392008-07-10 10:17:44 +00001073 curr++; /* 10 */
1074 }
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +00001075
1076 while (curr > lti) {
1077 // inlined: bsW(2, 3);
1078 while (bsLiveShadow >= 8) {
1079 outShadow.write(bsBuffShadow >> 24); // write 8-bit
1080 bsBuffShadow <<= 8;
1081 bsLiveShadow -= 8;
1082 }
1083 bsBuffShadow |= 3 << (32 - bsLiveShadow - 2);
1084 bsLiveShadow += 2;
1085
Torsten Curdtca165392008-07-10 10:17:44 +00001086 curr--; /* 11 */
1087 }
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +00001088
1089 // inlined: bsW(1, 0);
1090 while (bsLiveShadow >= 8) {
1091 outShadow.write(bsBuffShadow >> 24); // write 8-bit
1092 bsBuffShadow <<= 8;
1093 bsLiveShadow -= 8;
1094 }
1095 // bsBuffShadow |= 0 << (32 - bsLiveShadow - 1);
1096 bsLiveShadow++;
Torsten Curdtca165392008-07-10 10:17:44 +00001097 }
1098 }
1099
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +00001100 this.bsBuff = bsBuffShadow;
1101 this.bsLive = bsLiveShadow;
1102 }
1103
Stefan Bodewig58c56fc2011-07-23 12:41:55 +00001104 private void sendMTFValues7() throws IOException {
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +00001105 final Data dataShadow = this.data;
1106 final byte[][] len = dataShadow.sendMTFValues_len;
1107 final int[][] code = dataShadow.sendMTFValues_code;
1108 final OutputStream outShadow = this.out;
1109 final byte[] selector = dataShadow.selector;
1110 final char[] sfmap = dataShadow.sfmap;
1111 final int nMTFShadow = this.nMTF;
1112
1113 int selCtr = 0;
1114
1115 int bsLiveShadow = this.bsLive;
1116 int bsBuffShadow = this.bsBuff;
1117
1118 for (int gs = 0; gs < nMTFShadow;) {
1119 final int ge = Math.min(gs + G_SIZE - 1, nMTFShadow - 1);
1120 final int selector_selCtr = selector[selCtr] & 0xff;
1121 final int[] code_selCtr = code[selector_selCtr];
1122 final byte[] len_selCtr = len[selector_selCtr];
1123
1124 while (gs <= ge) {
1125 final int sfmap_i = sfmap[gs];
1126
1127 //
1128 // inlined: bsW(len_selCtr[sfmap_i] & 0xff,
1129 // code_selCtr[sfmap_i]);
1130 //
1131 while (bsLiveShadow >= 8) {
1132 outShadow.write(bsBuffShadow >> 24);
1133 bsBuffShadow <<= 8;
1134 bsLiveShadow -= 8;
1135 }
1136 final int n = len_selCtr[sfmap_i] & 0xFF;
1137 bsBuffShadow |= code_selCtr[sfmap_i] << (32 - bsLiveShadow - n);
1138 bsLiveShadow += n;
1139
1140 gs++;
Torsten Curdtca165392008-07-10 10:17:44 +00001141 }
1142
1143 gs = ge + 1;
1144 selCtr++;
1145 }
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +00001146
1147 this.bsBuff = bsBuffShadow;
1148 this.bsLive = bsLiveShadow;
Torsten Curdtca165392008-07-10 10:17:44 +00001149 }
1150
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +00001151 private void moveToFrontCodeAndSend() throws IOException {
Stefan Bodewigb06f7b42012-05-01 07:06:25 +00001152 bsW(24, this.data.origPtr);
Torsten Curdtca165392008-07-10 10:17:44 +00001153 generateMTFValues();
1154 sendMTFValues();
1155 }
1156
Stefan Bodewig6ced4222012-05-20 18:10:46 +00001157 private void blockSort() {
1158 blockSorter.blockSort(data, last);
Torsten Curdtca165392008-07-10 10:17:44 +00001159 }
1160
Stefan Bodewig1ce57d92012-05-03 16:07:38 +00001161 /*
1162 * Performs Move-To-Front on the Burrows-Wheeler transformed
1163 * buffer, storing the MTFed data in data.sfmap in RUNA/RUNB
1164 * run-length-encoded form.
1165 *
1166 * <p>Keeps track of byte frequencies in data.mtfFreq at the same time.</p>
1167 */
Torsten Curdtca165392008-07-10 10:17:44 +00001168 private void generateMTFValues() {
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +00001169 final int lastShadow = this.last;
1170 final Data dataShadow = this.data;
1171 final boolean[] inUse = dataShadow.inUse;
1172 final byte[] block = dataShadow.block;
1173 final int[] fmap = dataShadow.fmap;
1174 final char[] sfmap = dataShadow.sfmap;
1175 final int[] mtfFreq = dataShadow.mtfFreq;
1176 final byte[] unseqToSeq = dataShadow.unseqToSeq;
1177 final byte[] yy = dataShadow.generateMTFValues_yy;
Torsten Curdtca165392008-07-10 10:17:44 +00001178
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +00001179 // make maps
1180 int nInUseShadow = 0;
1181 for (int i = 0; i < 256; i++) {
1182 if (inUse[i]) {
1183 unseqToSeq[i] = (byte) nInUseShadow;
1184 nInUseShadow++;
1185 }
1186 }
1187 this.nInUse = nInUseShadow;
Torsten Curdtca165392008-07-10 10:17:44 +00001188
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +00001189 final int eob = nInUseShadow + 1;
1190
1191 for (int i = eob; i >= 0; i--) {
Torsten Curdtca165392008-07-10 10:17:44 +00001192 mtfFreq[i] = 0;
1193 }
1194
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +00001195 for (int i = nInUseShadow; --i >= 0;) {
1196 yy[i] = (byte) i;
Torsten Curdtca165392008-07-10 10:17:44 +00001197 }
1198
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +00001199 int wr = 0;
1200 int zPend = 0;
Torsten Curdtca165392008-07-10 10:17:44 +00001201
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +00001202 for (int i = 0; i <= lastShadow; i++) {
1203 final byte ll_i = unseqToSeq[block[fmap[i]] & 0xff];
1204 byte tmp = yy[0];
1205 int j = 0;
Torsten Curdtca165392008-07-10 10:17:44 +00001206
Torsten Curdtca165392008-07-10 10:17:44 +00001207 while (ll_i != tmp) {
1208 j++;
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +00001209 byte tmp2 = tmp;
Torsten Curdtca165392008-07-10 10:17:44 +00001210 tmp = yy[j];
1211 yy[j] = tmp2;
Torsten Curdtc1997372009-01-08 11:13:50 +00001212 }
Torsten Curdtca165392008-07-10 10:17:44 +00001213 yy[0] = tmp;
1214
1215 if (j == 0) {
1216 zPend++;
1217 } else {
1218 if (zPend > 0) {
1219 zPend--;
1220 while (true) {
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +00001221 if ((zPend & 1) == 0) {
1222 sfmap[wr] = RUNA;
Torsten Curdtca165392008-07-10 10:17:44 +00001223 wr++;
1224 mtfFreq[RUNA]++;
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +00001225 } else {
1226 sfmap[wr] = RUNB;
Torsten Curdtca165392008-07-10 10:17:44 +00001227 wr++;
1228 mtfFreq[RUNB]++;
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +00001229 }
1230
1231 if (zPend >= 2) {
1232 zPend = (zPend - 2) >> 1;
1233 } else {
Torsten Curdtca165392008-07-10 10:17:44 +00001234 break;
Torsten Curdtc1997372009-01-08 11:13:50 +00001235 }
Torsten Curdtc1997372009-01-08 11:13:50 +00001236 }
Torsten Curdtca165392008-07-10 10:17:44 +00001237 zPend = 0;
1238 }
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +00001239 sfmap[wr] = (char) (j + 1);
Torsten Curdtca165392008-07-10 10:17:44 +00001240 wr++;
1241 mtfFreq[j + 1]++;
1242 }
1243 }
1244
1245 if (zPend > 0) {
1246 zPend--;
1247 while (true) {
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +00001248 if ((zPend & 1) == 0) {
1249 sfmap[wr] = RUNA;
Torsten Curdtca165392008-07-10 10:17:44 +00001250 wr++;
1251 mtfFreq[RUNA]++;
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +00001252 } else {
1253 sfmap[wr] = RUNB;
Torsten Curdtca165392008-07-10 10:17:44 +00001254 wr++;
1255 mtfFreq[RUNB]++;
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +00001256 }
1257
1258 if (zPend >= 2) {
1259 zPend = (zPend - 2) >> 1;
1260 } else {
Torsten Curdtca165392008-07-10 10:17:44 +00001261 break;
1262 }
Torsten Curdtca165392008-07-10 10:17:44 +00001263 }
1264 }
1265
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +00001266 sfmap[wr] = (char) eob;
1267 mtfFreq[eob]++;
1268 this.nMTF = wr + 1;
Torsten Curdtca165392008-07-10 10:17:44 +00001269 }
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +00001270
Stefan Bodewigb06f7b42012-05-01 07:06:25 +00001271 static final class Data extends Object {
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +00001272
1273 // with blockSize 900k
Stefan Bodewig1ce57d92012-05-03 16:07:38 +00001274 /* maps unsigned byte => "does it occur in block" */
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +00001275 final boolean[] inUse = new boolean[256]; // 256 byte
1276 final byte[] unseqToSeq = new byte[256]; // 256 byte
1277 final int[] mtfFreq = new int[MAX_ALPHA_SIZE]; // 1032 byte
1278 final byte[] selector = new byte[MAX_SELECTORS]; // 18002 byte
1279 final byte[] selectorMtf = new byte[MAX_SELECTORS]; // 18002 byte
1280
1281 final byte[] generateMTFValues_yy = new byte[256]; // 256 byte
1282 final byte[][] sendMTFValues_len = new byte[N_GROUPS][MAX_ALPHA_SIZE]; // 1548
1283 // byte
1284 final int[][] sendMTFValues_rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
1285 // byte
1286 final int[] sendMTFValues_fave = new int[N_GROUPS]; // 24 byte
1287 final short[] sendMTFValues_cost = new short[N_GROUPS]; // 12 byte
1288 final int[][] sendMTFValues_code = new int[N_GROUPS][MAX_ALPHA_SIZE]; // 6192
1289 // byte
1290 final byte[] sendMTFValues2_pos = new byte[N_GROUPS]; // 6 byte
1291 final boolean[] sentMTFValues4_inUse16 = new boolean[16]; // 16 byte
1292
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +00001293 final int[] heap = new int[MAX_ALPHA_SIZE + 2]; // 1040 byte
1294 final int[] weight = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
1295 final int[] parent = new int[MAX_ALPHA_SIZE * 2]; // 2064 byte
1296
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +00001297 // ------------
1298 // 333408 byte
1299
Stefan Bodewig1ce57d92012-05-03 16:07:38 +00001300 /* holds the RLEd block of original data starting at index 1.
1301 * After sorting the last byte added to the buffer is at index
1302 * 0. */
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +00001303 final byte[] block; // 900021 byte
Stefan Bodewig1ce57d92012-05-03 16:07:38 +00001304 /* maps index in Burrows-Wheeler transformed block => index of
1305 * byte in original block */
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +00001306 final int[] fmap; // 3600000 byte
1307 final char[] sfmap; // 3600000 byte
1308 // ------------
1309 // 8433529 byte
1310 // ============
1311
1312 /**
Stefan Bodewig020c03d2012-05-12 05:00:30 +00001313 * Index of original line in Burrows-Wheeler table.
1314 *
1315 * <p>This is the index in fmap that points to the last byte
1316 * of the original data.</p>
Stefan Bodewigb06f7b42012-05-01 07:06:25 +00001317 */
1318 int origPtr;
1319
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +00001320 Data(int blockSize100k) {
Sebastian Bazleydf7f45f2009-04-15 00:33:02 +00001321 final int n = blockSize100k * BZip2Constants.BASEBLOCKSIZE;
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +00001322 this.block = new byte[(n + 1 + NUM_OVERSHOOT_BYTES)];
1323 this.fmap = new int[n];
1324 this.sfmap = new char[2 * n];
Stefan Bodewigb1bfbcb2009-03-27 14:14:07 +00001325 }
1326
1327 }
1328
Torsten Curdtca165392008-07-10 10:17:44 +00001329}