blob: 5f4b96c630a252c9f3896e026f7a7ff60f07cb5f [file] [log] [blame]
Torsten Curdtca165392008-07-10 10:17:44 +00001/*
2 * Licensed to the Apache Software Foundation (ASF) under one
3 * or more contributor license agreements. See the NOTICE file
4 * distributed with this work for additional information
5 * regarding copyright ownership. The ASF licenses this file
6 * to you under the Apache License, Version 2.0 (the
7 * "License"); you may not use this file except in compliance
8 * with the License. You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing,
13 * software distributed under the License is distributed on an
14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
15 * KIND, either express or implied. See the License for the
16 * specific language governing permissions and limitations
17 * under the License.
18 */
19package org.apache.commons.compress.compressors.bzip2;
20
21import java.io.IOException;
22import java.io.OutputStream;
23
24import org.apache.commons.compress.compressors.CompressorOutputStream;
25
Torsten Curdtca165392008-07-10 10:17:44 +000026/**
27 * An output stream that compresses into the BZip2 format (without the file
28 * header chars) into another stream. TODO: Update to BZip2 1.0.1
Torsten Curdtca165392008-07-10 10:17:44 +000029 */
30public class BZip2CompressorOutputStream extends CompressorOutputStream implements BZip2Constants {
31 protected static final int SETMASK = (1 << 21);
32 protected static final int CLEARMASK = (~SETMASK);
33 protected static final int GREATER_ICOST = 15;
34 protected static final int LESSER_ICOST = 0;
35 protected static final int SMALL_THRESH = 20;
36 protected static final int DEPTH_THRESH = 10;
37
38 /*
39 If you are ever unlucky/improbable enough
40 to get a stack overflow whilst sorting,
41 increase the following constant and try
42 again. In practice I have never seen the
43 stack go above 27 elems, so the following
44 limit seems very generous.
45 */
46 protected static final int QSORT_STACK_SIZE = 1000;
47
48 private static void panic() {
49 System.out.println("panic");
50 //throw new CError();
51 }
52
53 private void makeMaps() {
54 int i;
55 nInUse = 0;
56 for (i = 0; i < 256; i++) {
57 if (inUse[i]) {
58 seqToUnseq[nInUse] = (char) i;
59 unseqToSeq[i] = (char) nInUse;
60 nInUse++;
61 }
62 }
63 }
64
65 protected static void hbMakeCodeLengths(char[] len, int[] freq,
66 int alphaSize, int maxLen) {
67 /*
68 Nodes and heap entries run from 1. Entry 0
69 for both the heap and nodes is a sentinel.
70 */
71 int nNodes, nHeap, n1, n2, i, j, k;
72 boolean tooLong;
73
74 int[] heap = new int[MAX_ALPHA_SIZE + 2];
75 int[] weight = new int[MAX_ALPHA_SIZE * 2];
76 int[] parent = new int[MAX_ALPHA_SIZE * 2];
77
78 for (i = 0; i < alphaSize; i++) {
79 weight[i + 1] = (freq[i] == 0 ? 1 : freq[i]) << 8;
80 }
81
82 while (true) {
83 nNodes = alphaSize;
84 nHeap = 0;
85
86 heap[0] = 0;
87 weight[0] = 0;
88 parent[0] = -2;
89
90 for (i = 1; i <= alphaSize; i++) {
91 parent[i] = -1;
92 nHeap++;
93 heap[nHeap] = i;
94 {
95 int zz, tmp;
96 zz = nHeap;
97 tmp = heap[zz];
98 while (weight[tmp] < weight[heap[zz >> 1]]) {
99 heap[zz] = heap[zz >> 1];
100 zz >>= 1;
101 }
102 heap[zz] = tmp;
103 }
104 }
105 if (!(nHeap < (MAX_ALPHA_SIZE + 2))) {
106 panic();
107 }
108
109 while (nHeap > 1) {
110 n1 = heap[1];
111 heap[1] = heap[nHeap];
112 nHeap--;
113 {
114 int zz = 0, yy = 0, tmp = 0;
115 zz = 1;
116 tmp = heap[zz];
117 while (true) {
118 yy = zz << 1;
119 if (yy > nHeap) {
120 break;
121 }
122 if (yy < nHeap
123 && weight[heap[yy + 1]] < weight[heap[yy]]) {
124 yy++;
125 }
126 if (weight[tmp] < weight[heap[yy]]) {
127 break;
128 }
129 heap[zz] = heap[yy];
130 zz = yy;
131 }
132 heap[zz] = tmp;
133 }
134 n2 = heap[1];
135 heap[1] = heap[nHeap];
136 nHeap--;
137 {
138 int zz = 0, yy = 0, tmp = 0;
139 zz = 1;
140 tmp = heap[zz];
141 while (true) {
142 yy = zz << 1;
143 if (yy > nHeap) {
144 break;
145 }
146 if (yy < nHeap
147 && weight[heap[yy + 1]] < weight[heap[yy]]) {
148 yy++;
149 }
150 if (weight[tmp] < weight[heap[yy]]) {
151 break;
152 }
153 heap[zz] = heap[yy];
154 zz = yy;
155 }
156 heap[zz] = tmp;
157 }
158 nNodes++;
159 parent[n1] = parent[n2] = nNodes;
160
161 weight[nNodes] = ((weight[n1] & 0xffffff00)
162 + (weight[n2] & 0xffffff00))
Torsten Curdtc1997372009-01-08 11:13:50 +0000163 | (1 + (((weight[n1] & 0x000000ff)
164 > (weight[n2] & 0x000000ff))
165 ? (weight[n1] & 0x000000ff)
166 : (weight[n2] & 0x000000ff)));
Torsten Curdtca165392008-07-10 10:17:44 +0000167
168 parent[nNodes] = -1;
169 nHeap++;
170 heap[nHeap] = nNodes;
171 {
172 int zz = 0, tmp = 0;
173 zz = nHeap;
174 tmp = heap[zz];
175 while (weight[tmp] < weight[heap[zz >> 1]]) {
176 heap[zz] = heap[zz >> 1];
177 zz >>= 1;
178 }
179 heap[zz] = tmp;
180 }
181 }
182 if (!(nNodes < (MAX_ALPHA_SIZE * 2))) {
183 panic();
184 }
185
186 tooLong = false;
187 for (i = 1; i <= alphaSize; i++) {
188 j = 0;
189 k = i;
190 while (parent[k] >= 0) {
191 k = parent[k];
192 j++;
193 }
194 len[i - 1] = (char) j;
195 if (j > maxLen) {
196 tooLong = true;
197 }
198 }
199
200 if (!tooLong) {
201 break;
202 }
203
204 for (i = 1; i < alphaSize; i++) {
205 j = weight[i] >> 8;
206 j = 1 + (j / 2);
207 weight[i] = j << 8;
208 }
209 }
210 }
211
212 /*
213 index of the last char in the block, so
214 the block size == last + 1.
215 */
216 int last;
217
218 /*
219 index in zptr[] of original string after sorting.
220 */
221 int origPtr;
222
223 /*
224 always: in the range 0 .. 9.
225 The current block size is 100000 * this number.
226 */
227 int blockSize100k;
228
229 boolean blockRandomised;
230
231 int bytesOut;
232 int bsBuff;
233 int bsLive;
234 CRC mCrc = new CRC();
235
236 private boolean[] inUse = new boolean[256];
237 private int nInUse;
238
239 private char[] seqToUnseq = new char[256];
240 private char[] unseqToSeq = new char[256];
241
242 private char[] selector = new char[MAX_SELECTORS];
243 private char[] selectorMtf = new char[MAX_SELECTORS];
244
245 private char[] block;
246 private int[] quadrant;
247 private int[] zptr;
248 private short[] szptr;
249 private int[] ftab;
250
251 private int nMTF;
252
253 private int[] mtfFreq = new int[MAX_ALPHA_SIZE];
254
255 /*
256 * Used when sorting. If too many long comparisons
257 * happen, we stop sorting, randomise the block
258 * slightly, and try again.
259 */
260 private int workFactor;
261 private int workDone;
262 private int workLimit;
263 private boolean firstAttempt;
264 private int nBlocksRandomised;
265
266 private int currentChar = -1;
267 private int runLength = 0;
268
269 public BZip2CompressorOutputStream(OutputStream inStream) throws IOException {
270 this(inStream, 9);
271 }
272
273 public BZip2CompressorOutputStream(OutputStream inStream, int inBlockSize)
274 throws IOException {
275 block = null;
276 quadrant = null;
277 zptr = null;
278 ftab = null;
279
280 bsSetStream(inStream);
281
282 workFactor = 50;
283 if (inBlockSize > 9) {
284 inBlockSize = 9;
285 }
286 if (inBlockSize < 1) {
287 inBlockSize = 1;
288 }
289 blockSize100k = inBlockSize;
290 allocateCompressStructures();
291 initialize();
292 initBlock();
293 }
294
295 /**
296 *
297 * modified by Oliver Merkel, 010128
298 *
299 */
300 public void write(int bv) throws IOException {
301 int b = (256 + bv) % 256;
302 if (currentChar != -1) {
303 if (currentChar == b) {
304 runLength++;
305 if (runLength > 254) {
306 writeRun();
307 currentChar = -1;
308 runLength = 0;
309 }
310 } else {
311 writeRun();
312 runLength = 1;
313 currentChar = b;
314 }
315 } else {
316 currentChar = b;
317 runLength++;
318 }
319 }
320
321 private void writeRun() throws IOException {
322 if (last < allowableBlockSize) {
323 inUse[currentChar] = true;
324 for (int i = 0; i < runLength; i++) {
325 mCrc.updateCRC((char) currentChar);
326 }
327 switch (runLength) {
328 case 1:
329 last++;
330 block[last + 1] = (char) currentChar;
331 break;
332 case 2:
333 last++;
334 block[last + 1] = (char) currentChar;
335 last++;
336 block[last + 1] = (char) currentChar;
337 break;
338 case 3:
339 last++;
340 block[last + 1] = (char) currentChar;
341 last++;
342 block[last + 1] = (char) currentChar;
343 last++;
344 block[last + 1] = (char) currentChar;
345 break;
346 default:
347 inUse[runLength - 4] = true;
348 last++;
349 block[last + 1] = (char) currentChar;
350 last++;
351 block[last + 1] = (char) currentChar;
352 last++;
353 block[last + 1] = (char) currentChar;
354 last++;
355 block[last + 1] = (char) currentChar;
356 last++;
357 block[last + 1] = (char) (runLength - 4);
358 break;
359 }
360 } else {
361 endBlock();
362 initBlock();
363 writeRun();
364 }
365 }
366
367 boolean closed = false;
368
369 protected void finalize() throws Throwable {
370 close();
371 super.finalize();
372 }
373
374 public void close() throws IOException {
375 if (closed) {
376 return;
377 }
378
379 if (runLength > 0) {
380 writeRun();
381 }
382 currentChar = -1;
383 endBlock();
384 endCompression();
385 closed = true;
386 super.close();
387 bsStream.close();
388 }
389
390 public void flush() throws IOException {
391 super.flush();
392 bsStream.flush();
393 }
394
395 private int blockCRC, combinedCRC;
396
397 private void initialize() throws IOException {
398 bytesOut = 0;
399 nBlocksRandomised = 0;
400
401 /* Write `magic' bytes h indicating file-format == huffmanised,
402 followed by a digit indicating blockSize100k.
403 */
404 bsPutUChar('h');
405 bsPutUChar('0' + blockSize100k);
406
407 combinedCRC = 0;
408 }
409
410 private int allowableBlockSize;
411
412 private void initBlock() {
413 // blockNo++;
414 mCrc.initialiseCRC();
415 last = -1;
416 // ch = 0;
417
418 for (int i = 0; i < 256; i++) {
419 inUse[i] = false;
420 }
421
422 /* 20 is just a paranoia constant */
423 allowableBlockSize = baseBlockSize * blockSize100k - 20;
424 }
425
426 private void endBlock() throws IOException {
427 blockCRC = mCrc.getFinalCRC();
428 combinedCRC = (combinedCRC << 1) | (combinedCRC >>> 31);
429 combinedCRC ^= blockCRC;
430
Torsten Curdtc1997372009-01-08 11:13:50 +0000431 // If the stream was empty we must skip the rest of this method.
432 // See bug#32200.
433 if (last == -1) {
434 return;
435 }
436
Torsten Curdtca165392008-07-10 10:17:44 +0000437 /* sort the block and establish posn of original string */
438 doReversibleTransformation();
439
440 /*
441 A 6-byte block header, the value chosen arbitrarily
442 as 0x314159265359 :-). A 32 bit value does not really
443 give a strong enough guarantee that the value will not
444 appear by chance in the compressed datastream. Worst-case
445 probability of this event, for a 900k block, is about
446 2.0e-3 for 32 bits, 1.0e-5 for 40 bits and 4.0e-8 for 48 bits.
447 For a compressed file of size 100Gb -- about 100000 blocks --
448 only a 48-bit marker will do. NB: normal compression/
449 decompression do *not* rely on these statistical properties.
450 They are only important when trying to recover blocks from
451 damaged files.
452 */
453 bsPutUChar(0x31);
454 bsPutUChar(0x41);
455 bsPutUChar(0x59);
456 bsPutUChar(0x26);
457 bsPutUChar(0x53);
458 bsPutUChar(0x59);
459
460 /* Now the block's CRC, so it is in a known place. */
461 bsPutint(blockCRC);
462
463 /* Now a single bit indicating randomisation. */
464 if (blockRandomised) {
465 bsW(1, 1);
466 nBlocksRandomised++;
467 } else {
468 bsW(1, 0);
469 }
470
471 /* Finally, block's contents proper. */
472 moveToFrontCodeAndSend();
473 }
474
475 private void endCompression() throws IOException {
476 /*
477 Now another magic 48-bit number, 0x177245385090, to
478 indicate the end of the last block. (sqrt(pi), if
479 you want to know. I did want to use e, but it contains
480 too much repetition -- 27 18 28 18 28 46 -- for me
481 to feel statistically comfortable. Call me paranoid.)
482 */
483 bsPutUChar(0x17);
484 bsPutUChar(0x72);
485 bsPutUChar(0x45);
486 bsPutUChar(0x38);
487 bsPutUChar(0x50);
488 bsPutUChar(0x90);
489
490 bsPutint(combinedCRC);
491
492 bsFinishedWithStream();
493 }
494
495 private void hbAssignCodes (int[] code, char[] length, int minLen,
496 int maxLen, int alphaSize) {
497 int n, vec, i;
498
499 vec = 0;
500 for (n = minLen; n <= maxLen; n++) {
501 for (i = 0; i < alphaSize; i++) {
502 if (length[i] == n) {
503 code[i] = vec;
504 vec++;
505 }
Torsten Curdtc1997372009-01-08 11:13:50 +0000506 }
Torsten Curdtca165392008-07-10 10:17:44 +0000507 vec <<= 1;
508 }
509 }
510
511 private void bsSetStream(OutputStream f) {
512 bsStream = f;
513 bsLive = 0;
514 bsBuff = 0;
515 bytesOut = 0;
516 }
517
518 private void bsFinishedWithStream() throws IOException {
519 while (bsLive > 0) {
520 int ch = (bsBuff >> 24);
521 try {
522 bsStream.write(ch); // write 8-bit
523 } catch (IOException e) {
524 throw e;
525 }
526 bsBuff <<= 8;
527 bsLive -= 8;
528 bytesOut++;
529 }
530 }
531
532 private void bsW(int n, int v) throws IOException {
533 while (bsLive >= 8) {
534 int ch = (bsBuff >> 24);
535 try {
536 bsStream.write(ch); // write 8-bit
537 } catch (IOException e) {
538 throw e;
539 }
540 bsBuff <<= 8;
541 bsLive -= 8;
542 bytesOut++;
543 }
544 bsBuff |= (v << (32 - bsLive - n));
545 bsLive += n;
546 }
547
548 private void bsPutUChar(int c) throws IOException {
549 bsW(8, c);
550 }
551
552 private void bsPutint(int u) throws IOException {
553 bsW(8, (u >> 24) & 0xff);
554 bsW(8, (u >> 16) & 0xff);
555 bsW(8, (u >> 8) & 0xff);
556 bsW(8, u & 0xff);
557 }
558
559 private void bsPutIntVS(int numBits, int c) throws IOException {
560 bsW(numBits, c);
561 }
562
563 private void sendMTFValues() throws IOException {
564 char len[][] = new char[N_GROUPS][MAX_ALPHA_SIZE];
565
566 int v, t, i, j, gs, ge, totc, bt, bc, iter;
567 int nSelectors = 0, alphaSize, minLen, maxLen, selCtr;
Torsten Curdt70c83202009-01-12 11:09:21 +0000568 int nGroups; //, nBytes;
Torsten Curdtca165392008-07-10 10:17:44 +0000569
570 alphaSize = nInUse + 2;
571 for (t = 0; t < N_GROUPS; t++) {
572 for (v = 0; v < alphaSize; v++) {
573 len[t][v] = (char) GREATER_ICOST;
574 }
575 }
576
577 /* Decide how many coding tables to use */
578 if (nMTF <= 0) {
579 panic();
580 }
581
582 if (nMTF < 200) {
583 nGroups = 2;
584 } else if (nMTF < 600) {
585 nGroups = 3;
586 } else if (nMTF < 1200) {
587 nGroups = 4;
588 } else if (nMTF < 2400) {
589 nGroups = 5;
590 } else {
591 nGroups = 6;
592 }
593
594 /* Generate an initial set of coding tables */ {
595 int nPart, remF, tFreq, aFreq;
596
597 nPart = nGroups;
598 remF = nMTF;
599 gs = 0;
600 while (nPart > 0) {
601 tFreq = remF / nPart;
602 ge = gs - 1;
603 aFreq = 0;
604 while (aFreq < tFreq && ge < alphaSize - 1) {
605 ge++;
606 aFreq += mtfFreq[ge];
607 }
608
609 if (ge > gs && nPart != nGroups && nPart != 1
610 && ((nGroups - nPart) % 2 == 1)) {
611 aFreq -= mtfFreq[ge];
612 ge--;
613 }
614
615 for (v = 0; v < alphaSize; v++) {
616 if (v >= gs && v <= ge) {
617 len[nPart - 1][v] = (char) LESSER_ICOST;
618 } else {
619 len[nPart - 1][v] = (char) GREATER_ICOST;
620 }
621 }
622
623 nPart--;
624 gs = ge + 1;
625 remF -= aFreq;
626 }
627 }
628
629 int[][] rfreq = new int[N_GROUPS][MAX_ALPHA_SIZE];
630 int[] fave = new int[N_GROUPS];
631 short[] cost = new short[N_GROUPS];
632 /*
633 Iterate up to N_ITERS times to improve the tables.
634 */
635 for (iter = 0; iter < N_ITERS; iter++) {
636 for (t = 0; t < nGroups; t++) {
637 fave[t] = 0;
638 }
639
640 for (t = 0; t < nGroups; t++) {
641 for (v = 0; v < alphaSize; v++) {
642 rfreq[t][v] = 0;
643 }
644 }
645
646 nSelectors = 0;
647 totc = 0;
648 gs = 0;
649 while (true) {
650
651 /* Set group start & end marks. */
652 if (gs >= nMTF) {
653 break;
654 }
655 ge = gs + G_SIZE - 1;
656 if (ge >= nMTF) {
657 ge = nMTF - 1;
658 }
659
660 /*
661 Calculate the cost of this group as coded
662 by each of the coding tables.
663 */
664 for (t = 0; t < nGroups; t++) {
665 cost[t] = 0;
666 }
667
668 if (nGroups == 6) {
669 short cost0, cost1, cost2, cost3, cost4, cost5;
670 cost0 = cost1 = cost2 = cost3 = cost4 = cost5 = 0;
671 for (i = gs; i <= ge; i++) {
672 short icv = szptr[i];
673 cost0 += len[0][icv];
674 cost1 += len[1][icv];
675 cost2 += len[2][icv];
676 cost3 += len[3][icv];
677 cost4 += len[4][icv];
678 cost5 += len[5][icv];
679 }
680 cost[0] = cost0;
681 cost[1] = cost1;
682 cost[2] = cost2;
683 cost[3] = cost3;
684 cost[4] = cost4;
685 cost[5] = cost5;
686 } else {
687 for (i = gs; i <= ge; i++) {
688 short icv = szptr[i];
689 for (t = 0; t < nGroups; t++) {
690 cost[t] += len[t][icv];
691 }
692 }
693 }
694
695 /*
696 Find the coding table which is best for this group,
697 and record its identity in the selector table.
698 */
699 bc = 999999999;
700 bt = -1;
701 for (t = 0; t < nGroups; t++) {
702 if (cost[t] < bc) {
703 bc = cost[t];
704 bt = t;
705 }
Torsten Curdtc1997372009-01-08 11:13:50 +0000706 }
Torsten Curdtca165392008-07-10 10:17:44 +0000707 totc += bc;
708 fave[bt]++;
709 selector[nSelectors] = (char) bt;
710 nSelectors++;
711
712 /*
713 Increment the symbol frequencies for the selected table.
714 */
715 for (i = gs; i <= ge; i++) {
716 rfreq[bt][szptr[i]]++;
717 }
718
719 gs = ge + 1;
720 }
721
722 /*
723 Recompute the tables based on the accumulated frequencies.
724 */
725 for (t = 0; t < nGroups; t++) {
726 hbMakeCodeLengths(len[t], rfreq[t], alphaSize, 20);
727 }
728 }
729
730 rfreq = null;
731 fave = null;
732 cost = null;
733
734 if (!(nGroups < 8)) {
735 panic();
736 }
737 if (!(nSelectors < 32768 && nSelectors <= (2 + (900000 / G_SIZE)))) {
738 panic();
739 }
740
741
742 /* Compute MTF values for the selectors. */
743 {
744 char[] pos = new char[N_GROUPS];
745 char ll_i, tmp2, tmp;
746 for (i = 0; i < nGroups; i++) {
747 pos[i] = (char) i;
748 }
749 for (i = 0; i < nSelectors; i++) {
750 ll_i = selector[i];
751 j = 0;
752 tmp = pos[j];
753 while (ll_i != tmp) {
754 j++;
755 tmp2 = tmp;
756 tmp = pos[j];
757 pos[j] = tmp2;
758 }
759 pos[0] = tmp;
760 selectorMtf[i] = (char) j;
761 }
762 }
763
764 int[][] code = new int[N_GROUPS][MAX_ALPHA_SIZE];
765
766 /* Assign actual codes for the tables. */
767 for (t = 0; t < nGroups; t++) {
768 minLen = 32;
769 maxLen = 0;
770 for (i = 0; i < alphaSize; i++) {
771 if (len[t][i] > maxLen) {
772 maxLen = len[t][i];
773 }
774 if (len[t][i] < minLen) {
775 minLen = len[t][i];
776 }
777 }
778 if (maxLen > 20) {
779 panic();
780 }
781 if (minLen < 1) {
782 panic();
783 }
784 hbAssignCodes(code[t], len[t], minLen, maxLen, alphaSize);
785 }
786
787 /* Transmit the mapping table. */
788 {
789 boolean[] inUse16 = new boolean[16];
790 for (i = 0; i < 16; i++) {
791 inUse16[i] = false;
792 for (j = 0; j < 16; j++) {
793 if (inUse[i * 16 + j]) {
794 inUse16[i] = true;
795 }
796 }
797 }
798
Torsten Curdt70c83202009-01-12 11:09:21 +0000799 //nBytes = bytesOut;
Torsten Curdtca165392008-07-10 10:17:44 +0000800 for (i = 0; i < 16; i++) {
801 if (inUse16[i]) {
802 bsW(1, 1);
803 } else {
804 bsW(1, 0);
805 }
806 }
807
808 for (i = 0; i < 16; i++) {
809 if (inUse16[i]) {
810 for (j = 0; j < 16; j++) {
811 if (inUse[i * 16 + j]) {
812 bsW(1, 1);
813 } else {
814 bsW(1, 0);
815 }
816 }
817 }
818 }
819
820 }
821
822 /* Now the selectors. */
Torsten Curdt70c83202009-01-12 11:09:21 +0000823 //nBytes = bytesOut;
Torsten Curdtca165392008-07-10 10:17:44 +0000824 bsW (3, nGroups);
825 bsW (15, nSelectors);
826 for (i = 0; i < nSelectors; i++) {
827 for (j = 0; j < selectorMtf[i]; j++) {
828 bsW(1, 1);
829 }
830 bsW(1, 0);
831 }
832
833 /* Now the coding tables. */
Torsten Curdt70c83202009-01-12 11:09:21 +0000834 //nBytes = bytesOut;
Torsten Curdtca165392008-07-10 10:17:44 +0000835
836 for (t = 0; t < nGroups; t++) {
837 int curr = len[t][0];
838 bsW(5, curr);
839 for (i = 0; i < alphaSize; i++) {
840 while (curr < len[t][i]) {
841 bsW(2, 2);
842 curr++; /* 10 */
843 }
844 while (curr > len[t][i]) {
845 bsW(2, 3);
846 curr--; /* 11 */
847 }
848 bsW (1, 0);
849 }
850 }
851
852 /* And finally, the block data proper */
Torsten Curdt70c83202009-01-12 11:09:21 +0000853 //nBytes = bytesOut;
Torsten Curdtca165392008-07-10 10:17:44 +0000854 selCtr = 0;
855 gs = 0;
856 while (true) {
857 if (gs >= nMTF) {
858 break;
859 }
860 ge = gs + G_SIZE - 1;
861 if (ge >= nMTF) {
862 ge = nMTF - 1;
863 }
864 for (i = gs; i <= ge; i++) {
865 bsW(len[selector[selCtr]][szptr[i]],
866 code[selector[selCtr]][szptr[i]]);
867 }
868
869 gs = ge + 1;
870 selCtr++;
871 }
872 if (!(selCtr == nSelectors)) {
873 panic();
874 }
875 }
876
877 private void moveToFrontCodeAndSend () throws IOException {
878 bsPutIntVS(24, origPtr);
879 generateMTFValues();
880 sendMTFValues();
881 }
882
883 private OutputStream bsStream;
884
885 private void simpleSort(int lo, int hi, int d) {
886 int i, j, h, bigN, hp;
887 int v;
888
889 bigN = hi - lo + 1;
890 if (bigN < 2) {
891 return;
892 }
893
894 hp = 0;
895 while (incs[hp] < bigN) {
896 hp++;
897 }
898 hp--;
899
900 for (; hp >= 0; hp--) {
901 h = incs[hp];
902
903 i = lo + h;
904 while (true) {
905 /* copy 1 */
906 if (i > hi) {
907 break;
908 }
909 v = zptr[i];
910 j = i;
911 while (fullGtU(zptr[j - h] + d, v + d)) {
912 zptr[j] = zptr[j - h];
913 j = j - h;
914 if (j <= (lo + h - 1)) {
915 break;
916 }
917 }
918 zptr[j] = v;
919 i++;
920
921 /* copy 2 */
922 if (i > hi) {
923 break;
924 }
925 v = zptr[i];
926 j = i;
927 while (fullGtU(zptr[j - h] + d, v + d)) {
928 zptr[j] = zptr[j - h];
929 j = j - h;
930 if (j <= (lo + h - 1)) {
931 break;
932 }
933 }
934 zptr[j] = v;
935 i++;
936
937 /* copy 3 */
938 if (i > hi) {
939 break;
940 }
941 v = zptr[i];
942 j = i;
943 while (fullGtU(zptr[j - h] + d, v + d)) {
944 zptr[j] = zptr[j - h];
945 j = j - h;
946 if (j <= (lo + h - 1)) {
947 break;
948 }
949 }
950 zptr[j] = v;
951 i++;
952
953 if (workDone > workLimit && firstAttempt) {
954 return;
955 }
956 }
957 }
958 }
959
960 private void vswap(int p1, int p2, int n) {
961 int temp = 0;
962 while (n > 0) {
963 temp = zptr[p1];
964 zptr[p1] = zptr[p2];
965 zptr[p2] = temp;
966 p1++;
967 p2++;
968 n--;
969 }
970 }
971
972 private char med3(char a, char b, char c) {
973 char t;
974 if (a > b) {
975 t = a;
976 a = b;
977 b = t;
978 }
979 if (b > c) {
980 t = b;
981 b = c;
982 c = t;
983 }
984 if (a > b) {
985 b = a;
986 }
987 return b;
988 }
989
990 private static class StackElem {
991 int ll;
992 int hh;
993 int dd;
994 }
995
Torsten Curdtc1997372009-01-08 11:13:50 +0000996 private void qSort3(int loSt, int hiSt, int dSt, StackElem[] stack) {
Torsten Curdtca165392008-07-10 10:17:44 +0000997 int unLo, unHi, ltLo, gtHi, med, n, m;
998 int sp, lo, hi, d;
Torsten Curdtca165392008-07-10 10:17:44 +0000999
1000 sp = 0;
1001
1002 stack[sp].ll = loSt;
1003 stack[sp].hh = hiSt;
1004 stack[sp].dd = dSt;
1005 sp++;
1006
1007 while (sp > 0) {
1008 if (sp >= QSORT_STACK_SIZE) {
1009 panic();
1010 }
1011
1012 sp--;
1013 lo = stack[sp].ll;
1014 hi = stack[sp].hh;
1015 d = stack[sp].dd;
1016
1017 if (hi - lo < SMALL_THRESH || d > DEPTH_THRESH) {
1018 simpleSort(lo, hi, d);
1019 if (workDone > workLimit && firstAttempt) {
1020 return;
1021 }
1022 continue;
1023 }
1024
1025 med = med3(block[zptr[lo] + d + 1],
1026 block[zptr[hi ] + d + 1],
1027 block[zptr[(lo + hi) >> 1] + d + 1]);
1028
1029 unLo = ltLo = lo;
1030 unHi = gtHi = hi;
1031
1032 while (true) {
1033 while (true) {
1034 if (unLo > unHi) {
1035 break;
1036 }
1037 n = ((int) block[zptr[unLo] + d + 1]) - med;
1038 if (n == 0) {
1039 int temp = 0;
1040 temp = zptr[unLo];
1041 zptr[unLo] = zptr[ltLo];
1042 zptr[ltLo] = temp;
1043 ltLo++;
1044 unLo++;
1045 continue;
Torsten Curdtc1997372009-01-08 11:13:50 +00001046 }
Torsten Curdtca165392008-07-10 10:17:44 +00001047 if (n > 0) {
1048 break;
1049 }
1050 unLo++;
1051 }
1052 while (true) {
1053 if (unLo > unHi) {
1054 break;
1055 }
1056 n = ((int) block[zptr[unHi] + d + 1]) - med;
1057 if (n == 0) {
1058 int temp = 0;
1059 temp = zptr[unHi];
1060 zptr[unHi] = zptr[gtHi];
1061 zptr[gtHi] = temp;
1062 gtHi--;
1063 unHi--;
1064 continue;
Torsten Curdtc1997372009-01-08 11:13:50 +00001065 }
Torsten Curdtca165392008-07-10 10:17:44 +00001066 if (n < 0) {
1067 break;
1068 }
1069 unHi--;
1070 }
1071 if (unLo > unHi) {
1072 break;
1073 }
1074 int temp = 0;
1075 temp = zptr[unLo];
1076 zptr[unLo] = zptr[unHi];
1077 zptr[unHi] = temp;
1078 unLo++;
1079 unHi--;
1080 }
1081
1082 if (gtHi < ltLo) {
1083 stack[sp].ll = lo;
1084 stack[sp].hh = hi;
1085 stack[sp].dd = d + 1;
1086 sp++;
1087 continue;
1088 }
1089
1090 n = ((ltLo - lo) < (unLo - ltLo)) ? (ltLo - lo) : (unLo - ltLo);
1091 vswap(lo, unLo - n, n);
1092 m = ((hi - gtHi) < (gtHi - unHi)) ? (hi - gtHi) : (gtHi - unHi);
1093 vswap(unLo, hi - m + 1, m);
1094
1095 n = lo + unLo - ltLo - 1;
1096 m = hi - (gtHi - unHi) + 1;
1097
1098 stack[sp].ll = lo;
1099 stack[sp].hh = n;
1100 stack[sp].dd = d;
1101 sp++;
1102
1103 stack[sp].ll = n + 1;
1104 stack[sp].hh = m - 1;
1105 stack[sp].dd = d + 1;
1106 sp++;
1107
1108 stack[sp].ll = m;
1109 stack[sp].hh = hi;
1110 stack[sp].dd = d;
1111 sp++;
1112 }
1113 }
1114
1115 private void mainSort() {
1116 int i, j, ss, sb;
1117 int[] runningOrder = new int[256];
1118 int[] copy = new int[256];
1119 boolean[] bigDone = new boolean[256];
1120 int c1, c2;
1121 int numQSorted;
1122
1123 /*
1124 In the various block-sized structures, live data runs
1125 from 0 to last+NUM_OVERSHOOT_BYTES inclusive. First,
1126 set up the overshoot area for block.
1127 */
1128
1129 // if (verbosity >= 4) fprintf ( stderr, " sort initialise ...\n" );
Torsten Curdtc1997372009-01-08 11:13:50 +00001130
Torsten Curdtca165392008-07-10 10:17:44 +00001131 for (i = 0; i < NUM_OVERSHOOT_BYTES; i++) {
1132 block[last + i + 2] = block[(i % (last + 1)) + 1];
1133 }
1134 for (i = 0; i <= last + NUM_OVERSHOOT_BYTES; i++) {
1135 quadrant[i] = 0;
1136 }
1137
1138 block[0] = (char) (block[last + 1]);
1139
1140 if (last < 4000) {
1141 /*
1142 Use simpleSort(), since the full sorting mechanism
1143 has quite a large constant overhead.
1144 */
1145 for (i = 0; i <= last; i++) {
1146 zptr[i] = i;
1147 }
1148 firstAttempt = false;
1149 workDone = workLimit = 0;
1150 simpleSort(0, last, 0);
1151 } else {
1152 numQSorted = 0;
1153 for (i = 0; i <= 255; i++) {
1154 bigDone[i] = false;
1155 }
1156
1157 for (i = 0; i <= 65536; i++) {
1158 ftab[i] = 0;
1159 }
1160
1161 c1 = block[0];
1162 for (i = 0; i <= last; i++) {
1163 c2 = block[i + 1];
1164 ftab[(c1 << 8) + c2]++;
1165 c1 = c2;
1166 }
1167
1168 for (i = 1; i <= 65536; i++) {
1169 ftab[i] += ftab[i - 1];
1170 }
1171
1172 c1 = block[1];
1173 for (i = 0; i < last; i++) {
1174 c2 = block[i + 2];
1175 j = (c1 << 8) + c2;
1176 c1 = c2;
1177 ftab[j]--;
1178 zptr[ftab[j]] = i;
1179 }
1180
1181 j = ((block[last + 1]) << 8) + (block[1]);
1182 ftab[j]--;
1183 zptr[ftab[j]] = last;
1184
1185 /*
1186 Now ftab contains the first loc of every small bucket.
1187 Calculate the running order, from smallest to largest
1188 big bucket.
1189 */
1190
1191 for (i = 0; i <= 255; i++) {
1192 runningOrder[i] = i;
1193 }
1194
1195 {
1196 int vv;
1197 int h = 1;
1198 do {
1199 h = 3 * h + 1;
1200 }
1201 while (h <= 256);
1202 do {
1203 h = h / 3;
1204 for (i = h; i <= 255; i++) {
1205 vv = runningOrder[i];
1206 j = i;
1207 while ((ftab[((runningOrder[j - h]) + 1) << 8]
Torsten Curdtc1997372009-01-08 11:13:50 +00001208 - ftab[(runningOrder[j - h]) << 8])
1209 > (ftab[((vv) + 1) << 8] - ftab[(vv) << 8])) {
Torsten Curdtca165392008-07-10 10:17:44 +00001210 runningOrder[j] = runningOrder[j - h];
1211 j = j - h;
1212 if (j <= (h - 1)) {
1213 break;
1214 }
1215 }
1216 runningOrder[j] = vv;
1217 }
1218 } while (h != 1);
1219 }
1220
Torsten Curdtc1997372009-01-08 11:13:50 +00001221 StackElem[] stack = new StackElem[QSORT_STACK_SIZE];
1222 for (int count = 0; count < QSORT_STACK_SIZE; count++) {
1223 stack[count] = new StackElem();
1224 }
1225
Torsten Curdtca165392008-07-10 10:17:44 +00001226 /*
1227 The main sorting loop.
1228 */
1229 for (i = 0; i <= 255; i++) {
1230
1231 /*
1232 Process big buckets, starting with the least full.
1233 */
1234 ss = runningOrder[i];
1235
1236 /*
1237 Complete the big bucket [ss] by quicksorting
1238 any unsorted small buckets [ss, j]. Hopefully
1239 previous pointer-scanning phases have already
1240 completed many of the small buckets [ss, j], so
1241 we don't have to sort them at all.
1242 */
1243 for (j = 0; j <= 255; j++) {
1244 sb = (ss << 8) + j;
1245 if (!((ftab[sb] & SETMASK) == SETMASK)) {
1246 int lo = ftab[sb] & CLEARMASK;
1247 int hi = (ftab[sb + 1] & CLEARMASK) - 1;
1248 if (hi > lo) {
Torsten Curdtc1997372009-01-08 11:13:50 +00001249 qSort3(lo, hi, 2, stack);
Torsten Curdtca165392008-07-10 10:17:44 +00001250 numQSorted += (hi - lo + 1);
1251 if (workDone > workLimit && firstAttempt) {
1252 return;
1253 }
1254 }
1255 ftab[sb] |= SETMASK;
1256 }
1257 }
1258
1259 /*
1260 The ss big bucket is now done. Record this fact,
1261 and update the quadrant descriptors. Remember to
1262 update quadrants in the overshoot area too, if
1263 necessary. The "if (i < 255)" test merely skips
1264 this updating for the last bucket processed, since
1265 updating for the last bucket is pointless.
1266 */
1267 bigDone[ss] = true;
1268
1269 if (i < 255) {
1270 int bbStart = ftab[ss << 8] & CLEARMASK;
1271 int bbSize = (ftab[(ss + 1) << 8] & CLEARMASK) - bbStart;
1272 int shifts = 0;
1273
1274 while ((bbSize >> shifts) > 65534) {
1275 shifts++;
1276 }
1277
1278 for (j = 0; j < bbSize; j++) {
1279 int a2update = zptr[bbStart + j];
1280 int qVal = (j >> shifts);
1281 quadrant[a2update] = qVal;
1282 if (a2update < NUM_OVERSHOOT_BYTES) {
1283 quadrant[a2update + last + 1] = qVal;
1284 }
1285 }
1286
1287 if (!(((bbSize - 1) >> shifts) <= 65535)) {
1288 panic();
1289 }
1290 }
1291
1292 /*
1293 Now scan this big bucket so as to synthesise the
1294 sorted order for small buckets [t, ss] for all t != ss.
1295 */
1296 for (j = 0; j <= 255; j++) {
1297 copy[j] = ftab[(j << 8) + ss] & CLEARMASK;
1298 }
1299
1300 for (j = ftab[ss << 8] & CLEARMASK;
1301 j < (ftab[(ss + 1) << 8] & CLEARMASK); j++) {
1302 c1 = block[zptr[j]];
1303 if (!bigDone[c1]) {
1304 zptr[copy[c1]] = zptr[j] == 0 ? last : zptr[j] - 1;
1305 copy[c1]++;
1306 }
1307 }
1308
1309 for (j = 0; j <= 255; j++) {
1310 ftab[(j << 8) + ss] |= SETMASK;
1311 }
1312 }
1313 }
1314 }
1315
1316 private void randomiseBlock() {
1317 int i;
1318 int rNToGo = 0;
1319 int rTPos = 0;
1320 for (i = 0; i < 256; i++) {
1321 inUse[i] = false;
1322 }
1323
1324 for (i = 0; i <= last; i++) {
1325 if (rNToGo == 0) {
1326 rNToGo = (char) rNums[rTPos];
1327 rTPos++;
1328 if (rTPos == 512) {
1329 rTPos = 0;
1330 }
1331 }
1332 rNToGo--;
1333 block[i + 1] ^= ((rNToGo == 1) ? 1 : 0);
1334 // handle 16 bit signed numbers
1335 block[i + 1] &= 0xFF;
1336
1337 inUse[block[i + 1]] = true;
1338 }
1339 }
1340
1341 private void doReversibleTransformation() {
1342 int i;
1343
1344 workLimit = workFactor * last;
1345 workDone = 0;
1346 blockRandomised = false;
1347 firstAttempt = true;
1348
1349 mainSort();
1350
1351 if (workDone > workLimit && firstAttempt) {
1352 randomiseBlock();
1353 workLimit = workDone = 0;
1354 blockRandomised = true;
1355 firstAttempt = false;
1356 mainSort();
1357 }
1358
1359 origPtr = -1;
1360 for (i = 0; i <= last; i++) {
1361 if (zptr[i] == 0) {
1362 origPtr = i;
1363 break;
1364 }
Torsten Curdtc1997372009-01-08 11:13:50 +00001365 }
Torsten Curdtca165392008-07-10 10:17:44 +00001366
1367 if (origPtr == -1) {
1368 panic();
1369 }
1370 }
1371
1372 private boolean fullGtU(int i1, int i2) {
1373 int k;
1374 char c1, c2;
1375 int s1, s2;
1376
1377 c1 = block[i1 + 1];
1378 c2 = block[i2 + 1];
1379 if (c1 != c2) {
1380 return (c1 > c2);
1381 }
1382 i1++;
1383 i2++;
1384
1385 c1 = block[i1 + 1];
1386 c2 = block[i2 + 1];
1387 if (c1 != c2) {
1388 return (c1 > c2);
1389 }
1390 i1++;
1391 i2++;
1392
1393 c1 = block[i1 + 1];
1394 c2 = block[i2 + 1];
1395 if (c1 != c2) {
1396 return (c1 > c2);
1397 }
1398 i1++;
1399 i2++;
1400
1401 c1 = block[i1 + 1];
1402 c2 = block[i2 + 1];
1403 if (c1 != c2) {
1404 return (c1 > c2);
1405 }
1406 i1++;
1407 i2++;
1408
1409 c1 = block[i1 + 1];
1410 c2 = block[i2 + 1];
1411 if (c1 != c2) {
1412 return (c1 > c2);
1413 }
1414 i1++;
1415 i2++;
1416
1417 c1 = block[i1 + 1];
1418 c2 = block[i2 + 1];
1419 if (c1 != c2) {
1420 return (c1 > c2);
1421 }
1422 i1++;
1423 i2++;
1424
1425 k = last + 1;
1426
1427 do {
1428 c1 = block[i1 + 1];
1429 c2 = block[i2 + 1];
1430 if (c1 != c2) {
1431 return (c1 > c2);
1432 }
1433 s1 = quadrant[i1];
1434 s2 = quadrant[i2];
1435 if (s1 != s2) {
1436 return (s1 > s2);
1437 }
1438 i1++;
1439 i2++;
1440
1441 c1 = block[i1 + 1];
1442 c2 = block[i2 + 1];
1443 if (c1 != c2) {
1444 return (c1 > c2);
1445 }
1446 s1 = quadrant[i1];
1447 s2 = quadrant[i2];
1448 if (s1 != s2) {
1449 return (s1 > s2);
1450 }
1451 i1++;
1452 i2++;
1453
1454 c1 = block[i1 + 1];
1455 c2 = block[i2 + 1];
1456 if (c1 != c2) {
1457 return (c1 > c2);
1458 }
1459 s1 = quadrant[i1];
1460 s2 = quadrant[i2];
1461 if (s1 != s2) {
1462 return (s1 > s2);
1463 }
1464 i1++;
1465 i2++;
1466
1467 c1 = block[i1 + 1];
1468 c2 = block[i2 + 1];
1469 if (c1 != c2) {
1470 return (c1 > c2);
1471 }
1472 s1 = quadrant[i1];
1473 s2 = quadrant[i2];
1474 if (s1 != s2) {
1475 return (s1 > s2);
1476 }
1477 i1++;
1478 i2++;
1479
1480 if (i1 > last) {
1481 i1 -= last;
1482 i1--;
Torsten Curdtc1997372009-01-08 11:13:50 +00001483 }
Torsten Curdtca165392008-07-10 10:17:44 +00001484 if (i2 > last) {
1485 i2 -= last;
1486 i2--;
Torsten Curdtc1997372009-01-08 11:13:50 +00001487 }
Torsten Curdtca165392008-07-10 10:17:44 +00001488
1489 k -= 4;
1490 workDone++;
1491 } while (k >= 0);
1492
1493 return false;
1494 }
1495
1496 /*
1497 Knuth's increments seem to work better
1498 than Incerpi-Sedgewick here. Possibly
1499 because the number of elems to sort is
1500 usually small, typically <= 20.
1501 */
Torsten Curdtc1997372009-01-08 11:13:50 +00001502 private int[] incs = {1, 4, 13, 40, 121, 364, 1093, 3280,
Torsten Curdtca165392008-07-10 10:17:44 +00001503 9841, 29524, 88573, 265720,
Torsten Curdtc1997372009-01-08 11:13:50 +00001504 797161, 2391484};
Torsten Curdtca165392008-07-10 10:17:44 +00001505
1506 private void allocateCompressStructures () {
1507 int n = baseBlockSize * blockSize100k;
1508 block = new char[(n + 1 + NUM_OVERSHOOT_BYTES)];
1509 quadrant = new int[(n + NUM_OVERSHOOT_BYTES)];
1510 zptr = new int[n];
1511 ftab = new int[65537];
1512
1513 if (block == null || quadrant == null || zptr == null
1514 || ftab == null) {
1515 //int totalDraw = (n + 1 + NUM_OVERSHOOT_BYTES) + (n + NUM_OVERSHOOT_BYTES) + n + 65537;
1516 //compressOutOfMemory ( totalDraw, n );
1517 }
1518
1519 /*
1520 The back end needs a place to store the MTF values
1521 whilst it calculates the coding tables. We could
1522 put them in the zptr array. However, these values
1523 will fit in a short, so we overlay szptr at the
1524 start of zptr, in the hope of reducing the number
1525 of cache misses induced by the multiple traversals
1526 of the MTF values when calculating coding tables.
1527 Seems to improve compression speed by about 1%.
1528 */
1529 // szptr = zptr;
1530
1531
1532 szptr = new short[2 * n];
1533 }
1534
1535 private void generateMTFValues() {
1536 char[] yy = new char[256];
1537 int i, j;
1538 char tmp;
1539 char tmp2;
1540 int zPend;
1541 int wr;
1542 int EOB;
1543
1544 makeMaps();
1545 EOB = nInUse + 1;
1546
1547 for (i = 0; i <= EOB; i++) {
1548 mtfFreq[i] = 0;
1549 }
1550
1551 wr = 0;
1552 zPend = 0;
1553 for (i = 0; i < nInUse; i++) {
1554 yy[i] = (char) i;
1555 }
1556
1557
1558 for (i = 0; i <= last; i++) {
1559 char ll_i;
1560
1561 ll_i = unseqToSeq[block[zptr[i]]];
1562
1563 j = 0;
1564 tmp = yy[j];
1565 while (ll_i != tmp) {
1566 j++;
1567 tmp2 = tmp;
1568 tmp = yy[j];
1569 yy[j] = tmp2;
Torsten Curdtc1997372009-01-08 11:13:50 +00001570 }
Torsten Curdtca165392008-07-10 10:17:44 +00001571 yy[0] = tmp;
1572
1573 if (j == 0) {
1574 zPend++;
1575 } else {
1576 if (zPend > 0) {
1577 zPend--;
1578 while (true) {
1579 switch (zPend % 2) {
1580 case 0:
1581 szptr[wr] = (short) RUNA;
1582 wr++;
1583 mtfFreq[RUNA]++;
1584 break;
1585 case 1:
1586 szptr[wr] = (short) RUNB;
1587 wr++;
1588 mtfFreq[RUNB]++;
1589 break;
Torsten Curdtc1997372009-01-08 11:13:50 +00001590 }
Torsten Curdtca165392008-07-10 10:17:44 +00001591 if (zPend < 2) {
1592 break;
1593 }
1594 zPend = (zPend - 2) / 2;
Torsten Curdtc1997372009-01-08 11:13:50 +00001595 }
Torsten Curdtca165392008-07-10 10:17:44 +00001596 zPend = 0;
1597 }
1598 szptr[wr] = (short) (j + 1);
1599 wr++;
1600 mtfFreq[j + 1]++;
1601 }
1602 }
1603
1604 if (zPend > 0) {
1605 zPend--;
1606 while (true) {
1607 switch (zPend % 2) {
1608 case 0:
1609 szptr[wr] = (short) RUNA;
1610 wr++;
1611 mtfFreq[RUNA]++;
1612 break;
1613 case 1:
1614 szptr[wr] = (short) RUNB;
1615 wr++;
1616 mtfFreq[RUNB]++;
1617 break;
1618 }
1619 if (zPend < 2) {
1620 break;
1621 }
1622 zPend = (zPend - 2) / 2;
1623 }
1624 }
1625
1626 szptr[wr] = (short) EOB;
1627 wr++;
1628 mtfFreq[EOB]++;
1629
1630 nMTF = wr;
1631 }
1632}