blob: 04d494542b46d97a8145ca6a6981be2509a5ab1a [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 2002-2005 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26package com.sun.java.util.jar.pack;
27
28import java.io.*;
29import java.util.*;
30import java.util.zip.*;
31
32/**
33 * Heuristic chooser of basic encodings.
34 * Runs "zip" to measure the apparent information content after coding.
35 * @author John Rose
36 */
37class CodingChooser implements Constants {
38 int verbose;
39 int effort;
40 boolean optUseHistogram = true;
41 boolean optUsePopulationCoding = true;
42 boolean optUseAdaptiveCoding = true;
43 boolean disablePopCoding;
44 boolean disableRunCoding;
45 boolean topLevel = true;
46
47 // Derived from effort; >1 (<1) means try more (less) experiments
48 // when looking to beat a best score.
49 double fuzz;
50
51 Coding[] allCodingChoices;
52 Choice[] choices;
53 ByteArrayOutputStream context;
54 CodingChooser popHelper;
55 CodingChooser runHelper;
56
57 Random stress; // If not null, stress mode oracle.
58
59 // Element in sorted set of coding choices:
60 static
61 class Choice {
62 final Coding coding;
63 final int index; // index in choices
64 final int[] distance; // cache of distance
65 Choice(Coding coding, int index, int[] distance) {
66 this.coding = coding;
67 this.index = index;
68 this.distance = distance;
69 }
70 // These variables are reset and reused:
71 int searchOrder; // order in which it is checked
72 int minDistance; // min distance from already-checked choices
73 int zipSize; // size of encoding in sample, zipped output
74 int byteSize; // size of encoding in sample (debug only)
75 int histSize; // size of encoding, according to histogram
76
77 void reset() {
78 searchOrder = Integer.MAX_VALUE;
79 minDistance = Integer.MAX_VALUE;
80 zipSize = byteSize = histSize = -1;
81 }
82
83 boolean isExtra() {
84 return index < 0;
85 }
86
87 public String toString() {
88 return stringForDebug();
89 }
90
91 private String stringForDebug() {
92 String s = "";
93 if (searchOrder < Integer.MAX_VALUE)
94 s += " so: "+searchOrder;
95 if (minDistance < Integer.MAX_VALUE)
96 s += " md: "+minDistance;
97 if (zipSize > 0)
98 s += " zs: "+zipSize;
99 if (byteSize > 0)
100 s += " bs: "+byteSize;
101 if (histSize > 0)
102 s += " hs: "+histSize;
103 return "Choice["+index+"] "+s+" "+coding;
104 }
105 }
106
107 CodingChooser(int effort, Coding[] allCodingChoices) {
108 PropMap p200 = Utils.currentPropMap();
109 if (p200 != null) {
110 this.verbose
111 = Math.max(p200.getInteger(Utils.DEBUG_VERBOSE),
112 p200.getInteger(Utils.COM_PREFIX+"verbose.coding"));
113 this.optUseHistogram
114 = !p200.getBoolean(Utils.COM_PREFIX+"no.histogram");
115 this.optUsePopulationCoding
116 = !p200.getBoolean(Utils.COM_PREFIX+"no.population.coding");
117 this.optUseAdaptiveCoding
118 = !p200.getBoolean(Utils.COM_PREFIX+"no.adaptive.coding");
119 int stress
120 = p200.getInteger(Utils.COM_PREFIX+"stress.coding");
121 if (stress != 0)
122 this.stress = new Random(stress);
123 }
124
125 this.effort = effort;
126 // The following line "makes sense" but is too much
127 // work for a simple heuristic.
128 //if (effort > 5) zipDef.setLevel(effort);
129
130 this.allCodingChoices = allCodingChoices;
131
132 // If effort = 9, look carefully at any solution
133 // whose initial metrics are within 1% of the best
134 // so far. If effort = 1, look carefully only at
135 // solutions whose initial metrics promise a 1% win.
136 this.fuzz = 1 + (0.0025 * (effort-MID_EFFORT));
137
138 int nc = 0;
139 for (int i = 0; i < allCodingChoices.length; i++) {
140 if (allCodingChoices[i] == null) continue;
141 nc++;
142 }
143 choices = new Choice[nc];
144 nc = 0;
145 for (int i = 0; i < allCodingChoices.length; i++) {
146 if (allCodingChoices[i] == null) continue;
147 int[] distance = new int[choices.length];
148 choices[nc++] = new Choice(allCodingChoices[i], i, distance);
149 }
150 for (int i = 0; i < choices.length; i++) {
151 Coding ci = choices[i].coding;
152 assert(ci.distanceFrom(ci) == 0);
153 for (int j = 0; j < i; j++) {
154 Coding cj = choices[j].coding;
155 int dij = ci.distanceFrom(cj);
156 assert(dij > 0);
157 assert(dij == cj.distanceFrom(ci));
158 choices[i].distance[j] = dij;
159 choices[j].distance[i] = dij;
160 }
161 }
162 }
163
164 Choice makeExtraChoice(Coding coding) {
165 int[] distance = new int[choices.length];
166 for (int i = 0; i < distance.length; i++) {
167 Coding ci = choices[i].coding;
168 int dij = coding.distanceFrom(ci);
169 assert(dij > 0);
170 assert(dij == ci.distanceFrom(coding));
171 distance[i] = dij;
172 }
173 Choice c = new Choice(coding, -1, distance);
174 c.reset();
175 return c;
176 }
177
178 ByteArrayOutputStream getContext() {
179 if (context == null)
180 context = new ByteArrayOutputStream(1 << 16);
181 return context;
182 }
183
184 // These variables are reset and reused:
185 private int[] values;
186 private int start, end; // slice of values
187 private int[] deltas;
188 private int min, max;
189 private Histogram vHist;
190 private Histogram dHist;
191 private int searchOrder;
192 private Choice regularChoice;
193 private Choice bestChoice;
194 private CodingMethod bestMethod;
195 private int bestByteSize;
196 private int bestZipSize;
197 private int targetSize; // fuzzed target byte size
198
199 private void reset(int[] values, int start, int end) {
200 this.values = values;
201 this.start = start;
202 this.end = end;
203 this.deltas = null;
204 this.min = Integer.MAX_VALUE;
205 this.max = Integer.MIN_VALUE;
206 this.vHist = null;
207 this.dHist = null;
208 this.searchOrder = 0;
209 this.regularChoice = null;
210 this.bestChoice = null;
211 this.bestMethod = null;
212 this.bestZipSize = Integer.MAX_VALUE;
213 this.bestByteSize = Integer.MAX_VALUE;
214 this.targetSize = Integer.MAX_VALUE;
215 }
216
217 public static final int MIN_EFFORT = 1;
218 public static final int MID_EFFORT = 5;
219 public static final int MAX_EFFORT = 9;
220
221 public static final int POP_EFFORT = MID_EFFORT-1;
222 public static final int RUN_EFFORT = MID_EFFORT-2;
223
224 public static final int BYTE_SIZE = 0;
225 public static final int ZIP_SIZE = 1;
226
227 CodingMethod choose(int[] values, int start, int end, Coding regular, int[] sizes) {
228 // Save the value array
229 reset(values, start, end);
230
231 if (effort <= MIN_EFFORT || start >= end) {
232 if (sizes != null) {
233 int[] computed = computeSizePrivate(regular);
234 sizes[BYTE_SIZE] = computed[BYTE_SIZE];
235 sizes[ZIP_SIZE] = computed[ZIP_SIZE];
236 }
237 return regular;
238 }
239
240 if (optUseHistogram) {
241 getValueHistogram();
242 getDeltaHistogram();
243 }
244
245 for (int i = start; i < end; i++) {
246 int val = values[i];
247 if (min > val) min = val;
248 if (max < val) max = val;
249 }
250
251 // Find all the preset choices that might be worth looking at:
252 int numChoices = markUsableChoices(regular);
253
254 if (stress != null) {
255 // Make a random choice.
256 int rand = stress.nextInt(numChoices*2 + 4);
257 CodingMethod coding = null;
258 for (int i = 0; i < choices.length; i++) {
259 Choice c = choices[i];
260 if (c.searchOrder >= 0 && rand-- == 0) {
261 coding = c.coding;
262 break;
263 }
264 }
265 if (coding == null) {
266 if ((rand & 7) != 0) {
267 coding = regular;
268 } else {
269 // Pick a totally random coding 6% of the time.
270 coding = stressCoding(min, max);
271 }
272 }
273 if (!disablePopCoding
274 && optUsePopulationCoding
275 && effort >= POP_EFFORT) {
276 coding = stressPopCoding(coding);
277 }
278 if (!disableRunCoding
279 && optUseAdaptiveCoding
280 && effort >= RUN_EFFORT) {
281 coding = stressAdaptiveCoding(coding);
282 }
283 return coding;
284 }
285
286 double searchScale = 1.0;
287 for (int x = effort; x < MAX_EFFORT; x++) {
288 searchScale /= 1.414; // every 2 effort points doubles work
289 }
290 int searchOrderLimit = (int)Math.ceil( numChoices * searchScale );
291
292 // Start by evaluating the "regular" choice.
293 bestChoice = regularChoice;
294 evaluate(regularChoice);
295 int maxd = updateDistances(regularChoice);
296
297 // save these first-cut numbers for later
298 int zipSize1 = bestZipSize;
299 int byteSize1 = bestByteSize;
300
301 if (regularChoice.coding == regular && topLevel) {
302 // Give credit for being the default; no band header is needed.
303 // Rather than increasing every other size value by the band
304 // header amount, we decrement this one metric, to give it an edge.
305 // Decreasing zipSize by a byte length is conservatively correct,
306 // especially considering that the escape byte is not likely to
307 // zip well with other bytes in the band.
308 int X = BandStructure.encodeEscapeValue(_meta_canon_max, regular);
309 if (regular.canRepresentSigned(X)) {
310 int Xlen = regular.getLength(X); // band coding header
311 //regularChoice.histSize -= Xlen; // keep exact byteSize
312 //regularChoice.byteSize -= Xlen; // keep exact byteSize
313 regularChoice.zipSize -= Xlen;
314 bestByteSize = regularChoice.byteSize;
315 bestZipSize = regularChoice.zipSize;
316 }
317 }
318
319 int dscale = 1;
320 // Continually select a new choice to evaluate.
321 while (searchOrder < searchOrderLimit) {
322 Choice nextChoice;
323 if (dscale > maxd) dscale = 1; // cycle dscale values!
324 int dhi = maxd / dscale;
325 int dlo = maxd / (dscale *= 2) + 1;
326 nextChoice = findChoiceNear(bestChoice, dhi, dlo);
327 if (nextChoice == null) continue;
328 assert(nextChoice.coding.canRepresent(min, max));
329 evaluate(nextChoice);
330 int nextMaxd = updateDistances(nextChoice);
331 if (nextChoice == bestChoice) {
332 maxd = nextMaxd;
333 if (verbose > 5) Utils.log.info("maxd = "+maxd);
334 }
335 }
336
337 // Record best "plain coding" choice.
338 Coding plainBest = bestChoice.coding;
339 assert(plainBest == bestMethod);
340
341 if (verbose > 2) {
342 Utils.log.info("chooser: plain result="+bestChoice+" after "+bestChoice.searchOrder+" rounds, "+(regularChoice.zipSize-bestZipSize)+" fewer bytes than regular "+regular);
343 }
344 bestChoice = null;
345
346 if (!disablePopCoding
347 && optUsePopulationCoding
348 && effort >= POP_EFFORT
349 && bestMethod instanceof Coding) {
350 tryPopulationCoding(plainBest);
351 }
352
353 if (!disableRunCoding
354 && optUseAdaptiveCoding
355 && effort >= RUN_EFFORT
356 && bestMethod instanceof Coding) {
357 tryAdaptiveCoding(plainBest);
358 }
359
360 // Pass back the requested information:
361 if (sizes != null) {
362 sizes[BYTE_SIZE] = bestByteSize;
363 sizes[ZIP_SIZE] = bestZipSize;
364 }
365 if (verbose > 1) {
366 Utils.log.info("chooser: result="+bestMethod+" "+
367 (zipSize1-bestZipSize)+
368 " fewer bytes than regular "+regular+
369 "; win="+pct(zipSize1-bestZipSize, zipSize1));
370 }
371 CodingMethod bestMethod = this.bestMethod;
372 reset(null, 0, 0); // for GC
373 return bestMethod;
374 }
375 CodingMethod choose(int[] values, int start, int end, Coding regular) {
376 return choose(values, start, end, regular, null);
377 }
378 CodingMethod choose(int[] values, Coding regular, int[] sizes) {
379 return choose(values, 0, values.length, regular, sizes);
380 }
381 CodingMethod choose(int[] values, Coding regular) {
382 return choose(values, 0, values.length, regular, null);
383 }
384
385 private int markUsableChoices(Coding regular) {
386 int numChoices = 0;
387 for (int i = 0; i < choices.length; i++) {
388 Choice c = choices[i];
389 c.reset();
390 if (!c.coding.canRepresent(min, max)) {
391 // Mark as already visited:
392 c.searchOrder = -1;
393 if (verbose > 1 && c.coding == regular) {
394 Utils.log.info("regular coding cannot represent ["+min+".."+max+"]: "+regular);
395 }
396 continue;
397 }
398 if (c.coding == regular)
399 regularChoice = c;
400 numChoices++;
401 }
402 if (regularChoice == null && regular.canRepresent(min, max)) {
403 regularChoice = makeExtraChoice(regular);
404 if (verbose > 1) {
405 Utils.log.info("*** regular choice is extra: "+regularChoice.coding);
406 }
407 }
408 if (regularChoice == null) {
409 for (int i = 0; i < choices.length; i++) {
410 Choice c = choices[i];
411 if (c.searchOrder != -1) {
412 regularChoice = c; // arbitrary pick
413 break;
414 }
415 }
416 if (verbose > 1) {
417 Utils.log.info("*** regular choice does not apply "+regular);
418 Utils.log.info(" using instead "+regularChoice.coding);
419 }
420 }
421 if (verbose > 2) {
422 Utils.log.info("chooser: #choices="+numChoices+" ["+min+".."+max+"]");
423 if (verbose > 4) {
424 for (int i = 0; i < choices.length; i++) {
425 Choice c = choices[i];
426 if (c.searchOrder >= 0)
427 Utils.log.info(" "+c);
428 }
429 }
430 }
431 return numChoices;
432 }
433
434 // Find an arbitrary choice at least dlo away from a previously
435 // evaluated choices, and at most dhi. Try also to regulate its
436 // min distance to all previously evaluated choices, in this range.
437 private Choice findChoiceNear(Choice near, int dhi, int dlo) {
438 if (verbose > 5)
439 Utils.log.info("findChoice "+dhi+".."+dlo+" near: "+near);
440 int[] distance = near.distance;
441 Choice found = null;
442 for (int i = 0; i < choices.length; i++) {
443 Choice c = choices[i];
444 if (c.searchOrder < searchOrder)
445 continue; // already searched
446 // Distance from "near" guy must be in bounds:
447 if (distance[i] >= dlo && distance[i] <= dhi) {
448 // Try also to keep min-distance from other guys in bounds:
449 if (c.minDistance >= dlo && c.minDistance <= dhi) {
450 if (verbose > 5)
451 Utils.log.info("findChoice => good "+c);
452 return c;
453 }
454 found = c;
455 }
456 }
457 if (verbose > 5)
458 Utils.log.info("findChoice => found "+found);
459 return found;
460 }
461
462 private void evaluate(Choice c) {
463 assert(c.searchOrder == Integer.MAX_VALUE);
464 c.searchOrder = searchOrder++;
465 boolean mustComputeSize;
466 if (c == bestChoice || c.isExtra()) {
467 mustComputeSize = true;
468 } else if (optUseHistogram) {
469 Histogram hist = getHistogram(c.coding.isDelta());
470 c.histSize = (int)Math.ceil(hist.getBitLength(c.coding) / 8);
471 c.byteSize = c.histSize;
472 mustComputeSize = (c.byteSize <= targetSize);
473 } else {
474 mustComputeSize = true;
475 }
476 if (mustComputeSize) {
477 int[] sizes = computeSizePrivate(c.coding);
478 c.byteSize = sizes[BYTE_SIZE];
479 c.zipSize = sizes[ZIP_SIZE];
480 if (noteSizes(c.coding, c.byteSize, c.zipSize))
481 bestChoice = c;
482 }
483 if (c.histSize >= 0) {
484 assert(c.byteSize == c.histSize); // models should agree
485 }
486 if (verbose > 4) {
487 Utils.log.info("evaluated "+c);
488 }
489 }
490
491 private boolean noteSizes(CodingMethod c, int byteSize, int zipSize) {
492 assert(zipSize > 0 && byteSize > 0);
493 boolean better = (zipSize < bestZipSize);
494 if (verbose > 3)
495 Utils.log.info("computed size "+c+" "+byteSize+"/zs="+zipSize+
496 ((better && bestMethod != null)?
497 (" better by "+
498 pct(bestZipSize - zipSize, zipSize)): ""));
499 if (better) {
500 bestMethod = c;
501 bestZipSize = zipSize;
502 bestByteSize = byteSize;
503 targetSize = (int)(byteSize * fuzz);
504 return true;
505 } else {
506 return false;
507 }
508 }
509
510
511 private int updateDistances(Choice c) {
512 // update all minDistance values in still unevaluated choices
513 int[] distance = c.distance;
514 int maxd = 0; // how far is c from everybody else?
515 for (int i = 0; i < choices.length; i++) {
516 Choice c2 = choices[i];
517 if (c2.searchOrder < searchOrder)
518 continue;
519 int d = distance[i];
520 if (verbose > 5)
521 Utils.log.info("evaluate dist "+d+" to "+c2);
522 int mind = c2.minDistance;
523 if (mind > d)
524 c2.minDistance = mind = d;
525 if (maxd < d)
526 maxd = d;
527 }
528 // Now maxd has the distance of the farthest outlier
529 // from all evaluated choices.
530 if (verbose > 5)
531 Utils.log.info("evaluate maxd => "+maxd);
532 return maxd;
533 }
534
535 // Compute the coded size of a sequence of values.
536 // The first int is the size in uncompressed bytes.
537 // The second is an estimate of the compressed size of these bytes.
538 public void computeSize(CodingMethod c, int[] values, int start, int end, int[] sizes) {
539 if (end <= start) {
540 sizes[BYTE_SIZE] = sizes[ZIP_SIZE] = 0;
541 return;
542 }
543 try {
544 resetData();
545 c.writeArrayTo(byteSizer, values, start, end);
546 sizes[BYTE_SIZE] = getByteSize();
547 sizes[ZIP_SIZE] = getZipSize();
548 } catch (IOException ee) {
549 throw new RuntimeException(ee); // cannot happen
550 }
551 }
552 public void computeSize(CodingMethod c, int[] values, int[] sizes) {
553 computeSize(c, values, 0, values.length, sizes);
554 }
555 public int[] computeSize(CodingMethod c, int[] values, int start, int end) {
556 int[] sizes = { 0, 0 };
557 computeSize(c, values, start, end, sizes);
558 return sizes;
559 }
560 public int[] computeSize(CodingMethod c, int[] values) {
561 return computeSize(c, values, 0, values.length);
562 }
563 // This version uses the implicit local arguments
564 private int[] computeSizePrivate(CodingMethod c) {
565 int[] sizes = { 0, 0 };
566 computeSize(c, values, start, end, sizes);
567 return sizes;
568 }
569 public int computeByteSize(CodingMethod cm, int[] values, int start, int end) {
570 int len = end-start;
571 if (len < 0) {
572 return 0;
573 }
574 if (cm instanceof Coding) {
575 Coding c = (Coding) cm;
576 int size = c.getLength(values, start, end);
577 int size2;
578 assert(size == (size2=countBytesToSizer(cm, values, start, end)))
579 : (cm+" : "+size+" != "+size2);
580 return size;
581 }
582 return countBytesToSizer(cm, values, start, end);
583 }
584 private int countBytesToSizer(CodingMethod cm, int[] values, int start, int end) {
585 try {
586 byteOnlySizer.reset();
587 cm.writeArrayTo(byteOnlySizer, values, start, end);
588 return byteOnlySizer.getSize();
589 } catch (IOException ee) {
590 throw new RuntimeException(ee); // cannot happen
591 }
592 }
593
594 int[] getDeltas(int min, int max) {
595 if ((min|max) != 0)
596 return Coding.makeDeltas(values, start, end, min, max);
597 if (deltas == null) {
598 deltas = Coding.makeDeltas(values, start, end, 0, 0);
599 }
600 return deltas;
601 }
602 Histogram getValueHistogram() {
603 if (vHist == null) {
604 vHist = new Histogram(values, start, end);
605 if (verbose > 3) {
606 vHist.print("vHist", System.out);
607 } else if (verbose > 1) {
608 vHist.print("vHist", null, System.out);
609 }
610 }
611 return vHist;
612 }
613 Histogram getDeltaHistogram() {
614 if (dHist == null) {
615 dHist = new Histogram(getDeltas(0, 0));
616 if (verbose > 3) {
617 dHist.print("dHist", System.out);
618 } else if (verbose > 1) {
619 dHist.print("dHist", null, System.out);
620 }
621 }
622 return dHist;
623 }
624 Histogram getHistogram(boolean isDelta) {
625 return isDelta ? getDeltaHistogram(): getValueHistogram();
626 }
627
628 private void tryPopulationCoding(Coding plainCoding) {
629 // assert(plainCoding.canRepresent(min, max));
630 Histogram hist = getValueHistogram();
631 // Start with "reasonable" default codings.
632 final int approxL = 64;
633 Coding favoredCoding = plainCoding.getValueCoding();
634 Coding tokenCoding = BandStructure.UNSIGNED5.setL(approxL);
635 Coding unfavoredCoding = plainCoding.getValueCoding();
636 // There's going to be a band header. Estimate conservatively large.
637 final int BAND_HEADER = 4;
638 // Keep a running model of the predicted sizes of the F/T/U sequences.
639 int currentFSize;
640 int currentTSize;
641 int currentUSize;
642 // Start by assuming a degenerate favored-value length of 0,
643 // which looks like a bunch of zero tokens followed by the
644 // original sequence.
645 // The {F} list ends with a repeated F value; find worst case:
646 currentFSize =
647 BAND_HEADER + Math.max(favoredCoding.getLength(min),
648 favoredCoding.getLength(max));
649 // The {T} list starts out a bunch of zeros, each of length 1.
650 final int ZERO_LEN = tokenCoding.getLength(0);
651 currentTSize = ZERO_LEN * (end-start);
652 // The {U} list starts out a copy of the plainCoding:
653 currentUSize = (int) Math.ceil(hist.getBitLength(unfavoredCoding) / 8);
654
655 int bestPopSize = (currentFSize + currentTSize + currentUSize);
656 int bestPopFVC = 0;
657
658 // Record all the values, in decreasing order of favor.
659 int[] allFavoredValues = new int[1+hist.getTotalLength()];
660 //int[] allPopSizes = new int[1+hist.getTotalLength()];
661
662 // What sizes are "interesting"?
663 int targetLowFVC = -1;
664 int targetHighFVC = -1;
665
666 // For each length, adjust the currentXSize model, and look for a win.
667 int[][] matrix = hist.getMatrix();
668 int mrow = -1;
669 int mcol = 1;
670 int mrowFreq = 0;
671 for (int fvcount = 1; fvcount <= hist.getTotalLength(); fvcount++) {
672 // The {F} list gets an additional member.
673 // Take it from the end of the current matrix row.
674 // (It's the end, so that we get larger favored values first.)
675 if (mcol == 1) {
676 mrow += 1;
677 mrowFreq = matrix[mrow][0];
678 mcol = matrix[mrow].length;
679 }
680 int thisValue = matrix[mrow][--mcol];
681 allFavoredValues[fvcount] = thisValue;
682 int thisVLen = favoredCoding.getLength(thisValue);
683 currentFSize += thisVLen;
684 // The token list replaces occurrences of zero with a new token:
685 int thisVCount = mrowFreq;
686 int thisToken = fvcount;
687 currentTSize += (tokenCoding.getLength(thisToken)
688 - ZERO_LEN) * thisVCount;
689 // The unfavored list loses occurrences of the newly favored value.
690 // (This is the whole point of the exercise!)
691 currentUSize -= thisVLen * thisVCount;
692 int currentSize = (currentFSize + currentTSize + currentUSize);
693 //allPopSizes[fvcount] = currentSize;
694 if (bestPopSize > currentSize) {
695 if (currentSize <= targetSize) {
696 targetHighFVC = fvcount;
697 if (targetLowFVC < 0)
698 targetLowFVC = fvcount;
699 if (verbose > 4)
700 Utils.log.info("better pop-size at fvc="+fvcount+
701 " by "+pct(bestPopSize-currentSize,
702 bestPopSize));
703 }
704 bestPopSize = currentSize;
705 bestPopFVC = fvcount;
706 }
707 }
708 if (targetLowFVC < 0) {
709 if (verbose > 1) {
710 // Complete loss.
711 if (verbose > 1)
712 Utils.log.info("no good pop-size; best was "+
713 bestPopSize+" at "+bestPopFVC+
714 " worse by "+
715 pct(bestPopSize-bestByteSize,
716 bestByteSize));
717 }
718 return;
719 }
720 if (verbose > 1)
721 Utils.log.info("initial best pop-size at fvc="+bestPopFVC+
722 " in ["+targetLowFVC+".."+targetHighFVC+"]"+
723 " by "+pct(bestByteSize-bestPopSize,
724 bestByteSize));
725 int oldZipSize = bestZipSize;
726 // Now close onto a specific coding, testing more rigorously
727 // with the zipSize metric.
728 // Questions to decide:
729 // 1. How many favored values?
730 // 2. What token coding (TC)?
731 // 3. Sort favored values by value within length brackets?
732 // 4. What favored coding?
733 // 5. What unfavored coding?
734 // Steps 1/2/3 are interdependent, and may be iterated.
735 // Steps 4 and 5 may be decided independently afterward.
736 int[] LValuesCoded = PopulationCoding.LValuesCoded;
737 ArrayList bestFits = new ArrayList();
738 ArrayList fullFits = new ArrayList();
739 ArrayList longFits = new ArrayList();
740 final int PACK_TO_MAX_S = 1;
741 if (bestPopFVC <= 255) {
742 bestFits.add(BandStructure.BYTE1);
743 } else {
744 int bestB = Coding.B_MAX;
745 boolean doFullAlso = (effort > POP_EFFORT);
746 if (doFullAlso)
747 fullFits.add(BandStructure.BYTE1.setS(PACK_TO_MAX_S));
748 for (int i = LValuesCoded.length-1; i >= 1; i--) {
749 int L = LValuesCoded[i];
750 Coding c0 = PopulationCoding.fitTokenCoding(targetLowFVC, L);
751 Coding c1 = PopulationCoding.fitTokenCoding(bestPopFVC, L);
752 Coding c3 = PopulationCoding.fitTokenCoding(targetHighFVC, L);
753 if (c1 != null) {
754 if (!bestFits.contains(c1))
755 bestFits.add(c1);
756 if (bestB > c1.B())
757 bestB = c1.B();
758 }
759 if (doFullAlso) {
760 if (c3 == null) c3 = c1;
761 for (int B = c0.B(); B <= c3.B(); B++) {
762 if (B == c1.B()) continue;
763 if (B == 1) continue;
764 Coding c2 = c3.setB(B).setS(PACK_TO_MAX_S);
765 if (!fullFits.contains(c2))
766 fullFits.add(c2);
767 }
768 }
769 }
770 // interleave all B greater than bestB with best and full fits
771 for (Iterator i = bestFits.iterator(); i.hasNext(); ) {
772 Coding c = (Coding) i.next();
773 if (c.B() > bestB) {
774 i.remove();
775 longFits.add(0, c);
776 }
777 }
778 }
779 ArrayList allFits = new ArrayList();
780 for (Iterator i = bestFits.iterator(),
781 j = fullFits.iterator(),
782 k = longFits.iterator();
783 i.hasNext() || j.hasNext() || k.hasNext(); ) {
784 if (i.hasNext()) allFits.add(i.next());
785 if (j.hasNext()) allFits.add(j.next());
786 if (k.hasNext()) allFits.add(k.next());
787 }
788 bestFits.clear();
789 fullFits.clear();
790 longFits.clear();
791 int maxFits = allFits.size();
792 if (effort == POP_EFFORT)
793 maxFits = 2;
794 else if (maxFits > 4) {
795 maxFits -= 4;
796 maxFits = (maxFits * (effort-POP_EFFORT)
797 ) / (MAX_EFFORT-POP_EFFORT);
798 maxFits += 4;
799 }
800 if (allFits.size() > maxFits) {
801 if (verbose > 4)
802 Utils.log.info("allFits before clip: "+allFits);
803 allFits.subList(maxFits, allFits.size()).clear();
804 }
805 if (verbose > 3)
806 Utils.log.info("allFits: "+allFits);
807 for (Iterator i = allFits.iterator(); i.hasNext(); ) {
808 Coding tc = (Coding) i.next();
809 boolean packToMax = false;
810 if (tc.S() == PACK_TO_MAX_S) {
811 // Kludge: setS(PACK_TO_MAX_S) means packToMax here.
812 packToMax = true;
813 tc = tc.setS(0);
814 }
815 int fVlen;
816 if (!packToMax) {
817 fVlen = bestPopFVC;
818 assert(tc.umax() >= fVlen);
819 assert(tc.B() == 1 || tc.setB(tc.B()-1).umax() < fVlen);
820 } else {
821 fVlen = Math.min(tc.umax(), targetHighFVC);
822 if (fVlen < targetLowFVC)
823 continue;
824 if (fVlen == bestPopFVC)
825 continue; // redundant test
826 }
827 PopulationCoding pop = new PopulationCoding();
828 pop.setHistogram(hist);
829 pop.setL(tc.L());
830 pop.setFavoredValues(allFavoredValues, fVlen);
831 assert(pop.tokenCoding == tc); // predict correctly
832 pop.resortFavoredValues();
833 int[] tcsizes =
834 computePopSizePrivate(pop,
835 favoredCoding, unfavoredCoding);
836 noteSizes(pop, tcsizes[BYTE_SIZE], BAND_HEADER+tcsizes[ZIP_SIZE]);
837 }
838 if (verbose > 3) {
839 Utils.log.info("measured best pop, size="+bestByteSize+
840 "/zs="+bestZipSize+
841 " better by "+
842 pct(oldZipSize-bestZipSize, oldZipSize));
843 if (bestZipSize < oldZipSize) {
844 Utils.log.info(">>> POP WINS BY "+
845 (oldZipSize - bestZipSize));
846 }
847 }
848 }
849
850 private
851 int[] computePopSizePrivate(PopulationCoding pop,
852 Coding favoredCoding,
853 Coding unfavoredCoding) {
854 if (popHelper == null) {
855 popHelper = new CodingChooser(effort, allCodingChoices);
856 if (stress != null)
857 popHelper.addStressSeed(stress.nextInt());
858 popHelper.topLevel = false;
859 popHelper.verbose -= 1;
860 popHelper.disablePopCoding = true;
861 popHelper.disableRunCoding = this.disableRunCoding;
862 if (effort < MID_EFFORT)
863 // No nested run codings.
864 popHelper.disableRunCoding = true;
865 }
866 int fVlen = pop.fVlen;
867 if (verbose > 2) {
868 Utils.log.info("computePopSizePrivate fvlen="+fVlen+
869 " tc="+pop.tokenCoding);
870 Utils.log.info("{ //BEGIN");
871 }
872
873 // Find good coding choices for the token and unfavored sequences.
874 int[] favoredValues = pop.fValues;
875 int[][] vals = pop.encodeValues(values, start, end);
876 int[] tokens = vals[0];
877 int[] unfavoredValues = vals[1];
878 if (verbose > 2)
879 Utils.log.info("-- refine on fv["+fVlen+"] fc="+favoredCoding);
880 pop.setFavoredCoding(popHelper.choose(favoredValues, 1, 1+fVlen, favoredCoding));
881 if (pop.tokenCoding instanceof Coding &&
882 (stress == null || stress.nextBoolean())) {
883 if (verbose > 2)
884 Utils.log.info("-- refine on tv["+tokens.length+"] tc="+pop.tokenCoding);
885 CodingMethod tc = popHelper.choose(tokens, (Coding) pop.tokenCoding);
886 if (tc != pop.tokenCoding) {
887 if (verbose > 2)
888 Utils.log.info(">>> refined tc="+tc);
889 pop.setTokenCoding(tc);
890 }
891 }
892 if (unfavoredValues.length == 0)
893 pop.setUnfavoredCoding(null);
894 else {
895 if (verbose > 2)
896 Utils.log.info("-- refine on uv["+unfavoredValues.length+"] uc="+pop.unfavoredCoding);
897 pop.setUnfavoredCoding(popHelper.choose(unfavoredValues, unfavoredCoding));
898 }
899 if (verbose > 3) {
900 Utils.log.info("finish computePopSizePrivate fvlen="+fVlen+
901 " fc="+pop.favoredCoding+
902 " tc="+pop.tokenCoding+
903 " uc="+pop.unfavoredCoding);
904 //pop.hist.print("pop-hist", null, System.out);
905 StringBuffer sb = new StringBuffer();
906 sb.append("fv = {");
907 for (int i = 1; i <= fVlen; i++) {
908 if ((i % 10) == 0)
909 sb.append('\n');
910 sb.append(" ").append(favoredValues[i]);
911 }
912 sb.append('\n');
913 sb.append("}");
914 Utils.log.info(sb.toString());
915 }
916 if (verbose > 2) {
917 Utils.log.info("} //END");
918 }
919 if (stress != null) {
920 return null; // do not bother with size computation
921 }
922 int[] sizes;
923 try {
924 resetData();
925 // Write the array of favored values.
926 pop.writeSequencesTo(byteSizer, tokens, unfavoredValues);
927 sizes = new int[] { getByteSize(), getZipSize() };
928 } catch (IOException ee) {
929 throw new RuntimeException(ee); // cannot happen
930 }
931 int[] checkSizes = null;
932 assert((checkSizes = computeSizePrivate(pop)) != null);
933 assert(checkSizes[BYTE_SIZE] == sizes[BYTE_SIZE])
934 : (checkSizes[BYTE_SIZE]+" != "+sizes[BYTE_SIZE]);
935 return sizes;
936 }
937
938 private void tryAdaptiveCoding(Coding plainCoding) {
939 int oldZipSize = bestZipSize;
940 // Scan the value sequence, determining whether an interesting
941 // run occupies too much space. ("Too much" means, say 5% more
942 // than the average integer size of the band as a whole.)
943 // Try to find a better coding for those segments.
944 int start = this.start;
945 int end = this.end;
946 int[] values = this.values;
947 int len = end-start;
948 if (plainCoding.isDelta()) {
949 values = getDeltas(0,0); //%%% not quite right!
950 start = 0;
951 end = values.length;
952 }
953 int[] sizes = new int[len+1];
954 int fillp = 0;
955 int totalSize = 0;
956 for (int i = start; i < end; i++) {
957 int val = values[i];
958 sizes[fillp++] = totalSize;
959 int size = plainCoding.getLength(val);
960 assert(size < Integer.MAX_VALUE);
961 //System.out.println("len "+val+" = "+size);
962 totalSize += size;
963 }
964 sizes[fillp++] = totalSize;
965 assert(fillp == sizes.length);
966 double avgSize = (double)totalSize / len;
967 double sizeFuzz;
968 double sizeFuzz2;
969 double sizeFuzz3;
970 if (effort >= MID_EFFORT) {
971 if (effort > MID_EFFORT+1)
972 sizeFuzz = 1.001;
973 else
974 sizeFuzz = 1.003;
975 } else {
976 if (effort > RUN_EFFORT)
977 sizeFuzz = 1.01;
978 else
979 sizeFuzz = 1.03;
980 }
981 // for now:
982 sizeFuzz *= sizeFuzz; // double the thresh
983 sizeFuzz2 = (sizeFuzz*sizeFuzz);
984 sizeFuzz3 = (sizeFuzz*sizeFuzz*sizeFuzz);
985 // Find some mesh scales we like.
986 double[] dmeshes = new double[1 + (effort-RUN_EFFORT)];
987 double logLen = Math.log(len);
988 for (int i = 0; i < dmeshes.length; i++) {
989 dmeshes[i] = Math.exp(logLen*(i+1)/(dmeshes.length+1));
990 }
991 int[] meshes = new int[dmeshes.length];
992 int mfillp = 0;
993 for (int i = 0; i < dmeshes.length; i++) {
994 int m = (int)Math.round(dmeshes[i]);
995 m = AdaptiveCoding.getNextK(m-1);
996 if (m <= 0 || m >= len) continue;
997 if (mfillp > 0 && m == meshes[mfillp-1]) continue;
998 meshes[mfillp++] = m;
999 }
1000 meshes = BandStructure.realloc(meshes, mfillp);
1001 // There's going to be a band header. Estimate conservatively large.
1002 final int BAND_HEADER = 4; // op, KB, A, B
1003 // Threshold values for a "too big" mesh.
1004 int[] threshes = new int[meshes.length];
1005 double[] fuzzes = new double[meshes.length];
1006 for (int i = 0; i < meshes.length; i++) {
1007 int mesh = meshes[i];
1008 double fuzz;
1009 if (mesh < 10)
1010 fuzz = sizeFuzz3;
1011 else if (mesh < 100)
1012 fuzz = sizeFuzz2;
1013 else
1014 fuzz = sizeFuzz;
1015 fuzzes[i] = fuzz;
1016 threshes[i] = BAND_HEADER + (int)Math.ceil(mesh * avgSize * fuzz);
1017 }
1018 if (verbose > 1) {
1019 System.out.print("tryAdaptiveCoding ["+len+"]"+
1020 " avgS="+avgSize+" fuzz="+sizeFuzz+
1021 " meshes: {");
1022 for (int i = 0; i < meshes.length; i++)
1023 System.out.print(" "+meshes[i]+"("+threshes[i]+")");
1024 Utils.log.info(" }");
1025 }
1026 if (runHelper == null) {
1027 runHelper = new CodingChooser(effort, allCodingChoices);
1028 if (stress != null)
1029 runHelper.addStressSeed(stress.nextInt());
1030 runHelper.topLevel = false;
1031 runHelper.verbose -= 1;
1032 runHelper.disableRunCoding = true;
1033 runHelper.disablePopCoding = this.disablePopCoding;
1034 if (effort < MID_EFFORT)
1035 // No nested pop codings.
1036 runHelper.disablePopCoding = true;
1037 }
1038 for (int i = 0; i < len; i++) {
1039 i = AdaptiveCoding.getNextK(i-1);
1040 if (i > len) i = len;
1041 for (int j = meshes.length-1; j >= 0; j--) {
1042 int mesh = meshes[j];
1043 int thresh = threshes[j];
1044 if (i+mesh > len) continue;
1045 int size = sizes[i+mesh] - sizes[i];
1046 if (size >= thresh) {
1047 // Found a size bulge.
1048 int bend = i+mesh;
1049 int bsize = size;
1050 double bigSize = avgSize * fuzzes[j];
1051 while (bend < len && (bend-i) <= len/2) {
1052 int bend0 = bend;
1053 int bsize0 = bsize;
1054 bend += mesh;
1055 bend = i+AdaptiveCoding.getNextK(bend-i-1);
1056 if (bend < 0 || bend > len)
1057 bend = len;
1058 bsize = sizes[bend]-sizes[i];
1059 if (bsize < BAND_HEADER + (bend-i) * bigSize) {
1060 bsize = bsize0;
1061 bend = bend0;
1062 break;
1063 }
1064 }
1065 int nexti = bend;
1066 if (verbose > 2) {
1067 Utils.log.info("bulge at "+i+"["+(bend-i)+"] of "+
1068 pct(bsize - avgSize*(bend-i),
1069 avgSize*(bend-i)));
1070 Utils.log.info("{ //BEGIN");
1071 }
1072 CodingMethod begcm, midcm, endcm;
1073 midcm = runHelper.choose(this.values,
1074 this.start+i,
1075 this.start+bend,
1076 plainCoding);
1077 if (midcm == plainCoding) {
1078 // No use working further.
1079 begcm = plainCoding;
1080 endcm = plainCoding;
1081 } else {
1082 begcm = runHelper.choose(this.values,
1083 this.start,
1084 this.start+i,
1085 plainCoding);
1086 endcm = runHelper.choose(this.values,
1087 this.start+bend,
1088 this.start+len,
1089 plainCoding);
1090 }
1091 if (verbose > 2)
1092 Utils.log.info("} //END");
1093 if (begcm == midcm && i > 0 &&
1094 AdaptiveCoding.isCodableLength(bend)) {
1095 i = 0;
1096 }
1097 if (midcm == endcm && bend < len) {
1098 bend = len;
1099 }
1100 if (begcm != plainCoding ||
1101 midcm != plainCoding ||
1102 endcm != plainCoding) {
1103 CodingMethod chain;
1104 int hlen = 0;
1105 if (bend == len) {
1106 chain = midcm;
1107 } else {
1108 chain = new AdaptiveCoding(bend-i, midcm, endcm);
1109 hlen += BAND_HEADER;
1110 }
1111 if (i > 0) {
1112 chain = new AdaptiveCoding(i, begcm, chain);
1113 hlen += BAND_HEADER;
1114 }
1115 int[] chainSize = computeSizePrivate(chain);
1116 noteSizes(chain,
1117 chainSize[BYTE_SIZE],
1118 chainSize[ZIP_SIZE]+hlen);
1119 }
1120 i = nexti;
1121 break;
1122 }
1123 }
1124 }
1125 if (verbose > 3) {
1126 if (bestZipSize < oldZipSize) {
1127 Utils.log.info(">>> RUN WINS BY "+
1128 (oldZipSize - bestZipSize));
1129 }
1130 }
1131 }
1132
1133 static private
1134 String pct(double num, double den) {
1135 return (Math.round((num / den)*10000)/100.0)+"%";
1136 }
1137
1138 static
1139 class Sizer extends OutputStream {
1140 final OutputStream out; // if non-null, copy output here also
1141 Sizer(OutputStream out) {
1142 this.out = out;
1143 }
1144 Sizer() {
1145 this(null);
1146 }
1147 private int count;
1148 public void write(int b) throws IOException {
1149 count++;
1150 if (out != null) out.write(b);
1151 }
1152 public void write(byte b[], int off, int len) throws IOException {
1153 count += len;
1154 if (out != null) out.write(b, off, len);
1155 }
1156 public void reset() {
1157 count = 0;
1158 }
1159 public int getSize() { return count; }
1160
1161 public String toString() {
1162 String str = super.toString();
1163 // If -ea, print out more informative strings!
1164 assert((str = stringForDebug()) != null);
1165 return str;
1166 }
1167 String stringForDebug() {
1168 return "<Sizer "+getSize()+">";
1169 }
1170 }
1171
1172 private Sizer zipSizer = new Sizer();
1173 private Deflater zipDef = new Deflater();
1174 private DeflaterOutputStream zipOut = new DeflaterOutputStream(zipSizer, zipDef);
1175 private Sizer byteSizer = new Sizer(zipOut);
1176 private Sizer byteOnlySizer = new Sizer();
1177
1178 private void resetData() {
1179 flushData();
1180 zipDef.reset();
1181 if (context != null) {
1182 // Prepend given salt to the test output.
1183 try {
1184 context.writeTo(byteSizer);
1185 } catch (IOException ee) {
1186 throw new RuntimeException(ee); // cannot happen
1187 }
1188 }
1189 zipSizer.reset();
1190 byteSizer.reset();
1191 }
1192 private void flushData() {
1193 try {
1194 zipOut.finish();
1195 } catch (IOException ee) {
1196 throw new RuntimeException(ee); // cannot happen
1197 }
1198 }
1199 private int getByteSize() {
1200 return byteSizer.getSize();
1201 }
1202 private int getZipSize() {
1203 flushData();
1204 return zipSizer.getSize();
1205 }
1206
1207
1208 /// Stress-test helpers.
1209
1210 void addStressSeed(int x) {
1211 if (stress == null) return;
1212 stress.setSeed(x + ((long)stress.nextInt() << 32));
1213 }
1214
1215 // Pick a random pop-coding.
1216 private CodingMethod stressPopCoding(CodingMethod coding) {
1217 assert(stress != null); // this method is only for testing
1218 // Don't turn it into a pop coding if it's already something special.
1219 if (!(coding instanceof Coding)) return coding;
1220 Coding valueCoding = ((Coding)coding).getValueCoding();
1221 Histogram hist = getValueHistogram();
1222 int fVlen = stressLen(hist.getTotalLength());
1223 if (fVlen == 0) return coding;
1224 List popvals = new ArrayList();
1225 if (stress.nextBoolean()) {
1226 // Build the population from the value list.
1227 HashSet popset = new HashSet();
1228 for (int i = start; i < end; i++) {
1229 Integer val = new Integer(values[i]);
1230 if (popset.add(val)) popvals.add(val);
1231 }
1232 } else {
1233 int[][] matrix = hist.getMatrix();
1234 for (int mrow = 0; mrow < matrix.length; mrow++) {
1235 int[] row = matrix[mrow];
1236 for (int mcol = 1; mcol < row.length; mcol++) {
1237 popvals.add(new Integer(row[mcol]));
1238 }
1239 }
1240 }
1241 int reorder = stress.nextInt();
1242 if ((reorder & 7) <= 2) {
1243 // Lose the order.
1244 Collections.shuffle(popvals, stress);
1245 } else {
1246 // Keep the order, mostly.
1247 if (((reorder >>>= 3) & 7) <= 2) Collections.sort(popvals);
1248 if (((reorder >>>= 3) & 7) <= 2) Collections.reverse(popvals);
1249 if (((reorder >>>= 3) & 7) <= 2) Collections.rotate(popvals, stressLen(popvals.size()));
1250 }
1251 if (popvals.size() > fVlen) {
1252 // Cut the list down.
1253 if (((reorder >>>= 3) & 7) <= 2) {
1254 // Cut at end.
1255 popvals.subList(fVlen, popvals.size()).clear();
1256 } else {
1257 // Cut at start.
1258 popvals.subList(0, popvals.size()-fVlen).clear();
1259 }
1260 }
1261 fVlen = popvals.size();
1262 int[] fvals = new int[1+fVlen];
1263 for (int i = 0; i < fVlen; i++) {
1264 fvals[1+i] = ((Integer)popvals.get(i)).intValue();
1265 }
1266 PopulationCoding pop = new PopulationCoding();
1267 pop.setFavoredValues(fvals, fVlen);
1268 int[] lvals = PopulationCoding.LValuesCoded;
1269 for (int i = 0; i < lvals.length / 2; i++) {
1270 int popl = lvals[stress.nextInt(lvals.length)];
1271 if (popl < 0) continue;
1272 if (PopulationCoding.fitTokenCoding(fVlen, popl) != null) {
1273 pop.setL(popl);
1274 break;
1275 }
1276 }
1277 if (pop.tokenCoding == null) {
1278 int min = fvals[1], max = min;
1279 for (int i = 2; i <= fVlen; i++) {
1280 int val = fvals[i];
1281 if (min > val) min = val;
1282 if (max < val) max = val;
1283 }
1284 pop.tokenCoding = stressCoding(min, max);
1285 }
1286
1287 computePopSizePrivate(pop, valueCoding, valueCoding);
1288 return pop;
1289 }
1290
1291 // Pick a random adaptive coding.
1292 private CodingMethod stressAdaptiveCoding(CodingMethod coding) {
1293 assert(stress != null); // this method is only for testing
1294 // Don't turn it into a run coding if it's already something special.
1295 if (!(coding instanceof Coding)) return coding;
1296 Coding plainCoding = (Coding)coding;
1297 int len = end-start;
1298 if (len < 2) return coding;
1299 // Decide how many spans we'll create.
1300 int spanlen = stressLen(len-1)+1;
1301 if (spanlen == len) return coding;
1302 try {
1303 assert(!disableRunCoding);
1304 disableRunCoding = true; // temporary, while I decide spans
1305 int[] allValues = (int[]) values.clone();
1306 CodingMethod result = null;
1307 int scan = this.end;
1308 int start = this.start;
1309 for (int split; scan > start; scan = split) {
1310 int thisspan;
1311 int rand = (scan - start < 100)? -1: stress.nextInt();
1312 if ((rand & 7) != 0) {
1313 thisspan = (spanlen==1? spanlen: stressLen(spanlen-1)+1);
1314 } else {
1315 // Every so often generate a value based on KX/KB format.
1316 int KX = (rand >>>= 3) & AdaptiveCoding.KX_MAX;
1317 int KB = (rand >>>= 3) & AdaptiveCoding.KB_MAX;
1318 for (;;) {
1319 thisspan = AdaptiveCoding.decodeK(KX, KB);
1320 if (thisspan <= scan - start) break;
1321 // Try smaller and smaller codings:
1322 if (KB != AdaptiveCoding.KB_DEFAULT)
1323 KB = AdaptiveCoding.KB_DEFAULT;
1324 else
1325 KX -= 1;
1326 }
1327 //System.out.println("KX="+KX+" KB="+KB+" K="+thisspan);
1328 assert(AdaptiveCoding.isCodableLength(thisspan));
1329 }
1330 if (thisspan > scan - start) thisspan = scan - start;
1331 while (!AdaptiveCoding.isCodableLength(thisspan)) --thisspan;
1332 split = scan - thisspan;
1333 assert(split < scan);
1334 assert(split >= start);
1335 // Choose a coding for the span [split..scan).
1336 CodingMethod sc = choose(allValues, split, scan, plainCoding);
1337 if (result == null) {
1338 result = sc; // the caboose
1339 } else {
1340 result = new AdaptiveCoding(scan-split, sc, result);
1341 }
1342 }
1343 return result;
1344 } finally {
1345 disableRunCoding = false; // return to normal value
1346 }
1347 }
1348
1349 // Return a random value in [0..len], gently biased toward extremes.
1350 private Coding stressCoding(int min, int max) {
1351 assert(stress != null); // this method is only for testing
1352 for (int i = 0; i < 100; i++) {
1353 Coding c = Coding.of(stress.nextInt(Coding.B_MAX)+1,
1354 stress.nextInt(Coding.H_MAX)+1,
1355 stress.nextInt(Coding.S_MAX+1));
1356 if (c.B() == 1) c = c.setH(256);
1357 if (c.H() == 256 && c.B() >= 5) c = c.setB(4);
1358 if (stress.nextBoolean()) {
1359 Coding dc = c.setD(1);
1360 if (dc.canRepresent(min, max)) return dc;
1361 }
1362 if (c.canRepresent(min, max)) return c;
1363 }
1364 return BandStructure.UNSIGNED5;
1365 }
1366
1367 // Return a random value in [0..len], gently biased toward extremes.
1368 private int stressLen(int len) {
1369 assert(stress != null); // this method is only for testing
1370 assert(len >= 0);
1371 int rand = stress.nextInt(100);
1372 if (rand < 20)
1373 return Math.min(len/5, rand);
1374 else if (rand < 40)
1375 return len;
1376 else
1377 return stress.nextInt(len);
1378 }
1379
1380 // For debug only.
1381/*
1382 public static
1383 int[] readValuesFrom(InputStream instr) {
1384 return readValuesFrom(new InputStreamReader(instr));
1385 }
1386 public static
1387 int[] readValuesFrom(Reader inrdr) {
1388 inrdr = new BufferedReader(inrdr);
1389 final StreamTokenizer in = new StreamTokenizer(inrdr);
1390 final int TT_NOTHING = -99;
1391 in.commentChar('#');
1392 return readValuesFrom(new Iterator() {
1393 int token = TT_NOTHING;
1394 private int getToken() {
1395 if (token == TT_NOTHING) {
1396 try {
1397 token = in.nextToken();
1398 assert(token != TT_NOTHING);
1399 } catch (IOException ee) {
1400 throw new RuntimeException(ee);
1401 }
1402 }
1403 return token;
1404 }
1405 public boolean hasNext() {
1406 return getToken() != StreamTokenizer.TT_EOF;
1407 }
1408 public Object next() {
1409 int ntok = getToken();
1410 token = TT_NOTHING;
1411 switch (ntok) {
1412 case StreamTokenizer.TT_EOF:
1413 throw new NoSuchElementException();
1414 case StreamTokenizer.TT_NUMBER:
1415 return new Integer((int) in.nval);
1416 default:
1417 assert(false);
1418 return null;
1419 }
1420 }
1421 public void remove() {
1422 throw new UnsupportedOperationException();
1423 }
1424 });
1425 }
1426 public static
1427 int[] readValuesFrom(Iterator iter) {
1428 return readValuesFrom(iter, 0);
1429 }
1430 public static
1431 int[] readValuesFrom(Iterator iter, int initSize) {
1432 int[] na = new int[Math.max(10, initSize)];
1433 int np = 0;
1434 while (iter.hasNext()) {
1435 Integer val = (Integer) iter.next();
1436 if (np == na.length) {
1437 na = BandStructure.realloc(na);
1438 }
1439 na[np++] = val.intValue();
1440 }
1441 if (np != na.length) {
1442 na = BandStructure.realloc(na, np);
1443 }
1444 return na;
1445 }
1446
1447 public static
1448 void main(String[] av) throws IOException {
1449 int effort = MID_EFFORT;
1450 int ap = 0;
1451 if (ap < av.length && av[ap].equals("-e")) {
1452 ap++;
1453 effort = Integer.parseInt(av[ap++]);
1454 }
1455 int verbose = 1;
1456 if (ap < av.length && av[ap].equals("-v")) {
1457 ap++;
1458 verbose = Integer.parseInt(av[ap++]);
1459 }
1460 Coding[] bcs = BandStructure.getBasicCodings();
1461 CodingChooser cc = new CodingChooser(effort, bcs);
1462 if (ap < av.length && av[ap].equals("-p")) {
1463 ap++;
1464 cc.optUsePopulationCoding = false;
1465 }
1466 if (ap < av.length && av[ap].equals("-a")) {
1467 ap++;
1468 cc.optUseAdaptiveCoding = false;
1469 }
1470 cc.verbose = verbose;
1471 int[] values = readValuesFrom(System.in);
1472 int[] sizes = {0,0};
1473 CodingMethod cm = cc.choose(values, BandStructure.UNSIGNED5, sizes);
1474 System.out.println("size: "+sizes[BYTE_SIZE]+"/zs="+sizes[ZIP_SIZE]);
1475 System.out.println(cm);
1476 }
1477//*/
1478
1479}