J. Duke | 319a3b9 | 2007-12-01 00:00:00 +0000 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright 2002-2005 Sun Microsystems, Inc. All Rights Reserved. |
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | * |
| 5 | * This code is free software; you can redistribute it and/or modify it |
| 6 | * under the terms of the GNU General Public License version 2 only, as |
| 7 | * published by the Free Software Foundation. Sun designates this |
| 8 | * particular file as subject to the "Classpath" exception as provided |
| 9 | * by Sun in the LICENSE file that accompanied this code. |
| 10 | * |
| 11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 14 | * version 2 for more details (a copy is included in the LICENSE file that |
| 15 | * accompanied this code). |
| 16 | * |
| 17 | * You should have received a copy of the GNU General Public License version |
| 18 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 20 | * |
| 21 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
| 22 | * CA 95054 USA or visit www.sun.com if you need additional information or |
| 23 | * have any questions. |
| 24 | */ |
| 25 | |
| 26 | package com.sun.java.util.jar.pack; |
| 27 | |
| 28 | import java.io.*; |
| 29 | import java.util.*; |
| 30 | import java.util.zip.*; |
| 31 | |
| 32 | /** |
| 33 | * Heuristic chooser of basic encodings. |
| 34 | * Runs "zip" to measure the apparent information content after coding. |
| 35 | * @author John Rose |
| 36 | */ |
| 37 | class CodingChooser implements Constants { |
| 38 | int verbose; |
| 39 | int effort; |
| 40 | boolean optUseHistogram = true; |
| 41 | boolean optUsePopulationCoding = true; |
| 42 | boolean optUseAdaptiveCoding = true; |
| 43 | boolean disablePopCoding; |
| 44 | boolean disableRunCoding; |
| 45 | boolean topLevel = true; |
| 46 | |
| 47 | // Derived from effort; >1 (<1) means try more (less) experiments |
| 48 | // when looking to beat a best score. |
| 49 | double fuzz; |
| 50 | |
| 51 | Coding[] allCodingChoices; |
| 52 | Choice[] choices; |
| 53 | ByteArrayOutputStream context; |
| 54 | CodingChooser popHelper; |
| 55 | CodingChooser runHelper; |
| 56 | |
| 57 | Random stress; // If not null, stress mode oracle. |
| 58 | |
| 59 | // Element in sorted set of coding choices: |
| 60 | static |
| 61 | class Choice { |
| 62 | final Coding coding; |
| 63 | final int index; // index in choices |
| 64 | final int[] distance; // cache of distance |
| 65 | Choice(Coding coding, int index, int[] distance) { |
| 66 | this.coding = coding; |
| 67 | this.index = index; |
| 68 | this.distance = distance; |
| 69 | } |
| 70 | // These variables are reset and reused: |
| 71 | int searchOrder; // order in which it is checked |
| 72 | int minDistance; // min distance from already-checked choices |
| 73 | int zipSize; // size of encoding in sample, zipped output |
| 74 | int byteSize; // size of encoding in sample (debug only) |
| 75 | int histSize; // size of encoding, according to histogram |
| 76 | |
| 77 | void reset() { |
| 78 | searchOrder = Integer.MAX_VALUE; |
| 79 | minDistance = Integer.MAX_VALUE; |
| 80 | zipSize = byteSize = histSize = -1; |
| 81 | } |
| 82 | |
| 83 | boolean isExtra() { |
| 84 | return index < 0; |
| 85 | } |
| 86 | |
| 87 | public String toString() { |
| 88 | return stringForDebug(); |
| 89 | } |
| 90 | |
| 91 | private String stringForDebug() { |
| 92 | String s = ""; |
| 93 | if (searchOrder < Integer.MAX_VALUE) |
| 94 | s += " so: "+searchOrder; |
| 95 | if (minDistance < Integer.MAX_VALUE) |
| 96 | s += " md: "+minDistance; |
| 97 | if (zipSize > 0) |
| 98 | s += " zs: "+zipSize; |
| 99 | if (byteSize > 0) |
| 100 | s += " bs: "+byteSize; |
| 101 | if (histSize > 0) |
| 102 | s += " hs: "+histSize; |
| 103 | return "Choice["+index+"] "+s+" "+coding; |
| 104 | } |
| 105 | } |
| 106 | |
| 107 | CodingChooser(int effort, Coding[] allCodingChoices) { |
| 108 | PropMap p200 = Utils.currentPropMap(); |
| 109 | if (p200 != null) { |
| 110 | this.verbose |
| 111 | = Math.max(p200.getInteger(Utils.DEBUG_VERBOSE), |
| 112 | p200.getInteger(Utils.COM_PREFIX+"verbose.coding")); |
| 113 | this.optUseHistogram |
| 114 | = !p200.getBoolean(Utils.COM_PREFIX+"no.histogram"); |
| 115 | this.optUsePopulationCoding |
| 116 | = !p200.getBoolean(Utils.COM_PREFIX+"no.population.coding"); |
| 117 | this.optUseAdaptiveCoding |
| 118 | = !p200.getBoolean(Utils.COM_PREFIX+"no.adaptive.coding"); |
| 119 | int stress |
| 120 | = p200.getInteger(Utils.COM_PREFIX+"stress.coding"); |
| 121 | if (stress != 0) |
| 122 | this.stress = new Random(stress); |
| 123 | } |
| 124 | |
| 125 | this.effort = effort; |
| 126 | // The following line "makes sense" but is too much |
| 127 | // work for a simple heuristic. |
| 128 | //if (effort > 5) zipDef.setLevel(effort); |
| 129 | |
| 130 | this.allCodingChoices = allCodingChoices; |
| 131 | |
| 132 | // If effort = 9, look carefully at any solution |
| 133 | // whose initial metrics are within 1% of the best |
| 134 | // so far. If effort = 1, look carefully only at |
| 135 | // solutions whose initial metrics promise a 1% win. |
| 136 | this.fuzz = 1 + (0.0025 * (effort-MID_EFFORT)); |
| 137 | |
| 138 | int nc = 0; |
| 139 | for (int i = 0; i < allCodingChoices.length; i++) { |
| 140 | if (allCodingChoices[i] == null) continue; |
| 141 | nc++; |
| 142 | } |
| 143 | choices = new Choice[nc]; |
| 144 | nc = 0; |
| 145 | for (int i = 0; i < allCodingChoices.length; i++) { |
| 146 | if (allCodingChoices[i] == null) continue; |
| 147 | int[] distance = new int[choices.length]; |
| 148 | choices[nc++] = new Choice(allCodingChoices[i], i, distance); |
| 149 | } |
| 150 | for (int i = 0; i < choices.length; i++) { |
| 151 | Coding ci = choices[i].coding; |
| 152 | assert(ci.distanceFrom(ci) == 0); |
| 153 | for (int j = 0; j < i; j++) { |
| 154 | Coding cj = choices[j].coding; |
| 155 | int dij = ci.distanceFrom(cj); |
| 156 | assert(dij > 0); |
| 157 | assert(dij == cj.distanceFrom(ci)); |
| 158 | choices[i].distance[j] = dij; |
| 159 | choices[j].distance[i] = dij; |
| 160 | } |
| 161 | } |
| 162 | } |
| 163 | |
| 164 | Choice makeExtraChoice(Coding coding) { |
| 165 | int[] distance = new int[choices.length]; |
| 166 | for (int i = 0; i < distance.length; i++) { |
| 167 | Coding ci = choices[i].coding; |
| 168 | int dij = coding.distanceFrom(ci); |
| 169 | assert(dij > 0); |
| 170 | assert(dij == ci.distanceFrom(coding)); |
| 171 | distance[i] = dij; |
| 172 | } |
| 173 | Choice c = new Choice(coding, -1, distance); |
| 174 | c.reset(); |
| 175 | return c; |
| 176 | } |
| 177 | |
| 178 | ByteArrayOutputStream getContext() { |
| 179 | if (context == null) |
| 180 | context = new ByteArrayOutputStream(1 << 16); |
| 181 | return context; |
| 182 | } |
| 183 | |
| 184 | // These variables are reset and reused: |
| 185 | private int[] values; |
| 186 | private int start, end; // slice of values |
| 187 | private int[] deltas; |
| 188 | private int min, max; |
| 189 | private Histogram vHist; |
| 190 | private Histogram dHist; |
| 191 | private int searchOrder; |
| 192 | private Choice regularChoice; |
| 193 | private Choice bestChoice; |
| 194 | private CodingMethod bestMethod; |
| 195 | private int bestByteSize; |
| 196 | private int bestZipSize; |
| 197 | private int targetSize; // fuzzed target byte size |
| 198 | |
| 199 | private void reset(int[] values, int start, int end) { |
| 200 | this.values = values; |
| 201 | this.start = start; |
| 202 | this.end = end; |
| 203 | this.deltas = null; |
| 204 | this.min = Integer.MAX_VALUE; |
| 205 | this.max = Integer.MIN_VALUE; |
| 206 | this.vHist = null; |
| 207 | this.dHist = null; |
| 208 | this.searchOrder = 0; |
| 209 | this.regularChoice = null; |
| 210 | this.bestChoice = null; |
| 211 | this.bestMethod = null; |
| 212 | this.bestZipSize = Integer.MAX_VALUE; |
| 213 | this.bestByteSize = Integer.MAX_VALUE; |
| 214 | this.targetSize = Integer.MAX_VALUE; |
| 215 | } |
| 216 | |
| 217 | public static final int MIN_EFFORT = 1; |
| 218 | public static final int MID_EFFORT = 5; |
| 219 | public static final int MAX_EFFORT = 9; |
| 220 | |
| 221 | public static final int POP_EFFORT = MID_EFFORT-1; |
| 222 | public static final int RUN_EFFORT = MID_EFFORT-2; |
| 223 | |
| 224 | public static final int BYTE_SIZE = 0; |
| 225 | public static final int ZIP_SIZE = 1; |
| 226 | |
| 227 | CodingMethod choose(int[] values, int start, int end, Coding regular, int[] sizes) { |
| 228 | // Save the value array |
| 229 | reset(values, start, end); |
| 230 | |
| 231 | if (effort <= MIN_EFFORT || start >= end) { |
| 232 | if (sizes != null) { |
| 233 | int[] computed = computeSizePrivate(regular); |
| 234 | sizes[BYTE_SIZE] = computed[BYTE_SIZE]; |
| 235 | sizes[ZIP_SIZE] = computed[ZIP_SIZE]; |
| 236 | } |
| 237 | return regular; |
| 238 | } |
| 239 | |
| 240 | if (optUseHistogram) { |
| 241 | getValueHistogram(); |
| 242 | getDeltaHistogram(); |
| 243 | } |
| 244 | |
| 245 | for (int i = start; i < end; i++) { |
| 246 | int val = values[i]; |
| 247 | if (min > val) min = val; |
| 248 | if (max < val) max = val; |
| 249 | } |
| 250 | |
| 251 | // Find all the preset choices that might be worth looking at: |
| 252 | int numChoices = markUsableChoices(regular); |
| 253 | |
| 254 | if (stress != null) { |
| 255 | // Make a random choice. |
| 256 | int rand = stress.nextInt(numChoices*2 + 4); |
| 257 | CodingMethod coding = null; |
| 258 | for (int i = 0; i < choices.length; i++) { |
| 259 | Choice c = choices[i]; |
| 260 | if (c.searchOrder >= 0 && rand-- == 0) { |
| 261 | coding = c.coding; |
| 262 | break; |
| 263 | } |
| 264 | } |
| 265 | if (coding == null) { |
| 266 | if ((rand & 7) != 0) { |
| 267 | coding = regular; |
| 268 | } else { |
| 269 | // Pick a totally random coding 6% of the time. |
| 270 | coding = stressCoding(min, max); |
| 271 | } |
| 272 | } |
| 273 | if (!disablePopCoding |
| 274 | && optUsePopulationCoding |
| 275 | && effort >= POP_EFFORT) { |
| 276 | coding = stressPopCoding(coding); |
| 277 | } |
| 278 | if (!disableRunCoding |
| 279 | && optUseAdaptiveCoding |
| 280 | && effort >= RUN_EFFORT) { |
| 281 | coding = stressAdaptiveCoding(coding); |
| 282 | } |
| 283 | return coding; |
| 284 | } |
| 285 | |
| 286 | double searchScale = 1.0; |
| 287 | for (int x = effort; x < MAX_EFFORT; x++) { |
| 288 | searchScale /= 1.414; // every 2 effort points doubles work |
| 289 | } |
| 290 | int searchOrderLimit = (int)Math.ceil( numChoices * searchScale ); |
| 291 | |
| 292 | // Start by evaluating the "regular" choice. |
| 293 | bestChoice = regularChoice; |
| 294 | evaluate(regularChoice); |
| 295 | int maxd = updateDistances(regularChoice); |
| 296 | |
| 297 | // save these first-cut numbers for later |
| 298 | int zipSize1 = bestZipSize; |
| 299 | int byteSize1 = bestByteSize; |
| 300 | |
| 301 | if (regularChoice.coding == regular && topLevel) { |
| 302 | // Give credit for being the default; no band header is needed. |
| 303 | // Rather than increasing every other size value by the band |
| 304 | // header amount, we decrement this one metric, to give it an edge. |
| 305 | // Decreasing zipSize by a byte length is conservatively correct, |
| 306 | // especially considering that the escape byte is not likely to |
| 307 | // zip well with other bytes in the band. |
| 308 | int X = BandStructure.encodeEscapeValue(_meta_canon_max, regular); |
| 309 | if (regular.canRepresentSigned(X)) { |
| 310 | int Xlen = regular.getLength(X); // band coding header |
| 311 | //regularChoice.histSize -= Xlen; // keep exact byteSize |
| 312 | //regularChoice.byteSize -= Xlen; // keep exact byteSize |
| 313 | regularChoice.zipSize -= Xlen; |
| 314 | bestByteSize = regularChoice.byteSize; |
| 315 | bestZipSize = regularChoice.zipSize; |
| 316 | } |
| 317 | } |
| 318 | |
| 319 | int dscale = 1; |
| 320 | // Continually select a new choice to evaluate. |
| 321 | while (searchOrder < searchOrderLimit) { |
| 322 | Choice nextChoice; |
| 323 | if (dscale > maxd) dscale = 1; // cycle dscale values! |
| 324 | int dhi = maxd / dscale; |
| 325 | int dlo = maxd / (dscale *= 2) + 1; |
| 326 | nextChoice = findChoiceNear(bestChoice, dhi, dlo); |
| 327 | if (nextChoice == null) continue; |
| 328 | assert(nextChoice.coding.canRepresent(min, max)); |
| 329 | evaluate(nextChoice); |
| 330 | int nextMaxd = updateDistances(nextChoice); |
| 331 | if (nextChoice == bestChoice) { |
| 332 | maxd = nextMaxd; |
| 333 | if (verbose > 5) Utils.log.info("maxd = "+maxd); |
| 334 | } |
| 335 | } |
| 336 | |
| 337 | // Record best "plain coding" choice. |
| 338 | Coding plainBest = bestChoice.coding; |
| 339 | assert(plainBest == bestMethod); |
| 340 | |
| 341 | if (verbose > 2) { |
| 342 | Utils.log.info("chooser: plain result="+bestChoice+" after "+bestChoice.searchOrder+" rounds, "+(regularChoice.zipSize-bestZipSize)+" fewer bytes than regular "+regular); |
| 343 | } |
| 344 | bestChoice = null; |
| 345 | |
| 346 | if (!disablePopCoding |
| 347 | && optUsePopulationCoding |
| 348 | && effort >= POP_EFFORT |
| 349 | && bestMethod instanceof Coding) { |
| 350 | tryPopulationCoding(plainBest); |
| 351 | } |
| 352 | |
| 353 | if (!disableRunCoding |
| 354 | && optUseAdaptiveCoding |
| 355 | && effort >= RUN_EFFORT |
| 356 | && bestMethod instanceof Coding) { |
| 357 | tryAdaptiveCoding(plainBest); |
| 358 | } |
| 359 | |
| 360 | // Pass back the requested information: |
| 361 | if (sizes != null) { |
| 362 | sizes[BYTE_SIZE] = bestByteSize; |
| 363 | sizes[ZIP_SIZE] = bestZipSize; |
| 364 | } |
| 365 | if (verbose > 1) { |
| 366 | Utils.log.info("chooser: result="+bestMethod+" "+ |
| 367 | (zipSize1-bestZipSize)+ |
| 368 | " fewer bytes than regular "+regular+ |
| 369 | "; win="+pct(zipSize1-bestZipSize, zipSize1)); |
| 370 | } |
| 371 | CodingMethod bestMethod = this.bestMethod; |
| 372 | reset(null, 0, 0); // for GC |
| 373 | return bestMethod; |
| 374 | } |
| 375 | CodingMethod choose(int[] values, int start, int end, Coding regular) { |
| 376 | return choose(values, start, end, regular, null); |
| 377 | } |
| 378 | CodingMethod choose(int[] values, Coding regular, int[] sizes) { |
| 379 | return choose(values, 0, values.length, regular, sizes); |
| 380 | } |
| 381 | CodingMethod choose(int[] values, Coding regular) { |
| 382 | return choose(values, 0, values.length, regular, null); |
| 383 | } |
| 384 | |
| 385 | private int markUsableChoices(Coding regular) { |
| 386 | int numChoices = 0; |
| 387 | for (int i = 0; i < choices.length; i++) { |
| 388 | Choice c = choices[i]; |
| 389 | c.reset(); |
| 390 | if (!c.coding.canRepresent(min, max)) { |
| 391 | // Mark as already visited: |
| 392 | c.searchOrder = -1; |
| 393 | if (verbose > 1 && c.coding == regular) { |
| 394 | Utils.log.info("regular coding cannot represent ["+min+".."+max+"]: "+regular); |
| 395 | } |
| 396 | continue; |
| 397 | } |
| 398 | if (c.coding == regular) |
| 399 | regularChoice = c; |
| 400 | numChoices++; |
| 401 | } |
| 402 | if (regularChoice == null && regular.canRepresent(min, max)) { |
| 403 | regularChoice = makeExtraChoice(regular); |
| 404 | if (verbose > 1) { |
| 405 | Utils.log.info("*** regular choice is extra: "+regularChoice.coding); |
| 406 | } |
| 407 | } |
| 408 | if (regularChoice == null) { |
| 409 | for (int i = 0; i < choices.length; i++) { |
| 410 | Choice c = choices[i]; |
| 411 | if (c.searchOrder != -1) { |
| 412 | regularChoice = c; // arbitrary pick |
| 413 | break; |
| 414 | } |
| 415 | } |
| 416 | if (verbose > 1) { |
| 417 | Utils.log.info("*** regular choice does not apply "+regular); |
| 418 | Utils.log.info(" using instead "+regularChoice.coding); |
| 419 | } |
| 420 | } |
| 421 | if (verbose > 2) { |
| 422 | Utils.log.info("chooser: #choices="+numChoices+" ["+min+".."+max+"]"); |
| 423 | if (verbose > 4) { |
| 424 | for (int i = 0; i < choices.length; i++) { |
| 425 | Choice c = choices[i]; |
| 426 | if (c.searchOrder >= 0) |
| 427 | Utils.log.info(" "+c); |
| 428 | } |
| 429 | } |
| 430 | } |
| 431 | return numChoices; |
| 432 | } |
| 433 | |
| 434 | // Find an arbitrary choice at least dlo away from a previously |
| 435 | // evaluated choices, and at most dhi. Try also to regulate its |
| 436 | // min distance to all previously evaluated choices, in this range. |
| 437 | private Choice findChoiceNear(Choice near, int dhi, int dlo) { |
| 438 | if (verbose > 5) |
| 439 | Utils.log.info("findChoice "+dhi+".."+dlo+" near: "+near); |
| 440 | int[] distance = near.distance; |
| 441 | Choice found = null; |
| 442 | for (int i = 0; i < choices.length; i++) { |
| 443 | Choice c = choices[i]; |
| 444 | if (c.searchOrder < searchOrder) |
| 445 | continue; // already searched |
| 446 | // Distance from "near" guy must be in bounds: |
| 447 | if (distance[i] >= dlo && distance[i] <= dhi) { |
| 448 | // Try also to keep min-distance from other guys in bounds: |
| 449 | if (c.minDistance >= dlo && c.minDistance <= dhi) { |
| 450 | if (verbose > 5) |
| 451 | Utils.log.info("findChoice => good "+c); |
| 452 | return c; |
| 453 | } |
| 454 | found = c; |
| 455 | } |
| 456 | } |
| 457 | if (verbose > 5) |
| 458 | Utils.log.info("findChoice => found "+found); |
| 459 | return found; |
| 460 | } |
| 461 | |
| 462 | private void evaluate(Choice c) { |
| 463 | assert(c.searchOrder == Integer.MAX_VALUE); |
| 464 | c.searchOrder = searchOrder++; |
| 465 | boolean mustComputeSize; |
| 466 | if (c == bestChoice || c.isExtra()) { |
| 467 | mustComputeSize = true; |
| 468 | } else if (optUseHistogram) { |
| 469 | Histogram hist = getHistogram(c.coding.isDelta()); |
| 470 | c.histSize = (int)Math.ceil(hist.getBitLength(c.coding) / 8); |
| 471 | c.byteSize = c.histSize; |
| 472 | mustComputeSize = (c.byteSize <= targetSize); |
| 473 | } else { |
| 474 | mustComputeSize = true; |
| 475 | } |
| 476 | if (mustComputeSize) { |
| 477 | int[] sizes = computeSizePrivate(c.coding); |
| 478 | c.byteSize = sizes[BYTE_SIZE]; |
| 479 | c.zipSize = sizes[ZIP_SIZE]; |
| 480 | if (noteSizes(c.coding, c.byteSize, c.zipSize)) |
| 481 | bestChoice = c; |
| 482 | } |
| 483 | if (c.histSize >= 0) { |
| 484 | assert(c.byteSize == c.histSize); // models should agree |
| 485 | } |
| 486 | if (verbose > 4) { |
| 487 | Utils.log.info("evaluated "+c); |
| 488 | } |
| 489 | } |
| 490 | |
| 491 | private boolean noteSizes(CodingMethod c, int byteSize, int zipSize) { |
| 492 | assert(zipSize > 0 && byteSize > 0); |
| 493 | boolean better = (zipSize < bestZipSize); |
| 494 | if (verbose > 3) |
| 495 | Utils.log.info("computed size "+c+" "+byteSize+"/zs="+zipSize+ |
| 496 | ((better && bestMethod != null)? |
| 497 | (" better by "+ |
| 498 | pct(bestZipSize - zipSize, zipSize)): "")); |
| 499 | if (better) { |
| 500 | bestMethod = c; |
| 501 | bestZipSize = zipSize; |
| 502 | bestByteSize = byteSize; |
| 503 | targetSize = (int)(byteSize * fuzz); |
| 504 | return true; |
| 505 | } else { |
| 506 | return false; |
| 507 | } |
| 508 | } |
| 509 | |
| 510 | |
| 511 | private int updateDistances(Choice c) { |
| 512 | // update all minDistance values in still unevaluated choices |
| 513 | int[] distance = c.distance; |
| 514 | int maxd = 0; // how far is c from everybody else? |
| 515 | for (int i = 0; i < choices.length; i++) { |
| 516 | Choice c2 = choices[i]; |
| 517 | if (c2.searchOrder < searchOrder) |
| 518 | continue; |
| 519 | int d = distance[i]; |
| 520 | if (verbose > 5) |
| 521 | Utils.log.info("evaluate dist "+d+" to "+c2); |
| 522 | int mind = c2.minDistance; |
| 523 | if (mind > d) |
| 524 | c2.minDistance = mind = d; |
| 525 | if (maxd < d) |
| 526 | maxd = d; |
| 527 | } |
| 528 | // Now maxd has the distance of the farthest outlier |
| 529 | // from all evaluated choices. |
| 530 | if (verbose > 5) |
| 531 | Utils.log.info("evaluate maxd => "+maxd); |
| 532 | return maxd; |
| 533 | } |
| 534 | |
| 535 | // Compute the coded size of a sequence of values. |
| 536 | // The first int is the size in uncompressed bytes. |
| 537 | // The second is an estimate of the compressed size of these bytes. |
| 538 | public void computeSize(CodingMethod c, int[] values, int start, int end, int[] sizes) { |
| 539 | if (end <= start) { |
| 540 | sizes[BYTE_SIZE] = sizes[ZIP_SIZE] = 0; |
| 541 | return; |
| 542 | } |
| 543 | try { |
| 544 | resetData(); |
| 545 | c.writeArrayTo(byteSizer, values, start, end); |
| 546 | sizes[BYTE_SIZE] = getByteSize(); |
| 547 | sizes[ZIP_SIZE] = getZipSize(); |
| 548 | } catch (IOException ee) { |
| 549 | throw new RuntimeException(ee); // cannot happen |
| 550 | } |
| 551 | } |
| 552 | public void computeSize(CodingMethod c, int[] values, int[] sizes) { |
| 553 | computeSize(c, values, 0, values.length, sizes); |
| 554 | } |
| 555 | public int[] computeSize(CodingMethod c, int[] values, int start, int end) { |
| 556 | int[] sizes = { 0, 0 }; |
| 557 | computeSize(c, values, start, end, sizes); |
| 558 | return sizes; |
| 559 | } |
| 560 | public int[] computeSize(CodingMethod c, int[] values) { |
| 561 | return computeSize(c, values, 0, values.length); |
| 562 | } |
| 563 | // This version uses the implicit local arguments |
| 564 | private int[] computeSizePrivate(CodingMethod c) { |
| 565 | int[] sizes = { 0, 0 }; |
| 566 | computeSize(c, values, start, end, sizes); |
| 567 | return sizes; |
| 568 | } |
| 569 | public int computeByteSize(CodingMethod cm, int[] values, int start, int end) { |
| 570 | int len = end-start; |
| 571 | if (len < 0) { |
| 572 | return 0; |
| 573 | } |
| 574 | if (cm instanceof Coding) { |
| 575 | Coding c = (Coding) cm; |
| 576 | int size = c.getLength(values, start, end); |
| 577 | int size2; |
| 578 | assert(size == (size2=countBytesToSizer(cm, values, start, end))) |
| 579 | : (cm+" : "+size+" != "+size2); |
| 580 | return size; |
| 581 | } |
| 582 | return countBytesToSizer(cm, values, start, end); |
| 583 | } |
| 584 | private int countBytesToSizer(CodingMethod cm, int[] values, int start, int end) { |
| 585 | try { |
| 586 | byteOnlySizer.reset(); |
| 587 | cm.writeArrayTo(byteOnlySizer, values, start, end); |
| 588 | return byteOnlySizer.getSize(); |
| 589 | } catch (IOException ee) { |
| 590 | throw new RuntimeException(ee); // cannot happen |
| 591 | } |
| 592 | } |
| 593 | |
| 594 | int[] getDeltas(int min, int max) { |
| 595 | if ((min|max) != 0) |
| 596 | return Coding.makeDeltas(values, start, end, min, max); |
| 597 | if (deltas == null) { |
| 598 | deltas = Coding.makeDeltas(values, start, end, 0, 0); |
| 599 | } |
| 600 | return deltas; |
| 601 | } |
| 602 | Histogram getValueHistogram() { |
| 603 | if (vHist == null) { |
| 604 | vHist = new Histogram(values, start, end); |
| 605 | if (verbose > 3) { |
| 606 | vHist.print("vHist", System.out); |
| 607 | } else if (verbose > 1) { |
| 608 | vHist.print("vHist", null, System.out); |
| 609 | } |
| 610 | } |
| 611 | return vHist; |
| 612 | } |
| 613 | Histogram getDeltaHistogram() { |
| 614 | if (dHist == null) { |
| 615 | dHist = new Histogram(getDeltas(0, 0)); |
| 616 | if (verbose > 3) { |
| 617 | dHist.print("dHist", System.out); |
| 618 | } else if (verbose > 1) { |
| 619 | dHist.print("dHist", null, System.out); |
| 620 | } |
| 621 | } |
| 622 | return dHist; |
| 623 | } |
| 624 | Histogram getHistogram(boolean isDelta) { |
| 625 | return isDelta ? getDeltaHistogram(): getValueHistogram(); |
| 626 | } |
| 627 | |
| 628 | private void tryPopulationCoding(Coding plainCoding) { |
| 629 | // assert(plainCoding.canRepresent(min, max)); |
| 630 | Histogram hist = getValueHistogram(); |
| 631 | // Start with "reasonable" default codings. |
| 632 | final int approxL = 64; |
| 633 | Coding favoredCoding = plainCoding.getValueCoding(); |
| 634 | Coding tokenCoding = BandStructure.UNSIGNED5.setL(approxL); |
| 635 | Coding unfavoredCoding = plainCoding.getValueCoding(); |
| 636 | // There's going to be a band header. Estimate conservatively large. |
| 637 | final int BAND_HEADER = 4; |
| 638 | // Keep a running model of the predicted sizes of the F/T/U sequences. |
| 639 | int currentFSize; |
| 640 | int currentTSize; |
| 641 | int currentUSize; |
| 642 | // Start by assuming a degenerate favored-value length of 0, |
| 643 | // which looks like a bunch of zero tokens followed by the |
| 644 | // original sequence. |
| 645 | // The {F} list ends with a repeated F value; find worst case: |
| 646 | currentFSize = |
| 647 | BAND_HEADER + Math.max(favoredCoding.getLength(min), |
| 648 | favoredCoding.getLength(max)); |
| 649 | // The {T} list starts out a bunch of zeros, each of length 1. |
| 650 | final int ZERO_LEN = tokenCoding.getLength(0); |
| 651 | currentTSize = ZERO_LEN * (end-start); |
| 652 | // The {U} list starts out a copy of the plainCoding: |
| 653 | currentUSize = (int) Math.ceil(hist.getBitLength(unfavoredCoding) / 8); |
| 654 | |
| 655 | int bestPopSize = (currentFSize + currentTSize + currentUSize); |
| 656 | int bestPopFVC = 0; |
| 657 | |
| 658 | // Record all the values, in decreasing order of favor. |
| 659 | int[] allFavoredValues = new int[1+hist.getTotalLength()]; |
| 660 | //int[] allPopSizes = new int[1+hist.getTotalLength()]; |
| 661 | |
| 662 | // What sizes are "interesting"? |
| 663 | int targetLowFVC = -1; |
| 664 | int targetHighFVC = -1; |
| 665 | |
| 666 | // For each length, adjust the currentXSize model, and look for a win. |
| 667 | int[][] matrix = hist.getMatrix(); |
| 668 | int mrow = -1; |
| 669 | int mcol = 1; |
| 670 | int mrowFreq = 0; |
| 671 | for (int fvcount = 1; fvcount <= hist.getTotalLength(); fvcount++) { |
| 672 | // The {F} list gets an additional member. |
| 673 | // Take it from the end of the current matrix row. |
| 674 | // (It's the end, so that we get larger favored values first.) |
| 675 | if (mcol == 1) { |
| 676 | mrow += 1; |
| 677 | mrowFreq = matrix[mrow][0]; |
| 678 | mcol = matrix[mrow].length; |
| 679 | } |
| 680 | int thisValue = matrix[mrow][--mcol]; |
| 681 | allFavoredValues[fvcount] = thisValue; |
| 682 | int thisVLen = favoredCoding.getLength(thisValue); |
| 683 | currentFSize += thisVLen; |
| 684 | // The token list replaces occurrences of zero with a new token: |
| 685 | int thisVCount = mrowFreq; |
| 686 | int thisToken = fvcount; |
| 687 | currentTSize += (tokenCoding.getLength(thisToken) |
| 688 | - ZERO_LEN) * thisVCount; |
| 689 | // The unfavored list loses occurrences of the newly favored value. |
| 690 | // (This is the whole point of the exercise!) |
| 691 | currentUSize -= thisVLen * thisVCount; |
| 692 | int currentSize = (currentFSize + currentTSize + currentUSize); |
| 693 | //allPopSizes[fvcount] = currentSize; |
| 694 | if (bestPopSize > currentSize) { |
| 695 | if (currentSize <= targetSize) { |
| 696 | targetHighFVC = fvcount; |
| 697 | if (targetLowFVC < 0) |
| 698 | targetLowFVC = fvcount; |
| 699 | if (verbose > 4) |
| 700 | Utils.log.info("better pop-size at fvc="+fvcount+ |
| 701 | " by "+pct(bestPopSize-currentSize, |
| 702 | bestPopSize)); |
| 703 | } |
| 704 | bestPopSize = currentSize; |
| 705 | bestPopFVC = fvcount; |
| 706 | } |
| 707 | } |
| 708 | if (targetLowFVC < 0) { |
| 709 | if (verbose > 1) { |
| 710 | // Complete loss. |
| 711 | if (verbose > 1) |
| 712 | Utils.log.info("no good pop-size; best was "+ |
| 713 | bestPopSize+" at "+bestPopFVC+ |
| 714 | " worse by "+ |
| 715 | pct(bestPopSize-bestByteSize, |
| 716 | bestByteSize)); |
| 717 | } |
| 718 | return; |
| 719 | } |
| 720 | if (verbose > 1) |
| 721 | Utils.log.info("initial best pop-size at fvc="+bestPopFVC+ |
| 722 | " in ["+targetLowFVC+".."+targetHighFVC+"]"+ |
| 723 | " by "+pct(bestByteSize-bestPopSize, |
| 724 | bestByteSize)); |
| 725 | int oldZipSize = bestZipSize; |
| 726 | // Now close onto a specific coding, testing more rigorously |
| 727 | // with the zipSize metric. |
| 728 | // Questions to decide: |
| 729 | // 1. How many favored values? |
| 730 | // 2. What token coding (TC)? |
| 731 | // 3. Sort favored values by value within length brackets? |
| 732 | // 4. What favored coding? |
| 733 | // 5. What unfavored coding? |
| 734 | // Steps 1/2/3 are interdependent, and may be iterated. |
| 735 | // Steps 4 and 5 may be decided independently afterward. |
| 736 | int[] LValuesCoded = PopulationCoding.LValuesCoded; |
| 737 | ArrayList bestFits = new ArrayList(); |
| 738 | ArrayList fullFits = new ArrayList(); |
| 739 | ArrayList longFits = new ArrayList(); |
| 740 | final int PACK_TO_MAX_S = 1; |
| 741 | if (bestPopFVC <= 255) { |
| 742 | bestFits.add(BandStructure.BYTE1); |
| 743 | } else { |
| 744 | int bestB = Coding.B_MAX; |
| 745 | boolean doFullAlso = (effort > POP_EFFORT); |
| 746 | if (doFullAlso) |
| 747 | fullFits.add(BandStructure.BYTE1.setS(PACK_TO_MAX_S)); |
| 748 | for (int i = LValuesCoded.length-1; i >= 1; i--) { |
| 749 | int L = LValuesCoded[i]; |
| 750 | Coding c0 = PopulationCoding.fitTokenCoding(targetLowFVC, L); |
| 751 | Coding c1 = PopulationCoding.fitTokenCoding(bestPopFVC, L); |
| 752 | Coding c3 = PopulationCoding.fitTokenCoding(targetHighFVC, L); |
| 753 | if (c1 != null) { |
| 754 | if (!bestFits.contains(c1)) |
| 755 | bestFits.add(c1); |
| 756 | if (bestB > c1.B()) |
| 757 | bestB = c1.B(); |
| 758 | } |
| 759 | if (doFullAlso) { |
| 760 | if (c3 == null) c3 = c1; |
| 761 | for (int B = c0.B(); B <= c3.B(); B++) { |
| 762 | if (B == c1.B()) continue; |
| 763 | if (B == 1) continue; |
| 764 | Coding c2 = c3.setB(B).setS(PACK_TO_MAX_S); |
| 765 | if (!fullFits.contains(c2)) |
| 766 | fullFits.add(c2); |
| 767 | } |
| 768 | } |
| 769 | } |
| 770 | // interleave all B greater than bestB with best and full fits |
| 771 | for (Iterator i = bestFits.iterator(); i.hasNext(); ) { |
| 772 | Coding c = (Coding) i.next(); |
| 773 | if (c.B() > bestB) { |
| 774 | i.remove(); |
| 775 | longFits.add(0, c); |
| 776 | } |
| 777 | } |
| 778 | } |
| 779 | ArrayList allFits = new ArrayList(); |
| 780 | for (Iterator i = bestFits.iterator(), |
| 781 | j = fullFits.iterator(), |
| 782 | k = longFits.iterator(); |
| 783 | i.hasNext() || j.hasNext() || k.hasNext(); ) { |
| 784 | if (i.hasNext()) allFits.add(i.next()); |
| 785 | if (j.hasNext()) allFits.add(j.next()); |
| 786 | if (k.hasNext()) allFits.add(k.next()); |
| 787 | } |
| 788 | bestFits.clear(); |
| 789 | fullFits.clear(); |
| 790 | longFits.clear(); |
| 791 | int maxFits = allFits.size(); |
| 792 | if (effort == POP_EFFORT) |
| 793 | maxFits = 2; |
| 794 | else if (maxFits > 4) { |
| 795 | maxFits -= 4; |
| 796 | maxFits = (maxFits * (effort-POP_EFFORT) |
| 797 | ) / (MAX_EFFORT-POP_EFFORT); |
| 798 | maxFits += 4; |
| 799 | } |
| 800 | if (allFits.size() > maxFits) { |
| 801 | if (verbose > 4) |
| 802 | Utils.log.info("allFits before clip: "+allFits); |
| 803 | allFits.subList(maxFits, allFits.size()).clear(); |
| 804 | } |
| 805 | if (verbose > 3) |
| 806 | Utils.log.info("allFits: "+allFits); |
| 807 | for (Iterator i = allFits.iterator(); i.hasNext(); ) { |
| 808 | Coding tc = (Coding) i.next(); |
| 809 | boolean packToMax = false; |
| 810 | if (tc.S() == PACK_TO_MAX_S) { |
| 811 | // Kludge: setS(PACK_TO_MAX_S) means packToMax here. |
| 812 | packToMax = true; |
| 813 | tc = tc.setS(0); |
| 814 | } |
| 815 | int fVlen; |
| 816 | if (!packToMax) { |
| 817 | fVlen = bestPopFVC; |
| 818 | assert(tc.umax() >= fVlen); |
| 819 | assert(tc.B() == 1 || tc.setB(tc.B()-1).umax() < fVlen); |
| 820 | } else { |
| 821 | fVlen = Math.min(tc.umax(), targetHighFVC); |
| 822 | if (fVlen < targetLowFVC) |
| 823 | continue; |
| 824 | if (fVlen == bestPopFVC) |
| 825 | continue; // redundant test |
| 826 | } |
| 827 | PopulationCoding pop = new PopulationCoding(); |
| 828 | pop.setHistogram(hist); |
| 829 | pop.setL(tc.L()); |
| 830 | pop.setFavoredValues(allFavoredValues, fVlen); |
| 831 | assert(pop.tokenCoding == tc); // predict correctly |
| 832 | pop.resortFavoredValues(); |
| 833 | int[] tcsizes = |
| 834 | computePopSizePrivate(pop, |
| 835 | favoredCoding, unfavoredCoding); |
| 836 | noteSizes(pop, tcsizes[BYTE_SIZE], BAND_HEADER+tcsizes[ZIP_SIZE]); |
| 837 | } |
| 838 | if (verbose > 3) { |
| 839 | Utils.log.info("measured best pop, size="+bestByteSize+ |
| 840 | "/zs="+bestZipSize+ |
| 841 | " better by "+ |
| 842 | pct(oldZipSize-bestZipSize, oldZipSize)); |
| 843 | if (bestZipSize < oldZipSize) { |
| 844 | Utils.log.info(">>> POP WINS BY "+ |
| 845 | (oldZipSize - bestZipSize)); |
| 846 | } |
| 847 | } |
| 848 | } |
| 849 | |
| 850 | private |
| 851 | int[] computePopSizePrivate(PopulationCoding pop, |
| 852 | Coding favoredCoding, |
| 853 | Coding unfavoredCoding) { |
| 854 | if (popHelper == null) { |
| 855 | popHelper = new CodingChooser(effort, allCodingChoices); |
| 856 | if (stress != null) |
| 857 | popHelper.addStressSeed(stress.nextInt()); |
| 858 | popHelper.topLevel = false; |
| 859 | popHelper.verbose -= 1; |
| 860 | popHelper.disablePopCoding = true; |
| 861 | popHelper.disableRunCoding = this.disableRunCoding; |
| 862 | if (effort < MID_EFFORT) |
| 863 | // No nested run codings. |
| 864 | popHelper.disableRunCoding = true; |
| 865 | } |
| 866 | int fVlen = pop.fVlen; |
| 867 | if (verbose > 2) { |
| 868 | Utils.log.info("computePopSizePrivate fvlen="+fVlen+ |
| 869 | " tc="+pop.tokenCoding); |
| 870 | Utils.log.info("{ //BEGIN"); |
| 871 | } |
| 872 | |
| 873 | // Find good coding choices for the token and unfavored sequences. |
| 874 | int[] favoredValues = pop.fValues; |
| 875 | int[][] vals = pop.encodeValues(values, start, end); |
| 876 | int[] tokens = vals[0]; |
| 877 | int[] unfavoredValues = vals[1]; |
| 878 | if (verbose > 2) |
| 879 | Utils.log.info("-- refine on fv["+fVlen+"] fc="+favoredCoding); |
| 880 | pop.setFavoredCoding(popHelper.choose(favoredValues, 1, 1+fVlen, favoredCoding)); |
| 881 | if (pop.tokenCoding instanceof Coding && |
| 882 | (stress == null || stress.nextBoolean())) { |
| 883 | if (verbose > 2) |
| 884 | Utils.log.info("-- refine on tv["+tokens.length+"] tc="+pop.tokenCoding); |
| 885 | CodingMethod tc = popHelper.choose(tokens, (Coding) pop.tokenCoding); |
| 886 | if (tc != pop.tokenCoding) { |
| 887 | if (verbose > 2) |
| 888 | Utils.log.info(">>> refined tc="+tc); |
| 889 | pop.setTokenCoding(tc); |
| 890 | } |
| 891 | } |
| 892 | if (unfavoredValues.length == 0) |
| 893 | pop.setUnfavoredCoding(null); |
| 894 | else { |
| 895 | if (verbose > 2) |
| 896 | Utils.log.info("-- refine on uv["+unfavoredValues.length+"] uc="+pop.unfavoredCoding); |
| 897 | pop.setUnfavoredCoding(popHelper.choose(unfavoredValues, unfavoredCoding)); |
| 898 | } |
| 899 | if (verbose > 3) { |
| 900 | Utils.log.info("finish computePopSizePrivate fvlen="+fVlen+ |
| 901 | " fc="+pop.favoredCoding+ |
| 902 | " tc="+pop.tokenCoding+ |
| 903 | " uc="+pop.unfavoredCoding); |
| 904 | //pop.hist.print("pop-hist", null, System.out); |
| 905 | StringBuffer sb = new StringBuffer(); |
| 906 | sb.append("fv = {"); |
| 907 | for (int i = 1; i <= fVlen; i++) { |
| 908 | if ((i % 10) == 0) |
| 909 | sb.append('\n'); |
| 910 | sb.append(" ").append(favoredValues[i]); |
| 911 | } |
| 912 | sb.append('\n'); |
| 913 | sb.append("}"); |
| 914 | Utils.log.info(sb.toString()); |
| 915 | } |
| 916 | if (verbose > 2) { |
| 917 | Utils.log.info("} //END"); |
| 918 | } |
| 919 | if (stress != null) { |
| 920 | return null; // do not bother with size computation |
| 921 | } |
| 922 | int[] sizes; |
| 923 | try { |
| 924 | resetData(); |
| 925 | // Write the array of favored values. |
| 926 | pop.writeSequencesTo(byteSizer, tokens, unfavoredValues); |
| 927 | sizes = new int[] { getByteSize(), getZipSize() }; |
| 928 | } catch (IOException ee) { |
| 929 | throw new RuntimeException(ee); // cannot happen |
| 930 | } |
| 931 | int[] checkSizes = null; |
| 932 | assert((checkSizes = computeSizePrivate(pop)) != null); |
| 933 | assert(checkSizes[BYTE_SIZE] == sizes[BYTE_SIZE]) |
| 934 | : (checkSizes[BYTE_SIZE]+" != "+sizes[BYTE_SIZE]); |
| 935 | return sizes; |
| 936 | } |
| 937 | |
| 938 | private void tryAdaptiveCoding(Coding plainCoding) { |
| 939 | int oldZipSize = bestZipSize; |
| 940 | // Scan the value sequence, determining whether an interesting |
| 941 | // run occupies too much space. ("Too much" means, say 5% more |
| 942 | // than the average integer size of the band as a whole.) |
| 943 | // Try to find a better coding for those segments. |
| 944 | int start = this.start; |
| 945 | int end = this.end; |
| 946 | int[] values = this.values; |
| 947 | int len = end-start; |
| 948 | if (plainCoding.isDelta()) { |
| 949 | values = getDeltas(0,0); //%%% not quite right! |
| 950 | start = 0; |
| 951 | end = values.length; |
| 952 | } |
| 953 | int[] sizes = new int[len+1]; |
| 954 | int fillp = 0; |
| 955 | int totalSize = 0; |
| 956 | for (int i = start; i < end; i++) { |
| 957 | int val = values[i]; |
| 958 | sizes[fillp++] = totalSize; |
| 959 | int size = plainCoding.getLength(val); |
| 960 | assert(size < Integer.MAX_VALUE); |
| 961 | //System.out.println("len "+val+" = "+size); |
| 962 | totalSize += size; |
| 963 | } |
| 964 | sizes[fillp++] = totalSize; |
| 965 | assert(fillp == sizes.length); |
| 966 | double avgSize = (double)totalSize / len; |
| 967 | double sizeFuzz; |
| 968 | double sizeFuzz2; |
| 969 | double sizeFuzz3; |
| 970 | if (effort >= MID_EFFORT) { |
| 971 | if (effort > MID_EFFORT+1) |
| 972 | sizeFuzz = 1.001; |
| 973 | else |
| 974 | sizeFuzz = 1.003; |
| 975 | } else { |
| 976 | if (effort > RUN_EFFORT) |
| 977 | sizeFuzz = 1.01; |
| 978 | else |
| 979 | sizeFuzz = 1.03; |
| 980 | } |
| 981 | // for now: |
| 982 | sizeFuzz *= sizeFuzz; // double the thresh |
| 983 | sizeFuzz2 = (sizeFuzz*sizeFuzz); |
| 984 | sizeFuzz3 = (sizeFuzz*sizeFuzz*sizeFuzz); |
| 985 | // Find some mesh scales we like. |
| 986 | double[] dmeshes = new double[1 + (effort-RUN_EFFORT)]; |
| 987 | double logLen = Math.log(len); |
| 988 | for (int i = 0; i < dmeshes.length; i++) { |
| 989 | dmeshes[i] = Math.exp(logLen*(i+1)/(dmeshes.length+1)); |
| 990 | } |
| 991 | int[] meshes = new int[dmeshes.length]; |
| 992 | int mfillp = 0; |
| 993 | for (int i = 0; i < dmeshes.length; i++) { |
| 994 | int m = (int)Math.round(dmeshes[i]); |
| 995 | m = AdaptiveCoding.getNextK(m-1); |
| 996 | if (m <= 0 || m >= len) continue; |
| 997 | if (mfillp > 0 && m == meshes[mfillp-1]) continue; |
| 998 | meshes[mfillp++] = m; |
| 999 | } |
| 1000 | meshes = BandStructure.realloc(meshes, mfillp); |
| 1001 | // There's going to be a band header. Estimate conservatively large. |
| 1002 | final int BAND_HEADER = 4; // op, KB, A, B |
| 1003 | // Threshold values for a "too big" mesh. |
| 1004 | int[] threshes = new int[meshes.length]; |
| 1005 | double[] fuzzes = new double[meshes.length]; |
| 1006 | for (int i = 0; i < meshes.length; i++) { |
| 1007 | int mesh = meshes[i]; |
| 1008 | double fuzz; |
| 1009 | if (mesh < 10) |
| 1010 | fuzz = sizeFuzz3; |
| 1011 | else if (mesh < 100) |
| 1012 | fuzz = sizeFuzz2; |
| 1013 | else |
| 1014 | fuzz = sizeFuzz; |
| 1015 | fuzzes[i] = fuzz; |
| 1016 | threshes[i] = BAND_HEADER + (int)Math.ceil(mesh * avgSize * fuzz); |
| 1017 | } |
| 1018 | if (verbose > 1) { |
| 1019 | System.out.print("tryAdaptiveCoding ["+len+"]"+ |
| 1020 | " avgS="+avgSize+" fuzz="+sizeFuzz+ |
| 1021 | " meshes: {"); |
| 1022 | for (int i = 0; i < meshes.length; i++) |
| 1023 | System.out.print(" "+meshes[i]+"("+threshes[i]+")"); |
| 1024 | Utils.log.info(" }"); |
| 1025 | } |
| 1026 | if (runHelper == null) { |
| 1027 | runHelper = new CodingChooser(effort, allCodingChoices); |
| 1028 | if (stress != null) |
| 1029 | runHelper.addStressSeed(stress.nextInt()); |
| 1030 | runHelper.topLevel = false; |
| 1031 | runHelper.verbose -= 1; |
| 1032 | runHelper.disableRunCoding = true; |
| 1033 | runHelper.disablePopCoding = this.disablePopCoding; |
| 1034 | if (effort < MID_EFFORT) |
| 1035 | // No nested pop codings. |
| 1036 | runHelper.disablePopCoding = true; |
| 1037 | } |
| 1038 | for (int i = 0; i < len; i++) { |
| 1039 | i = AdaptiveCoding.getNextK(i-1); |
| 1040 | if (i > len) i = len; |
| 1041 | for (int j = meshes.length-1; j >= 0; j--) { |
| 1042 | int mesh = meshes[j]; |
| 1043 | int thresh = threshes[j]; |
| 1044 | if (i+mesh > len) continue; |
| 1045 | int size = sizes[i+mesh] - sizes[i]; |
| 1046 | if (size >= thresh) { |
| 1047 | // Found a size bulge. |
| 1048 | int bend = i+mesh; |
| 1049 | int bsize = size; |
| 1050 | double bigSize = avgSize * fuzzes[j]; |
| 1051 | while (bend < len && (bend-i) <= len/2) { |
| 1052 | int bend0 = bend; |
| 1053 | int bsize0 = bsize; |
| 1054 | bend += mesh; |
| 1055 | bend = i+AdaptiveCoding.getNextK(bend-i-1); |
| 1056 | if (bend < 0 || bend > len) |
| 1057 | bend = len; |
| 1058 | bsize = sizes[bend]-sizes[i]; |
| 1059 | if (bsize < BAND_HEADER + (bend-i) * bigSize) { |
| 1060 | bsize = bsize0; |
| 1061 | bend = bend0; |
| 1062 | break; |
| 1063 | } |
| 1064 | } |
| 1065 | int nexti = bend; |
| 1066 | if (verbose > 2) { |
| 1067 | Utils.log.info("bulge at "+i+"["+(bend-i)+"] of "+ |
| 1068 | pct(bsize - avgSize*(bend-i), |
| 1069 | avgSize*(bend-i))); |
| 1070 | Utils.log.info("{ //BEGIN"); |
| 1071 | } |
| 1072 | CodingMethod begcm, midcm, endcm; |
| 1073 | midcm = runHelper.choose(this.values, |
| 1074 | this.start+i, |
| 1075 | this.start+bend, |
| 1076 | plainCoding); |
| 1077 | if (midcm == plainCoding) { |
| 1078 | // No use working further. |
| 1079 | begcm = plainCoding; |
| 1080 | endcm = plainCoding; |
| 1081 | } else { |
| 1082 | begcm = runHelper.choose(this.values, |
| 1083 | this.start, |
| 1084 | this.start+i, |
| 1085 | plainCoding); |
| 1086 | endcm = runHelper.choose(this.values, |
| 1087 | this.start+bend, |
| 1088 | this.start+len, |
| 1089 | plainCoding); |
| 1090 | } |
| 1091 | if (verbose > 2) |
| 1092 | Utils.log.info("} //END"); |
| 1093 | if (begcm == midcm && i > 0 && |
| 1094 | AdaptiveCoding.isCodableLength(bend)) { |
| 1095 | i = 0; |
| 1096 | } |
| 1097 | if (midcm == endcm && bend < len) { |
| 1098 | bend = len; |
| 1099 | } |
| 1100 | if (begcm != plainCoding || |
| 1101 | midcm != plainCoding || |
| 1102 | endcm != plainCoding) { |
| 1103 | CodingMethod chain; |
| 1104 | int hlen = 0; |
| 1105 | if (bend == len) { |
| 1106 | chain = midcm; |
| 1107 | } else { |
| 1108 | chain = new AdaptiveCoding(bend-i, midcm, endcm); |
| 1109 | hlen += BAND_HEADER; |
| 1110 | } |
| 1111 | if (i > 0) { |
| 1112 | chain = new AdaptiveCoding(i, begcm, chain); |
| 1113 | hlen += BAND_HEADER; |
| 1114 | } |
| 1115 | int[] chainSize = computeSizePrivate(chain); |
| 1116 | noteSizes(chain, |
| 1117 | chainSize[BYTE_SIZE], |
| 1118 | chainSize[ZIP_SIZE]+hlen); |
| 1119 | } |
| 1120 | i = nexti; |
| 1121 | break; |
| 1122 | } |
| 1123 | } |
| 1124 | } |
| 1125 | if (verbose > 3) { |
| 1126 | if (bestZipSize < oldZipSize) { |
| 1127 | Utils.log.info(">>> RUN WINS BY "+ |
| 1128 | (oldZipSize - bestZipSize)); |
| 1129 | } |
| 1130 | } |
| 1131 | } |
| 1132 | |
| 1133 | static private |
| 1134 | String pct(double num, double den) { |
| 1135 | return (Math.round((num / den)*10000)/100.0)+"%"; |
| 1136 | } |
| 1137 | |
| 1138 | static |
| 1139 | class Sizer extends OutputStream { |
| 1140 | final OutputStream out; // if non-null, copy output here also |
| 1141 | Sizer(OutputStream out) { |
| 1142 | this.out = out; |
| 1143 | } |
| 1144 | Sizer() { |
| 1145 | this(null); |
| 1146 | } |
| 1147 | private int count; |
| 1148 | public void write(int b) throws IOException { |
| 1149 | count++; |
| 1150 | if (out != null) out.write(b); |
| 1151 | } |
| 1152 | public void write(byte b[], int off, int len) throws IOException { |
| 1153 | count += len; |
| 1154 | if (out != null) out.write(b, off, len); |
| 1155 | } |
| 1156 | public void reset() { |
| 1157 | count = 0; |
| 1158 | } |
| 1159 | public int getSize() { return count; } |
| 1160 | |
| 1161 | public String toString() { |
| 1162 | String str = super.toString(); |
| 1163 | // If -ea, print out more informative strings! |
| 1164 | assert((str = stringForDebug()) != null); |
| 1165 | return str; |
| 1166 | } |
| 1167 | String stringForDebug() { |
| 1168 | return "<Sizer "+getSize()+">"; |
| 1169 | } |
| 1170 | } |
| 1171 | |
| 1172 | private Sizer zipSizer = new Sizer(); |
| 1173 | private Deflater zipDef = new Deflater(); |
| 1174 | private DeflaterOutputStream zipOut = new DeflaterOutputStream(zipSizer, zipDef); |
| 1175 | private Sizer byteSizer = new Sizer(zipOut); |
| 1176 | private Sizer byteOnlySizer = new Sizer(); |
| 1177 | |
| 1178 | private void resetData() { |
| 1179 | flushData(); |
| 1180 | zipDef.reset(); |
| 1181 | if (context != null) { |
| 1182 | // Prepend given salt to the test output. |
| 1183 | try { |
| 1184 | context.writeTo(byteSizer); |
| 1185 | } catch (IOException ee) { |
| 1186 | throw new RuntimeException(ee); // cannot happen |
| 1187 | } |
| 1188 | } |
| 1189 | zipSizer.reset(); |
| 1190 | byteSizer.reset(); |
| 1191 | } |
| 1192 | private void flushData() { |
| 1193 | try { |
| 1194 | zipOut.finish(); |
| 1195 | } catch (IOException ee) { |
| 1196 | throw new RuntimeException(ee); // cannot happen |
| 1197 | } |
| 1198 | } |
| 1199 | private int getByteSize() { |
| 1200 | return byteSizer.getSize(); |
| 1201 | } |
| 1202 | private int getZipSize() { |
| 1203 | flushData(); |
| 1204 | return zipSizer.getSize(); |
| 1205 | } |
| 1206 | |
| 1207 | |
| 1208 | /// Stress-test helpers. |
| 1209 | |
| 1210 | void addStressSeed(int x) { |
| 1211 | if (stress == null) return; |
| 1212 | stress.setSeed(x + ((long)stress.nextInt() << 32)); |
| 1213 | } |
| 1214 | |
| 1215 | // Pick a random pop-coding. |
| 1216 | private CodingMethod stressPopCoding(CodingMethod coding) { |
| 1217 | assert(stress != null); // this method is only for testing |
| 1218 | // Don't turn it into a pop coding if it's already something special. |
| 1219 | if (!(coding instanceof Coding)) return coding; |
| 1220 | Coding valueCoding = ((Coding)coding).getValueCoding(); |
| 1221 | Histogram hist = getValueHistogram(); |
| 1222 | int fVlen = stressLen(hist.getTotalLength()); |
| 1223 | if (fVlen == 0) return coding; |
| 1224 | List popvals = new ArrayList(); |
| 1225 | if (stress.nextBoolean()) { |
| 1226 | // Build the population from the value list. |
| 1227 | HashSet popset = new HashSet(); |
| 1228 | for (int i = start; i < end; i++) { |
| 1229 | Integer val = new Integer(values[i]); |
| 1230 | if (popset.add(val)) popvals.add(val); |
| 1231 | } |
| 1232 | } else { |
| 1233 | int[][] matrix = hist.getMatrix(); |
| 1234 | for (int mrow = 0; mrow < matrix.length; mrow++) { |
| 1235 | int[] row = matrix[mrow]; |
| 1236 | for (int mcol = 1; mcol < row.length; mcol++) { |
| 1237 | popvals.add(new Integer(row[mcol])); |
| 1238 | } |
| 1239 | } |
| 1240 | } |
| 1241 | int reorder = stress.nextInt(); |
| 1242 | if ((reorder & 7) <= 2) { |
| 1243 | // Lose the order. |
| 1244 | Collections.shuffle(popvals, stress); |
| 1245 | } else { |
| 1246 | // Keep the order, mostly. |
| 1247 | if (((reorder >>>= 3) & 7) <= 2) Collections.sort(popvals); |
| 1248 | if (((reorder >>>= 3) & 7) <= 2) Collections.reverse(popvals); |
| 1249 | if (((reorder >>>= 3) & 7) <= 2) Collections.rotate(popvals, stressLen(popvals.size())); |
| 1250 | } |
| 1251 | if (popvals.size() > fVlen) { |
| 1252 | // Cut the list down. |
| 1253 | if (((reorder >>>= 3) & 7) <= 2) { |
| 1254 | // Cut at end. |
| 1255 | popvals.subList(fVlen, popvals.size()).clear(); |
| 1256 | } else { |
| 1257 | // Cut at start. |
| 1258 | popvals.subList(0, popvals.size()-fVlen).clear(); |
| 1259 | } |
| 1260 | } |
| 1261 | fVlen = popvals.size(); |
| 1262 | int[] fvals = new int[1+fVlen]; |
| 1263 | for (int i = 0; i < fVlen; i++) { |
| 1264 | fvals[1+i] = ((Integer)popvals.get(i)).intValue(); |
| 1265 | } |
| 1266 | PopulationCoding pop = new PopulationCoding(); |
| 1267 | pop.setFavoredValues(fvals, fVlen); |
| 1268 | int[] lvals = PopulationCoding.LValuesCoded; |
| 1269 | for (int i = 0; i < lvals.length / 2; i++) { |
| 1270 | int popl = lvals[stress.nextInt(lvals.length)]; |
| 1271 | if (popl < 0) continue; |
| 1272 | if (PopulationCoding.fitTokenCoding(fVlen, popl) != null) { |
| 1273 | pop.setL(popl); |
| 1274 | break; |
| 1275 | } |
| 1276 | } |
| 1277 | if (pop.tokenCoding == null) { |
| 1278 | int min = fvals[1], max = min; |
| 1279 | for (int i = 2; i <= fVlen; i++) { |
| 1280 | int val = fvals[i]; |
| 1281 | if (min > val) min = val; |
| 1282 | if (max < val) max = val; |
| 1283 | } |
| 1284 | pop.tokenCoding = stressCoding(min, max); |
| 1285 | } |
| 1286 | |
| 1287 | computePopSizePrivate(pop, valueCoding, valueCoding); |
| 1288 | return pop; |
| 1289 | } |
| 1290 | |
| 1291 | // Pick a random adaptive coding. |
| 1292 | private CodingMethod stressAdaptiveCoding(CodingMethod coding) { |
| 1293 | assert(stress != null); // this method is only for testing |
| 1294 | // Don't turn it into a run coding if it's already something special. |
| 1295 | if (!(coding instanceof Coding)) return coding; |
| 1296 | Coding plainCoding = (Coding)coding; |
| 1297 | int len = end-start; |
| 1298 | if (len < 2) return coding; |
| 1299 | // Decide how many spans we'll create. |
| 1300 | int spanlen = stressLen(len-1)+1; |
| 1301 | if (spanlen == len) return coding; |
| 1302 | try { |
| 1303 | assert(!disableRunCoding); |
| 1304 | disableRunCoding = true; // temporary, while I decide spans |
| 1305 | int[] allValues = (int[]) values.clone(); |
| 1306 | CodingMethod result = null; |
| 1307 | int scan = this.end; |
| 1308 | int start = this.start; |
| 1309 | for (int split; scan > start; scan = split) { |
| 1310 | int thisspan; |
| 1311 | int rand = (scan - start < 100)? -1: stress.nextInt(); |
| 1312 | if ((rand & 7) != 0) { |
| 1313 | thisspan = (spanlen==1? spanlen: stressLen(spanlen-1)+1); |
| 1314 | } else { |
| 1315 | // Every so often generate a value based on KX/KB format. |
| 1316 | int KX = (rand >>>= 3) & AdaptiveCoding.KX_MAX; |
| 1317 | int KB = (rand >>>= 3) & AdaptiveCoding.KB_MAX; |
| 1318 | for (;;) { |
| 1319 | thisspan = AdaptiveCoding.decodeK(KX, KB); |
| 1320 | if (thisspan <= scan - start) break; |
| 1321 | // Try smaller and smaller codings: |
| 1322 | if (KB != AdaptiveCoding.KB_DEFAULT) |
| 1323 | KB = AdaptiveCoding.KB_DEFAULT; |
| 1324 | else |
| 1325 | KX -= 1; |
| 1326 | } |
| 1327 | //System.out.println("KX="+KX+" KB="+KB+" K="+thisspan); |
| 1328 | assert(AdaptiveCoding.isCodableLength(thisspan)); |
| 1329 | } |
| 1330 | if (thisspan > scan - start) thisspan = scan - start; |
| 1331 | while (!AdaptiveCoding.isCodableLength(thisspan)) --thisspan; |
| 1332 | split = scan - thisspan; |
| 1333 | assert(split < scan); |
| 1334 | assert(split >= start); |
| 1335 | // Choose a coding for the span [split..scan). |
| 1336 | CodingMethod sc = choose(allValues, split, scan, plainCoding); |
| 1337 | if (result == null) { |
| 1338 | result = sc; // the caboose |
| 1339 | } else { |
| 1340 | result = new AdaptiveCoding(scan-split, sc, result); |
| 1341 | } |
| 1342 | } |
| 1343 | return result; |
| 1344 | } finally { |
| 1345 | disableRunCoding = false; // return to normal value |
| 1346 | } |
| 1347 | } |
| 1348 | |
| 1349 | // Return a random value in [0..len], gently biased toward extremes. |
| 1350 | private Coding stressCoding(int min, int max) { |
| 1351 | assert(stress != null); // this method is only for testing |
| 1352 | for (int i = 0; i < 100; i++) { |
| 1353 | Coding c = Coding.of(stress.nextInt(Coding.B_MAX)+1, |
| 1354 | stress.nextInt(Coding.H_MAX)+1, |
| 1355 | stress.nextInt(Coding.S_MAX+1)); |
| 1356 | if (c.B() == 1) c = c.setH(256); |
| 1357 | if (c.H() == 256 && c.B() >= 5) c = c.setB(4); |
| 1358 | if (stress.nextBoolean()) { |
| 1359 | Coding dc = c.setD(1); |
| 1360 | if (dc.canRepresent(min, max)) return dc; |
| 1361 | } |
| 1362 | if (c.canRepresent(min, max)) return c; |
| 1363 | } |
| 1364 | return BandStructure.UNSIGNED5; |
| 1365 | } |
| 1366 | |
| 1367 | // Return a random value in [0..len], gently biased toward extremes. |
| 1368 | private int stressLen(int len) { |
| 1369 | assert(stress != null); // this method is only for testing |
| 1370 | assert(len >= 0); |
| 1371 | int rand = stress.nextInt(100); |
| 1372 | if (rand < 20) |
| 1373 | return Math.min(len/5, rand); |
| 1374 | else if (rand < 40) |
| 1375 | return len; |
| 1376 | else |
| 1377 | return stress.nextInt(len); |
| 1378 | } |
| 1379 | |
| 1380 | // For debug only. |
| 1381 | /* |
| 1382 | public static |
| 1383 | int[] readValuesFrom(InputStream instr) { |
| 1384 | return readValuesFrom(new InputStreamReader(instr)); |
| 1385 | } |
| 1386 | public static |
| 1387 | int[] readValuesFrom(Reader inrdr) { |
| 1388 | inrdr = new BufferedReader(inrdr); |
| 1389 | final StreamTokenizer in = new StreamTokenizer(inrdr); |
| 1390 | final int TT_NOTHING = -99; |
| 1391 | in.commentChar('#'); |
| 1392 | return readValuesFrom(new Iterator() { |
| 1393 | int token = TT_NOTHING; |
| 1394 | private int getToken() { |
| 1395 | if (token == TT_NOTHING) { |
| 1396 | try { |
| 1397 | token = in.nextToken(); |
| 1398 | assert(token != TT_NOTHING); |
| 1399 | } catch (IOException ee) { |
| 1400 | throw new RuntimeException(ee); |
| 1401 | } |
| 1402 | } |
| 1403 | return token; |
| 1404 | } |
| 1405 | public boolean hasNext() { |
| 1406 | return getToken() != StreamTokenizer.TT_EOF; |
| 1407 | } |
| 1408 | public Object next() { |
| 1409 | int ntok = getToken(); |
| 1410 | token = TT_NOTHING; |
| 1411 | switch (ntok) { |
| 1412 | case StreamTokenizer.TT_EOF: |
| 1413 | throw new NoSuchElementException(); |
| 1414 | case StreamTokenizer.TT_NUMBER: |
| 1415 | return new Integer((int) in.nval); |
| 1416 | default: |
| 1417 | assert(false); |
| 1418 | return null; |
| 1419 | } |
| 1420 | } |
| 1421 | public void remove() { |
| 1422 | throw new UnsupportedOperationException(); |
| 1423 | } |
| 1424 | }); |
| 1425 | } |
| 1426 | public static |
| 1427 | int[] readValuesFrom(Iterator iter) { |
| 1428 | return readValuesFrom(iter, 0); |
| 1429 | } |
| 1430 | public static |
| 1431 | int[] readValuesFrom(Iterator iter, int initSize) { |
| 1432 | int[] na = new int[Math.max(10, initSize)]; |
| 1433 | int np = 0; |
| 1434 | while (iter.hasNext()) { |
| 1435 | Integer val = (Integer) iter.next(); |
| 1436 | if (np == na.length) { |
| 1437 | na = BandStructure.realloc(na); |
| 1438 | } |
| 1439 | na[np++] = val.intValue(); |
| 1440 | } |
| 1441 | if (np != na.length) { |
| 1442 | na = BandStructure.realloc(na, np); |
| 1443 | } |
| 1444 | return na; |
| 1445 | } |
| 1446 | |
| 1447 | public static |
| 1448 | void main(String[] av) throws IOException { |
| 1449 | int effort = MID_EFFORT; |
| 1450 | int ap = 0; |
| 1451 | if (ap < av.length && av[ap].equals("-e")) { |
| 1452 | ap++; |
| 1453 | effort = Integer.parseInt(av[ap++]); |
| 1454 | } |
| 1455 | int verbose = 1; |
| 1456 | if (ap < av.length && av[ap].equals("-v")) { |
| 1457 | ap++; |
| 1458 | verbose = Integer.parseInt(av[ap++]); |
| 1459 | } |
| 1460 | Coding[] bcs = BandStructure.getBasicCodings(); |
| 1461 | CodingChooser cc = new CodingChooser(effort, bcs); |
| 1462 | if (ap < av.length && av[ap].equals("-p")) { |
| 1463 | ap++; |
| 1464 | cc.optUsePopulationCoding = false; |
| 1465 | } |
| 1466 | if (ap < av.length && av[ap].equals("-a")) { |
| 1467 | ap++; |
| 1468 | cc.optUseAdaptiveCoding = false; |
| 1469 | } |
| 1470 | cc.verbose = verbose; |
| 1471 | int[] values = readValuesFrom(System.in); |
| 1472 | int[] sizes = {0,0}; |
| 1473 | CodingMethod cm = cc.choose(values, BandStructure.UNSIGNED5, sizes); |
| 1474 | System.out.println("size: "+sizes[BYTE_SIZE]+"/zs="+sizes[ZIP_SIZE]); |
| 1475 | System.out.println(cm); |
| 1476 | } |
| 1477 | //*/ |
| 1478 | |
| 1479 | } |