J. Duke | 319a3b9 | 2007-12-01 00:00:00 +0000 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright 2003-2005 Sun Microsystems, Inc. All Rights Reserved. |
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | * |
| 5 | * This code is free software; you can redistribute it and/or modify it |
| 6 | * under the terms of the GNU General Public License version 2 only, as |
| 7 | * published by the Free Software Foundation. Sun designates this |
| 8 | * particular file as subject to the "Classpath" exception as provided |
| 9 | * by Sun in the LICENSE file that accompanied this code. |
| 10 | * |
| 11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 14 | * version 2 for more details (a copy is included in the LICENSE file that |
| 15 | * accompanied this code). |
| 16 | * |
| 17 | * You should have received a copy of the GNU General Public License version |
| 18 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 20 | * |
| 21 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
| 22 | * CA 95054 USA or visit www.sun.com if you need additional information or |
| 23 | * have any questions. |
| 24 | */ |
| 25 | |
| 26 | package com.sun.java.util.jar.pack; |
| 27 | |
| 28 | import java.util.*; |
| 29 | import java.io.*; |
| 30 | |
| 31 | /** |
| 32 | * Population-based coding. |
| 33 | * See the section "Encodings of Uncorrelated Values" in the Pack200 spec. |
| 34 | * @author John Rose |
| 35 | */ |
| 36 | // This tactic alone reduces the final zipped rt.jar by about a percent. |
| 37 | class PopulationCoding implements Constants, CodingMethod { |
| 38 | Histogram vHist; // histogram of all values |
| 39 | int[] fValues; // list of favored values |
| 40 | int fVlen; // inclusive max index |
| 41 | long[] symtab; // int map of favored value -> token [1..#fValues] |
| 42 | |
| 43 | CodingMethod favoredCoding; |
| 44 | CodingMethod tokenCoding; |
| 45 | CodingMethod unfavoredCoding; |
| 46 | |
| 47 | int L = -1; //preferred L value for tokenCoding |
| 48 | |
| 49 | public void setFavoredValues(int[] fValues, int fVlen) { |
| 50 | // Note: {f} is allFavoredValues[1..fvlen], not [0..fvlen-1]. |
| 51 | // This is because zero is an exceptional favored value index. |
| 52 | assert(fValues[0] == 0); // must be empty |
| 53 | assert(this.fValues == null); // do not do this twice |
| 54 | this.fValues = fValues; |
| 55 | this.fVlen = fVlen; |
| 56 | if (L >= 0) { |
| 57 | setL(L); // reassert |
| 58 | } |
| 59 | } |
| 60 | public void setFavoredValues(int[] fValues) { |
| 61 | int fVlen = fValues.length-1; |
| 62 | setFavoredValues(fValues, fVlen); |
| 63 | } |
| 64 | public void setHistogram(Histogram vHist) { |
| 65 | this.vHist = vHist; |
| 66 | } |
| 67 | public void setL(int L) { |
| 68 | this.L = L; |
| 69 | if (L >= 0 && fValues != null && tokenCoding == null) { |
| 70 | tokenCoding = fitTokenCoding(fVlen, L); |
| 71 | assert(tokenCoding != null); |
| 72 | } |
| 73 | } |
| 74 | |
| 75 | public static Coding fitTokenCoding(int fVlen, int L) { |
| 76 | // Find the smallest B s.t. (B,H,0) covers fVlen. |
| 77 | if (fVlen < 256) |
| 78 | // H/L do not matter when B==1 |
| 79 | return BandStructure.BYTE1; |
| 80 | Coding longest = BandStructure.UNSIGNED5.setL(L); |
| 81 | if (!longest.canRepresentUnsigned(fVlen)) |
| 82 | return null; // failure; L is too sharp and fVlen too large |
| 83 | Coding tc = longest; |
| 84 | for (Coding shorter = longest; ; ) { |
| 85 | shorter = shorter.setB(shorter.B()-1); |
| 86 | if (shorter.umax() < fVlen) |
| 87 | break; |
| 88 | tc = shorter; // shorten it by reducing B |
| 89 | } |
| 90 | return tc; |
| 91 | } |
| 92 | |
| 93 | public void setFavoredCoding(CodingMethod favoredCoding) { |
| 94 | this.favoredCoding = favoredCoding; |
| 95 | } |
| 96 | public void setTokenCoding(CodingMethod tokenCoding) { |
| 97 | this.tokenCoding = tokenCoding; |
| 98 | this.L = -1; |
| 99 | if (tokenCoding instanceof Coding && fValues != null) { |
| 100 | Coding tc = (Coding) tokenCoding; |
| 101 | if (tc == fitTokenCoding(fVlen, tc.L())) |
| 102 | this.L = tc.L();; |
| 103 | // Otherwise, it's a non-default coding. |
| 104 | } |
| 105 | } |
| 106 | public void setUnfavoredCoding(CodingMethod unfavoredCoding) { |
| 107 | this.unfavoredCoding = unfavoredCoding; |
| 108 | } |
| 109 | |
| 110 | public int favoredValueMaxLength() { |
| 111 | if (L == 0) |
| 112 | return Integer.MAX_VALUE; |
| 113 | else |
| 114 | return BandStructure.UNSIGNED5.setL(L).umax(); |
| 115 | } |
| 116 | |
| 117 | public void resortFavoredValues() { |
| 118 | Coding tc = (Coding) tokenCoding; |
| 119 | // Make a local copy before reordering. |
| 120 | fValues = BandStructure.realloc(fValues, 1+fVlen); |
| 121 | // Resort favoredValues within each byte-size cadre. |
| 122 | int fillp = 1; // skip initial zero |
| 123 | for (int n = 1; n <= tc.B(); n++) { |
| 124 | int nmax = tc.byteMax(n); |
| 125 | if (nmax > fVlen) |
| 126 | nmax = fVlen; |
| 127 | if (nmax < tc.byteMin(n)) |
| 128 | break; |
| 129 | int low = fillp; |
| 130 | int high = nmax+1; |
| 131 | if (high == low) continue; |
| 132 | assert(high > low) |
| 133 | : high+"!>"+low; |
| 134 | assert(tc.getLength(low) == n) |
| 135 | : n+" != len("+(low)+") == "+ |
| 136 | tc.getLength(low); |
| 137 | assert(tc.getLength(high-1) == n) |
| 138 | : n+" != len("+(high-1)+") == "+ |
| 139 | tc.getLength(high-1); |
| 140 | int midTarget = low + (high-low)/2; |
| 141 | int mid = low; |
| 142 | // Divide the values into cadres, and sort within each. |
| 143 | int prevCount = -1; |
| 144 | int prevLimit = low; |
| 145 | for (int i = low; i < high; i++) { |
| 146 | int val = fValues[i]; |
| 147 | int count = vHist.getFrequency(val); |
| 148 | if (prevCount != count) { |
| 149 | if (n == 1) { |
| 150 | // For the single-byte encoding, keep strict order |
| 151 | // among frequency groups. |
| 152 | Arrays.sort(fValues, prevLimit, i); |
| 153 | } else if (Math.abs(mid - midTarget) > |
| 154 | Math.abs(i - midTarget)) { |
| 155 | // Find a single inflection point |
| 156 | // close to the middle of the byte-size cadre. |
| 157 | mid = i; |
| 158 | } |
| 159 | prevCount = count; |
| 160 | prevLimit = i; |
| 161 | } |
| 162 | } |
| 163 | if (n == 1) { |
| 164 | Arrays.sort(fValues, prevLimit, high); |
| 165 | } else { |
| 166 | // Sort up to the midpoint, if any. |
| 167 | Arrays.sort(fValues, low, mid); |
| 168 | Arrays.sort(fValues, mid, high); |
| 169 | } |
| 170 | assert(tc.getLength(low) == tc.getLength(mid)); |
| 171 | assert(tc.getLength(low) == tc.getLength(high-1)); |
| 172 | fillp = nmax+1; |
| 173 | } |
| 174 | assert(fillp == fValues.length); |
| 175 | |
| 176 | // Reset symtab. |
| 177 | symtab = null; |
| 178 | } |
| 179 | |
| 180 | public int getToken(int value) { |
| 181 | if (symtab == null) |
| 182 | symtab = makeSymtab(); |
| 183 | int pos = Arrays.binarySearch(symtab, (long)value << 32); |
| 184 | if (pos < 0) pos = -pos-1; |
| 185 | if (pos < symtab.length && value == (int)(symtab[pos] >>> 32)) |
| 186 | return (int)symtab[pos]; |
| 187 | else |
| 188 | return 0; |
| 189 | } |
| 190 | |
| 191 | public int[][] encodeValues(int[] values, int start, int end) { |
| 192 | // Compute token sequence. |
| 193 | int[] tokens = new int[end-start]; |
| 194 | int nuv = 0; |
| 195 | for (int i = 0; i < tokens.length; i++) { |
| 196 | int val = values[start+i]; |
| 197 | int tok = getToken(val); |
| 198 | if (tok != 0) |
| 199 | tokens[i] = tok; |
| 200 | else |
| 201 | nuv += 1; |
| 202 | } |
| 203 | // Compute unfavored value sequence. |
| 204 | int[] unfavoredValues = new int[nuv]; |
| 205 | nuv = 0; // reset |
| 206 | for (int i = 0; i < tokens.length; i++) { |
| 207 | if (tokens[i] != 0) continue; // already covered |
| 208 | int val = values[start+i]; |
| 209 | unfavoredValues[nuv++] = val; |
| 210 | } |
| 211 | assert(nuv == unfavoredValues.length); |
| 212 | return new int[][]{ tokens, unfavoredValues }; |
| 213 | } |
| 214 | |
| 215 | private long[] makeSymtab() { |
| 216 | long[] symtab = new long[fVlen]; |
| 217 | for (int token = 1; token <= fVlen; token++) { |
| 218 | symtab[token-1] = ((long)fValues[token] << 32) | token; |
| 219 | } |
| 220 | // Index by value: |
| 221 | Arrays.sort(symtab); |
| 222 | return symtab; |
| 223 | } |
| 224 | |
| 225 | private Coding getTailCoding(CodingMethod c) { |
| 226 | while (c instanceof AdaptiveCoding) |
| 227 | c = ((AdaptiveCoding)c).tailCoding; |
| 228 | return (Coding) c; |
| 229 | } |
| 230 | |
| 231 | // CodingMethod methods. |
| 232 | public void writeArrayTo(OutputStream out, int[] a, int start, int end) throws IOException { |
| 233 | int[][] vals = encodeValues(a, start, end); |
| 234 | writeSequencesTo(out, vals[0], vals[1]); |
| 235 | } |
| 236 | void writeSequencesTo(OutputStream out, int[] tokens, int[] uValues) throws IOException { |
| 237 | favoredCoding.writeArrayTo(out, fValues, 1, 1+fVlen); |
| 238 | getTailCoding(favoredCoding).writeTo(out, computeSentinelValue()); |
| 239 | tokenCoding.writeArrayTo(out, tokens, 0, tokens.length); |
| 240 | if (uValues.length > 0) |
| 241 | unfavoredCoding.writeArrayTo(out, uValues, 0, uValues.length); |
| 242 | } |
| 243 | |
| 244 | int computeSentinelValue() { |
| 245 | Coding fc = getTailCoding(favoredCoding); |
| 246 | if (fc.isDelta()) { |
| 247 | // repeat the last favored value, using delta=0 |
| 248 | return 0; |
| 249 | } else { |
| 250 | // else repeat the shorter of the min or last value |
| 251 | int min = fValues[1]; |
| 252 | int last = min; |
| 253 | // (remember that fVlen is an inclusive limit in fValues) |
| 254 | for (int i = 2; i <= fVlen; i++) { |
| 255 | last = fValues[i]; |
| 256 | min = moreCentral(min, last); |
| 257 | } |
| 258 | int endVal; |
| 259 | if (fc.getLength(min) <= fc.getLength(last)) |
| 260 | return min; |
| 261 | else |
| 262 | return last; |
| 263 | } |
| 264 | } |
| 265 | |
| 266 | public void readArrayFrom(InputStream in, int[] a, int start, int end) throws IOException { |
| 267 | // Parameters are fCode, L, uCode. |
| 268 | setFavoredValues(readFavoredValuesFrom(in, end-start)); |
| 269 | // Read the tokens. Read them into the final array, for the moment. |
| 270 | tokenCoding.readArrayFrom(in, a, start, end); |
| 271 | // Decode the favored tokens. |
| 272 | int headp = 0, tailp = -1; |
| 273 | int uVlen = 0; |
| 274 | for (int i = start; i < end; i++) { |
| 275 | int tok = a[i]; |
| 276 | if (tok == 0) { |
| 277 | // Make a linked list, and decode in a second pass. |
| 278 | if (tailp < 0) { |
| 279 | headp = i; |
| 280 | } else { |
| 281 | a[tailp] = i; |
| 282 | } |
| 283 | tailp = i; |
| 284 | uVlen += 1; |
| 285 | } else { |
| 286 | a[i] = fValues[tok]; |
| 287 | } |
| 288 | } |
| 289 | // Walk the linked list of "zero" locations, decoding unfavored vals. |
| 290 | int[] uValues = new int[uVlen]; |
| 291 | if (uVlen > 0) |
| 292 | unfavoredCoding.readArrayFrom(in, uValues, 0, uVlen); |
| 293 | for (int i = 0; i < uVlen; i++) { |
| 294 | int nextp = a[headp]; |
| 295 | a[headp] = uValues[i]; |
| 296 | headp = nextp; |
| 297 | } |
| 298 | } |
| 299 | |
| 300 | int[] readFavoredValuesFrom(InputStream in, int maxForDebug) throws IOException { |
| 301 | int[] fValues = new int[1000]; // realloc as needed |
| 302 | // The set uniqueValuesForDebug records all favored values. |
| 303 | // As each new value is added, we assert that the value |
| 304 | // was not already in the set. |
| 305 | HashSet uniqueValuesForDebug = null; |
| 306 | assert((uniqueValuesForDebug = new HashSet()) != null); |
| 307 | int fillp = 1; |
| 308 | maxForDebug += fillp; |
| 309 | int min = Integer.MIN_VALUE; // farthest from the center |
| 310 | //int min2 = Integer.MIN_VALUE; // emulate buggy 150.7 spec. |
| 311 | int last = 0; |
| 312 | CodingMethod fcm = favoredCoding; |
| 313 | while (fcm instanceof AdaptiveCoding) { |
| 314 | AdaptiveCoding ac = (AdaptiveCoding) fcm; |
| 315 | int len = ac.headLength; |
| 316 | while (fillp + len > fValues.length) |
| 317 | fValues = BandStructure.realloc(fValues); |
| 318 | int newFillp = fillp + len; |
| 319 | ac.headCoding.readArrayFrom(in, fValues, fillp, newFillp); |
| 320 | while (fillp < newFillp) { |
| 321 | int val = fValues[fillp++]; |
| 322 | assert(uniqueValuesForDebug.add(new Integer(val))); |
| 323 | assert(fillp <= maxForDebug); |
| 324 | last = val; |
| 325 | min = moreCentral(min, val); |
| 326 | //min2 = moreCentral2(min2, val, min); |
| 327 | } |
| 328 | fcm = ac.tailCoding; |
| 329 | } |
| 330 | Coding fc = (Coding) fcm; |
| 331 | if (fc.isDelta()) { |
| 332 | for (long state = 0;;) { |
| 333 | // Read a new value: |
| 334 | state += fc.readFrom(in); |
| 335 | int val; |
| 336 | if (fc.isSubrange()) |
| 337 | val = fc.reduceToUnsignedRange(state); |
| 338 | else |
| 339 | val = (int)state; |
| 340 | state = val; |
| 341 | if (fillp > 1 && (val == last || val == min)) //|| val == min2 |
| 342 | break; |
| 343 | if (fillp == fValues.length) |
| 344 | fValues = BandStructure.realloc(fValues); |
| 345 | fValues[fillp++] = val; |
| 346 | assert(uniqueValuesForDebug.add(new Integer(val))); |
| 347 | assert(fillp <= maxForDebug); |
| 348 | last = val; |
| 349 | min = moreCentral(min, val); |
| 350 | //min2 = moreCentral(min2, val); |
| 351 | } |
| 352 | } else { |
| 353 | for (;;) { |
| 354 | int val = fc.readFrom(in); |
| 355 | if (fillp > 1 && (val == last || val == min)) //|| val == min2 |
| 356 | break; |
| 357 | if (fillp == fValues.length) |
| 358 | fValues = BandStructure.realloc(fValues); |
| 359 | fValues[fillp++] = val; |
| 360 | assert(uniqueValuesForDebug.add(new Integer(val))); |
| 361 | assert(fillp <= maxForDebug); |
| 362 | last = val; |
| 363 | min = moreCentral(min, val); |
| 364 | //min2 = moreCentral2(min2, val, min); |
| 365 | } |
| 366 | } |
| 367 | return BandStructure.realloc(fValues, fillp); |
| 368 | } |
| 369 | |
| 370 | private static int moreCentral(int x, int y) { |
| 371 | int kx = (x >> 31) ^ (x << 1); |
| 372 | int ky = (y >> 31) ^ (y << 1); |
| 373 | // bias kx/ky to get an unsigned comparison: |
| 374 | kx -= Integer.MIN_VALUE; |
| 375 | ky -= Integer.MIN_VALUE; |
| 376 | int xy = (kx < ky? x: y); |
| 377 | // assert that this ALU-ish version is the same: |
| 378 | assert(xy == moreCentralSlow(x, y)); |
| 379 | return xy; |
| 380 | } |
| 381 | // private static int moreCentral2(int x, int y, int min) { |
| 382 | // // Strict implementation of buggy 150.7 specification. |
| 383 | // // The bug is that the spec. says absolute-value ties are broken |
| 384 | // // in favor of positive numbers, but the suggested implementation |
| 385 | // // (also mentioned in the spec.) breaks ties in favor of negatives. |
| 386 | // if (x + y == 0) return (x > y? x : y); |
| 387 | // return min; |
| 388 | // } |
| 389 | private static int moreCentralSlow(int x, int y) { |
| 390 | int ax = x; |
| 391 | if (ax < 0) ax = -ax; |
| 392 | if (ax < 0) return y; //x is MIN_VALUE |
| 393 | int ay = y; |
| 394 | if (ay < 0) ay = -ay; |
| 395 | if (ay < 0) return x; //y is MIN_VALUE |
| 396 | if (ax < ay) return x; |
| 397 | if (ax > ay) return y; |
| 398 | // At this point the absolute values agree, and the negative wins. |
| 399 | return x < y ? x : y; |
| 400 | } |
| 401 | |
| 402 | static final int[] LValuesCoded |
| 403 | = { -1, 4, 8, 16, 32, 64, 128, 192, 224, 240, 248, 252 }; |
| 404 | |
| 405 | public byte[] getMetaCoding(Coding dflt) { |
| 406 | int K = fVlen; |
| 407 | int LCoded = 0; |
| 408 | if (tokenCoding instanceof Coding) { |
| 409 | Coding tc = (Coding) tokenCoding; |
| 410 | if (tc.B() == 1) { |
| 411 | LCoded = 1; |
| 412 | } else if (L >= 0) { |
| 413 | assert(L == tc.L()); |
| 414 | for (int i = 1; i < LValuesCoded.length; i++) { |
| 415 | if (LValuesCoded[i] == L) { LCoded = i; break; } |
| 416 | } |
| 417 | } |
| 418 | } |
| 419 | CodingMethod tokenDflt = null; |
| 420 | if (LCoded != 0 && tokenCoding == fitTokenCoding(fVlen, L)) { |
| 421 | // A simple L value is enough to recover the tokenCoding. |
| 422 | tokenDflt = tokenCoding; |
| 423 | } |
| 424 | int FDef = (favoredCoding == dflt)?1:0; |
| 425 | int UDef = (unfavoredCoding == dflt || unfavoredCoding == null)?1:0; |
| 426 | int TDef = (tokenCoding == tokenDflt)?1:0; |
| 427 | int TDefL = (TDef == 1) ? LCoded : 0; |
| 428 | assert(TDef == ((TDefL>0)?1:0)); |
| 429 | ByteArrayOutputStream bytes = new ByteArrayOutputStream(10); |
| 430 | bytes.write(_meta_pop + FDef + 2*UDef + 4*TDefL); |
| 431 | try { |
| 432 | if (FDef == 0) bytes.write(favoredCoding.getMetaCoding(dflt)); |
| 433 | if (TDef == 0) bytes.write(tokenCoding.getMetaCoding(dflt)); |
| 434 | if (UDef == 0) bytes.write(unfavoredCoding.getMetaCoding(dflt)); |
| 435 | } catch (IOException ee) { |
| 436 | throw new RuntimeException(ee); |
| 437 | } |
| 438 | return bytes.toByteArray(); |
| 439 | } |
| 440 | public static int parseMetaCoding(byte[] bytes, int pos, Coding dflt, CodingMethod res[]) { |
| 441 | int op = bytes[pos++] & 0xFF; |
| 442 | if (op < _meta_pop || op >= _meta_limit) return pos-1; // backup |
| 443 | op -= _meta_pop; |
| 444 | int FDef = op % 2; |
| 445 | int UDef = (op / 2) % 2; |
| 446 | int TDefL = (op / 4); |
| 447 | int TDef = (TDefL > 0)?1:0; |
| 448 | int L = LValuesCoded[TDefL]; |
| 449 | CodingMethod[] FCode = {dflt}, TCode = {null}, UCode = {dflt}; |
| 450 | if (FDef == 0) |
| 451 | pos = BandStructure.parseMetaCoding(bytes, pos, dflt, FCode); |
| 452 | if (TDef == 0) |
| 453 | pos = BandStructure.parseMetaCoding(bytes, pos, dflt, TCode); |
| 454 | if (UDef == 0) |
| 455 | pos = BandStructure.parseMetaCoding(bytes, pos, dflt, UCode); |
| 456 | PopulationCoding pop = new PopulationCoding(); |
| 457 | pop.L = L; // might be -1 |
| 458 | pop.favoredCoding = FCode[0]; |
| 459 | pop.tokenCoding = TCode[0]; // might be null! |
| 460 | pop.unfavoredCoding = UCode[0]; |
| 461 | res[0] = pop; |
| 462 | return pos; |
| 463 | } |
| 464 | |
| 465 | private String keyString(CodingMethod m) { |
| 466 | if (m instanceof Coding) |
| 467 | return ((Coding)m).keyString(); |
| 468 | if (m == null) |
| 469 | return "none"; |
| 470 | return m.toString(); |
| 471 | } |
| 472 | public String toString() { |
| 473 | PropMap p200 = Utils.currentPropMap(); |
| 474 | boolean verbose |
| 475 | = (p200 != null && |
| 476 | p200.getBoolean(Utils.COM_PREFIX+"verbose.pop")); |
| 477 | StringBuffer res = new StringBuffer(100); |
| 478 | res.append("pop(").append("fVlen=").append(fVlen); |
| 479 | if (verbose && fValues != null) { |
| 480 | res.append(" fV=["); |
| 481 | for (int i = 1; i <= fVlen; i++) { |
| 482 | res.append(i==1?"":",").append(fValues[i]); |
| 483 | } |
| 484 | res.append(";").append(computeSentinelValue()); |
| 485 | res.append("]"); |
| 486 | } |
| 487 | res.append(" fc=").append(keyString(favoredCoding)); |
| 488 | res.append(" tc=").append(keyString(tokenCoding)); |
| 489 | res.append(" uc=").append(keyString(unfavoredCoding)); |
| 490 | res.append(")"); |
| 491 | return res.toString(); |
| 492 | } |
| 493 | } |