blob: 0f594952e611f300cf1fcd251ecd0c12ce9f3103 [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 2003-2005 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26package com.sun.java.util.jar.pack;
27
28import java.util.*;
29import java.io.*;
30
31/**
32 * Population-based coding.
33 * See the section "Encodings of Uncorrelated Values" in the Pack200 spec.
34 * @author John Rose
35 */
36// This tactic alone reduces the final zipped rt.jar by about a percent.
37class PopulationCoding implements Constants, CodingMethod {
38 Histogram vHist; // histogram of all values
39 int[] fValues; // list of favored values
40 int fVlen; // inclusive max index
41 long[] symtab; // int map of favored value -> token [1..#fValues]
42
43 CodingMethod favoredCoding;
44 CodingMethod tokenCoding;
45 CodingMethod unfavoredCoding;
46
47 int L = -1; //preferred L value for tokenCoding
48
49 public void setFavoredValues(int[] fValues, int fVlen) {
50 // Note: {f} is allFavoredValues[1..fvlen], not [0..fvlen-1].
51 // This is because zero is an exceptional favored value index.
52 assert(fValues[0] == 0); // must be empty
53 assert(this.fValues == null); // do not do this twice
54 this.fValues = fValues;
55 this.fVlen = fVlen;
56 if (L >= 0) {
57 setL(L); // reassert
58 }
59 }
60 public void setFavoredValues(int[] fValues) {
61 int fVlen = fValues.length-1;
62 setFavoredValues(fValues, fVlen);
63 }
64 public void setHistogram(Histogram vHist) {
65 this.vHist = vHist;
66 }
67 public void setL(int L) {
68 this.L = L;
69 if (L >= 0 && fValues != null && tokenCoding == null) {
70 tokenCoding = fitTokenCoding(fVlen, L);
71 assert(tokenCoding != null);
72 }
73 }
74
75 public static Coding fitTokenCoding(int fVlen, int L) {
76 // Find the smallest B s.t. (B,H,0) covers fVlen.
77 if (fVlen < 256)
78 // H/L do not matter when B==1
79 return BandStructure.BYTE1;
80 Coding longest = BandStructure.UNSIGNED5.setL(L);
81 if (!longest.canRepresentUnsigned(fVlen))
82 return null; // failure; L is too sharp and fVlen too large
83 Coding tc = longest;
84 for (Coding shorter = longest; ; ) {
85 shorter = shorter.setB(shorter.B()-1);
86 if (shorter.umax() < fVlen)
87 break;
88 tc = shorter; // shorten it by reducing B
89 }
90 return tc;
91 }
92
93 public void setFavoredCoding(CodingMethod favoredCoding) {
94 this.favoredCoding = favoredCoding;
95 }
96 public void setTokenCoding(CodingMethod tokenCoding) {
97 this.tokenCoding = tokenCoding;
98 this.L = -1;
99 if (tokenCoding instanceof Coding && fValues != null) {
100 Coding tc = (Coding) tokenCoding;
101 if (tc == fitTokenCoding(fVlen, tc.L()))
102 this.L = tc.L();;
103 // Otherwise, it's a non-default coding.
104 }
105 }
106 public void setUnfavoredCoding(CodingMethod unfavoredCoding) {
107 this.unfavoredCoding = unfavoredCoding;
108 }
109
110 public int favoredValueMaxLength() {
111 if (L == 0)
112 return Integer.MAX_VALUE;
113 else
114 return BandStructure.UNSIGNED5.setL(L).umax();
115 }
116
117 public void resortFavoredValues() {
118 Coding tc = (Coding) tokenCoding;
119 // Make a local copy before reordering.
120 fValues = BandStructure.realloc(fValues, 1+fVlen);
121 // Resort favoredValues within each byte-size cadre.
122 int fillp = 1; // skip initial zero
123 for (int n = 1; n <= tc.B(); n++) {
124 int nmax = tc.byteMax(n);
125 if (nmax > fVlen)
126 nmax = fVlen;
127 if (nmax < tc.byteMin(n))
128 break;
129 int low = fillp;
130 int high = nmax+1;
131 if (high == low) continue;
132 assert(high > low)
133 : high+"!>"+low;
134 assert(tc.getLength(low) == n)
135 : n+" != len("+(low)+") == "+
136 tc.getLength(low);
137 assert(tc.getLength(high-1) == n)
138 : n+" != len("+(high-1)+") == "+
139 tc.getLength(high-1);
140 int midTarget = low + (high-low)/2;
141 int mid = low;
142 // Divide the values into cadres, and sort within each.
143 int prevCount = -1;
144 int prevLimit = low;
145 for (int i = low; i < high; i++) {
146 int val = fValues[i];
147 int count = vHist.getFrequency(val);
148 if (prevCount != count) {
149 if (n == 1) {
150 // For the single-byte encoding, keep strict order
151 // among frequency groups.
152 Arrays.sort(fValues, prevLimit, i);
153 } else if (Math.abs(mid - midTarget) >
154 Math.abs(i - midTarget)) {
155 // Find a single inflection point
156 // close to the middle of the byte-size cadre.
157 mid = i;
158 }
159 prevCount = count;
160 prevLimit = i;
161 }
162 }
163 if (n == 1) {
164 Arrays.sort(fValues, prevLimit, high);
165 } else {
166 // Sort up to the midpoint, if any.
167 Arrays.sort(fValues, low, mid);
168 Arrays.sort(fValues, mid, high);
169 }
170 assert(tc.getLength(low) == tc.getLength(mid));
171 assert(tc.getLength(low) == tc.getLength(high-1));
172 fillp = nmax+1;
173 }
174 assert(fillp == fValues.length);
175
176 // Reset symtab.
177 symtab = null;
178 }
179
180 public int getToken(int value) {
181 if (symtab == null)
182 symtab = makeSymtab();
183 int pos = Arrays.binarySearch(symtab, (long)value << 32);
184 if (pos < 0) pos = -pos-1;
185 if (pos < symtab.length && value == (int)(symtab[pos] >>> 32))
186 return (int)symtab[pos];
187 else
188 return 0;
189 }
190
191 public int[][] encodeValues(int[] values, int start, int end) {
192 // Compute token sequence.
193 int[] tokens = new int[end-start];
194 int nuv = 0;
195 for (int i = 0; i < tokens.length; i++) {
196 int val = values[start+i];
197 int tok = getToken(val);
198 if (tok != 0)
199 tokens[i] = tok;
200 else
201 nuv += 1;
202 }
203 // Compute unfavored value sequence.
204 int[] unfavoredValues = new int[nuv];
205 nuv = 0; // reset
206 for (int i = 0; i < tokens.length; i++) {
207 if (tokens[i] != 0) continue; // already covered
208 int val = values[start+i];
209 unfavoredValues[nuv++] = val;
210 }
211 assert(nuv == unfavoredValues.length);
212 return new int[][]{ tokens, unfavoredValues };
213 }
214
215 private long[] makeSymtab() {
216 long[] symtab = new long[fVlen];
217 for (int token = 1; token <= fVlen; token++) {
218 symtab[token-1] = ((long)fValues[token] << 32) | token;
219 }
220 // Index by value:
221 Arrays.sort(symtab);
222 return symtab;
223 }
224
225 private Coding getTailCoding(CodingMethod c) {
226 while (c instanceof AdaptiveCoding)
227 c = ((AdaptiveCoding)c).tailCoding;
228 return (Coding) c;
229 }
230
231 // CodingMethod methods.
232 public void writeArrayTo(OutputStream out, int[] a, int start, int end) throws IOException {
233 int[][] vals = encodeValues(a, start, end);
234 writeSequencesTo(out, vals[0], vals[1]);
235 }
236 void writeSequencesTo(OutputStream out, int[] tokens, int[] uValues) throws IOException {
237 favoredCoding.writeArrayTo(out, fValues, 1, 1+fVlen);
238 getTailCoding(favoredCoding).writeTo(out, computeSentinelValue());
239 tokenCoding.writeArrayTo(out, tokens, 0, tokens.length);
240 if (uValues.length > 0)
241 unfavoredCoding.writeArrayTo(out, uValues, 0, uValues.length);
242 }
243
244 int computeSentinelValue() {
245 Coding fc = getTailCoding(favoredCoding);
246 if (fc.isDelta()) {
247 // repeat the last favored value, using delta=0
248 return 0;
249 } else {
250 // else repeat the shorter of the min or last value
251 int min = fValues[1];
252 int last = min;
253 // (remember that fVlen is an inclusive limit in fValues)
254 for (int i = 2; i <= fVlen; i++) {
255 last = fValues[i];
256 min = moreCentral(min, last);
257 }
258 int endVal;
259 if (fc.getLength(min) <= fc.getLength(last))
260 return min;
261 else
262 return last;
263 }
264 }
265
266 public void readArrayFrom(InputStream in, int[] a, int start, int end) throws IOException {
267 // Parameters are fCode, L, uCode.
268 setFavoredValues(readFavoredValuesFrom(in, end-start));
269 // Read the tokens. Read them into the final array, for the moment.
270 tokenCoding.readArrayFrom(in, a, start, end);
271 // Decode the favored tokens.
272 int headp = 0, tailp = -1;
273 int uVlen = 0;
274 for (int i = start; i < end; i++) {
275 int tok = a[i];
276 if (tok == 0) {
277 // Make a linked list, and decode in a second pass.
278 if (tailp < 0) {
279 headp = i;
280 } else {
281 a[tailp] = i;
282 }
283 tailp = i;
284 uVlen += 1;
285 } else {
286 a[i] = fValues[tok];
287 }
288 }
289 // Walk the linked list of "zero" locations, decoding unfavored vals.
290 int[] uValues = new int[uVlen];
291 if (uVlen > 0)
292 unfavoredCoding.readArrayFrom(in, uValues, 0, uVlen);
293 for (int i = 0; i < uVlen; i++) {
294 int nextp = a[headp];
295 a[headp] = uValues[i];
296 headp = nextp;
297 }
298 }
299
300 int[] readFavoredValuesFrom(InputStream in, int maxForDebug) throws IOException {
301 int[] fValues = new int[1000]; // realloc as needed
302 // The set uniqueValuesForDebug records all favored values.
303 // As each new value is added, we assert that the value
304 // was not already in the set.
305 HashSet uniqueValuesForDebug = null;
306 assert((uniqueValuesForDebug = new HashSet()) != null);
307 int fillp = 1;
308 maxForDebug += fillp;
309 int min = Integer.MIN_VALUE; // farthest from the center
310 //int min2 = Integer.MIN_VALUE; // emulate buggy 150.7 spec.
311 int last = 0;
312 CodingMethod fcm = favoredCoding;
313 while (fcm instanceof AdaptiveCoding) {
314 AdaptiveCoding ac = (AdaptiveCoding) fcm;
315 int len = ac.headLength;
316 while (fillp + len > fValues.length)
317 fValues = BandStructure.realloc(fValues);
318 int newFillp = fillp + len;
319 ac.headCoding.readArrayFrom(in, fValues, fillp, newFillp);
320 while (fillp < newFillp) {
321 int val = fValues[fillp++];
322 assert(uniqueValuesForDebug.add(new Integer(val)));
323 assert(fillp <= maxForDebug);
324 last = val;
325 min = moreCentral(min, val);
326 //min2 = moreCentral2(min2, val, min);
327 }
328 fcm = ac.tailCoding;
329 }
330 Coding fc = (Coding) fcm;
331 if (fc.isDelta()) {
332 for (long state = 0;;) {
333 // Read a new value:
334 state += fc.readFrom(in);
335 int val;
336 if (fc.isSubrange())
337 val = fc.reduceToUnsignedRange(state);
338 else
339 val = (int)state;
340 state = val;
341 if (fillp > 1 && (val == last || val == min)) //|| val == min2
342 break;
343 if (fillp == fValues.length)
344 fValues = BandStructure.realloc(fValues);
345 fValues[fillp++] = val;
346 assert(uniqueValuesForDebug.add(new Integer(val)));
347 assert(fillp <= maxForDebug);
348 last = val;
349 min = moreCentral(min, val);
350 //min2 = moreCentral(min2, val);
351 }
352 } else {
353 for (;;) {
354 int val = fc.readFrom(in);
355 if (fillp > 1 && (val == last || val == min)) //|| val == min2
356 break;
357 if (fillp == fValues.length)
358 fValues = BandStructure.realloc(fValues);
359 fValues[fillp++] = val;
360 assert(uniqueValuesForDebug.add(new Integer(val)));
361 assert(fillp <= maxForDebug);
362 last = val;
363 min = moreCentral(min, val);
364 //min2 = moreCentral2(min2, val, min);
365 }
366 }
367 return BandStructure.realloc(fValues, fillp);
368 }
369
370 private static int moreCentral(int x, int y) {
371 int kx = (x >> 31) ^ (x << 1);
372 int ky = (y >> 31) ^ (y << 1);
373 // bias kx/ky to get an unsigned comparison:
374 kx -= Integer.MIN_VALUE;
375 ky -= Integer.MIN_VALUE;
376 int xy = (kx < ky? x: y);
377 // assert that this ALU-ish version is the same:
378 assert(xy == moreCentralSlow(x, y));
379 return xy;
380 }
381// private static int moreCentral2(int x, int y, int min) {
382// // Strict implementation of buggy 150.7 specification.
383// // The bug is that the spec. says absolute-value ties are broken
384// // in favor of positive numbers, but the suggested implementation
385// // (also mentioned in the spec.) breaks ties in favor of negatives.
386// if (x + y == 0) return (x > y? x : y);
387// return min;
388// }
389 private static int moreCentralSlow(int x, int y) {
390 int ax = x;
391 if (ax < 0) ax = -ax;
392 if (ax < 0) return y; //x is MIN_VALUE
393 int ay = y;
394 if (ay < 0) ay = -ay;
395 if (ay < 0) return x; //y is MIN_VALUE
396 if (ax < ay) return x;
397 if (ax > ay) return y;
398 // At this point the absolute values agree, and the negative wins.
399 return x < y ? x : y;
400 }
401
402 static final int[] LValuesCoded
403 = { -1, 4, 8, 16, 32, 64, 128, 192, 224, 240, 248, 252 };
404
405 public byte[] getMetaCoding(Coding dflt) {
406 int K = fVlen;
407 int LCoded = 0;
408 if (tokenCoding instanceof Coding) {
409 Coding tc = (Coding) tokenCoding;
410 if (tc.B() == 1) {
411 LCoded = 1;
412 } else if (L >= 0) {
413 assert(L == tc.L());
414 for (int i = 1; i < LValuesCoded.length; i++) {
415 if (LValuesCoded[i] == L) { LCoded = i; break; }
416 }
417 }
418 }
419 CodingMethod tokenDflt = null;
420 if (LCoded != 0 && tokenCoding == fitTokenCoding(fVlen, L)) {
421 // A simple L value is enough to recover the tokenCoding.
422 tokenDflt = tokenCoding;
423 }
424 int FDef = (favoredCoding == dflt)?1:0;
425 int UDef = (unfavoredCoding == dflt || unfavoredCoding == null)?1:0;
426 int TDef = (tokenCoding == tokenDflt)?1:0;
427 int TDefL = (TDef == 1) ? LCoded : 0;
428 assert(TDef == ((TDefL>0)?1:0));
429 ByteArrayOutputStream bytes = new ByteArrayOutputStream(10);
430 bytes.write(_meta_pop + FDef + 2*UDef + 4*TDefL);
431 try {
432 if (FDef == 0) bytes.write(favoredCoding.getMetaCoding(dflt));
433 if (TDef == 0) bytes.write(tokenCoding.getMetaCoding(dflt));
434 if (UDef == 0) bytes.write(unfavoredCoding.getMetaCoding(dflt));
435 } catch (IOException ee) {
436 throw new RuntimeException(ee);
437 }
438 return bytes.toByteArray();
439 }
440 public static int parseMetaCoding(byte[] bytes, int pos, Coding dflt, CodingMethod res[]) {
441 int op = bytes[pos++] & 0xFF;
442 if (op < _meta_pop || op >= _meta_limit) return pos-1; // backup
443 op -= _meta_pop;
444 int FDef = op % 2;
445 int UDef = (op / 2) % 2;
446 int TDefL = (op / 4);
447 int TDef = (TDefL > 0)?1:0;
448 int L = LValuesCoded[TDefL];
449 CodingMethod[] FCode = {dflt}, TCode = {null}, UCode = {dflt};
450 if (FDef == 0)
451 pos = BandStructure.parseMetaCoding(bytes, pos, dflt, FCode);
452 if (TDef == 0)
453 pos = BandStructure.parseMetaCoding(bytes, pos, dflt, TCode);
454 if (UDef == 0)
455 pos = BandStructure.parseMetaCoding(bytes, pos, dflt, UCode);
456 PopulationCoding pop = new PopulationCoding();
457 pop.L = L; // might be -1
458 pop.favoredCoding = FCode[0];
459 pop.tokenCoding = TCode[0]; // might be null!
460 pop.unfavoredCoding = UCode[0];
461 res[0] = pop;
462 return pos;
463 }
464
465 private String keyString(CodingMethod m) {
466 if (m instanceof Coding)
467 return ((Coding)m).keyString();
468 if (m == null)
469 return "none";
470 return m.toString();
471 }
472 public String toString() {
473 PropMap p200 = Utils.currentPropMap();
474 boolean verbose
475 = (p200 != null &&
476 p200.getBoolean(Utils.COM_PREFIX+"verbose.pop"));
477 StringBuffer res = new StringBuffer(100);
478 res.append("pop(").append("fVlen=").append(fVlen);
479 if (verbose && fValues != null) {
480 res.append(" fV=[");
481 for (int i = 1; i <= fVlen; i++) {
482 res.append(i==1?"":",").append(fValues[i]);
483 }
484 res.append(";").append(computeSentinelValue());
485 res.append("]");
486 }
487 res.append(" fc=").append(keyString(favoredCoding));
488 res.append(" tc=").append(keyString(tokenCoding));
489 res.append(" uc=").append(keyString(unfavoredCoding));
490 res.append(")");
491 return res.toString();
492 }
493}