blob: d3ec4ae0e2272441b2702f478a8e8c0380efda72 [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 2002-2005 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26// -*- C++ -*-
27// Small program for unpacking specially compressed Java packages.
28// John R. Rose
29
30#include <stdio.h>
31#include <string.h>
32#include <stdlib.h>
33#include <stdarg.h>
34
35#include "defines.h"
36#include "bytes.h"
37#include "utils.h"
38#include "coding.h"
39
40#include "constants.h"
41#include "unpack.h"
42
43extern coding basic_codings[];
44
45#define CODING_PRIVATE(spec) \
46 int spec_ = spec; \
47 int B = CODING_B(spec_); \
48 int H = CODING_H(spec_); \
49 int L = 256 - H; \
50 int S = CODING_S(spec_); \
51 int D = CODING_D(spec_)
52
53#define IS_NEG_CODE(S, codeVal) \
54 ( (((int)(codeVal)+1) & ((1<<S)-1)) == 0 )
55
56#define DECODE_SIGN_S1(ux) \
57 ( ((uint)(ux) >> 1) ^ -((int)(ux) & 1) )
58
59static maybe_inline
60int decode_sign(int S, uint ux) { // == Coding.decodeSign32
61 assert(S > 0);
62 uint sigbits = (ux >> S);
63 if (IS_NEG_CODE(S, ux))
64 return (int)( ~sigbits);
65 else
66 return (int)(ux - sigbits);
67 // Note that (int)(ux-sigbits) can be negative, if ux is large enough.
68}
69
70coding* coding::init() {
71 if (umax > 0) return this; // already done
72 assert(spec != 0); // sanity
73
74 // fill in derived fields
75 CODING_PRIVATE(spec);
76
77 // Return null if 'arb(BHSD)' parameter constraints are not met:
78 if (B < 1 || B > B_MAX) return null;
79 if (H < 1 || H > 256) return null;
80 if (S < 0 || S > 2) return null;
81 if (D < 0 || D > 1) return null;
82 if (B == 1 && H != 256) return null; // 1-byte coding must be fixed-size
83 if (B >= 5 && H == 256) return null; // no 5-byte fixed-size coding
84
85 // first compute the range of the coding, in 64 bits
86 jlong range = 0;
87 {
88 jlong H_i = 1;
89 for (int i = 0; i < B; i++) {
90 range += H_i;
91 H_i *= H;
92 }
93 range *= L;
94 range += H_i;
95 }
96 assert(range > 0); // no useless codings, please
97
98 int this_umax;
99
100 // now, compute min and max
101 if (range >= ((jlong)1 << 32)) {
102 this_umax = INT_MAX_VALUE;
103 this->umin = INT_MIN_VALUE;
104 this->max = INT_MAX_VALUE;
105 this->min = INT_MIN_VALUE;
106 } else {
107 this_umax = (range > INT_MAX_VALUE) ? INT_MAX_VALUE : (int)range-1;
108 this->max = this_umax;
109 this->min = this->umin = 0;
110 if (S != 0 && range != 0) {
111 int Smask = (1<<S)-1;
112 jlong maxPosCode = range-1;
113 jlong maxNegCode = range-1;
114 while (IS_NEG_CODE(S, maxPosCode)) --maxPosCode;
115 while (!IS_NEG_CODE(S, maxNegCode)) --maxNegCode;
116 int maxPos = decode_sign(S, maxPosCode);
117 if (maxPos < 0)
118 this->max = INT_MAX_VALUE; // 32-bit wraparound
119 else
120 this->max = maxPos;
121 if (maxNegCode < 0)
122 this->min = 0; // No negative codings at all.
123 else
124 this->min = decode_sign(S, maxNegCode);
125 }
126 }
127
128 assert(!(isFullRange | isSigned | isSubrange)); // init
129 if (min < 0)
130 this->isSigned = true;
131 if (max < INT_MAX_VALUE && range <= INT_MAX_VALUE)
132 this->isSubrange = true;
133 if (max == INT_MAX_VALUE && min == INT_MIN_VALUE)
134 this->isFullRange = true;
135
136 // do this last, to reduce MT exposure (should have a membar too)
137 this->umax = this_umax;
138
139 return this;
140}
141
142coding* coding::findBySpec(int spec) {
143 for (coding* scan = &basic_codings[0]; ; scan++) {
144 if (scan->spec == spec)
145 return scan->init();
146 if (scan->spec == 0)
147 break;
148 }
149 coding* ptr = NEW(coding, 1);
150 CHECK_NULL_0(ptr);
151 coding* c = ptr->initFrom(spec);
152 if (c == null) mtrace('f', ptr, 0);
153 if (c == null)
154 ::free(ptr);
155 else
156 // else caller should free it...
157 c->isMalloc = true;
158 return c;
159}
160
161coding* coding::findBySpec(int B, int H, int S, int D) {
162 if (B < 1 || B > B_MAX) return null;
163 if (H < 1 || H > 256) return null;
164 if (S < 0 || S > 2) return null;
165 if (D < 0 || D > 1) return null;
166 return findBySpec(CODING_SPEC(B, H, S, D));
167}
168
169void coding::free() {
170 if (isMalloc) mtrace('f', this, 0);
171 if (isMalloc)
172 ::free(this);
173}
174
175void coding_method::reset(value_stream* state) {
176 assert(state->rp == state->rplimit); // not in mid-stream, please
177 //assert(this == vs0.cm);
178 state[0] = vs0;
179 if (uValues != null) {
180 uValues->reset(state->helper());
181 }
182}
183
184maybe_inline
185uint coding::parse(byte* &rp, int B, int H) {
186 int L = 256-H;
187 byte* ptr = rp;
188 // hand peel the i==0 part of the loop:
189 uint b_i = *ptr++ & 0xFF;
190 if (B == 1 || b_i < L)
191 { rp = ptr; return b_i; }
192 uint sum = b_i;
193 uint H_i = H;
194 assert(B <= B_MAX);
195 for (int i = 2; i <= B_MAX; i++) { // easy for compilers to unroll if desired
196 b_i = *ptr++ & 0xFF;
197 sum += b_i * H_i;
198 if (i == B || b_i < L)
199 { rp = ptr; return sum; }
200 H_i *= H;
201 }
202 assert(false);
203 return 0;
204}
205
206maybe_inline
207uint coding::parse_lgH(byte* &rp, int B, int H, int lgH) {
208 assert(H == (1<<lgH));
209 int L = 256-(1<<lgH);
210 byte* ptr = rp;
211 // hand peel the i==0 part of the loop:
212 uint b_i = *ptr++ & 0xFF;
213 if (B == 1 || b_i < L)
214 { rp = ptr; return b_i; }
215 uint sum = b_i;
216 uint lg_H_i = lgH;
217 assert(B <= B_MAX);
218 for (int i = 2; i <= B_MAX; i++) { // easy for compilers to unroll if desired
219 b_i = *ptr++ & 0xFF;
220 sum += b_i << lg_H_i;
221 if (i == B || b_i < L)
222 { rp = ptr; return sum; }
223 lg_H_i += lgH;
224 }
225 assert(false);
226 return 0;
227}
228
229static const char ERB[] = "EOF reading band";
230
231maybe_inline
232void coding::parseMultiple(byte* &rp, int N, byte* limit, int B, int H) {
233 if (N < 0) {
234 abort("bad value count");
235 return;
236 }
237 byte* ptr = rp;
238 if (B == 1 || H == 256) {
239 size_t len = (size_t)N*B;
240 if (len / B != N || ptr+len > limit) {
241 abort(ERB);
242 return;
243 }
244 rp = ptr+len;
245 return;
246 }
247 // Note: We assume rp has enough zero-padding.
248 int L = 256-H;
249 int n = B;
250 while (N > 0) {
251 ptr += 1;
252 if (--n == 0) {
253 // end of encoding at B bytes, regardless of byte value
254 } else {
255 int b = (ptr[-1] & 0xFF);
256 if (b >= L) {
257 // keep going, unless we find a byte < L
258 continue;
259 }
260 }
261 // found the last byte
262 N -= 1;
263 n = B; // reset length counter
264 // do an error check here
265 if (ptr > limit) {
266 abort(ERB);
267 return;
268 }
269 }
270 rp = ptr;
271 return;
272}
273
274bool value_stream::hasHelper() {
275 // If my coding method is a pop-style method,
276 // then I need a second value stream to transmit
277 // unfavored values.
278 // This can be determined by examining fValues.
279 return cm->fValues != null;
280}
281
282void value_stream::init(byte* rp_, byte* rplimit_, coding* defc) {
283 rp = rp_;
284 rplimit = rplimit_;
285 sum = 0;
286 cm = null; // no need in the simple case
287 setCoding(defc);
288}
289
290void value_stream::setCoding(coding* defc) {
291 if (defc == null) {
292 unpack_abort("bad coding");
293 defc = coding::findByIndex(_meta_canon_min); // random pick for recovery
294 }
295
296 c = (*defc);
297
298 // choose cmk
299 cmk = cmk_ERROR;
300 switch (c.spec) {
301 case BYTE1_spec: cmk = cmk_BYTE1; break;
302 case CHAR3_spec: cmk = cmk_CHAR3; break;
303 case UNSIGNED5_spec: cmk = cmk_UNSIGNED5; break;
304 case DELTA5_spec: cmk = cmk_DELTA5; break;
305 case BCI5_spec: cmk = cmk_BCI5; break;
306 case BRANCH5_spec: cmk = cmk_BRANCH5; break;
307 default:
308 if (c.D() == 0) {
309 switch (c.S()) {
310 case 0: cmk = cmk_BHS0; break;
311 case 1: cmk = cmk_BHS1; break;
312 default: cmk = cmk_BHS; break;
313 }
314 } else {
315 if (c.S() == 1) {
316 if (c.isFullRange) cmk = cmk_BHS1D1full;
317 if (c.isSubrange) cmk = cmk_BHS1D1sub;
318 }
319 if (cmk == cmk_ERROR) cmk = cmk_BHSD1;
320 }
321 }
322}
323
324static maybe_inline
325int getPopValue(value_stream* self, uint uval) {
326 if (uval > 0) {
327 // note that the initial parse performed a range check
328 assert(uval <= self->cm->fVlength);
329 return self->cm->fValues[uval-1];
330 } else {
331 // take an unfavored value
332 return self->helper()->getInt();
333 }
334}
335
336maybe_inline
337int coding::sumInUnsignedRange(int x, int y) {
338 assert(isSubrange);
339 int range = (int)(umax+1);
340 assert(range > 0);
341 x += y;
342 if (x != (int)((jlong)(x-y) + (jlong)y)) {
343 // 32-bit overflow interferes with range reduction.
344 // Back off from the overflow by adding a multiple of range:
345 if (x < 0) {
346 x -= range;
347 assert(x >= 0);
348 } else {
349 x += range;
350 assert(x < 0);
351 }
352 }
353 if (x < 0) {
354 x += range;
355 if (x >= 0) return x;
356 } else if (x >= range) {
357 x -= range;
358 if (x < range) return x;
359 } else {
360 // in range
361 return x;
362 }
363 // do it the hard way
364 x %= range;
365 if (x < 0) x += range;
366 return x;
367}
368
369static maybe_inline
370int getDeltaValue(value_stream* self, uint uval, bool isSubrange) {
371 assert((bool)(self->c.isSubrange) == isSubrange);
372 assert(self->c.isSubrange | self->c.isFullRange);
373 if (isSubrange)
374 return self->sum = self->c.sumInUnsignedRange(self->sum, (int)uval);
375 else
376 return self->sum += (int) uval;
377}
378
379bool value_stream::hasValue() {
380 if (rp < rplimit) return true;
381 if (cm == null) return false;
382 if (cm->next == null) return false;
383 cm->next->reset(this);
384 return hasValue();
385}
386
387int value_stream::getInt() {
388 if (rp >= rplimit) {
389 // Advance to next coding segment.
390 if (rp > rplimit || cm == null || cm->next == null) {
391 // Must perform this check and throw an exception on bad input.
392 unpack_abort(ERB);
393 return 0;
394 }
395 cm->next->reset(this);
396 return getInt();
397 }
398
399 CODING_PRIVATE(c.spec);
400 uint uval;
401 enum {
402 B5 = 5,
403 B3 = 3,
404 H128 = 128,
405 H64 = 64,
406 H4 = 4
407 };
408 switch (cmk) {
409 case cmk_BHS:
410 assert(D == 0);
411 uval = coding::parse(rp, B, H);
412 if (S == 0)
413 return (int) uval;
414 return decode_sign(S, uval);
415
416 case cmk_BHS0:
417 assert(S == 0 && D == 0);
418 uval = coding::parse(rp, B, H);
419 return (int) uval;
420
421 case cmk_BHS1:
422 assert(S == 1 && D == 0);
423 uval = coding::parse(rp, B, H);
424 return DECODE_SIGN_S1(uval);
425
426 case cmk_BYTE1:
427 assert(c.spec == BYTE1_spec);
428 assert(B == 1 && H == 256 && S == 0 && D == 0);
429 return *rp++ & 0xFF;
430
431 case cmk_CHAR3:
432 assert(c.spec == CHAR3_spec);
433 assert(B == B3 && H == H128 && S == 0 && D == 0);
434 return coding::parse_lgH(rp, B3, H128, 7);
435
436 case cmk_UNSIGNED5:
437 assert(c.spec == UNSIGNED5_spec);
438 assert(B == B5 && H == H64 && S == 0 && D == 0);
439 return coding::parse_lgH(rp, B5, H64, 6);
440
441 case cmk_BHSD1:
442 assert(D == 1);
443 uval = coding::parse(rp, B, H);
444 if (S != 0)
445 uval = (uint) decode_sign(S, uval);
446 return getDeltaValue(this, uval, c.isSubrange);
447
448 case cmk_BHS1D1full:
449 assert(S == 1 && D == 1 && c.isFullRange);
450 uval = coding::parse(rp, B, H);
451 uval = (uint) DECODE_SIGN_S1(uval);
452 return getDeltaValue(this, uval, false);
453
454 case cmk_BHS1D1sub:
455 assert(S == 1 && D == 1 && c.isSubrange);
456 uval = coding::parse(rp, B, H);
457 uval = (uint) DECODE_SIGN_S1(uval);
458 return getDeltaValue(this, uval, true);
459
460 case cmk_DELTA5:
461 assert(c.spec == DELTA5_spec);
462 assert(B == B5 && H == H64 && S == 1 && D == 1 && c.isFullRange);
463 uval = coding::parse_lgH(rp, B5, H64, 6);
464 sum += DECODE_SIGN_S1(uval);
465 return sum;
466
467 case cmk_BCI5:
468 assert(c.spec == BCI5_spec);
469 assert(B == B5 && H == H4 && S == 0 && D == 0);
470 return coding::parse_lgH(rp, B5, H4, 2);
471
472 case cmk_BRANCH5:
473 assert(c.spec == BRANCH5_spec);
474 assert(B == B5 && H == H4 && S == 2 && D == 0);
475 uval = coding::parse_lgH(rp, B5, H4, 2);
476 return decode_sign(S, uval);
477
478 case cmk_pop:
479 uval = coding::parse(rp, B, H);
480 if (S != 0) {
481 uval = (uint) decode_sign(S, uval);
482 }
483 if (D != 0) {
484 assert(c.isSubrange | c.isFullRange);
485 if (c.isSubrange)
486 sum = c.sumInUnsignedRange(sum, (int) uval);
487 else
488 sum += (int) uval;
489 uval = (uint) sum;
490 }
491 return getPopValue(this, uval);
492
493 case cmk_pop_BHS0:
494 assert(S == 0 && D == 0);
495 uval = coding::parse(rp, B, H);
496 return getPopValue(this, uval);
497
498 case cmk_pop_BYTE1:
499 assert(c.spec == BYTE1_spec);
500 assert(B == 1 && H == 256 && S == 0 && D == 0);
501 return getPopValue(this, *rp++ & 0xFF);
502 }
503 assert(false);
504 return 0;
505}
506
507static maybe_inline
508int moreCentral(int x, int y) { // used to find end of Pop.{F}
509 // Suggested implementation from the Pack200 specification:
510 uint kx = (x >> 31) ^ (x << 1);
511 uint ky = (y >> 31) ^ (y << 1);
512 return (kx < ky? x: y);
513}
514//static maybe_inline
515//int moreCentral2(int x, int y, int min) {
516// // Strict implementation of buggy 150.7 specification.
517// // The bug is that the spec. says absolute-value ties are broken
518// // in favor of positive numbers, but the suggested implementation
519// // (also mentioned in the spec.) breaks ties in favor of negative numbers.
520// if ((x + y) != 0)
521// return min;
522// else
523// // return the other value, which breaks a tie in the positive direction
524// return (x > y)? x: y;
525//}
526
527static const byte* no_meta[] = {null};
528#define NO_META (*(byte**)no_meta)
529enum { POP_FAVORED_N = -2 };
530
531// mode bits
532#define DISABLE_RUN 1 // used immediately inside ACodee
533#define DISABLE_POP 2 // used recursively in all pop sub-bands
534
535// This function knows all about meta-coding.
536void coding_method::init(byte* &band_rp, byte* band_limit,
537 byte* &meta_rp, int mode,
538 coding* defc, int N,
539 intlist* valueSink) {
540 assert(N != 0);
541
542 assert(u != null); // must be pre-initialized
543 //if (u == null) u = unpacker::current(); // expensive
544
545 int op = (meta_rp == null) ? _meta_default : (*meta_rp++ & 0xFF);
546 coding* foundc = null;
547 coding* to_free = null;
548
549 if (op == _meta_default) {
550 foundc = defc;
551 // and fall through
552
553 } else if (op >= _meta_canon_min && op <= _meta_canon_max) {
554 foundc = coding::findByIndex(op);
555 // and fall through
556
557 } else if (op == _meta_arb) {
558 int args = (*meta_rp++ & 0xFF);
559 // args = (D:[0..1] + 2*S[0..2] + 8*(B:[1..5]-1))
560 int D = ((args >> 0) & 1);
561 int S = ((args >> 1) & 3);
562 int B = ((args >> 3) & -1) + 1;
563 // & (H[1..256]-1)
564 int H = (*meta_rp++ & 0xFF) + 1;
565 foundc = coding::findBySpec(B, H, S, D);
566 to_free = foundc; // findBySpec may dynamically allocate
567 if (foundc == null) {
568 abort("illegal arb. coding");
569 return;
570 }
571 // and fall through
572
573 } else if (op >= _meta_run && op < _meta_pop) {
574 int args = (op - _meta_run);
575 // args: KX:[0..3] + 4*(KBFlag:[0..1]) + 8*(ABDef:[0..2])
576 int KX = ((args >> 0) & 3);
577 int KBFlag = ((args >> 2) & 1);
578 int ABDef = ((args >> 3) & -1);
579 assert(ABDef <= 2);
580 // & KB: one of [0..255] if KBFlag=1
581 int KB = (!KBFlag? 3: (*meta_rp++ & 0xFF));
582 int K = (KB+1) << (KX * 4);
583 int N2 = (N >= 0) ? N-K : N;
584 if (N == 0 || (N2 <= 0 && N2 != N)) {
585 abort("illegal run encoding");
586 return;
587 }
588 if ((mode & DISABLE_RUN) != 0) {
589 abort("illegal nested run encoding");
590 return;
591 }
592
593 // & Enc{ ACode } if ADef=0 (ABDef != 1)
594 // No direct nesting of 'run' in ACode, but in BCode it's OK.
595 int disRun = mode | DISABLE_RUN;
596 if (ABDef == 1) {
597 this->init(band_rp, band_limit, NO_META, disRun, defc, K, valueSink);
598 } else {
599 this->init(band_rp, band_limit, meta_rp, disRun, defc, K, valueSink);
600 }
601 CHECK;
602
603 // & Enc{ BCode } if BDef=0 (ABDef != 2)
604 coding_method* tail = U_NEW(coding_method, 1);
605 CHECK_NULL(tail);
606 tail->u = u;
607
608 // The 'run' codings may be nested indirectly via 'pop' codings.
609 // This means that this->next may already be filled in, if
610 // ACode was of type 'pop' with a 'run' token coding.
611 // No problem: Just chain the upcoming BCode onto the end.
612 for (coding_method* self = this; ; self = self->next) {
613 if (self->next == null) {
614 self->next = tail;
615 break;
616 }
617 }
618
619 if (ABDef == 2) {
620 tail->init(band_rp, band_limit, NO_META, mode, defc, N2, valueSink);
621 } else {
622 tail->init(band_rp, band_limit, meta_rp, mode, defc, N2, valueSink);
623 }
624 // Note: The preceding calls to init should be tail-recursive.
625
626 return; // done; no falling through
627
628 } else if (op >= _meta_pop && op < _meta_limit) {
629 int args = (op - _meta_pop);
630 // args: (FDef:[0..1]) + 2*UDef:[0..1] + 4*(TDefL:[0..11])
631 int FDef = ((args >> 0) & 1);
632 int UDef = ((args >> 1) & 1);
633 int TDefL = ((args >> 2) & -1);
634 assert(TDefL <= 11);
635 int TDef = (TDefL > 0);
636 int TL = (TDefL <= 6) ? (2 << TDefL) : (256 - (4 << (11-TDefL)));
637 int TH = (256-TL);
638 if (N <= 0) {
639 abort("illegal pop encoding");
640 return;
641 }
642 if ((mode & DISABLE_POP) != 0) {
643 abort("illegal nested pop encoding");
644 return;
645 }
646
647 // No indirect nesting of 'pop', but 'run' is OK.
648 int disPop = DISABLE_POP;
649
650 // & Enc{ FCode } if FDef=0
651 int FN = POP_FAVORED_N;
652 assert(valueSink == null);
653 intlist fValueSink; fValueSink.init();
654 coding_method fval;
655 BYTES_OF(fval).clear(); fval.u = u;
656 if (FDef != 0) {
657 fval.init(band_rp, band_limit, NO_META, disPop, defc, FN, &fValueSink);
658 } else {
659 fval.init(band_rp, band_limit, meta_rp, disPop, defc, FN, &fValueSink);
660 }
661 bytes fvbuf;
662 fValues = (u->saveTo(fvbuf, fValueSink.b), (int*) fvbuf.ptr);
663 fVlength = fValueSink.length(); // i.e., the parameter K
664 fValueSink.free();
665 CHECK;
666
667 // Skip the first {F} run in all subsequent passes.
668 // The next call to this->init(...) will set vs0.rp to point after the {F}.
669
670 // & Enc{ TCode } if TDef=0 (TDefL==0)
671 if (TDef != 0) {
672 coding* tcode = coding::findBySpec(1, 256); // BYTE1
673 // find the most narrowly sufficient code:
674 for (int B = 2; B <= B_MAX; B++) {
675 if (fVlength <= tcode->umax) break; // found it
676 tcode->free();
677 tcode = coding::findBySpec(B, TH);
678 CHECK_NULL(tcode);
679 }
680 if (!(fVlength <= tcode->umax)) {
681 abort("pop.L value too small");
682 return;
683 }
684 this->init(band_rp, band_limit, NO_META, disPop, tcode, N, null);
685 tcode->free();
686 } else {
687 this->init(band_rp, band_limit, meta_rp, disPop, defc, N, null);
688 }
689 CHECK;
690
691 // Count the number of zero tokens right now.
692 // Also verify that they are in bounds.
693 int UN = 0; // one {U} for each zero in {T}
694 value_stream vs = vs0;
695 for (int i = 0; i < N; i++) {
696 uint val = vs.getInt();
697 if (val == 0) UN += 1;
698 if (!(val <= fVlength)) {
699 abort("pop token out of range");
700 return;
701 }
702 }
703 vs.done();
704
705 // & Enc{ UCode } if UDef=0
706 if (UN != 0) {
707 uValues = U_NEW(coding_method, 1);
708 CHECK_NULL(uValues);
709 uValues->u = u;
710 if (UDef != 0) {
711 uValues->init(band_rp, band_limit, NO_META, disPop, defc, UN, null);
712 } else {
713 uValues->init(band_rp, band_limit, meta_rp, disPop, defc, UN, null);
714 }
715 } else {
716 if (UDef == 0) {
717 int uop = (*meta_rp++ & 0xFF);
718 if (uop > _meta_canon_max)
719 // %%% Spec. requires the more strict (uop != _meta_default).
720 abort("bad meta-coding for empty pop/U");
721 }
722 }
723
724 // Bug fix for 6259542
725 // Last of all, adjust vs0.cmk to the 'pop' flavor
726 for (coding_method* self = this; self != null; self = self->next) {
727 coding_method_kind cmk2 = cmk_pop;
728 switch (self->vs0.cmk) {
729 case cmk_BHS0: cmk2 = cmk_pop_BHS0; break;
730 case cmk_BYTE1: cmk2 = cmk_pop_BYTE1; break;
731 }
732 self->vs0.cmk = cmk2;
733 if (self != this) {
734 assert(self->fValues == null); // no double init
735 self->fValues = this->fValues;
736 self->fVlength = this->fVlength;
737 assert(self->uValues == null); // must stay null
738 }
739 }
740
741 return; // done; no falling through
742
743 } else {
744 abort("bad meta-coding");
745 return;
746 }
747
748 // Common code here skips a series of values with one coding.
749 assert(foundc != null);
750
751 assert(vs0.cmk == cmk_ERROR); // no garbage, please
752 assert(vs0.rp == null); // no garbage, please
753 assert(vs0.rplimit == null); // no garbage, please
754 assert(vs0.sum == 0); // no garbage, please
755
756 vs0.init(band_rp, band_limit, foundc);
757
758 // Done with foundc. Free if necessary.
759 if (to_free != null) {
760 to_free->free();
761 to_free = null;
762 }
763 foundc = null;
764
765 coding& c = vs0.c;
766 CODING_PRIVATE(c.spec);
767 // assert sane N
768 assert((uint)N < INT_MAX_VALUE || N == POP_FAVORED_N);
769
770 // Look at the values, or at least skip over them quickly.
771 if (valueSink == null) {
772 // Skip and ignore values in the first pass.
773 c.parseMultiple(band_rp, N, band_limit, B, H);
774 } else if (N >= 0) {
775 // Pop coding, {F} sequence, initial run of values...
776 assert((mode & DISABLE_POP) != 0);
777 value_stream vs = vs0;
778 for (int n = 0; n < N; n++) {
779 int val = vs.getInt();
780 valueSink->add(val);
781 }
782 band_rp = vs.rp;
783 } else {
784 // Pop coding, {F} sequence, final run of values...
785 assert((mode & DISABLE_POP) != 0);
786 assert(N == POP_FAVORED_N);
787 int min = INT_MIN_VALUE; // farthest from the center
788 // min2 is based on the buggy specification of centrality in version 150.7
789 // no known implementations transmit this value, but just in case...
790 //int min2 = INT_MIN_VALUE;
791 int last = 0;
792 // if there were initial runs, find the potential sentinels in them:
793 for (int i = 0; i < valueSink->length(); i++) {
794 last = valueSink->get(i);
795 min = moreCentral(min, last);
796 //min2 = moreCentral2(min2, last, min);
797 }
798 value_stream vs = vs0;
799 for (;;) {
800 int val = vs.getInt();
801 if (valueSink->length() > 0 &&
802 (val == last || val == min)) //|| val == min2
803 break;
804 valueSink->add(val);
805 CHECK;
806 last = val;
807 min = moreCentral(min, last);
808 //min2 = moreCentral2(min2, last, min);
809 }
810 band_rp = vs.rp;
811 }
812
813 // Get an accurate upper limit now.
814 vs0.rplimit = band_rp;
815 vs0.cm = this;
816
817 return; // success
818}
819
820coding basic_codings[] = {
821 // This one is not a usable irregular coding, but is used by cp_Utf8_chars.
822 CODING_INIT(3,128,0,0),
823
824 // Fixed-length codings:
825 CODING_INIT(1,256,0,0),
826 CODING_INIT(1,256,1,0),
827 CODING_INIT(1,256,0,1),
828 CODING_INIT(1,256,1,1),
829 CODING_INIT(2,256,0,0),
830 CODING_INIT(2,256,1,0),
831 CODING_INIT(2,256,0,1),
832 CODING_INIT(2,256,1,1),
833 CODING_INIT(3,256,0,0),
834 CODING_INIT(3,256,1,0),
835 CODING_INIT(3,256,0,1),
836 CODING_INIT(3,256,1,1),
837 CODING_INIT(4,256,0,0),
838 CODING_INIT(4,256,1,0),
839 CODING_INIT(4,256,0,1),
840 CODING_INIT(4,256,1,1),
841
842 // Full-range variable-length codings:
843 CODING_INIT(5, 4,0,0),
844 CODING_INIT(5, 4,1,0),
845 CODING_INIT(5, 4,2,0),
846 CODING_INIT(5, 16,0,0),
847 CODING_INIT(5, 16,1,0),
848 CODING_INIT(5, 16,2,0),
849 CODING_INIT(5, 32,0,0),
850 CODING_INIT(5, 32,1,0),
851 CODING_INIT(5, 32,2,0),
852 CODING_INIT(5, 64,0,0),
853 CODING_INIT(5, 64,1,0),
854 CODING_INIT(5, 64,2,0),
855 CODING_INIT(5,128,0,0),
856 CODING_INIT(5,128,1,0),
857 CODING_INIT(5,128,2,0),
858
859 CODING_INIT(5, 4,0,1),
860 CODING_INIT(5, 4,1,1),
861 CODING_INIT(5, 4,2,1),
862 CODING_INIT(5, 16,0,1),
863 CODING_INIT(5, 16,1,1),
864 CODING_INIT(5, 16,2,1),
865 CODING_INIT(5, 32,0,1),
866 CODING_INIT(5, 32,1,1),
867 CODING_INIT(5, 32,2,1),
868 CODING_INIT(5, 64,0,1),
869 CODING_INIT(5, 64,1,1),
870 CODING_INIT(5, 64,2,1),
871 CODING_INIT(5,128,0,1),
872 CODING_INIT(5,128,1,1),
873 CODING_INIT(5,128,2,1),
874
875 // Variable length subrange codings:
876 CODING_INIT(2,192,0,0),
877 CODING_INIT(2,224,0,0),
878 CODING_INIT(2,240,0,0),
879 CODING_INIT(2,248,0,0),
880 CODING_INIT(2,252,0,0),
881
882 CODING_INIT(2, 8,0,1),
883 CODING_INIT(2, 8,1,1),
884 CODING_INIT(2, 16,0,1),
885 CODING_INIT(2, 16,1,1),
886 CODING_INIT(2, 32,0,1),
887 CODING_INIT(2, 32,1,1),
888 CODING_INIT(2, 64,0,1),
889 CODING_INIT(2, 64,1,1),
890 CODING_INIT(2,128,0,1),
891 CODING_INIT(2,128,1,1),
892 CODING_INIT(2,192,0,1),
893 CODING_INIT(2,192,1,1),
894 CODING_INIT(2,224,0,1),
895 CODING_INIT(2,224,1,1),
896 CODING_INIT(2,240,0,1),
897 CODING_INIT(2,240,1,1),
898 CODING_INIT(2,248,0,1),
899 CODING_INIT(2,248,1,1),
900
901 CODING_INIT(3,192,0,0),
902 CODING_INIT(3,224,0,0),
903 CODING_INIT(3,240,0,0),
904 CODING_INIT(3,248,0,0),
905 CODING_INIT(3,252,0,0),
906
907 CODING_INIT(3, 8,0,1),
908 CODING_INIT(3, 8,1,1),
909 CODING_INIT(3, 16,0,1),
910 CODING_INIT(3, 16,1,1),
911 CODING_INIT(3, 32,0,1),
912 CODING_INIT(3, 32,1,1),
913 CODING_INIT(3, 64,0,1),
914 CODING_INIT(3, 64,1,1),
915 CODING_INIT(3,128,0,1),
916 CODING_INIT(3,128,1,1),
917 CODING_INIT(3,192,0,1),
918 CODING_INIT(3,192,1,1),
919 CODING_INIT(3,224,0,1),
920 CODING_INIT(3,224,1,1),
921 CODING_INIT(3,240,0,1),
922 CODING_INIT(3,240,1,1),
923 CODING_INIT(3,248,0,1),
924 CODING_INIT(3,248,1,1),
925
926 CODING_INIT(4,192,0,0),
927 CODING_INIT(4,224,0,0),
928 CODING_INIT(4,240,0,0),
929 CODING_INIT(4,248,0,0),
930 CODING_INIT(4,252,0,0),
931
932 CODING_INIT(4, 8,0,1),
933 CODING_INIT(4, 8,1,1),
934 CODING_INIT(4, 16,0,1),
935 CODING_INIT(4, 16,1,1),
936 CODING_INIT(4, 32,0,1),
937 CODING_INIT(4, 32,1,1),
938 CODING_INIT(4, 64,0,1),
939 CODING_INIT(4, 64,1,1),
940 CODING_INIT(4,128,0,1),
941 CODING_INIT(4,128,1,1),
942 CODING_INIT(4,192,0,1),
943 CODING_INIT(4,192,1,1),
944 CODING_INIT(4,224,0,1),
945 CODING_INIT(4,224,1,1),
946 CODING_INIT(4,240,0,1),
947 CODING_INIT(4,240,1,1),
948 CODING_INIT(4,248,0,1),
949 CODING_INIT(4,248,1,1),
950
951 0
952};
953#define BASIC_INDEX_LIMIT \
954 (sizeof(basic_codings)/sizeof(basic_codings[0])-1)
955
956coding* coding::findByIndex(int idx) {
957 assert(_meta_canon_min == 1);
958 assert(_meta_canon_max+1 == BASIC_INDEX_LIMIT);
959 if (idx >= _meta_canon_min && idx <= _meta_canon_max)
960 return basic_codings[idx].init();
961 else
962 return null;
963}
964
965#ifndef PRODUCT
966const char* coding::string() {
967 CODING_PRIVATE(spec);
968 bytes buf;
969 buf.malloc(100);
970 char maxS[20], minS[20];
971 sprintf(maxS, "%d", max);
972 sprintf(minS, "%d", min);
973 if (max == INT_MAX_VALUE) strcpy(maxS, "max");
974 if (min == INT_MIN_VALUE) strcpy(minS, "min");
975 sprintf((char*)buf.ptr, "(%d,%d,%d,%d) L=%d r=[%s,%s]",
976 B,H,S,D,L,minS,maxS);
977 return (const char*) buf.ptr;
978}
979#endif