J. Duke | 319a3b9 | 2007-12-01 00:00:00 +0000 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright 2002-2005 Sun Microsystems, Inc. All Rights Reserved. |
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | * |
| 5 | * This code is free software; you can redistribute it and/or modify it |
| 6 | * under the terms of the GNU General Public License version 2 only, as |
| 7 | * published by the Free Software Foundation. Sun designates this |
| 8 | * particular file as subject to the "Classpath" exception as provided |
| 9 | * by Sun in the LICENSE file that accompanied this code. |
| 10 | * |
| 11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 14 | * version 2 for more details (a copy is included in the LICENSE file that |
| 15 | * accompanied this code). |
| 16 | * |
| 17 | * You should have received a copy of the GNU General Public License version |
| 18 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 20 | * |
| 21 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
| 22 | * CA 95054 USA or visit www.sun.com if you need additional information or |
| 23 | * have any questions. |
| 24 | */ |
| 25 | |
| 26 | // -*- C++ -*- |
| 27 | // Small program for unpacking specially compressed Java packages. |
| 28 | // John R. Rose |
| 29 | |
| 30 | #include <stdio.h> |
| 31 | #include <string.h> |
| 32 | #include <stdlib.h> |
| 33 | #include <stdarg.h> |
| 34 | |
| 35 | #include "defines.h" |
| 36 | #include "bytes.h" |
| 37 | #include "utils.h" |
| 38 | #include "coding.h" |
| 39 | |
| 40 | #include "constants.h" |
| 41 | #include "unpack.h" |
| 42 | |
| 43 | extern coding basic_codings[]; |
| 44 | |
| 45 | #define CODING_PRIVATE(spec) \ |
| 46 | int spec_ = spec; \ |
| 47 | int B = CODING_B(spec_); \ |
| 48 | int H = CODING_H(spec_); \ |
| 49 | int L = 256 - H; \ |
| 50 | int S = CODING_S(spec_); \ |
| 51 | int D = CODING_D(spec_) |
| 52 | |
| 53 | #define IS_NEG_CODE(S, codeVal) \ |
| 54 | ( (((int)(codeVal)+1) & ((1<<S)-1)) == 0 ) |
| 55 | |
| 56 | #define DECODE_SIGN_S1(ux) \ |
| 57 | ( ((uint)(ux) >> 1) ^ -((int)(ux) & 1) ) |
| 58 | |
| 59 | static maybe_inline |
| 60 | int decode_sign(int S, uint ux) { // == Coding.decodeSign32 |
| 61 | assert(S > 0); |
| 62 | uint sigbits = (ux >> S); |
| 63 | if (IS_NEG_CODE(S, ux)) |
| 64 | return (int)( ~sigbits); |
| 65 | else |
| 66 | return (int)(ux - sigbits); |
| 67 | // Note that (int)(ux-sigbits) can be negative, if ux is large enough. |
| 68 | } |
| 69 | |
| 70 | coding* coding::init() { |
| 71 | if (umax > 0) return this; // already done |
| 72 | assert(spec != 0); // sanity |
| 73 | |
| 74 | // fill in derived fields |
| 75 | CODING_PRIVATE(spec); |
| 76 | |
| 77 | // Return null if 'arb(BHSD)' parameter constraints are not met: |
| 78 | if (B < 1 || B > B_MAX) return null; |
| 79 | if (H < 1 || H > 256) return null; |
| 80 | if (S < 0 || S > 2) return null; |
| 81 | if (D < 0 || D > 1) return null; |
| 82 | if (B == 1 && H != 256) return null; // 1-byte coding must be fixed-size |
| 83 | if (B >= 5 && H == 256) return null; // no 5-byte fixed-size coding |
| 84 | |
| 85 | // first compute the range of the coding, in 64 bits |
| 86 | jlong range = 0; |
| 87 | { |
| 88 | jlong H_i = 1; |
| 89 | for (int i = 0; i < B; i++) { |
| 90 | range += H_i; |
| 91 | H_i *= H; |
| 92 | } |
| 93 | range *= L; |
| 94 | range += H_i; |
| 95 | } |
| 96 | assert(range > 0); // no useless codings, please |
| 97 | |
| 98 | int this_umax; |
| 99 | |
| 100 | // now, compute min and max |
| 101 | if (range >= ((jlong)1 << 32)) { |
| 102 | this_umax = INT_MAX_VALUE; |
| 103 | this->umin = INT_MIN_VALUE; |
| 104 | this->max = INT_MAX_VALUE; |
| 105 | this->min = INT_MIN_VALUE; |
| 106 | } else { |
| 107 | this_umax = (range > INT_MAX_VALUE) ? INT_MAX_VALUE : (int)range-1; |
| 108 | this->max = this_umax; |
| 109 | this->min = this->umin = 0; |
| 110 | if (S != 0 && range != 0) { |
| 111 | int Smask = (1<<S)-1; |
| 112 | jlong maxPosCode = range-1; |
| 113 | jlong maxNegCode = range-1; |
| 114 | while (IS_NEG_CODE(S, maxPosCode)) --maxPosCode; |
| 115 | while (!IS_NEG_CODE(S, maxNegCode)) --maxNegCode; |
| 116 | int maxPos = decode_sign(S, maxPosCode); |
| 117 | if (maxPos < 0) |
| 118 | this->max = INT_MAX_VALUE; // 32-bit wraparound |
| 119 | else |
| 120 | this->max = maxPos; |
| 121 | if (maxNegCode < 0) |
| 122 | this->min = 0; // No negative codings at all. |
| 123 | else |
| 124 | this->min = decode_sign(S, maxNegCode); |
| 125 | } |
| 126 | } |
| 127 | |
| 128 | assert(!(isFullRange | isSigned | isSubrange)); // init |
| 129 | if (min < 0) |
| 130 | this->isSigned = true; |
| 131 | if (max < INT_MAX_VALUE && range <= INT_MAX_VALUE) |
| 132 | this->isSubrange = true; |
| 133 | if (max == INT_MAX_VALUE && min == INT_MIN_VALUE) |
| 134 | this->isFullRange = true; |
| 135 | |
| 136 | // do this last, to reduce MT exposure (should have a membar too) |
| 137 | this->umax = this_umax; |
| 138 | |
| 139 | return this; |
| 140 | } |
| 141 | |
| 142 | coding* coding::findBySpec(int spec) { |
| 143 | for (coding* scan = &basic_codings[0]; ; scan++) { |
| 144 | if (scan->spec == spec) |
| 145 | return scan->init(); |
| 146 | if (scan->spec == 0) |
| 147 | break; |
| 148 | } |
| 149 | coding* ptr = NEW(coding, 1); |
| 150 | CHECK_NULL_0(ptr); |
| 151 | coding* c = ptr->initFrom(spec); |
| 152 | if (c == null) mtrace('f', ptr, 0); |
| 153 | if (c == null) |
| 154 | ::free(ptr); |
| 155 | else |
| 156 | // else caller should free it... |
| 157 | c->isMalloc = true; |
| 158 | return c; |
| 159 | } |
| 160 | |
| 161 | coding* coding::findBySpec(int B, int H, int S, int D) { |
| 162 | if (B < 1 || B > B_MAX) return null; |
| 163 | if (H < 1 || H > 256) return null; |
| 164 | if (S < 0 || S > 2) return null; |
| 165 | if (D < 0 || D > 1) return null; |
| 166 | return findBySpec(CODING_SPEC(B, H, S, D)); |
| 167 | } |
| 168 | |
| 169 | void coding::free() { |
| 170 | if (isMalloc) mtrace('f', this, 0); |
| 171 | if (isMalloc) |
| 172 | ::free(this); |
| 173 | } |
| 174 | |
| 175 | void coding_method::reset(value_stream* state) { |
| 176 | assert(state->rp == state->rplimit); // not in mid-stream, please |
| 177 | //assert(this == vs0.cm); |
| 178 | state[0] = vs0; |
| 179 | if (uValues != null) { |
| 180 | uValues->reset(state->helper()); |
| 181 | } |
| 182 | } |
| 183 | |
| 184 | maybe_inline |
| 185 | uint coding::parse(byte* &rp, int B, int H) { |
| 186 | int L = 256-H; |
| 187 | byte* ptr = rp; |
| 188 | // hand peel the i==0 part of the loop: |
| 189 | uint b_i = *ptr++ & 0xFF; |
| 190 | if (B == 1 || b_i < L) |
| 191 | { rp = ptr; return b_i; } |
| 192 | uint sum = b_i; |
| 193 | uint H_i = H; |
| 194 | assert(B <= B_MAX); |
| 195 | for (int i = 2; i <= B_MAX; i++) { // easy for compilers to unroll if desired |
| 196 | b_i = *ptr++ & 0xFF; |
| 197 | sum += b_i * H_i; |
| 198 | if (i == B || b_i < L) |
| 199 | { rp = ptr; return sum; } |
| 200 | H_i *= H; |
| 201 | } |
| 202 | assert(false); |
| 203 | return 0; |
| 204 | } |
| 205 | |
| 206 | maybe_inline |
| 207 | uint coding::parse_lgH(byte* &rp, int B, int H, int lgH) { |
| 208 | assert(H == (1<<lgH)); |
| 209 | int L = 256-(1<<lgH); |
| 210 | byte* ptr = rp; |
| 211 | // hand peel the i==0 part of the loop: |
| 212 | uint b_i = *ptr++ & 0xFF; |
| 213 | if (B == 1 || b_i < L) |
| 214 | { rp = ptr; return b_i; } |
| 215 | uint sum = b_i; |
| 216 | uint lg_H_i = lgH; |
| 217 | assert(B <= B_MAX); |
| 218 | for (int i = 2; i <= B_MAX; i++) { // easy for compilers to unroll if desired |
| 219 | b_i = *ptr++ & 0xFF; |
| 220 | sum += b_i << lg_H_i; |
| 221 | if (i == B || b_i < L) |
| 222 | { rp = ptr; return sum; } |
| 223 | lg_H_i += lgH; |
| 224 | } |
| 225 | assert(false); |
| 226 | return 0; |
| 227 | } |
| 228 | |
| 229 | static const char ERB[] = "EOF reading band"; |
| 230 | |
| 231 | maybe_inline |
| 232 | void coding::parseMultiple(byte* &rp, int N, byte* limit, int B, int H) { |
| 233 | if (N < 0) { |
| 234 | abort("bad value count"); |
| 235 | return; |
| 236 | } |
| 237 | byte* ptr = rp; |
| 238 | if (B == 1 || H == 256) { |
| 239 | size_t len = (size_t)N*B; |
| 240 | if (len / B != N || ptr+len > limit) { |
| 241 | abort(ERB); |
| 242 | return; |
| 243 | } |
| 244 | rp = ptr+len; |
| 245 | return; |
| 246 | } |
| 247 | // Note: We assume rp has enough zero-padding. |
| 248 | int L = 256-H; |
| 249 | int n = B; |
| 250 | while (N > 0) { |
| 251 | ptr += 1; |
| 252 | if (--n == 0) { |
| 253 | // end of encoding at B bytes, regardless of byte value |
| 254 | } else { |
| 255 | int b = (ptr[-1] & 0xFF); |
| 256 | if (b >= L) { |
| 257 | // keep going, unless we find a byte < L |
| 258 | continue; |
| 259 | } |
| 260 | } |
| 261 | // found the last byte |
| 262 | N -= 1; |
| 263 | n = B; // reset length counter |
| 264 | // do an error check here |
| 265 | if (ptr > limit) { |
| 266 | abort(ERB); |
| 267 | return; |
| 268 | } |
| 269 | } |
| 270 | rp = ptr; |
| 271 | return; |
| 272 | } |
| 273 | |
| 274 | bool value_stream::hasHelper() { |
| 275 | // If my coding method is a pop-style method, |
| 276 | // then I need a second value stream to transmit |
| 277 | // unfavored values. |
| 278 | // This can be determined by examining fValues. |
| 279 | return cm->fValues != null; |
| 280 | } |
| 281 | |
| 282 | void value_stream::init(byte* rp_, byte* rplimit_, coding* defc) { |
| 283 | rp = rp_; |
| 284 | rplimit = rplimit_; |
| 285 | sum = 0; |
| 286 | cm = null; // no need in the simple case |
| 287 | setCoding(defc); |
| 288 | } |
| 289 | |
| 290 | void value_stream::setCoding(coding* defc) { |
| 291 | if (defc == null) { |
| 292 | unpack_abort("bad coding"); |
| 293 | defc = coding::findByIndex(_meta_canon_min); // random pick for recovery |
| 294 | } |
| 295 | |
| 296 | c = (*defc); |
| 297 | |
| 298 | // choose cmk |
| 299 | cmk = cmk_ERROR; |
| 300 | switch (c.spec) { |
| 301 | case BYTE1_spec: cmk = cmk_BYTE1; break; |
| 302 | case CHAR3_spec: cmk = cmk_CHAR3; break; |
| 303 | case UNSIGNED5_spec: cmk = cmk_UNSIGNED5; break; |
| 304 | case DELTA5_spec: cmk = cmk_DELTA5; break; |
| 305 | case BCI5_spec: cmk = cmk_BCI5; break; |
| 306 | case BRANCH5_spec: cmk = cmk_BRANCH5; break; |
| 307 | default: |
| 308 | if (c.D() == 0) { |
| 309 | switch (c.S()) { |
| 310 | case 0: cmk = cmk_BHS0; break; |
| 311 | case 1: cmk = cmk_BHS1; break; |
| 312 | default: cmk = cmk_BHS; break; |
| 313 | } |
| 314 | } else { |
| 315 | if (c.S() == 1) { |
| 316 | if (c.isFullRange) cmk = cmk_BHS1D1full; |
| 317 | if (c.isSubrange) cmk = cmk_BHS1D1sub; |
| 318 | } |
| 319 | if (cmk == cmk_ERROR) cmk = cmk_BHSD1; |
| 320 | } |
| 321 | } |
| 322 | } |
| 323 | |
| 324 | static maybe_inline |
| 325 | int getPopValue(value_stream* self, uint uval) { |
| 326 | if (uval > 0) { |
| 327 | // note that the initial parse performed a range check |
| 328 | assert(uval <= self->cm->fVlength); |
| 329 | return self->cm->fValues[uval-1]; |
| 330 | } else { |
| 331 | // take an unfavored value |
| 332 | return self->helper()->getInt(); |
| 333 | } |
| 334 | } |
| 335 | |
| 336 | maybe_inline |
| 337 | int coding::sumInUnsignedRange(int x, int y) { |
| 338 | assert(isSubrange); |
| 339 | int range = (int)(umax+1); |
| 340 | assert(range > 0); |
| 341 | x += y; |
| 342 | if (x != (int)((jlong)(x-y) + (jlong)y)) { |
| 343 | // 32-bit overflow interferes with range reduction. |
| 344 | // Back off from the overflow by adding a multiple of range: |
| 345 | if (x < 0) { |
| 346 | x -= range; |
| 347 | assert(x >= 0); |
| 348 | } else { |
| 349 | x += range; |
| 350 | assert(x < 0); |
| 351 | } |
| 352 | } |
| 353 | if (x < 0) { |
| 354 | x += range; |
| 355 | if (x >= 0) return x; |
| 356 | } else if (x >= range) { |
| 357 | x -= range; |
| 358 | if (x < range) return x; |
| 359 | } else { |
| 360 | // in range |
| 361 | return x; |
| 362 | } |
| 363 | // do it the hard way |
| 364 | x %= range; |
| 365 | if (x < 0) x += range; |
| 366 | return x; |
| 367 | } |
| 368 | |
| 369 | static maybe_inline |
| 370 | int getDeltaValue(value_stream* self, uint uval, bool isSubrange) { |
| 371 | assert((bool)(self->c.isSubrange) == isSubrange); |
| 372 | assert(self->c.isSubrange | self->c.isFullRange); |
| 373 | if (isSubrange) |
| 374 | return self->sum = self->c.sumInUnsignedRange(self->sum, (int)uval); |
| 375 | else |
| 376 | return self->sum += (int) uval; |
| 377 | } |
| 378 | |
| 379 | bool value_stream::hasValue() { |
| 380 | if (rp < rplimit) return true; |
| 381 | if (cm == null) return false; |
| 382 | if (cm->next == null) return false; |
| 383 | cm->next->reset(this); |
| 384 | return hasValue(); |
| 385 | } |
| 386 | |
| 387 | int value_stream::getInt() { |
| 388 | if (rp >= rplimit) { |
| 389 | // Advance to next coding segment. |
| 390 | if (rp > rplimit || cm == null || cm->next == null) { |
| 391 | // Must perform this check and throw an exception on bad input. |
| 392 | unpack_abort(ERB); |
| 393 | return 0; |
| 394 | } |
| 395 | cm->next->reset(this); |
| 396 | return getInt(); |
| 397 | } |
| 398 | |
| 399 | CODING_PRIVATE(c.spec); |
| 400 | uint uval; |
| 401 | enum { |
| 402 | B5 = 5, |
| 403 | B3 = 3, |
| 404 | H128 = 128, |
| 405 | H64 = 64, |
| 406 | H4 = 4 |
| 407 | }; |
| 408 | switch (cmk) { |
| 409 | case cmk_BHS: |
| 410 | assert(D == 0); |
| 411 | uval = coding::parse(rp, B, H); |
| 412 | if (S == 0) |
| 413 | return (int) uval; |
| 414 | return decode_sign(S, uval); |
| 415 | |
| 416 | case cmk_BHS0: |
| 417 | assert(S == 0 && D == 0); |
| 418 | uval = coding::parse(rp, B, H); |
| 419 | return (int) uval; |
| 420 | |
| 421 | case cmk_BHS1: |
| 422 | assert(S == 1 && D == 0); |
| 423 | uval = coding::parse(rp, B, H); |
| 424 | return DECODE_SIGN_S1(uval); |
| 425 | |
| 426 | case cmk_BYTE1: |
| 427 | assert(c.spec == BYTE1_spec); |
| 428 | assert(B == 1 && H == 256 && S == 0 && D == 0); |
| 429 | return *rp++ & 0xFF; |
| 430 | |
| 431 | case cmk_CHAR3: |
| 432 | assert(c.spec == CHAR3_spec); |
| 433 | assert(B == B3 && H == H128 && S == 0 && D == 0); |
| 434 | return coding::parse_lgH(rp, B3, H128, 7); |
| 435 | |
| 436 | case cmk_UNSIGNED5: |
| 437 | assert(c.spec == UNSIGNED5_spec); |
| 438 | assert(B == B5 && H == H64 && S == 0 && D == 0); |
| 439 | return coding::parse_lgH(rp, B5, H64, 6); |
| 440 | |
| 441 | case cmk_BHSD1: |
| 442 | assert(D == 1); |
| 443 | uval = coding::parse(rp, B, H); |
| 444 | if (S != 0) |
| 445 | uval = (uint) decode_sign(S, uval); |
| 446 | return getDeltaValue(this, uval, c.isSubrange); |
| 447 | |
| 448 | case cmk_BHS1D1full: |
| 449 | assert(S == 1 && D == 1 && c.isFullRange); |
| 450 | uval = coding::parse(rp, B, H); |
| 451 | uval = (uint) DECODE_SIGN_S1(uval); |
| 452 | return getDeltaValue(this, uval, false); |
| 453 | |
| 454 | case cmk_BHS1D1sub: |
| 455 | assert(S == 1 && D == 1 && c.isSubrange); |
| 456 | uval = coding::parse(rp, B, H); |
| 457 | uval = (uint) DECODE_SIGN_S1(uval); |
| 458 | return getDeltaValue(this, uval, true); |
| 459 | |
| 460 | case cmk_DELTA5: |
| 461 | assert(c.spec == DELTA5_spec); |
| 462 | assert(B == B5 && H == H64 && S == 1 && D == 1 && c.isFullRange); |
| 463 | uval = coding::parse_lgH(rp, B5, H64, 6); |
| 464 | sum += DECODE_SIGN_S1(uval); |
| 465 | return sum; |
| 466 | |
| 467 | case cmk_BCI5: |
| 468 | assert(c.spec == BCI5_spec); |
| 469 | assert(B == B5 && H == H4 && S == 0 && D == 0); |
| 470 | return coding::parse_lgH(rp, B5, H4, 2); |
| 471 | |
| 472 | case cmk_BRANCH5: |
| 473 | assert(c.spec == BRANCH5_spec); |
| 474 | assert(B == B5 && H == H4 && S == 2 && D == 0); |
| 475 | uval = coding::parse_lgH(rp, B5, H4, 2); |
| 476 | return decode_sign(S, uval); |
| 477 | |
| 478 | case cmk_pop: |
| 479 | uval = coding::parse(rp, B, H); |
| 480 | if (S != 0) { |
| 481 | uval = (uint) decode_sign(S, uval); |
| 482 | } |
| 483 | if (D != 0) { |
| 484 | assert(c.isSubrange | c.isFullRange); |
| 485 | if (c.isSubrange) |
| 486 | sum = c.sumInUnsignedRange(sum, (int) uval); |
| 487 | else |
| 488 | sum += (int) uval; |
| 489 | uval = (uint) sum; |
| 490 | } |
| 491 | return getPopValue(this, uval); |
| 492 | |
| 493 | case cmk_pop_BHS0: |
| 494 | assert(S == 0 && D == 0); |
| 495 | uval = coding::parse(rp, B, H); |
| 496 | return getPopValue(this, uval); |
| 497 | |
| 498 | case cmk_pop_BYTE1: |
| 499 | assert(c.spec == BYTE1_spec); |
| 500 | assert(B == 1 && H == 256 && S == 0 && D == 0); |
| 501 | return getPopValue(this, *rp++ & 0xFF); |
| 502 | } |
| 503 | assert(false); |
| 504 | return 0; |
| 505 | } |
| 506 | |
| 507 | static maybe_inline |
| 508 | int moreCentral(int x, int y) { // used to find end of Pop.{F} |
| 509 | // Suggested implementation from the Pack200 specification: |
| 510 | uint kx = (x >> 31) ^ (x << 1); |
| 511 | uint ky = (y >> 31) ^ (y << 1); |
| 512 | return (kx < ky? x: y); |
| 513 | } |
| 514 | //static maybe_inline |
| 515 | //int moreCentral2(int x, int y, int min) { |
| 516 | // // Strict implementation of buggy 150.7 specification. |
| 517 | // // The bug is that the spec. says absolute-value ties are broken |
| 518 | // // in favor of positive numbers, but the suggested implementation |
| 519 | // // (also mentioned in the spec.) breaks ties in favor of negative numbers. |
| 520 | // if ((x + y) != 0) |
| 521 | // return min; |
| 522 | // else |
| 523 | // // return the other value, which breaks a tie in the positive direction |
| 524 | // return (x > y)? x: y; |
| 525 | //} |
| 526 | |
| 527 | static const byte* no_meta[] = {null}; |
| 528 | #define NO_META (*(byte**)no_meta) |
| 529 | enum { POP_FAVORED_N = -2 }; |
| 530 | |
| 531 | // mode bits |
| 532 | #define DISABLE_RUN 1 // used immediately inside ACodee |
| 533 | #define DISABLE_POP 2 // used recursively in all pop sub-bands |
| 534 | |
| 535 | // This function knows all about meta-coding. |
| 536 | void coding_method::init(byte* &band_rp, byte* band_limit, |
| 537 | byte* &meta_rp, int mode, |
| 538 | coding* defc, int N, |
| 539 | intlist* valueSink) { |
| 540 | assert(N != 0); |
| 541 | |
| 542 | assert(u != null); // must be pre-initialized |
| 543 | //if (u == null) u = unpacker::current(); // expensive |
| 544 | |
| 545 | int op = (meta_rp == null) ? _meta_default : (*meta_rp++ & 0xFF); |
| 546 | coding* foundc = null; |
| 547 | coding* to_free = null; |
| 548 | |
| 549 | if (op == _meta_default) { |
| 550 | foundc = defc; |
| 551 | // and fall through |
| 552 | |
| 553 | } else if (op >= _meta_canon_min && op <= _meta_canon_max) { |
| 554 | foundc = coding::findByIndex(op); |
| 555 | // and fall through |
| 556 | |
| 557 | } else if (op == _meta_arb) { |
| 558 | int args = (*meta_rp++ & 0xFF); |
| 559 | // args = (D:[0..1] + 2*S[0..2] + 8*(B:[1..5]-1)) |
| 560 | int D = ((args >> 0) & 1); |
| 561 | int S = ((args >> 1) & 3); |
| 562 | int B = ((args >> 3) & -1) + 1; |
| 563 | // & (H[1..256]-1) |
| 564 | int H = (*meta_rp++ & 0xFF) + 1; |
| 565 | foundc = coding::findBySpec(B, H, S, D); |
| 566 | to_free = foundc; // findBySpec may dynamically allocate |
| 567 | if (foundc == null) { |
| 568 | abort("illegal arb. coding"); |
| 569 | return; |
| 570 | } |
| 571 | // and fall through |
| 572 | |
| 573 | } else if (op >= _meta_run && op < _meta_pop) { |
| 574 | int args = (op - _meta_run); |
| 575 | // args: KX:[0..3] + 4*(KBFlag:[0..1]) + 8*(ABDef:[0..2]) |
| 576 | int KX = ((args >> 0) & 3); |
| 577 | int KBFlag = ((args >> 2) & 1); |
| 578 | int ABDef = ((args >> 3) & -1); |
| 579 | assert(ABDef <= 2); |
| 580 | // & KB: one of [0..255] if KBFlag=1 |
| 581 | int KB = (!KBFlag? 3: (*meta_rp++ & 0xFF)); |
| 582 | int K = (KB+1) << (KX * 4); |
| 583 | int N2 = (N >= 0) ? N-K : N; |
| 584 | if (N == 0 || (N2 <= 0 && N2 != N)) { |
| 585 | abort("illegal run encoding"); |
| 586 | return; |
| 587 | } |
| 588 | if ((mode & DISABLE_RUN) != 0) { |
| 589 | abort("illegal nested run encoding"); |
| 590 | return; |
| 591 | } |
| 592 | |
| 593 | // & Enc{ ACode } if ADef=0 (ABDef != 1) |
| 594 | // No direct nesting of 'run' in ACode, but in BCode it's OK. |
| 595 | int disRun = mode | DISABLE_RUN; |
| 596 | if (ABDef == 1) { |
| 597 | this->init(band_rp, band_limit, NO_META, disRun, defc, K, valueSink); |
| 598 | } else { |
| 599 | this->init(band_rp, band_limit, meta_rp, disRun, defc, K, valueSink); |
| 600 | } |
| 601 | CHECK; |
| 602 | |
| 603 | // & Enc{ BCode } if BDef=0 (ABDef != 2) |
| 604 | coding_method* tail = U_NEW(coding_method, 1); |
| 605 | CHECK_NULL(tail); |
| 606 | tail->u = u; |
| 607 | |
| 608 | // The 'run' codings may be nested indirectly via 'pop' codings. |
| 609 | // This means that this->next may already be filled in, if |
| 610 | // ACode was of type 'pop' with a 'run' token coding. |
| 611 | // No problem: Just chain the upcoming BCode onto the end. |
| 612 | for (coding_method* self = this; ; self = self->next) { |
| 613 | if (self->next == null) { |
| 614 | self->next = tail; |
| 615 | break; |
| 616 | } |
| 617 | } |
| 618 | |
| 619 | if (ABDef == 2) { |
| 620 | tail->init(band_rp, band_limit, NO_META, mode, defc, N2, valueSink); |
| 621 | } else { |
| 622 | tail->init(band_rp, band_limit, meta_rp, mode, defc, N2, valueSink); |
| 623 | } |
| 624 | // Note: The preceding calls to init should be tail-recursive. |
| 625 | |
| 626 | return; // done; no falling through |
| 627 | |
| 628 | } else if (op >= _meta_pop && op < _meta_limit) { |
| 629 | int args = (op - _meta_pop); |
| 630 | // args: (FDef:[0..1]) + 2*UDef:[0..1] + 4*(TDefL:[0..11]) |
| 631 | int FDef = ((args >> 0) & 1); |
| 632 | int UDef = ((args >> 1) & 1); |
| 633 | int TDefL = ((args >> 2) & -1); |
| 634 | assert(TDefL <= 11); |
| 635 | int TDef = (TDefL > 0); |
| 636 | int TL = (TDefL <= 6) ? (2 << TDefL) : (256 - (4 << (11-TDefL))); |
| 637 | int TH = (256-TL); |
| 638 | if (N <= 0) { |
| 639 | abort("illegal pop encoding"); |
| 640 | return; |
| 641 | } |
| 642 | if ((mode & DISABLE_POP) != 0) { |
| 643 | abort("illegal nested pop encoding"); |
| 644 | return; |
| 645 | } |
| 646 | |
| 647 | // No indirect nesting of 'pop', but 'run' is OK. |
| 648 | int disPop = DISABLE_POP; |
| 649 | |
| 650 | // & Enc{ FCode } if FDef=0 |
| 651 | int FN = POP_FAVORED_N; |
| 652 | assert(valueSink == null); |
| 653 | intlist fValueSink; fValueSink.init(); |
| 654 | coding_method fval; |
| 655 | BYTES_OF(fval).clear(); fval.u = u; |
| 656 | if (FDef != 0) { |
| 657 | fval.init(band_rp, band_limit, NO_META, disPop, defc, FN, &fValueSink); |
| 658 | } else { |
| 659 | fval.init(band_rp, band_limit, meta_rp, disPop, defc, FN, &fValueSink); |
| 660 | } |
| 661 | bytes fvbuf; |
| 662 | fValues = (u->saveTo(fvbuf, fValueSink.b), (int*) fvbuf.ptr); |
| 663 | fVlength = fValueSink.length(); // i.e., the parameter K |
| 664 | fValueSink.free(); |
| 665 | CHECK; |
| 666 | |
| 667 | // Skip the first {F} run in all subsequent passes. |
| 668 | // The next call to this->init(...) will set vs0.rp to point after the {F}. |
| 669 | |
| 670 | // & Enc{ TCode } if TDef=0 (TDefL==0) |
| 671 | if (TDef != 0) { |
| 672 | coding* tcode = coding::findBySpec(1, 256); // BYTE1 |
| 673 | // find the most narrowly sufficient code: |
| 674 | for (int B = 2; B <= B_MAX; B++) { |
| 675 | if (fVlength <= tcode->umax) break; // found it |
| 676 | tcode->free(); |
| 677 | tcode = coding::findBySpec(B, TH); |
| 678 | CHECK_NULL(tcode); |
| 679 | } |
| 680 | if (!(fVlength <= tcode->umax)) { |
| 681 | abort("pop.L value too small"); |
| 682 | return; |
| 683 | } |
| 684 | this->init(band_rp, band_limit, NO_META, disPop, tcode, N, null); |
| 685 | tcode->free(); |
| 686 | } else { |
| 687 | this->init(band_rp, band_limit, meta_rp, disPop, defc, N, null); |
| 688 | } |
| 689 | CHECK; |
| 690 | |
| 691 | // Count the number of zero tokens right now. |
| 692 | // Also verify that they are in bounds. |
| 693 | int UN = 0; // one {U} for each zero in {T} |
| 694 | value_stream vs = vs0; |
| 695 | for (int i = 0; i < N; i++) { |
| 696 | uint val = vs.getInt(); |
| 697 | if (val == 0) UN += 1; |
| 698 | if (!(val <= fVlength)) { |
| 699 | abort("pop token out of range"); |
| 700 | return; |
| 701 | } |
| 702 | } |
| 703 | vs.done(); |
| 704 | |
| 705 | // & Enc{ UCode } if UDef=0 |
| 706 | if (UN != 0) { |
| 707 | uValues = U_NEW(coding_method, 1); |
| 708 | CHECK_NULL(uValues); |
| 709 | uValues->u = u; |
| 710 | if (UDef != 0) { |
| 711 | uValues->init(band_rp, band_limit, NO_META, disPop, defc, UN, null); |
| 712 | } else { |
| 713 | uValues->init(band_rp, band_limit, meta_rp, disPop, defc, UN, null); |
| 714 | } |
| 715 | } else { |
| 716 | if (UDef == 0) { |
| 717 | int uop = (*meta_rp++ & 0xFF); |
| 718 | if (uop > _meta_canon_max) |
| 719 | // %%% Spec. requires the more strict (uop != _meta_default). |
| 720 | abort("bad meta-coding for empty pop/U"); |
| 721 | } |
| 722 | } |
| 723 | |
| 724 | // Bug fix for 6259542 |
| 725 | // Last of all, adjust vs0.cmk to the 'pop' flavor |
| 726 | for (coding_method* self = this; self != null; self = self->next) { |
| 727 | coding_method_kind cmk2 = cmk_pop; |
| 728 | switch (self->vs0.cmk) { |
| 729 | case cmk_BHS0: cmk2 = cmk_pop_BHS0; break; |
| 730 | case cmk_BYTE1: cmk2 = cmk_pop_BYTE1; break; |
| 731 | } |
| 732 | self->vs0.cmk = cmk2; |
| 733 | if (self != this) { |
| 734 | assert(self->fValues == null); // no double init |
| 735 | self->fValues = this->fValues; |
| 736 | self->fVlength = this->fVlength; |
| 737 | assert(self->uValues == null); // must stay null |
| 738 | } |
| 739 | } |
| 740 | |
| 741 | return; // done; no falling through |
| 742 | |
| 743 | } else { |
| 744 | abort("bad meta-coding"); |
| 745 | return; |
| 746 | } |
| 747 | |
| 748 | // Common code here skips a series of values with one coding. |
| 749 | assert(foundc != null); |
| 750 | |
| 751 | assert(vs0.cmk == cmk_ERROR); // no garbage, please |
| 752 | assert(vs0.rp == null); // no garbage, please |
| 753 | assert(vs0.rplimit == null); // no garbage, please |
| 754 | assert(vs0.sum == 0); // no garbage, please |
| 755 | |
| 756 | vs0.init(band_rp, band_limit, foundc); |
| 757 | |
| 758 | // Done with foundc. Free if necessary. |
| 759 | if (to_free != null) { |
| 760 | to_free->free(); |
| 761 | to_free = null; |
| 762 | } |
| 763 | foundc = null; |
| 764 | |
| 765 | coding& c = vs0.c; |
| 766 | CODING_PRIVATE(c.spec); |
| 767 | // assert sane N |
| 768 | assert((uint)N < INT_MAX_VALUE || N == POP_FAVORED_N); |
| 769 | |
| 770 | // Look at the values, or at least skip over them quickly. |
| 771 | if (valueSink == null) { |
| 772 | // Skip and ignore values in the first pass. |
| 773 | c.parseMultiple(band_rp, N, band_limit, B, H); |
| 774 | } else if (N >= 0) { |
| 775 | // Pop coding, {F} sequence, initial run of values... |
| 776 | assert((mode & DISABLE_POP) != 0); |
| 777 | value_stream vs = vs0; |
| 778 | for (int n = 0; n < N; n++) { |
| 779 | int val = vs.getInt(); |
| 780 | valueSink->add(val); |
| 781 | } |
| 782 | band_rp = vs.rp; |
| 783 | } else { |
| 784 | // Pop coding, {F} sequence, final run of values... |
| 785 | assert((mode & DISABLE_POP) != 0); |
| 786 | assert(N == POP_FAVORED_N); |
| 787 | int min = INT_MIN_VALUE; // farthest from the center |
| 788 | // min2 is based on the buggy specification of centrality in version 150.7 |
| 789 | // no known implementations transmit this value, but just in case... |
| 790 | //int min2 = INT_MIN_VALUE; |
| 791 | int last = 0; |
| 792 | // if there were initial runs, find the potential sentinels in them: |
| 793 | for (int i = 0; i < valueSink->length(); i++) { |
| 794 | last = valueSink->get(i); |
| 795 | min = moreCentral(min, last); |
| 796 | //min2 = moreCentral2(min2, last, min); |
| 797 | } |
| 798 | value_stream vs = vs0; |
| 799 | for (;;) { |
| 800 | int val = vs.getInt(); |
| 801 | if (valueSink->length() > 0 && |
| 802 | (val == last || val == min)) //|| val == min2 |
| 803 | break; |
| 804 | valueSink->add(val); |
| 805 | CHECK; |
| 806 | last = val; |
| 807 | min = moreCentral(min, last); |
| 808 | //min2 = moreCentral2(min2, last, min); |
| 809 | } |
| 810 | band_rp = vs.rp; |
| 811 | } |
| 812 | |
| 813 | // Get an accurate upper limit now. |
| 814 | vs0.rplimit = band_rp; |
| 815 | vs0.cm = this; |
| 816 | |
| 817 | return; // success |
| 818 | } |
| 819 | |
| 820 | coding basic_codings[] = { |
| 821 | // This one is not a usable irregular coding, but is used by cp_Utf8_chars. |
| 822 | CODING_INIT(3,128,0,0), |
| 823 | |
| 824 | // Fixed-length codings: |
| 825 | CODING_INIT(1,256,0,0), |
| 826 | CODING_INIT(1,256,1,0), |
| 827 | CODING_INIT(1,256,0,1), |
| 828 | CODING_INIT(1,256,1,1), |
| 829 | CODING_INIT(2,256,0,0), |
| 830 | CODING_INIT(2,256,1,0), |
| 831 | CODING_INIT(2,256,0,1), |
| 832 | CODING_INIT(2,256,1,1), |
| 833 | CODING_INIT(3,256,0,0), |
| 834 | CODING_INIT(3,256,1,0), |
| 835 | CODING_INIT(3,256,0,1), |
| 836 | CODING_INIT(3,256,1,1), |
| 837 | CODING_INIT(4,256,0,0), |
| 838 | CODING_INIT(4,256,1,0), |
| 839 | CODING_INIT(4,256,0,1), |
| 840 | CODING_INIT(4,256,1,1), |
| 841 | |
| 842 | // Full-range variable-length codings: |
| 843 | CODING_INIT(5, 4,0,0), |
| 844 | CODING_INIT(5, 4,1,0), |
| 845 | CODING_INIT(5, 4,2,0), |
| 846 | CODING_INIT(5, 16,0,0), |
| 847 | CODING_INIT(5, 16,1,0), |
| 848 | CODING_INIT(5, 16,2,0), |
| 849 | CODING_INIT(5, 32,0,0), |
| 850 | CODING_INIT(5, 32,1,0), |
| 851 | CODING_INIT(5, 32,2,0), |
| 852 | CODING_INIT(5, 64,0,0), |
| 853 | CODING_INIT(5, 64,1,0), |
| 854 | CODING_INIT(5, 64,2,0), |
| 855 | CODING_INIT(5,128,0,0), |
| 856 | CODING_INIT(5,128,1,0), |
| 857 | CODING_INIT(5,128,2,0), |
| 858 | |
| 859 | CODING_INIT(5, 4,0,1), |
| 860 | CODING_INIT(5, 4,1,1), |
| 861 | CODING_INIT(5, 4,2,1), |
| 862 | CODING_INIT(5, 16,0,1), |
| 863 | CODING_INIT(5, 16,1,1), |
| 864 | CODING_INIT(5, 16,2,1), |
| 865 | CODING_INIT(5, 32,0,1), |
| 866 | CODING_INIT(5, 32,1,1), |
| 867 | CODING_INIT(5, 32,2,1), |
| 868 | CODING_INIT(5, 64,0,1), |
| 869 | CODING_INIT(5, 64,1,1), |
| 870 | CODING_INIT(5, 64,2,1), |
| 871 | CODING_INIT(5,128,0,1), |
| 872 | CODING_INIT(5,128,1,1), |
| 873 | CODING_INIT(5,128,2,1), |
| 874 | |
| 875 | // Variable length subrange codings: |
| 876 | CODING_INIT(2,192,0,0), |
| 877 | CODING_INIT(2,224,0,0), |
| 878 | CODING_INIT(2,240,0,0), |
| 879 | CODING_INIT(2,248,0,0), |
| 880 | CODING_INIT(2,252,0,0), |
| 881 | |
| 882 | CODING_INIT(2, 8,0,1), |
| 883 | CODING_INIT(2, 8,1,1), |
| 884 | CODING_INIT(2, 16,0,1), |
| 885 | CODING_INIT(2, 16,1,1), |
| 886 | CODING_INIT(2, 32,0,1), |
| 887 | CODING_INIT(2, 32,1,1), |
| 888 | CODING_INIT(2, 64,0,1), |
| 889 | CODING_INIT(2, 64,1,1), |
| 890 | CODING_INIT(2,128,0,1), |
| 891 | CODING_INIT(2,128,1,1), |
| 892 | CODING_INIT(2,192,0,1), |
| 893 | CODING_INIT(2,192,1,1), |
| 894 | CODING_INIT(2,224,0,1), |
| 895 | CODING_INIT(2,224,1,1), |
| 896 | CODING_INIT(2,240,0,1), |
| 897 | CODING_INIT(2,240,1,1), |
| 898 | CODING_INIT(2,248,0,1), |
| 899 | CODING_INIT(2,248,1,1), |
| 900 | |
| 901 | CODING_INIT(3,192,0,0), |
| 902 | CODING_INIT(3,224,0,0), |
| 903 | CODING_INIT(3,240,0,0), |
| 904 | CODING_INIT(3,248,0,0), |
| 905 | CODING_INIT(3,252,0,0), |
| 906 | |
| 907 | CODING_INIT(3, 8,0,1), |
| 908 | CODING_INIT(3, 8,1,1), |
| 909 | CODING_INIT(3, 16,0,1), |
| 910 | CODING_INIT(3, 16,1,1), |
| 911 | CODING_INIT(3, 32,0,1), |
| 912 | CODING_INIT(3, 32,1,1), |
| 913 | CODING_INIT(3, 64,0,1), |
| 914 | CODING_INIT(3, 64,1,1), |
| 915 | CODING_INIT(3,128,0,1), |
| 916 | CODING_INIT(3,128,1,1), |
| 917 | CODING_INIT(3,192,0,1), |
| 918 | CODING_INIT(3,192,1,1), |
| 919 | CODING_INIT(3,224,0,1), |
| 920 | CODING_INIT(3,224,1,1), |
| 921 | CODING_INIT(3,240,0,1), |
| 922 | CODING_INIT(3,240,1,1), |
| 923 | CODING_INIT(3,248,0,1), |
| 924 | CODING_INIT(3,248,1,1), |
| 925 | |
| 926 | CODING_INIT(4,192,0,0), |
| 927 | CODING_INIT(4,224,0,0), |
| 928 | CODING_INIT(4,240,0,0), |
| 929 | CODING_INIT(4,248,0,0), |
| 930 | CODING_INIT(4,252,0,0), |
| 931 | |
| 932 | CODING_INIT(4, 8,0,1), |
| 933 | CODING_INIT(4, 8,1,1), |
| 934 | CODING_INIT(4, 16,0,1), |
| 935 | CODING_INIT(4, 16,1,1), |
| 936 | CODING_INIT(4, 32,0,1), |
| 937 | CODING_INIT(4, 32,1,1), |
| 938 | CODING_INIT(4, 64,0,1), |
| 939 | CODING_INIT(4, 64,1,1), |
| 940 | CODING_INIT(4,128,0,1), |
| 941 | CODING_INIT(4,128,1,1), |
| 942 | CODING_INIT(4,192,0,1), |
| 943 | CODING_INIT(4,192,1,1), |
| 944 | CODING_INIT(4,224,0,1), |
| 945 | CODING_INIT(4,224,1,1), |
| 946 | CODING_INIT(4,240,0,1), |
| 947 | CODING_INIT(4,240,1,1), |
| 948 | CODING_INIT(4,248,0,1), |
| 949 | CODING_INIT(4,248,1,1), |
| 950 | |
| 951 | 0 |
| 952 | }; |
| 953 | #define BASIC_INDEX_LIMIT \ |
| 954 | (sizeof(basic_codings)/sizeof(basic_codings[0])-1) |
| 955 | |
| 956 | coding* coding::findByIndex(int idx) { |
| 957 | assert(_meta_canon_min == 1); |
| 958 | assert(_meta_canon_max+1 == BASIC_INDEX_LIMIT); |
| 959 | if (idx >= _meta_canon_min && idx <= _meta_canon_max) |
| 960 | return basic_codings[idx].init(); |
| 961 | else |
| 962 | return null; |
| 963 | } |
| 964 | |
| 965 | #ifndef PRODUCT |
| 966 | const char* coding::string() { |
| 967 | CODING_PRIVATE(spec); |
| 968 | bytes buf; |
| 969 | buf.malloc(100); |
| 970 | char maxS[20], minS[20]; |
| 971 | sprintf(maxS, "%d", max); |
| 972 | sprintf(minS, "%d", min); |
| 973 | if (max == INT_MAX_VALUE) strcpy(maxS, "max"); |
| 974 | if (min == INT_MIN_VALUE) strcpy(minS, "min"); |
| 975 | sprintf((char*)buf.ptr, "(%d,%d,%d,%d) L=%d r=[%s,%s]", |
| 976 | B,H,S,D,L,minS,maxS); |
| 977 | return (const char*) buf.ptr; |
| 978 | } |
| 979 | #endif |