| /* Copyright 2015 Google Inc. All Rights Reserved. |
| |
| Distributed under MIT license. |
| See file LICENSE for detail or copy at https://opensource.org/licenses/MIT |
| */ |
| |
| package org.brotli.dec; |
| |
| import static org.brotli.dec.WordTransformType.IDENTITY; |
| import static org.brotli.dec.WordTransformType.OMIT_FIRST_1; |
| import static org.brotli.dec.WordTransformType.OMIT_FIRST_2; |
| import static org.brotli.dec.WordTransformType.OMIT_FIRST_3; |
| import static org.brotli.dec.WordTransformType.OMIT_FIRST_4; |
| import static org.brotli.dec.WordTransformType.OMIT_FIRST_5; |
| import static org.brotli.dec.WordTransformType.OMIT_FIRST_6; |
| import static org.brotli.dec.WordTransformType.OMIT_FIRST_7; |
| import static org.brotli.dec.WordTransformType.OMIT_FIRST_9; |
| import static org.brotli.dec.WordTransformType.OMIT_LAST_1; |
| import static org.brotli.dec.WordTransformType.OMIT_LAST_2; |
| import static org.brotli.dec.WordTransformType.OMIT_LAST_3; |
| import static org.brotli.dec.WordTransformType.OMIT_LAST_4; |
| import static org.brotli.dec.WordTransformType.OMIT_LAST_5; |
| import static org.brotli.dec.WordTransformType.OMIT_LAST_6; |
| import static org.brotli.dec.WordTransformType.OMIT_LAST_7; |
| import static org.brotli.dec.WordTransformType.OMIT_LAST_8; |
| import static org.brotli.dec.WordTransformType.OMIT_LAST_9; |
| import static org.brotli.dec.WordTransformType.UPPERCASE_ALL; |
| import static org.brotli.dec.WordTransformType.UPPERCASE_FIRST; |
| |
| import java.nio.ByteBuffer; |
| |
| /** |
| * Transformations on dictionary words. |
| */ |
| final class Transform { |
| |
| private final byte[] prefix; |
| private final int type; |
| private final byte[] suffix; |
| |
| Transform(String prefix, int type, String suffix) { |
| this.prefix = readUniBytes(prefix); |
| this.type = type; |
| this.suffix = readUniBytes(suffix); |
| } |
| |
| static byte[] readUniBytes(String uniBytes) { |
| byte[] result = new byte[uniBytes.length()]; |
| for (int i = 0; i < result.length; ++i) { |
| result[i] = (byte) uniBytes.charAt(i); |
| } |
| return result; |
| } |
| |
| static final Transform[] TRANSFORMS = { |
| new Transform("", IDENTITY, ""), |
| new Transform("", IDENTITY, " "), |
| new Transform(" ", IDENTITY, " "), |
| new Transform("", OMIT_FIRST_1, ""), |
| new Transform("", UPPERCASE_FIRST, " "), |
| new Transform("", IDENTITY, " the "), |
| new Transform(" ", IDENTITY, ""), |
| new Transform("s ", IDENTITY, " "), |
| new Transform("", IDENTITY, " of "), |
| new Transform("", UPPERCASE_FIRST, ""), |
| new Transform("", IDENTITY, " and "), |
| new Transform("", OMIT_FIRST_2, ""), |
| new Transform("", OMIT_LAST_1, ""), |
| new Transform(", ", IDENTITY, " "), |
| new Transform("", IDENTITY, ", "), |
| new Transform(" ", UPPERCASE_FIRST, " "), |
| new Transform("", IDENTITY, " in "), |
| new Transform("", IDENTITY, " to "), |
| new Transform("e ", IDENTITY, " "), |
| new Transform("", IDENTITY, "\""), |
| new Transform("", IDENTITY, "."), |
| new Transform("", IDENTITY, "\">"), |
| new Transform("", IDENTITY, "\n"), |
| new Transform("", OMIT_LAST_3, ""), |
| new Transform("", IDENTITY, "]"), |
| new Transform("", IDENTITY, " for "), |
| new Transform("", OMIT_FIRST_3, ""), |
| new Transform("", OMIT_LAST_2, ""), |
| new Transform("", IDENTITY, " a "), |
| new Transform("", IDENTITY, " that "), |
| new Transform(" ", UPPERCASE_FIRST, ""), |
| new Transform("", IDENTITY, ". "), |
| new Transform(".", IDENTITY, ""), |
| new Transform(" ", IDENTITY, ", "), |
| new Transform("", OMIT_FIRST_4, ""), |
| new Transform("", IDENTITY, " with "), |
| new Transform("", IDENTITY, "'"), |
| new Transform("", IDENTITY, " from "), |
| new Transform("", IDENTITY, " by "), |
| new Transform("", OMIT_FIRST_5, ""), |
| new Transform("", OMIT_FIRST_6, ""), |
| new Transform(" the ", IDENTITY, ""), |
| new Transform("", OMIT_LAST_4, ""), |
| new Transform("", IDENTITY, ". The "), |
| new Transform("", UPPERCASE_ALL, ""), |
| new Transform("", IDENTITY, " on "), |
| new Transform("", IDENTITY, " as "), |
| new Transform("", IDENTITY, " is "), |
| new Transform("", OMIT_LAST_7, ""), |
| new Transform("", OMIT_LAST_1, "ing "), |
| new Transform("", IDENTITY, "\n\t"), |
| new Transform("", IDENTITY, ":"), |
| new Transform(" ", IDENTITY, ". "), |
| new Transform("", IDENTITY, "ed "), |
| new Transform("", OMIT_FIRST_9, ""), |
| new Transform("", OMIT_FIRST_7, ""), |
| new Transform("", OMIT_LAST_6, ""), |
| new Transform("", IDENTITY, "("), |
| new Transform("", UPPERCASE_FIRST, ", "), |
| new Transform("", OMIT_LAST_8, ""), |
| new Transform("", IDENTITY, " at "), |
| new Transform("", IDENTITY, "ly "), |
| new Transform(" the ", IDENTITY, " of "), |
| new Transform("", OMIT_LAST_5, ""), |
| new Transform("", OMIT_LAST_9, ""), |
| new Transform(" ", UPPERCASE_FIRST, ", "), |
| new Transform("", UPPERCASE_FIRST, "\""), |
| new Transform(".", IDENTITY, "("), |
| new Transform("", UPPERCASE_ALL, " "), |
| new Transform("", UPPERCASE_FIRST, "\">"), |
| new Transform("", IDENTITY, "=\""), |
| new Transform(" ", IDENTITY, "."), |
| new Transform(".com/", IDENTITY, ""), |
| new Transform(" the ", IDENTITY, " of the "), |
| new Transform("", UPPERCASE_FIRST, "'"), |
| new Transform("", IDENTITY, ". This "), |
| new Transform("", IDENTITY, ","), |
| new Transform(".", IDENTITY, " "), |
| new Transform("", UPPERCASE_FIRST, "("), |
| new Transform("", UPPERCASE_FIRST, "."), |
| new Transform("", IDENTITY, " not "), |
| new Transform(" ", IDENTITY, "=\""), |
| new Transform("", IDENTITY, "er "), |
| new Transform(" ", UPPERCASE_ALL, " "), |
| new Transform("", IDENTITY, "al "), |
| new Transform(" ", UPPERCASE_ALL, ""), |
| new Transform("", IDENTITY, "='"), |
| new Transform("", UPPERCASE_ALL, "\""), |
| new Transform("", UPPERCASE_FIRST, ". "), |
| new Transform(" ", IDENTITY, "("), |
| new Transform("", IDENTITY, "ful "), |
| new Transform(" ", UPPERCASE_FIRST, ". "), |
| new Transform("", IDENTITY, "ive "), |
| new Transform("", IDENTITY, "less "), |
| new Transform("", UPPERCASE_ALL, "'"), |
| new Transform("", IDENTITY, "est "), |
| new Transform(" ", UPPERCASE_FIRST, "."), |
| new Transform("", UPPERCASE_ALL, "\">"), |
| new Transform(" ", IDENTITY, "='"), |
| new Transform("", UPPERCASE_FIRST, ","), |
| new Transform("", IDENTITY, "ize "), |
| new Transform("", UPPERCASE_ALL, "."), |
| new Transform("\u00c2\u00a0", IDENTITY, ""), |
| new Transform(" ", IDENTITY, ","), |
| new Transform("", UPPERCASE_FIRST, "=\""), |
| new Transform("", UPPERCASE_ALL, "=\""), |
| new Transform("", IDENTITY, "ous "), |
| new Transform("", UPPERCASE_ALL, ", "), |
| new Transform("", UPPERCASE_FIRST, "='"), |
| new Transform(" ", UPPERCASE_FIRST, ","), |
| new Transform(" ", UPPERCASE_ALL, "=\""), |
| new Transform(" ", UPPERCASE_ALL, ", "), |
| new Transform("", UPPERCASE_ALL, ","), |
| new Transform("", UPPERCASE_ALL, "("), |
| new Transform("", UPPERCASE_ALL, ". "), |
| new Transform(" ", UPPERCASE_ALL, "."), |
| new Transform("", UPPERCASE_ALL, "='"), |
| new Transform(" ", UPPERCASE_ALL, ". "), |
| new Transform(" ", UPPERCASE_FIRST, "=\""), |
| new Transform(" ", UPPERCASE_ALL, "='"), |
| new Transform(" ", UPPERCASE_FIRST, "='") |
| }; |
| |
| static int transformDictionaryWord(byte[] dst, int dstOffset, ByteBuffer data, int wordOffset, |
| int len, Transform transform) { |
| int offset = dstOffset; |
| |
| // Copy prefix. |
| byte[] string = transform.prefix; |
| int tmp = string.length; |
| int i = 0; |
| // In most cases tmp < 10 -> no benefits from System.arrayCopy |
| while (i < tmp) { |
| dst[offset++] = string[i++]; |
| } |
| |
| // Copy trimmed word. |
| int op = transform.type; |
| tmp = WordTransformType.getOmitFirst(op); |
| if (tmp > len) { |
| tmp = len; |
| } |
| wordOffset += tmp; |
| len -= tmp; |
| len -= WordTransformType.getOmitLast(op); |
| i = len; |
| while (i > 0) { |
| dst[offset++] = data.get(wordOffset++); |
| i--; |
| } |
| |
| if (op == UPPERCASE_ALL || op == UPPERCASE_FIRST) { |
| int uppercaseOffset = offset - len; |
| if (op == UPPERCASE_FIRST) { |
| len = 1; |
| } |
| while (len > 0) { |
| tmp = dst[uppercaseOffset] & 0xFF; |
| if (tmp < 0xc0) { |
| if (tmp >= 'a' && tmp <= 'z') { |
| dst[uppercaseOffset] ^= (byte) 32; |
| } |
| uppercaseOffset += 1; |
| len -= 1; |
| } else if (tmp < 0xe0) { |
| dst[uppercaseOffset + 1] ^= (byte) 32; |
| uppercaseOffset += 2; |
| len -= 2; |
| } else { |
| dst[uppercaseOffset + 2] ^= (byte) 5; |
| uppercaseOffset += 3; |
| len -= 3; |
| } |
| } |
| } |
| |
| // Copy suffix. |
| string = transform.suffix; |
| tmp = string.length; |
| i = 0; |
| while (i < tmp) { |
| dst[offset++] = string[i++]; |
| } |
| |
| return offset - dstOffset; |
| } |
| } |