| /* |
| * Copyright (c) 2010, 2013, Oracle and/or its affiliates. All rights reserved. |
| * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| * |
| * This code is free software; you can redistribute it and/or modify it |
| * under the terms of the GNU General Public License version 2 only, as |
| * published by the Free Software Foundation. Oracle designates this |
| * particular file as subject to the "Classpath" exception as provided |
| * by Oracle in the LICENSE file that accompanied this code. |
| * |
| * This code is distributed in the hope that it will be useful, but WITHOUT |
| * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| * version 2 for more details (a copy is included in the LICENSE file that |
| * accompanied this code). |
| * |
| * You should have received a copy of the GNU General Public License version |
| * 2 along with this work; if not, write to the Free Software Foundation, |
| * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| * |
| * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA |
| * or visit www.oracle.com if you need additional information or have any |
| * questions. |
| */ |
| |
| package jdk.nashorn.internal.runtime; |
| |
| import static jdk.nashorn.internal.runtime.ECMAErrors.uriError; |
| |
| /** |
| * URI handling global functions. ECMA 15.1.3 URI Handling Function Properties |
| * |
| */ |
| public final class URIUtils { |
| |
| private URIUtils() { |
| } |
| |
| static String encodeURI(final Object self, final String string) { |
| return encode(self, string, false); |
| } |
| |
| static String encodeURIComponent(final Object self, final String string) { |
| return encode(self, string, true); |
| } |
| |
| static String decodeURI(final Object self, final String string) { |
| return decode(self, string, false); |
| } |
| |
| static String decodeURIComponent(final Object self, final String string) { |
| return decode(self, string, true); |
| } |
| |
| // abstract encode function |
| private static String encode(final Object self, final String string, final boolean component) { |
| if (string.isEmpty()) { |
| return string; |
| } |
| |
| final int len = string.length(); |
| final StringBuilder sb = new StringBuilder(); |
| |
| for (int k = 0; k < len; k++) { |
| final char C = string.charAt(k); |
| if (isUnescaped(C, component)) { |
| sb.append(C); |
| continue; |
| } |
| |
| if (C >= 0xDC00 && C <= 0xDFFF) { |
| return error(string, k); |
| } |
| |
| int V; |
| if (C < 0xD800 || C > 0xDBFF) { |
| V = C; |
| } else { |
| k++; |
| if (k == len) { |
| return error(string, k); |
| } |
| |
| final char kChar = string.charAt(k); |
| if (kChar < 0xDC00 || kChar > 0xDFFF) { |
| return error(string, k); |
| } |
| V = ((C - 0xD800) * 0x400 + (kChar - 0xDC00) + 0x10000); |
| } |
| |
| try { |
| sb.append(toHexEscape(V)); |
| } catch (final Exception e) { |
| throw uriError(e, "bad.uri", string, Integer.toString(k)); |
| } |
| } |
| |
| return sb.toString(); |
| } |
| |
| // abstract decode function |
| private static String decode(final Object self, final String string, final boolean component) { |
| if (string.isEmpty()) { |
| return string; |
| } |
| |
| final int len = string.length(); |
| final StringBuilder sb = new StringBuilder(); |
| |
| for (int k = 0; k < len; k++) { |
| final char ch = string.charAt(k); |
| if (ch != '%') { |
| sb.append(ch); |
| continue; |
| } |
| final int start = k; |
| if (k + 2 >= len) { |
| return error(string, k); |
| } |
| |
| int B = toHexByte(string.charAt(k + 1), string.charAt(k + 2)); |
| if (B < 0) { |
| return error(string, k + 1); |
| } |
| |
| k += 2; |
| char C; |
| // Most significant bit is zero |
| if ((B & 0x80) == 0) { |
| C = (char) B; |
| if (!component && URI_RESERVED.indexOf(C) >= 0) { |
| for (int j = start; j <= k; j++) { |
| sb.append(string.charAt(j)); |
| } |
| } else { |
| sb.append(C); |
| } |
| } else { |
| // n is utf8 length, V is codepoint and minV is lower bound |
| int n, V, minV; |
| |
| if ((B & 0xC0) == 0x80) { |
| // 10xxxxxx - illegal first byte |
| return error(string, k); |
| } else if ((B & 0x20) == 0) { |
| // 110xxxxx 10xxxxxx |
| n = 2; |
| V = B & 0x1F; |
| minV = 0x80; |
| } else if ((B & 0x10) == 0) { |
| // 1110xxxx 10xxxxxx 10xxxxxx |
| n = 3; |
| V = B & 0x0F; |
| minV = 0x800; |
| } else if ((B & 0x08) == 0) { |
| // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx |
| n = 4; |
| V = B & 0x07; |
| minV = 0x10000; |
| } else if ((B & 0x04) == 0) { |
| // 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx |
| n = 5; |
| V = B & 0x03; |
| minV = 0x200000; |
| } else if ((B & 0x02) == 0) { |
| // 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx |
| n = 6; |
| V = B & 0x01; |
| minV = 0x4000000; |
| } else { |
| return error(string, k); |
| } |
| |
| // check bound for sufficient chars |
| if (k + (3*(n-1)) >= len) { |
| return error(string, k); |
| } |
| |
| for (int j = 1; j < n; j++) { |
| k++; |
| if (string.charAt(k) != '%') { |
| return error(string, k); |
| } |
| |
| B = toHexByte(string.charAt(k + 1), string.charAt(k + 2)); |
| if (B < 0 || (B & 0xC0) != 0x80) { |
| return error(string, k + 1); |
| } |
| |
| V = (V << 6) | (B & 0x3F); |
| k += 2; |
| } |
| |
| // Check for overlongs and invalid codepoints. |
| // The high and low surrogate halves used by UTF-16 |
| // (U+D800 through U+DFFF) are not legal Unicode values. |
| if ((V < minV) || (V >= 0xD800 && V <= 0xDFFF)) { |
| V = Integer.MAX_VALUE; |
| } |
| |
| if (V < 0x10000) { |
| C = (char) V; |
| if (!component && URI_RESERVED.indexOf(C) >= 0) { |
| for (int j = start; j != k; j++) { |
| sb.append(string.charAt(j)); |
| } |
| } else { |
| sb.append(C); |
| } |
| } else { // V >= 0x10000 |
| if (V > 0x10FFFF) { |
| return error(string, k); |
| } |
| final int L = ((V - 0x10000) & 0x3FF) + 0xDC00; |
| final int H = (((V - 0x10000) >> 10) & 0x3FF) + 0xD800; |
| sb.append((char) H); |
| sb.append((char) L); |
| } |
| } |
| } |
| |
| return sb.toString(); |
| } |
| |
| private static int hexDigit(final char ch) { |
| final char chu = Character.toUpperCase(ch); |
| if (chu >= '0' && chu <= '9') { |
| return (chu - '0'); |
| } else if (chu >= 'A' && chu <= 'F') { |
| return (chu - 'A' + 10); |
| } else { |
| return -1; |
| } |
| } |
| |
| private static int toHexByte(final char ch1, final char ch2) { |
| final int i1 = hexDigit(ch1); |
| final int i2 = hexDigit(ch2); |
| if (i1 >= 0 && i2 >= 0) { |
| return (i1 << 4) | i2; |
| } |
| return -1; |
| } |
| |
| private static String toHexEscape(final int u0) { |
| int u = u0; |
| int len; |
| final byte[] b = new byte[6]; |
| |
| if (u <= 0x7f) { |
| b[0] = (byte) u; |
| len = 1; |
| } else { |
| // > 0x7ff -> length 2 |
| // > 0xffff -> length 3 |
| // and so on. each new length is an additional 5 bits from the |
| // original 11 |
| // the final mask is 8-len zeros in the low part. |
| len = 2; |
| for (int mask = u >>> 11; mask != 0; mask >>>= 5) { |
| len++; |
| } |
| for (int i = len - 1; i > 0; i--) { |
| b[i] = (byte) (0x80 | (u & 0x3f)); |
| u >>>= 6; // 64 bits per octet. |
| } |
| |
| b[0] = (byte) (~((1 << (8 - len)) - 1) | u); |
| } |
| |
| final StringBuilder sb = new StringBuilder(); |
| for (int i = 0; i < len; i++) { |
| sb.append('%'); |
| if ((b[i] & 0xff) < 0x10) { |
| sb.append('0'); |
| } |
| sb.append(Integer.toHexString(b[i] & 0xff).toUpperCase()); |
| } |
| |
| return sb.toString(); |
| } |
| |
| private static String error(final String string, final int index) { |
| throw uriError("bad.uri", string, Integer.toString(index)); |
| } |
| |
| // 'uriEscaped' except for alphanumeric chars |
| private static final String URI_UNESCAPED_NONALPHANUMERIC = "-_.!~*'()"; |
| // 'uriReserved' + '#' |
| private static final String URI_RESERVED = ";/?:@&=+$,#"; |
| |
| private static boolean isUnescaped(final char ch, final boolean component) { |
| if (('A' <= ch && ch <= 'Z') || ('a' <= ch && ch <= 'z') |
| || ('0' <= ch && ch <= '9')) { |
| return true; |
| } |
| |
| if (URI_UNESCAPED_NONALPHANUMERIC.indexOf(ch) >= 0) { |
| return true; |
| } |
| |
| if (!component) { |
| return URI_RESERVED.indexOf(ch) >= 0; |
| } |
| |
| return false; |
| } |
| } |