J. Duke | 319a3b9 | 2007-12-01 00:00:00 +0000 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright 2001 Sun Microsystems, Inc. All Rights Reserved. |
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | * |
| 5 | * This code is free software; you can redistribute it and/or modify it |
| 6 | * under the terms of the GNU General Public License version 2 only, as |
| 7 | * published by the Free Software Foundation. |
| 8 | * |
| 9 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 10 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 11 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 12 | * version 2 for more details (a copy is included in the LICENSE file that |
| 13 | * accompanied this code). |
| 14 | * |
| 15 | * You should have received a copy of the GNU General Public License version |
| 16 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 17 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 18 | * |
| 19 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
| 20 | * CA 95054 USA or visit www.sun.com if you need additional information or |
| 21 | * have any questions. |
| 22 | */ |
| 23 | |
| 24 | /* |
| 25 | * @test |
| 26 | * @bug 4396708 |
| 27 | * @summary Test URL encoder and decoder on a string that contains |
| 28 | * surrogate pairs. |
| 29 | * |
| 30 | */ |
| 31 | |
| 32 | import java.io.*; |
| 33 | import java.net.*; |
| 34 | |
| 35 | /* |
| 36 | * Surrogate pairs are two character Unicode sequences where the first |
| 37 | * character lies in the range [d800, dbff] and the second character lies |
| 38 | * in the range [dc00, dfff]. They are used as an escaping mechanism to add |
| 39 | * 1M more characters to Unicode. |
| 40 | */ |
| 41 | public class SurrogatePairs { |
| 42 | |
| 43 | static String[] testStrings = {"\uD800\uDC00", |
| 44 | "\uD800\uDFFF", |
| 45 | "\uDBFF\uDC00", |
| 46 | "\uDBFF\uDFFF", |
| 47 | "1\uDBFF\uDC00", |
| 48 | "@\uDBFF\uDC00", |
| 49 | "\uDBFF\uDC001", |
| 50 | "\uDBFF\uDC00@", |
| 51 | "\u0101\uDBFF\uDC00", |
| 52 | "\uDBFF\uDC00\u0101" |
| 53 | }; |
| 54 | |
| 55 | static String[] correctEncodings = {"%F0%90%80%80", |
| 56 | "%F0%90%8F%BF", |
| 57 | "%F4%8F%B0%80", |
| 58 | "%F4%8F%BF%BF", |
| 59 | "1%F4%8F%B0%80", |
| 60 | "%40%F4%8F%B0%80", |
| 61 | "%F4%8F%B0%801", |
| 62 | "%F4%8F%B0%80%40", |
| 63 | "%C4%81%F4%8F%B0%80", |
| 64 | "%F4%8F%B0%80%C4%81" |
| 65 | }; |
| 66 | |
| 67 | public static void main(String[] args) throws Exception { |
| 68 | |
| 69 | for (int i=0; i < testStrings.length; i++) { |
| 70 | test(testStrings[i], correctEncodings[i]); |
| 71 | } |
| 72 | } |
| 73 | |
| 74 | private static void test(String str, String correctEncoding) |
| 75 | throws Exception { |
| 76 | |
| 77 | System.out.println("Unicode bytes of test string are: " |
| 78 | + getHexBytes(str)); |
| 79 | |
| 80 | String encoded = URLEncoder.encode(str, "UTF-8"); |
| 81 | |
| 82 | System.out.println("URLEncoding is: " + encoded); |
| 83 | |
| 84 | if (encoded.equals(correctEncoding)) |
| 85 | System.out.println("The encoding is correct!"); |
| 86 | else { |
| 87 | throw new Exception("The encoding is incorrect!" + |
| 88 | " It should be " + correctEncoding); |
| 89 | } |
| 90 | |
| 91 | String decoded = URLDecoder.decode(encoded, "UTF-8"); |
| 92 | |
| 93 | System.out.println("Unicode bytes for URLDecoding are: " |
| 94 | + getHexBytes(decoded)); |
| 95 | |
| 96 | if (str.equals(decoded)) |
| 97 | System.out.println("The decoding is correct"); |
| 98 | else { |
| 99 | throw new Exception("The decoded is not equal to the original"); |
| 100 | } |
| 101 | System.out.println("---"); |
| 102 | } |
| 103 | |
| 104 | private static String getHexBytes(String s) throws Exception { |
| 105 | StringBuffer sb = new StringBuffer(); |
| 106 | for (int i = 0; i < s.length(); i++) { |
| 107 | |
| 108 | int a = s.charAt(i); |
| 109 | int b1 = (a >>8) & 0xff; |
| 110 | int b2 = (byte)a; |
| 111 | int b11 = (b1>>4) & 0x0f; |
| 112 | int b12 = b1 & 0x0f; |
| 113 | int b21 = (b2 >>4) & 0x0f; |
| 114 | int b22 = b2 & 0x0f; |
| 115 | |
| 116 | sb.append(Integer.toHexString(b11)); |
| 117 | sb.append(Integer.toHexString(b12)); |
| 118 | sb.append(Integer.toHexString(b21)); |
| 119 | sb.append(Integer.toHexString(b22)); |
| 120 | sb.append(' '); |
| 121 | } |
| 122 | return sb.toString(); |
| 123 | } |
| 124 | |
| 125 | } |