blob: 2e8519e9f9cc2f4f1f27238a6c0c171656f84311 [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 2001 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 */
23
24/*
25 * @test
26 * @bug 4396708
27 * @summary Test URL encoder and decoder on a string that contains
28 * surrogate pairs.
29 *
30 */
31
32import java.io.*;
33import java.net.*;
34
35/*
36 * Surrogate pairs are two character Unicode sequences where the first
37 * character lies in the range [d800, dbff] and the second character lies
38 * in the range [dc00, dfff]. They are used as an escaping mechanism to add
39 * 1M more characters to Unicode.
40 */
41public class SurrogatePairs {
42
43 static String[] testStrings = {"\uD800\uDC00",
44 "\uD800\uDFFF",
45 "\uDBFF\uDC00",
46 "\uDBFF\uDFFF",
47 "1\uDBFF\uDC00",
48 "@\uDBFF\uDC00",
49 "\uDBFF\uDC001",
50 "\uDBFF\uDC00@",
51 "\u0101\uDBFF\uDC00",
52 "\uDBFF\uDC00\u0101"
53 };
54
55 static String[] correctEncodings = {"%F0%90%80%80",
56 "%F0%90%8F%BF",
57 "%F4%8F%B0%80",
58 "%F4%8F%BF%BF",
59 "1%F4%8F%B0%80",
60 "%40%F4%8F%B0%80",
61 "%F4%8F%B0%801",
62 "%F4%8F%B0%80%40",
63 "%C4%81%F4%8F%B0%80",
64 "%F4%8F%B0%80%C4%81"
65 };
66
67 public static void main(String[] args) throws Exception {
68
69 for (int i=0; i < testStrings.length; i++) {
70 test(testStrings[i], correctEncodings[i]);
71 }
72 }
73
74 private static void test(String str, String correctEncoding)
75 throws Exception {
76
77 System.out.println("Unicode bytes of test string are: "
78 + getHexBytes(str));
79
80 String encoded = URLEncoder.encode(str, "UTF-8");
81
82 System.out.println("URLEncoding is: " + encoded);
83
84 if (encoded.equals(correctEncoding))
85 System.out.println("The encoding is correct!");
86 else {
87 throw new Exception("The encoding is incorrect!" +
88 " It should be " + correctEncoding);
89 }
90
91 String decoded = URLDecoder.decode(encoded, "UTF-8");
92
93 System.out.println("Unicode bytes for URLDecoding are: "
94 + getHexBytes(decoded));
95
96 if (str.equals(decoded))
97 System.out.println("The decoding is correct");
98 else {
99 throw new Exception("The decoded is not equal to the original");
100 }
101 System.out.println("---");
102 }
103
104 private static String getHexBytes(String s) throws Exception {
105 StringBuffer sb = new StringBuffer();
106 for (int i = 0; i < s.length(); i++) {
107
108 int a = s.charAt(i);
109 int b1 = (a >>8) & 0xff;
110 int b2 = (byte)a;
111 int b11 = (b1>>4) & 0x0f;
112 int b12 = b1 & 0x0f;
113 int b21 = (b2 >>4) & 0x0f;
114 int b22 = b2 & 0x0f;
115
116 sb.append(Integer.toHexString(b11));
117 sb.append(Integer.toHexString(b12));
118 sb.append(Integer.toHexString(b21));
119 sb.append(Integer.toHexString(b22));
120 sb.append(' ');
121 }
122 return sb.toString();
123 }
124
125}