blob: 3f83fbb5a5521df6dbb77eb438c33ec4bf36d749 [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 1998-2006 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26package java.net;
27
28import java.io.*;
29
30/**
31 * Utility class for HTML form decoding. This class contains static methods
32 * for decoding a String from the <CODE>application/x-www-form-urlencoded</CODE>
33 * MIME format.
34 * <p>
35 * The conversion process is the reverse of that used by the URLEncoder class. It is assumed
36 * that all characters in the encoded string are one of the following:
37 * &quot;<code>a</code>&quot; through &quot;<code>z</code>&quot;,
38 * &quot;<code>A</code>&quot; through &quot;<code>Z</code>&quot;,
39 * &quot;<code>0</code>&quot; through &quot;<code>9</code>&quot;, and
40 * &quot;<code>-</code>&quot;, &quot;<code>_</code>&quot;,
41 * &quot;<code>.</code>&quot;, and &quot;<code>*</code>&quot;. The
42 * character &quot;<code>%</code>&quot; is allowed but is interpreted
43 * as the start of a special escaped sequence.
44 * <p>
45 * The following rules are applied in the conversion:
46 * <p>
47 * <ul>
48 * <li>The alphanumeric characters &quot;<code>a</code>&quot; through
49 * &quot;<code>z</code>&quot;, &quot;<code>A</code>&quot; through
50 * &quot;<code>Z</code>&quot; and &quot;<code>0</code>&quot;
51 * through &quot;<code>9</code>&quot; remain the same.
52 * <li>The special characters &quot;<code>.</code>&quot;,
53 * &quot;<code>-</code>&quot;, &quot;<code>*</code>&quot;, and
54 * &quot;<code>_</code>&quot; remain the same.
55 * <li>The plus sign &quot;<code>+</code>&quot; is converted into a
56 * space character &quot;<code>&nbsp;</code>&quot; .
57 * <li>A sequence of the form "<code>%<i>xy</i></code>" will be
58 * treated as representing a byte where <i>xy</i> is the two-digit
59 * hexadecimal representation of the 8 bits. Then, all substrings
60 * that contain one or more of these byte sequences consecutively
61 * will be replaced by the character(s) whose encoding would result
62 * in those consecutive bytes.
63 * The encoding scheme used to decode these characters may be specified,
64 * or if unspecified, the default encoding of the platform will be used.
65 * </ul>
66 * <p>
67 * There are two possible ways in which this decoder could deal with
68 * illegal strings. It could either leave illegal characters alone or
69 * it could throw an <tt>{@link java.lang.IllegalArgumentException}</tt>.
70 * Which approach the decoder takes is left to the
71 * implementation.
72 *
73 * @author Mark Chamness
74 * @author Michael McCloskey
75 * @since 1.2
76 */
77
78public class URLDecoder {
79
80 // The platform default encoding
81 static String dfltEncName = URLEncoder.dfltEncName;
82
83 /**
84 * Decodes a <code>x-www-form-urlencoded</code> string.
85 * The platform's default encoding is used to determine what characters
86 * are represented by any consecutive sequences of the form
87 * "<code>%<i>xy</i></code>".
88 * @param s the <code>String</code> to decode
89 * @deprecated The resulting string may vary depending on the platform's
90 * default encoding. Instead, use the decode(String,String) method
91 * to specify the encoding.
92 * @return the newly decoded <code>String</code>
93 */
94 @Deprecated
95 public static String decode(String s) {
96
97 String str = null;
98
99 try {
100 str = decode(s, dfltEncName);
101 } catch (UnsupportedEncodingException e) {
102 // The system should always have the platform default
103 }
104
105 return str;
106 }
107
108 /**
109 * Decodes a <code>application/x-www-form-urlencoded</code> string using a specific
110 * encoding scheme.
111 * The supplied encoding is used to determine
112 * what characters are represented by any consecutive sequences of the
113 * form "<code>%<i>xy</i></code>".
114 * <p>
115 * <em><strong>Note:</strong> The <a href=
116 * "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">
117 * World Wide Web Consortium Recommendation</a> states that
118 * UTF-8 should be used. Not doing so may introduce
119 * incompatibilites.</em>
120 *
121 * @param s the <code>String</code> to decode
122 * @param enc The name of a supported
123 * <a href="../lang/package-summary.html#charenc">character
124 * encoding</a>.
125 * @return the newly decoded <code>String</code>
126 * @exception UnsupportedEncodingException
127 * If character encoding needs to be consulted, but
128 * named character encoding is not supported
129 * @see URLEncoder#encode(java.lang.String, java.lang.String)
130 * @since 1.4
131 */
132 public static String decode(String s, String enc)
133 throws UnsupportedEncodingException{
134
135 boolean needToChange = false;
136 int numChars = s.length();
137 StringBuffer sb = new StringBuffer(numChars > 500 ? numChars / 2 : numChars);
138 int i = 0;
139
140 if (enc.length() == 0) {
141 throw new UnsupportedEncodingException ("URLDecoder: empty string enc parameter");
142 }
143
144 char c;
145 byte[] bytes = null;
146 while (i < numChars) {
147 c = s.charAt(i);
148 switch (c) {
149 case '+':
150 sb.append(' ');
151 i++;
152 needToChange = true;
153 break;
154 case '%':
155 /*
156 * Starting with this instance of %, process all
157 * consecutive substrings of the form %xy. Each
158 * substring %xy will yield a byte. Convert all
159 * consecutive bytes obtained this way to whatever
160 * character(s) they represent in the provided
161 * encoding.
162 */
163
164 try {
165
166 // (numChars-i)/3 is an upper bound for the number
167 // of remaining bytes
168 if (bytes == null)
169 bytes = new byte[(numChars-i)/3];
170 int pos = 0;
171
172 while ( ((i+2) < numChars) &&
173 (c=='%')) {
174 int v = Integer.parseInt(s.substring(i+1,i+3),16);
175 if (v < 0)
176 throw new IllegalArgumentException("URLDecoder: Illegal hex characters in escape (%) pattern - negative value");
177 bytes[pos++] = (byte) v;
178 i+= 3;
179 if (i < numChars)
180 c = s.charAt(i);
181 }
182
183 // A trailing, incomplete byte encoding such as
184 // "%x" will cause an exception to be thrown
185
186 if ((i < numChars) && (c=='%'))
187 throw new IllegalArgumentException(
188 "URLDecoder: Incomplete trailing escape (%) pattern");
189
190 sb.append(new String(bytes, 0, pos, enc));
191 } catch (NumberFormatException e) {
192 throw new IllegalArgumentException(
193 "URLDecoder: Illegal hex characters in escape (%) pattern - "
194 + e.getMessage());
195 }
196 needToChange = true;
197 break;
198 default:
199 sb.append(c);
200 i++;
201 break;
202 }
203 }
204
205 return (needToChange? sb.toString() : s);
206 }
207}