Blame - jdk/src/share/classes/java/net/URLDecoder.java - platform/libcore

blob: 3f83fbb5a5521df6dbb77eb438c33ec4bf36d749 [file] [log] [blame]

J. Duke	319a3b9	2007-12-01 00:00:00 +0000	[diff] [blame^]	1	/*
				2	* Copyright 1998-2006 Sun Microsystems, Inc. All Rights Reserved.
				3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
				4	*
				5	* This code is free software; you can redistribute it and/or modify it
				6	* under the terms of the GNU General Public License version 2 only, as
				7	* published by the Free Software Foundation. Sun designates this
				8	* particular file as subject to the "Classpath" exception as provided
				9	* by Sun in the LICENSE file that accompanied this code.
				10	*
				11	* This code is distributed in the hope that it will be useful, but WITHOUT
				12	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
				13	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
				14	* version 2 for more details (a copy is included in the LICENSE file that
				15	* accompanied this code).
				16	*
				17	* You should have received a copy of the GNU General Public License version
				18	* 2 along with this work; if not, write to the Free Software Foundation,
				19	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
				20	*
				21	* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
				22	* CA 95054 USA or visit www.sun.com if you need additional information or
				23	* have any questions.
				24	*/
				25
				26	package java.net;
				27
				28	import java.io.*;
				29
				30	/**
				31	* Utility class for HTML form decoding. This class contains static methods
				32	* for decoding a String from the <CODE>application/x-www-form-urlencoded</CODE>
				33	* MIME format.
				34	* <p>
				35	* The conversion process is the reverse of that used by the URLEncoder class. It is assumed
				36	* that all characters in the encoded string are one of the following:
				37	* "<code>a</code>" through "<code>z</code>",
				38	* "<code>A</code>" through "<code>Z</code>",
				39	* "<code>0</code>" through "<code>9</code>", and
				40	* "<code>-</code>", "<code>_</code>",
				41	* "<code>.</code>", and "<code>*</code>". The
				42	* character "<code>%</code>" is allowed but is interpreted
				43	* as the start of a special escaped sequence.
				44	* <p>
				45	* The following rules are applied in the conversion:
				46	* <p>
				47	* <ul>
				48	* <li>The alphanumeric characters "<code>a</code>" through
				49	* "<code>z</code>", "<code>A</code>" through
				50	* "<code>Z</code>" and "<code>0</code>"
				51	* through "<code>9</code>" remain the same.
				52	* <li>The special characters "<code>.</code>",
				53	* "<code>-</code>", "<code>*</code>", and
				54	* "<code>_</code>" remain the same.
				55	* <li>The plus sign "<code>+</code>" is converted into a
				56	* space character "<code> </code>" .
				57	* <li>A sequence of the form "<code>%<i>xy</i></code>" will be
				58	* treated as representing a byte where <i>xy</i> is the two-digit
				59	* hexadecimal representation of the 8 bits. Then, all substrings
				60	* that contain one or more of these byte sequences consecutively
				61	* will be replaced by the character(s) whose encoding would result
				62	* in those consecutive bytes.
				63	* The encoding scheme used to decode these characters may be specified,
				64	* or if unspecified, the default encoding of the platform will be used.
				65	* </ul>
				66	* <p>
				67	* There are two possible ways in which this decoder could deal with
				68	* illegal strings. It could either leave illegal characters alone or
				69	* it could throw an <tt>{@link java.lang.IllegalArgumentException}</tt>.
				70	* Which approach the decoder takes is left to the
				71	* implementation.
				72	*
				73	* @author Mark Chamness
				74	* @author Michael McCloskey
				75	* @since 1.2
				76	*/
				77
				78	public class URLDecoder {
				79
				80	// The platform default encoding
				81	static String dfltEncName = URLEncoder.dfltEncName;
				82
				83	/**
				84	* Decodes a <code>x-www-form-urlencoded</code> string.
				85	* The platform's default encoding is used to determine what characters
				86	* are represented by any consecutive sequences of the form
				87	* "<code>%<i>xy</i></code>".
				88	* @param s the <code>String</code> to decode
				89	* @deprecated The resulting string may vary depending on the platform's
				90	* default encoding. Instead, use the decode(String,String) method
				91	* to specify the encoding.
				92	* @return the newly decoded <code>String</code>
				93	*/
				94	@Deprecated
				95	public static String decode(String s) {
				96
				97	String str = null;
				98
				99	try {
				100	str = decode(s, dfltEncName);
				101	} catch (UnsupportedEncodingException e) {
				102	// The system should always have the platform default
				103	}
				104
				105	return str;
				106	}
				107
				108	/**
				109	* Decodes a <code>application/x-www-form-urlencoded</code> string using a specific
				110	* encoding scheme.
				111	* The supplied encoding is used to determine
				112	* what characters are represented by any consecutive sequences of the
				113	* form "<code>%<i>xy</i></code>".
				114	* <p>
				115	* <em><strong>Note:</strong> The <a href=
				116	* "http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars">
				117	* World Wide Web Consortium Recommendation</a> states that
				118	* UTF-8 should be used. Not doing so may introduce
				119	* incompatibilites.</em>
				120	*
				121	* @param s the <code>String</code> to decode
				122	* @param enc The name of a supported
				123	* <a href="../lang/package-summary.html#charenc">character
				124	* encoding</a>.
				125	* @return the newly decoded <code>String</code>
				126	* @exception UnsupportedEncodingException
				127	* If character encoding needs to be consulted, but
				128	* named character encoding is not supported
				129	* @see URLEncoder#encode(java.lang.String, java.lang.String)
				130	* @since 1.4
				131	*/
				132	public static String decode(String s, String enc)
				133	throws UnsupportedEncodingException{
				134
				135	boolean needToChange = false;
				136	int numChars = s.length();
				137	StringBuffer sb = new StringBuffer(numChars > 500 ? numChars / 2 : numChars);
				138	int i = 0;
				139
				140	if (enc.length() == 0) {
				141	throw new UnsupportedEncodingException ("URLDecoder: empty string enc parameter");
				142	}
				143
				144	char c;
				145	byte[] bytes = null;
				146	while (i < numChars) {
				147	c = s.charAt(i);
				148	switch (c) {
				149	case '+':
				150	sb.append(' ');
				151	i++;
				152	needToChange = true;
				153	break;
				154	case '%':
				155	/*
				156	* Starting with this instance of %, process all
				157	* consecutive substrings of the form %xy. Each
				158	* substring %xy will yield a byte. Convert all
				159	* consecutive bytes obtained this way to whatever
				160	* character(s) they represent in the provided
				161	* encoding.
				162	*/
				163
				164	try {
				165
				166	// (numChars-i)/3 is an upper bound for the number
				167	// of remaining bytes
				168	if (bytes == null)
				169	bytes = new byte[(numChars-i)/3];
				170	int pos = 0;
				171
				172	while ( ((i+2) < numChars) &&
				173	(c=='%')) {
				174	int v = Integer.parseInt(s.substring(i+1,i+3),16);
				175	if (v < 0)
				176	throw new IllegalArgumentException("URLDecoder: Illegal hex characters in escape (%) pattern - negative value");
				177	bytes[pos++] = (byte) v;
				178	i+= 3;
				179	if (i < numChars)
				180	c = s.charAt(i);
				181	}
				182
				183	// A trailing, incomplete byte encoding such as
				184	// "%x" will cause an exception to be thrown
				185
				186	if ((i < numChars) && (c=='%'))
				187	throw new IllegalArgumentException(
				188	"URLDecoder: Incomplete trailing escape (%) pattern");
				189
				190	sb.append(new String(bytes, 0, pos, enc));
				191	} catch (NumberFormatException e) {
				192	throw new IllegalArgumentException(
				193	"URLDecoder: Illegal hex characters in escape (%) pattern - "
				194	+ e.getMessage());
				195	}
				196	needToChange = true;
				197	break;
				198	default:
				199	sb.append(c);
				200	i++;
				201	break;
				202	}
				203	}
				204
				205	return (needToChange? sb.toString() : s);
				206	}
				207	}