Blame - jdk/src/share/classes/java/util/StringTokenizer.java - platform/libcore

blob: 888db76179c253a32a554a2f455cd1249e68d709 [file] [log] [blame]

J. Duke	319a3b9	2007-12-01 00:00:00 +0000	[diff] [blame^]	1	/*
				2	* Copyright 1994-2004 Sun Microsystems, Inc. All Rights Reserved.
				3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
				4	*
				5	* This code is free software; you can redistribute it and/or modify it
				6	* under the terms of the GNU General Public License version 2 only, as
				7	* published by the Free Software Foundation. Sun designates this
				8	* particular file as subject to the "Classpath" exception as provided
				9	* by Sun in the LICENSE file that accompanied this code.
				10	*
				11	* This code is distributed in the hope that it will be useful, but WITHOUT
				12	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
				13	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
				14	* version 2 for more details (a copy is included in the LICENSE file that
				15	* accompanied this code).
				16	*
				17	* You should have received a copy of the GNU General Public License version
				18	* 2 along with this work; if not, write to the Free Software Foundation,
				19	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
				20	*
				21	* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
				22	* CA 95054 USA or visit www.sun.com if you need additional information or
				23	* have any questions.
				24	*/
				25
				26	package java.util;
				27
				28	import java.lang.*;
				29
				30	/**
				31	* The string tokenizer class allows an application to break a
				32	* string into tokens. The tokenization method is much simpler than
				33	* the one used by the <code>StreamTokenizer</code> class. The
				34	* <code>StringTokenizer</code> methods do not distinguish among
				35	* identifiers, numbers, and quoted strings, nor do they recognize
				36	* and skip comments.
				37	* <p>
				38	* The set of delimiters (the characters that separate tokens) may
				39	* be specified either at creation time or on a per-token basis.
				40	* <p>
				41	* An instance of <code>StringTokenizer</code> behaves in one of two
				42	* ways, depending on whether it was created with the
				43	* <code>returnDelims</code> flag having the value <code>true</code>
				44	* or <code>false</code>:
				45	* <ul>
				46	* <li>If the flag is <code>false</code>, delimiter characters serve to
				47	* separate tokens. A token is a maximal sequence of consecutive
				48	* characters that are not delimiters.
				49	* <li>If the flag is <code>true</code>, delimiter characters are themselves
				50	* considered to be tokens. A token is thus either one delimiter
				51	* character, or a maximal sequence of consecutive characters that are
				52	* not delimiters.
				53	* </ul><p>
				54	* A <tt>StringTokenizer</tt> object internally maintains a current
				55	* position within the string to be tokenized. Some operations advance this
				56	* current position past the characters processed.<p>
				57	* A token is returned by taking a substring of the string that was used to
				58	* create the <tt>StringTokenizer</tt> object.
				59	* <p>
				60	* The following is one example of the use of the tokenizer. The code:
				61	* <blockquote><pre>
				62	* StringTokenizer st = new StringTokenizer("this is a test");
				63	* while (st.hasMoreTokens()) {
				64	* System.out.println(st.nextToken());
				65	* }
				66	* </pre></blockquote>
				67	* <p>
				68	* prints the following output:
				69	* <blockquote><pre>
				70	* this
				71	* is
				72	* a
				73	* test
				74	* </pre></blockquote>
				75	*
				76	* <p>
				77	* <tt>StringTokenizer</tt> is a legacy class that is retained for
				78	* compatibility reasons although its use is discouraged in new code. It is
				79	* recommended that anyone seeking this functionality use the <tt>split</tt>
				80	* method of <tt>String</tt> or the java.util.regex package instead.
				81	* <p>
				82	* The following example illustrates how the <tt>String.split</tt>
				83	* method can be used to break up a string into its basic tokens:
				84	* <blockquote><pre>
				85	* String[] result = "this is a test".split("\\s");
				86	* for (int x=0; x<result.length; x++)
				87	* System.out.println(result[x]);
				88	* </pre></blockquote>
				89	* <p>
				90	* prints the following output:
				91	* <blockquote><pre>
				92	* this
				93	* is
				94	* a
				95	* test
				96	* </pre></blockquote>
				97	*
				98	* @author unascribed
				99	* @see java.io.StreamTokenizer
				100	* @since JDK1.0
				101	*/
				102	public
				103	class StringTokenizer implements Enumeration<Object> {
				104	private int currentPosition;
				105	private int newPosition;
				106	private int maxPosition;
				107	private String str;
				108	private String delimiters;
				109	private boolean retDelims;
				110	private boolean delimsChanged;
				111
				112	/**
				113	* maxDelimCodePoint stores the value of the delimiter character with the
				114	* highest value. It is used to optimize the detection of delimiter
				115	* characters.
				116	*
				117	* It is unlikely to provide any optimization benefit in the
				118	* hasSurrogates case because most string characters will be
				119	* smaller than the limit, but we keep it so that the two code
				120	* paths remain similar.
				121	*/
				122	private int maxDelimCodePoint;
				123
				124	/**
				125	* If delimiters include any surrogates (including surrogate
				126	* pairs), hasSurrogates is true and the tokenizer uses the
				127	* different code path. This is because String.indexOf(int)
				128	* doesn't handle unpaired surrogates as a single character.
				129	*/
				130	private boolean hasSurrogates = false;
				131
				132	/**
				133	* When hasSurrogates is true, delimiters are converted to code
				134	* points and isDelimiter(int) is used to determine if the given
				135	* codepoint is a delimiter.
				136	*/
				137	private int[] delimiterCodePoints;
				138
				139	/**
				140	* Set maxDelimCodePoint to the highest char in the delimiter set.
				141	*/
				142	private void setMaxDelimCodePoint() {
				143	if (delimiters == null) {
				144	maxDelimCodePoint = 0;
				145	return;
				146	}
				147
				148	int m = 0;
				149	int c;
				150	int count = 0;
				151	for (int i = 0; i < delimiters.length(); i += Character.charCount(c)) {
				152	c = delimiters.charAt(i);
				153	if (c >= Character.MIN_HIGH_SURROGATE && c <= Character.MAX_LOW_SURROGATE) {
				154	c = delimiters.codePointAt(i);
				155	hasSurrogates = true;
				156	}
				157	if (m < c)
				158	m = c;
				159	count++;
				160	}
				161	maxDelimCodePoint = m;
				162
				163	if (hasSurrogates) {
				164	delimiterCodePoints = new int[count];
				165	for (int i = 0, j = 0; i < count; i++, j += Character.charCount(c)) {
				166	c = delimiters.codePointAt(j);
				167	delimiterCodePoints[i] = c;
				168	}
				169	}
				170	}
				171
				172	/**
				173	* Constructs a string tokenizer for the specified string. All
				174	* characters in the <code>delim</code> argument are the delimiters
				175	* for separating tokens.
				176	* <p>
				177	* If the <code>returnDelims</code> flag is <code>true</code>, then
				178	* the delimiter characters are also returned as tokens. Each
				179	* delimiter is returned as a string of length one. If the flag is
				180	* <code>false</code>, the delimiter characters are skipped and only
				181	* serve as separators between tokens.
				182	* <p>
				183	* Note that if <tt>delim</tt> is <tt>null</tt>, this constructor does
				184	* not throw an exception. However, trying to invoke other methods on the
				185	* resulting <tt>StringTokenizer</tt> may result in a
				186	* <tt>NullPointerException</tt>.
				187	*
				188	* @param str a string to be parsed.
				189	* @param delim the delimiters.
				190	* @param returnDelims flag indicating whether to return the delimiters
				191	* as tokens.
				192	* @exception NullPointerException if str is <CODE>null</CODE>
				193	*/
				194	public StringTokenizer(String str, String delim, boolean returnDelims) {
				195	currentPosition = 0;
				196	newPosition = -1;
				197	delimsChanged = false;
				198	this.str = str;
				199	maxPosition = str.length();
				200	delimiters = delim;
				201	retDelims = returnDelims;
				202	setMaxDelimCodePoint();
				203	}
				204
				205	/**
				206	* Constructs a string tokenizer for the specified string. The
				207	* characters in the <code>delim</code> argument are the delimiters
				208	* for separating tokens. Delimiter characters themselves will not
				209	* be treated as tokens.
				210	* <p>
				211	* Note that if <tt>delim</tt> is <tt>null</tt>, this constructor does
				212	* not throw an exception. However, trying to invoke other methods on the
				213	* resulting <tt>StringTokenizer</tt> may result in a
				214	* <tt>NullPointerException</tt>.
				215	*
				216	* @param str a string to be parsed.
				217	* @param delim the delimiters.
				218	* @exception NullPointerException if str is <CODE>null</CODE>
				219	*/
				220	public StringTokenizer(String str, String delim) {
				221	this(str, delim, false);
				222	}
				223
				224	/**
				225	* Constructs a string tokenizer for the specified string. The
				226	* tokenizer uses the default delimiter set, which is
				227	* <code>" \t\n\r\f"</code>: the space character,
				228	* the tab character, the newline character, the carriage-return character,
				229	* and the form-feed character. Delimiter characters themselves will
				230	* not be treated as tokens.
				231	*
				232	* @param str a string to be parsed.
				233	* @exception NullPointerException if str is <CODE>null</CODE>
				234	*/
				235	public StringTokenizer(String str) {
				236	this(str, " \t\n\r\f", false);
				237	}
				238
				239	/**
				240	* Skips delimiters starting from the specified position. If retDelims
				241	* is false, returns the index of the first non-delimiter character at or
				242	* after startPos. If retDelims is true, startPos is returned.
				243	*/
				244	private int skipDelimiters(int startPos) {
				245	if (delimiters == null)
				246	throw new NullPointerException();
				247
				248	int position = startPos;
				249	while (!retDelims && position < maxPosition) {
				250	if (!hasSurrogates) {
				251	char c = str.charAt(position);
				252	if ((c > maxDelimCodePoint) \|\| (delimiters.indexOf(c) < 0))
				253	break;
				254	position++;
				255	} else {
				256	int c = str.codePointAt(position);
				257	if ((c > maxDelimCodePoint) \|\| !isDelimiter(c)) {
				258	break;
				259	}
				260	position += Character.charCount(c);
				261	}
				262	}
				263	return position;
				264	}
				265
				266	/**
				267	* Skips ahead from startPos and returns the index of the next delimiter
				268	* character encountered, or maxPosition if no such delimiter is found.
				269	*/
				270	private int scanToken(int startPos) {
				271	int position = startPos;
				272	while (position < maxPosition) {
				273	if (!hasSurrogates) {
				274	char c = str.charAt(position);
				275	if ((c <= maxDelimCodePoint) && (delimiters.indexOf(c) >= 0))
				276	break;
				277	position++;
				278	} else {
				279	int c = str.codePointAt(position);
				280	if ((c <= maxDelimCodePoint) && isDelimiter(c))
				281	break;
				282	position += Character.charCount(c);
				283	}
				284	}
				285	if (retDelims && (startPos == position)) {
				286	if (!hasSurrogates) {
				287	char c = str.charAt(position);
				288	if ((c <= maxDelimCodePoint) && (delimiters.indexOf(c) >= 0))
				289	position++;
				290	} else {
				291	int c = str.codePointAt(position);
				292	if ((c <= maxDelimCodePoint) && isDelimiter(c))
				293	position += Character.charCount(c);
				294	}
				295	}
				296	return position;
				297	}
				298
				299	private boolean isDelimiter(int codePoint) {
				300	for (int i = 0; i < delimiterCodePoints.length; i++) {
				301	if (delimiterCodePoints[i] == codePoint) {
				302	return true;
				303	}
				304	}
				305	return false;
				306	}
				307
				308	/**
				309	* Tests if there are more tokens available from this tokenizer's string.
				310	* If this method returns <tt>true</tt>, then a subsequent call to
				311	* <tt>nextToken</tt> with no argument will successfully return a token.
				312	*
				313	* @return <code>true</code> if and only if there is at least one token
				314	* in the string after the current position; <code>false</code>
				315	* otherwise.
				316	*/
				317	public boolean hasMoreTokens() {
				318	/*
				319	* Temporarily store this position and use it in the following
				320	* nextToken() method only if the delimiters haven't been changed in
				321	* that nextToken() invocation.
				322	*/
				323	newPosition = skipDelimiters(currentPosition);
				324	return (newPosition < maxPosition);
				325	}
				326
				327	/**
				328	* Returns the next token from this string tokenizer.
				329	*
				330	* @return the next token from this string tokenizer.
				331	* @exception NoSuchElementException if there are no more tokens in this
				332	* tokenizer's string.
				333	*/
				334	public String nextToken() {
				335	/*
				336	* If next position already computed in hasMoreElements() and
				337	* delimiters have changed between the computation and this invocation,
				338	* then use the computed value.
				339	*/
				340
				341	currentPosition = (newPosition >= 0 && !delimsChanged) ?
				342	newPosition : skipDelimiters(currentPosition);
				343
				344	/* Reset these anyway */
				345	delimsChanged = false;
				346	newPosition = -1;
				347
				348	if (currentPosition >= maxPosition)
				349	throw new NoSuchElementException();
				350	int start = currentPosition;
				351	currentPosition = scanToken(currentPosition);
				352	return str.substring(start, currentPosition);
				353	}
				354
				355	/**
				356	* Returns the next token in this string tokenizer's string. First,
				357	* the set of characters considered to be delimiters by this
				358	* <tt>StringTokenizer</tt> object is changed to be the characters in
				359	* the string <tt>delim</tt>. Then the next token in the string
				360	* after the current position is returned. The current position is
				361	* advanced beyond the recognized token. The new delimiter set
				362	* remains the default after this call.
				363	*
				364	* @param delim the new delimiters.
				365	* @return the next token, after switching to the new delimiter set.
				366	* @exception NoSuchElementException if there are no more tokens in this
				367	* tokenizer's string.
				368	* @exception NullPointerException if delim is <CODE>null</CODE>
				369	*/
				370	public String nextToken(String delim) {
				371	delimiters = delim;
				372
				373	/* delimiter string specified, so set the appropriate flag. */
				374	delimsChanged = true;
				375
				376	setMaxDelimCodePoint();
				377	return nextToken();
				378	}
				379
				380	/**
				381	* Returns the same value as the <code>hasMoreTokens</code>
				382	* method. It exists so that this class can implement the
				383	* <code>Enumeration</code> interface.
				384	*
				385	* @return <code>true</code> if there are more tokens;
				386	* <code>false</code> otherwise.
				387	* @see java.util.Enumeration
				388	* @see java.util.StringTokenizer#hasMoreTokens()
				389	*/
				390	public boolean hasMoreElements() {
				391	return hasMoreTokens();
				392	}
				393
				394	/**
				395	* Returns the same value as the <code>nextToken</code> method,
				396	* except that its declared return value is <code>Object</code> rather than
				397	* <code>String</code>. It exists so that this class can implement the
				398	* <code>Enumeration</code> interface.
				399	*
				400	* @return the next token in the string.
				401	* @exception NoSuchElementException if there are no more tokens in this
				402	* tokenizer's string.
				403	* @see java.util.Enumeration
				404	* @see java.util.StringTokenizer#nextToken()
				405	*/
				406	public Object nextElement() {
				407	return nextToken();
				408	}
				409
				410	/**
				411	* Calculates the number of times that this tokenizer's
				412	* <code>nextToken</code> method can be called before it generates an
				413	* exception. The current position is not advanced.
				414	*
				415	* @return the number of tokens remaining in the string using the current
				416	* delimiter set.
				417	* @see java.util.StringTokenizer#nextToken()
				418	*/
				419	public int countTokens() {
				420	int count = 0;
				421	int currpos = currentPosition;
				422	while (currpos < maxPosition) {
				423	currpos = skipDelimiters(currpos);
				424	if (currpos >= maxPosition)
				425	break;
				426	currpos = scanToken(currpos);
				427	count++;
				428	}
				429	return count;
				430	}
				431	}