Blame - jdk/src/share/classes/java/text/BreakIterator.java - platform/libcore

blob: 220979f5517e96141963e549238efed1d4a95c52 [file] [log] [blame]

J. Duke	319a3b9	2007-12-01 00:00:00 +0000	[diff] [blame^]	1	/*
				2	* Copyright 1996-2006 Sun Microsystems, Inc. All Rights Reserved.
				3	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
				4	*
				5	* This code is free software; you can redistribute it and/or modify it
				6	* under the terms of the GNU General Public License version 2 only, as
				7	* published by the Free Software Foundation. Sun designates this
				8	* particular file as subject to the "Classpath" exception as provided
				9	* by Sun in the LICENSE file that accompanied this code.
				10	*
				11	* This code is distributed in the hope that it will be useful, but WITHOUT
				12	* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
				13	* FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
				14	* version 2 for more details (a copy is included in the LICENSE file that
				15	* accompanied this code).
				16	*
				17	* You should have received a copy of the GNU General Public License version
				18	* 2 along with this work; if not, write to the Free Software Foundation,
				19	* Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
				20	*
				21	* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
				22	* CA 95054 USA or visit www.sun.com if you need additional information or
				23	* have any questions.
				24	*/
				25
				26	/*
				27	* (C) Copyright Taligent, Inc. 1996, 1997 - All Rights Reserved
				28	* (C) Copyright IBM Corp. 1996 - 1998 - All Rights Reserved
				29	*
				30	* The original version of this source code and documentation
				31	* is copyrighted and owned by Taligent, Inc., a wholly-owned
				32	* subsidiary of IBM. These materials are provided under terms
				33	* of a License Agreement between Taligent and Sun. This technology
				34	* is protected by multiple US and International patents.
				35	*
				36	* This notice and attribution to Taligent may not be removed.
				37	* Taligent is a registered trademark of Taligent, Inc.
				38	*
				39	*/
				40
				41	package java.text;
				42
				43	import java.lang.ref.SoftReference;
				44	import java.net.URL;
				45	import java.io.InputStream;
				46	import java.io.IOException;
				47	import java.security.AccessController;
				48	import java.security.PrivilegedAction;
				49	import java.text.CharacterIterator;
				50	import java.text.StringCharacterIterator;
				51	import java.text.spi.BreakIteratorProvider;
				52	import java.util.Locale;
				53	import java.util.MissingResourceException;
				54	import java.util.ResourceBundle;
				55	import java.util.spi.LocaleServiceProvider;
				56	import sun.util.LocaleServiceProviderPool;
				57	import sun.util.resources.LocaleData;
				58
				59
				60	/**
				61	* The <code>BreakIterator</code> class implements methods for finding
				62	* the location of boundaries in text. Instances of <code>BreakIterator</code>
				63	* maintain a current position and scan over text
				64	* returning the index of characters where boundaries occur.
				65	* Internally, <code>BreakIterator</code> scans text using a
				66	* <code>CharacterIterator</code>, and is thus able to scan text held
				67	* by any object implementing that protocol. A <code>StringCharacterIterator</code>
				68	* is used to scan <code>String</code> objects passed to <code>setText</code>.
				69	*
				70	* <p>
				71	* You use the factory methods provided by this class to create
				72	* instances of various types of break iterators. In particular,
				73	* use <code>getWordInstance</code>, <code>getLineInstance</code>,
				74	* <code>getSentenceInstance</code>, and <code>getCharacterInstance</code>
				75	* to create <code>BreakIterator</code>s that perform
				76	* word, line, sentence, and character boundary analysis respectively.
				77	* A single <code>BreakIterator</code> can work only on one unit
				78	* (word, line, sentence, and so on). You must use a different iterator
				79	* for each unit boundary analysis you wish to perform.
				80	*
				81	* <p><a name="line"></a>
				82	* Line boundary analysis determines where a text string can be
				83	* broken when line-wrapping. The mechanism correctly handles
				84	* punctuation and hyphenated words. Actual line breaking needs
				85	* to also consider the available line width and is handled by
				86	* higher-level software.
				87	*
				88	* <p><a name="sentence"></a>
				89	* Sentence boundary analysis allows selection with correct interpretation
				90	* of periods within numbers and abbreviations, and trailing punctuation
				91	* marks such as quotation marks and parentheses.
				92	*
				93	* <p><a name="word"></a>
				94	* Word boundary analysis is used by search and replace functions, as
				95	* well as within text editing applications that allow the user to
				96	* select words with a double click. Word selection provides correct
				97	* interpretation of punctuation marks within and following
				98	* words. Characters that are not part of a word, such as symbols
				99	* or punctuation marks, have word-breaks on both sides.
				100	*
				101	* <p><a name="character"></a>
				102	* Character boundary analysis allows users to interact with characters
				103	* as they expect to, for example, when moving the cursor through a text
				104	* string. Character boundary analysis provides correct navigation
				105	* through character strings, regardless of how the character is stored.
				106	* The boundaries returned may be those of supplementary characters,
				107	* combining character sequences, or ligature clusters.
				108	* For example, an accented character might be stored as a base character
				109	* and a diacritical mark. What users consider to be a character can
				110	* differ between languages.
				111	*
				112	* <p>
				113	* The <code>BreakIterator</code> instances returned by the factory methods
				114	* of this class are intended for use with natural languages only, not for
				115	* programming language text. It is however possible to define subclasses
				116	* that tokenize a programming language.
				117	*
				118	* <P>
				119	* <strong>Examples</strong>:<P>
				120	* Creating and using text boundaries:
				121	* <blockquote>
				122	* <pre>
				123	* public static void main(String args[]) {
				124	* if (args.length == 1) {
				125	* String stringToExamine = args[0];
				126	* //print each word in order
				127	* BreakIterator boundary = BreakIterator.getWordInstance();
				128	* boundary.setText(stringToExamine);
				129	* printEachForward(boundary, stringToExamine);
				130	* //print each sentence in reverse order
				131	* boundary = BreakIterator.getSentenceInstance(Locale.US);
				132	* boundary.setText(stringToExamine);
				133	* printEachBackward(boundary, stringToExamine);
				134	* printFirst(boundary, stringToExamine);
				135	* printLast(boundary, stringToExamine);
				136	* }
				137	* }
				138	* </pre>
				139	* </blockquote>
				140	*
				141	* Print each element in order:
				142	* <blockquote>
				143	* <pre>
				144	* public static void printEachForward(BreakIterator boundary, String source) {
				145	* int start = boundary.first();
				146	* for (int end = boundary.next();
				147	* end != BreakIterator.DONE;
				148	* start = end, end = boundary.next()) {
				149	* System.out.println(source.substring(start,end));
				150	* }
				151	* }
				152	* </pre>
				153	* </blockquote>
				154	*
				155	* Print each element in reverse order:
				156	* <blockquote>
				157	* <pre>
				158	* public static void printEachBackward(BreakIterator boundary, String source) {
				159	* int end = boundary.last();
				160	* for (int start = boundary.previous();
				161	* start != BreakIterator.DONE;
				162	* end = start, start = boundary.previous()) {
				163	* System.out.println(source.substring(start,end));
				164	* }
				165	* }
				166	* </pre>
				167	* </blockquote>
				168	*
				169	* Print first element:
				170	* <blockquote>
				171	* <pre>
				172	* public static void printFirst(BreakIterator boundary, String source) {
				173	* int start = boundary.first();
				174	* int end = boundary.next();
				175	* System.out.println(source.substring(start,end));
				176	* }
				177	* </pre>
				178	* </blockquote>
				179	*
				180	* Print last element:
				181	* <blockquote>
				182	* <pre>
				183	* public static void printLast(BreakIterator boundary, String source) {
				184	* int end = boundary.last();
				185	* int start = boundary.previous();
				186	* System.out.println(source.substring(start,end));
				187	* }
				188	* </pre>
				189	* </blockquote>
				190	*
				191	* Print the element at a specified position:
				192	* <blockquote>
				193	* <pre>
				194	* public static void printAt(BreakIterator boundary, int pos, String source) {
				195	* int end = boundary.following(pos);
				196	* int start = boundary.previous();
				197	* System.out.println(source.substring(start,end));
				198	* }
				199	* </pre>
				200	* </blockquote>
				201	*
				202	* Find the next word:
				203	* <blockquote>
				204	* <pre>
				205	* public static int nextWordStartAfter(int pos, String text) {
				206	* BreakIterator wb = BreakIterator.getWordInstance();
				207	* wb.setText(text);
				208	* int last = wb.following(pos);
				209	* int current = wb.next();
				210	* while (current != BreakIterator.DONE) {
				211	* for (int p = last; p < current; p++) {
				212	* if (Character.isLetter(text.codePointAt(p)))
				213	* return last;
				214	* }
				215	* last = current;
				216	* current = wb.next();
				217	* }
				218	* return BreakIterator.DONE;
				219	* }
				220	* </pre>
				221	* (The iterator returned by BreakIterator.getWordInstance() is unique in that
				222	* the break positions it returns don't represent both the start and end of the
				223	* thing being iterated over. That is, a sentence-break iterator returns breaks
				224	* that each represent the end of one sentence and the beginning of the next.
				225	* With the word-break iterator, the characters between two boundaries might be a
				226	* word, or they might be the punctuation or whitespace between two words. The
				227	* above code uses a simple heuristic to determine which boundary is the beginning
				228	* of a word: If the characters between this boundary and the next boundary
				229	* include at least one letter (this can be an alphabetical letter, a CJK ideograph,
				230	* a Hangul syllable, a Kana character, etc.), then the text between this boundary
				231	* and the next is a word; otherwise, it's the material between words.)
				232	* </blockquote>
				233	*
				234	* @see CharacterIterator
				235	*
				236	*/
				237
				238	public abstract class BreakIterator implements Cloneable
				239	{
				240	/**
				241	* Constructor. BreakIterator is stateless and has no default behavior.
				242	*/
				243	protected BreakIterator()
				244	{
				245	}
				246
				247	/**
				248	* Create a copy of this iterator
				249	* @return A copy of this
				250	*/
				251	public Object clone()
				252	{
				253	try {
				254	return super.clone();
				255	}
				256	catch (CloneNotSupportedException e) {
				257	throw new InternalError();
				258	}
				259	}
				260
				261	/**
				262	* DONE is returned by previous(), next(), next(int), preceding(int)
				263	* and following(int) when either the first or last text boundary has been
				264	* reached.
				265	*/
				266	public static final int DONE = -1;
				267
				268	/**
				269	* Returns the first boundary. The iterator's current position is set
				270	* to the first text boundary.
				271	* @return The character index of the first text boundary.
				272	*/
				273	public abstract int first();
				274
				275	/**
				276	* Returns the last boundary. The iterator's current position is set
				277	* to the last text boundary.
				278	* @return The character index of the last text boundary.
				279	*/
				280	public abstract int last();
				281
				282	/**
				283	* Returns the nth boundary from the current boundary. If either
				284	* the first or last text boundary has been reached, it returns
				285	* <code>BreakIterator.DONE</code> and the current position is set to either
				286	* the first or last text boundary depending on which one is reached. Otherwise,
				287	* the iterator's current position is set to the new boundary.
				288	* For example, if the iterator's current position is the mth text boundary
				289	* and three more boundaries exist from the current boundary to the last text
				290	* boundary, the next(2) call will return m + 2. The new text position is set
				291	* to the (m + 2)th text boundary. A next(4) call would return
				292	* <code>BreakIterator.DONE</code> and the last text boundary would become the
				293	* new text position.
				294	* @param n which boundary to return. A value of 0
				295	* does nothing. Negative values move to previous boundaries
				296	* and positive values move to later boundaries.
				297	* @return The character index of the nth boundary from the current position
				298	* or <code>BreakIterator.DONE</code> if either first or last text boundary
				299	* has been reached.
				300	*/
				301	public abstract int next(int n);
				302
				303	/**
				304	* Returns the boundary following the current boundary. If the current boundary
				305	* is the last text boundary, it returns <code>BreakIterator.DONE</code> and
				306	* the iterator's current position is unchanged. Otherwise, the iterator's
				307	* current position is set to the boundary following the current boundary.
				308	* @return The character index of the next text boundary or
				309	* <code>BreakIterator.DONE</code> if the current boundary is the last text
				310	* boundary.
				311	* Equivalent to next(1).
				312	* @see #next(int)
				313	*/
				314	public abstract int next();
				315
				316	/**
				317	* Returns the boundary preceding the current boundary. If the current boundary
				318	* is the first text boundary, it returns <code>BreakIterator.DONE</code> and
				319	* the iterator's current position is unchanged. Otherwise, the iterator's
				320	* current position is set to the boundary preceding the current boundary.
				321	* @return The character index of the previous text boundary or
				322	* <code>BreakIterator.DONE</code> if the current boundary is the first text
				323	* boundary.
				324	*/
				325	public abstract int previous();
				326
				327	/**
				328	* Returns the first boundary following the specified character offset. If the
				329	* specified offset equals to the last text boundary, it returns
				330	* <code>BreakIterator.DONE</code> and the iterator's current position is unchanged.
				331	* Otherwise, the iterator's current position is set to the returned boundary.
				332	* The value returned is always greater than the offset or the value
				333	* <code>BreakIterator.DONE</code>.
				334	* @param offset the character offset to begin scanning.
				335	* @return The first boundary after the specified offset or
				336	* <code>BreakIterator.DONE</code> if the last text boundary is passed in
				337	* as the offset.
				338	* @exception IllegalArgumentException if the specified offset is less than
				339	* the first text boundary or greater than the last text boundary.
				340	*/
				341	public abstract int following(int offset);
				342
				343	/**
				344	* Returns the last boundary preceding the specified character offset. If the
				345	* specified offset equals to the first text boundary, it returns
				346	* <code>BreakIterator.DONE</code> and the iterator's current position is unchanged.
				347	* Otherwise, the iterator's current position is set to the returned boundary.
				348	* The value returned is always less than the offset or the value
				349	* <code>BreakIterator.DONE</code>.
				350	* @param offset the characater offset to begin scanning.
				351	* @return The last boundary before the specified offset or
				352	* <code>BreakIterator.DONE</code> if the first text boundary is passed in
				353	* as the offset.
				354	* @exception IllegalArgumentException if the specified offset is less than
				355	* the first text boundary or greater than the last text boundary.
				356	* @since 1.2
				357	*/
				358	public int preceding(int offset) {
				359	// NOTE: This implementation is here solely because we can't add new
				360	// abstract methods to an existing class. There is almost ALWAYS a
				361	// better, faster way to do this.
				362	int pos = following(offset);
				363	while (pos >= offset && pos != DONE)
				364	pos = previous();
				365	return pos;
				366	}
				367
				368	/**
				369	* Returns true if the specified character offset is a text boundary.
				370	* @param offset the character offset to check.
				371	* @return <code>true</code> if "offset" is a boundary position,
				372	* <code>false</code> otherwise.
				373	* @exception IllegalArgumentException if the specified offset is less than
				374	* the first text boundary or greater than the last text boundary.
				375	* @since 1.2
				376	*/
				377	public boolean isBoundary(int offset) {
				378	// NOTE: This implementation probably is wrong for most situations
				379	// because it fails to take into account the possibility that a
				380	// CharacterIterator passed to setText() may not have a begin offset
				381	// of 0. But since the abstract BreakIterator doesn't have that
				382	// knowledge, it assumes the begin offset is 0. If you subclass
				383	// BreakIterator, copy the SimpleTextBoundary implementation of this
				384	// function into your subclass. [This should have been abstract at
				385	// this level, but it's too late to fix that now.]
				386	if (offset == 0) {
				387	return true;
				388	}
				389	int boundary = following(offset - 1);
				390	if (boundary == DONE) {
				391	throw new IllegalArgumentException();
				392	}
				393	return boundary == offset;
				394	}
				395
				396	/**
				397	* Returns character index of the text boundary that was most
				398	* recently returned by next(), next(int), previous(), first(), last(),
				399	* following(int) or preceding(int). If any of these methods returns
				400	* <code>BreakIterator.DONE</code> because either first or last text boundary
				401	* has been reached, it returns the first or last text boundary depending on
				402	* which one is reached.
				403	* @return The text boundary returned from the above methods, first or last
				404	* text boundary.
				405	* @see #next()
				406	* @see #next(int)
				407	* @see #previous()
				408	* @see #first()
				409	* @see #last()
				410	* @see #following(int)
				411	* @see #preceding(int)
				412	*/
				413	public abstract int current();
				414
				415	/**
				416	* Get the text being scanned
				417	* @return the text being scanned
				418	*/
				419	public abstract CharacterIterator getText();
				420
				421	/**
				422	* Set a new text string to be scanned. The current scan
				423	* position is reset to first().
				424	* @param newText new text to scan.
				425	*/
				426	public void setText(String newText)
				427	{
				428	setText(new StringCharacterIterator(newText));
				429	}
				430
				431	/**
				432	* Set a new text for scanning. The current scan
				433	* position is reset to first().
				434	* @param newText new text to scan.
				435	*/
				436	public abstract void setText(CharacterIterator newText);
				437
				438	private static final int CHARACTER_INDEX = 0;
				439	private static final int WORD_INDEX = 1;
				440	private static final int LINE_INDEX = 2;
				441	private static final int SENTENCE_INDEX = 3;
				442	private static final SoftReference[] iterCache = new SoftReference[4];
				443
				444	/**
				445	* Returns a new <code>BreakIterator</code> instance
				446	* for <a href="#word">word breaks</a>
				447	* for the {@linkplain Locale#getDefault() default locale}.
				448	* @return A break iterator for word breaks
				449	*/
				450	public static BreakIterator getWordInstance()
				451	{
				452	return getWordInstance(Locale.getDefault());
				453	}
				454
				455	/**
				456	* Returns a new <code>BreakIterator</code> instance
				457	* for <a href="#word">word breaks</a>
				458	* for the given locale.
				459	* @param locale the desired locale
				460	* @return A break iterator for word breaks
				461	* @exception NullPointerException if <code>locale</code> is null
				462	*/
				463	public static BreakIterator getWordInstance(Locale locale)
				464	{
				465	return getBreakInstance(locale,
				466	WORD_INDEX,
				467	"WordData",
				468	"WordDictionary");
				469	}
				470
				471	/**
				472	* Returns a new <code>BreakIterator</code> instance
				473	* for <a href="#line">line breaks</a>
				474	* for the {@linkplain Locale#getDefault() default locale}.
				475	* @return A break iterator for line breaks
				476	*/
				477	public static BreakIterator getLineInstance()
				478	{
				479	return getLineInstance(Locale.getDefault());
				480	}
				481
				482	/**
				483	* Returns a new <code>BreakIterator</code> instance
				484	* for <a href="#line">line breaks</a>
				485	* for the given locale.
				486	* @param locale the desired locale
				487	* @return A break iterator for line breaks
				488	* @exception NullPointerException if <code>locale</code> is null
				489	*/
				490	public static BreakIterator getLineInstance(Locale locale)
				491	{
				492	return getBreakInstance(locale,
				493	LINE_INDEX,
				494	"LineData",
				495	"LineDictionary");
				496	}
				497
				498	/**
				499	* Returns a new <code>BreakIterator</code> instance
				500	* for <a href="#character">character breaks</a>
				501	* for the {@linkplain Locale#getDefault() default locale}.
				502	* @return A break iterator for character breaks
				503	*/
				504	public static BreakIterator getCharacterInstance()
				505	{
				506	return getCharacterInstance(Locale.getDefault());
				507	}
				508
				509	/**
				510	* Returns a new <code>BreakIterator</code> instance
				511	* for <a href="#character">character breaks</a>
				512	* for the given locale.
				513	* @param locale the desired locale
				514	* @return A break iterator for character breaks
				515	* @exception NullPointerException if <code>locale</code> is null
				516	*/
				517	public static BreakIterator getCharacterInstance(Locale locale)
				518	{
				519	return getBreakInstance(locale,
				520	CHARACTER_INDEX,
				521	"CharacterData",
				522	"CharacterDictionary");
				523	}
				524
				525	/**
				526	* Returns a new <code>BreakIterator</code> instance
				527	* for <a href="#sentence">sentence breaks</a>
				528	* for the {@linkplain Locale#getDefault() default locale}.
				529	* @return A break iterator for sentence breaks
				530	*/
				531	public static BreakIterator getSentenceInstance()
				532	{
				533	return getSentenceInstance(Locale.getDefault());
				534	}
				535
				536	/**
				537	* Returns a new <code>BreakIterator</code> instance
				538	* for <a href="#sentence">sentence breaks</a>
				539	* for the given locale.
				540	* @param locale the desired locale
				541	* @return A break iterator for sentence breaks
				542	* @exception NullPointerException if <code>locale</code> is null
				543	*/
				544	public static BreakIterator getSentenceInstance(Locale locale)
				545	{
				546	return getBreakInstance(locale,
				547	SENTENCE_INDEX,
				548	"SentenceData",
				549	"SentenceDictionary");
				550	}
				551
				552	private static BreakIterator getBreakInstance(Locale locale,
				553	int type,
				554	String dataName,
				555	String dictionaryName) {
				556	if (iterCache[type] != null) {
				557	BreakIteratorCache cache = (BreakIteratorCache) iterCache[type].get();
				558	if (cache != null) {
				559	if (cache.getLocale().equals(locale)) {
				560	return cache.createBreakInstance();
				561	}
				562	}
				563	}
				564
				565	BreakIterator result = createBreakInstance(locale,
				566	type,
				567	dataName,
				568	dictionaryName);
				569	BreakIteratorCache cache = new BreakIteratorCache(locale, result);
				570	iterCache[type] = new SoftReference(cache);
				571	return result;
				572	}
				573
				574	private static ResourceBundle getBundle(final String baseName, final Locale locale) {
				575	return (ResourceBundle) AccessController.doPrivileged(new PrivilegedAction() {
				576	public Object run() {
				577	return ResourceBundle.getBundle(baseName, locale);
				578	}
				579	});
				580	}
				581
				582	private static BreakIterator createBreakInstance(Locale locale,
				583	int type,
				584	String dataName,
				585	String dictionaryName) {
				586
				587	// Check whether a provider can provide an implementation that's closer
				588	// to the requested locale than what the Java runtime itself can provide.
				589	LocaleServiceProviderPool pool =
				590	LocaleServiceProviderPool.getPool(BreakIteratorProvider.class);
				591	if (pool.hasProviders()) {
				592	BreakIterator providersInstance = pool.getLocalizedObject(
				593	BreakIteratorGetter.INSTANCE,
				594	locale, type);
				595	if (providersInstance != null) {
				596	return providersInstance;
				597	}
				598	}
				599
				600	ResourceBundle bundle = getBundle(
				601	"sun.text.resources.BreakIteratorInfo", locale);
				602	String[] classNames = bundle.getStringArray("BreakIteratorClasses");
				603
				604	String dataFile = bundle.getString(dataName);
				605
				606	try {
				607	if (classNames[type].equals("RuleBasedBreakIterator")) {
				608	return new RuleBasedBreakIterator(dataFile);
				609	}
				610	else if (classNames[type].equals("DictionaryBasedBreakIterator")) {
				611	String dictionaryFile = bundle.getString(dictionaryName);
				612	return new DictionaryBasedBreakIterator(dataFile, dictionaryFile);
				613	}
				614	else {
				615	throw new IllegalArgumentException("Invalid break iterator class \"" +
				616	classNames[type] + "\"");
				617	}
				618	}
				619	catch (Exception e) {
				620	throw new InternalError(e.toString());
				621	}
				622	}
				623
				624	/**
				625	* Returns an array of all locales for which the
				626	* <code>get*Instance</code> methods of this class can return
				627	* localized instances.
				628	* The returned array represents the union of locales supported by the Java
				629	* runtime and by installed
				630	* {@link java.text.spi.BreakIteratorProvider BreakIteratorProvider} implementations.
				631	* It must contain at least a <code>Locale</code>
				632	* instance equal to {@link java.util.Locale#US Locale.US}.
				633	*
				634	* @return An array of locales for which localized
				635	* <code>BreakIterator</code> instances are available.
				636	*/
				637	public static synchronized Locale[] getAvailableLocales()
				638	{
				639	LocaleServiceProviderPool pool =
				640	LocaleServiceProviderPool.getPool(BreakIteratorProvider.class);
				641	return pool.getAvailableLocales();
				642	}
				643
				644	private static final class BreakIteratorCache {
				645
				646	private BreakIterator iter;
				647	private Locale locale;
				648
				649	BreakIteratorCache(Locale locale, BreakIterator iter) {
				650	this.locale = locale;
				651	this.iter = (BreakIterator) iter.clone();
				652	}
				653
				654	Locale getLocale() {
				655	return locale;
				656	}
				657
				658	BreakIterator createBreakInstance() {
				659	return (BreakIterator) iter.clone();
				660	}
				661	}
				662
				663	static long getLong(byte[] buf, int offset) {
				664	long num = buf[offset]&0xFF;
				665	for (int i = 1; i < 8; i++) {
				666	num = num<<8 \| (buf[offset+i]&0xFF);
				667	}
				668	return num;
				669	}
				670
				671	static int getInt(byte[] buf, int offset) {
				672	int num = buf[offset]&0xFF;
				673	for (int i = 1; i < 4; i++) {
				674	num = num<<8 \| (buf[offset+i]&0xFF);
				675	}
				676	return num;
				677	}
				678
				679	static short getShort(byte[] buf, int offset) {
				680	short num = (short)(buf[offset]&0xFF);
				681	num = (short)(num<<8 \| (buf[offset+1]&0xFF));
				682	return num;
				683	}
				684
				685	/**
				686	* Obtains a BreakIterator instance from a BreakIteratorProvider
				687	* implementation.
				688	*/
				689	private static class BreakIteratorGetter
				690	implements LocaleServiceProviderPool.LocalizedObjectGetter<BreakIteratorProvider, BreakIterator> {
				691	private static final BreakIteratorGetter INSTANCE =
				692	new BreakIteratorGetter();
				693
				694	public BreakIterator getObject(BreakIteratorProvider breakIteratorProvider,
				695	Locale locale,
				696	String key,
				697	Object... params) {
				698	assert params.length == 1;
				699
				700	switch ((Integer)params[0]) {
				701	case CHARACTER_INDEX:
				702	return breakIteratorProvider.getCharacterInstance(locale);
				703	case WORD_INDEX:
				704	return breakIteratorProvider.getWordInstance(locale);
				705	case LINE_INDEX:
				706	return breakIteratorProvider.getLineInstance(locale);
				707	case SENTENCE_INDEX:
				708	return breakIteratorProvider.getSentenceInstance(locale);
				709	default:
				710	assert false : "should not happen";
				711	}
				712	return null;
				713	}
				714	}
				715	}