core/java/android/text/method/WordIterator.java - platform/frameworks/base - Gitiles


 /*
  * Copyright (C) 2011 The Android Open Source Project
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
  * You may obtain a copy of the License at
  *
  *      http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */

 package android.text.method;

 import android.text.Selection;
 import android.text.SpannableStringBuilder;

 import android.icu.text.BreakIterator;
 import java.util.Locale;

 /**
  * Walks through cursor positions at word boundaries. Internally uses
  * {@link BreakIterator#getWordInstance()}, and caches {@link CharSequence}
  * for performance reasons.
  *
  * Also provides methods to determine word boundaries.
  * {@hide}
  */
 public class WordIterator implements Selection.PositionIterator {
     // Size of the window for the word iterator, should be greater than the longest word's length
     private static final int WINDOW_WIDTH = 50;

     private String mString;
     private int mOffsetShift;

     private BreakIterator mIterator;

     /**
      * Constructs a WordIterator using the default locale.
      */
     public WordIterator() {
         this(Locale.getDefault());
     }

     /**
      * Constructs a new WordIterator for the specified locale.
      * @param locale The locale to be used when analysing the text.
      */
     public WordIterator(Locale locale) {
         mIterator = BreakIterator.getWordInstance(locale);
     }

     public void setCharSequence(CharSequence charSequence, int start, int end) {
         mOffsetShift = Math.max(0, start - WINDOW_WIDTH);
         final int windowEnd = Math.min(charSequence.length(), end + WINDOW_WIDTH);

         if (charSequence instanceof SpannableStringBuilder) {
             mString = ((SpannableStringBuilder) charSequence).substring(mOffsetShift, windowEnd);
         } else {
             mString = charSequence.subSequence(mOffsetShift, windowEnd).toString();
         }
         mIterator.setText(mString);
     }

     /** {@inheritDoc} */
     public int preceding(int offset) {
         int shiftedOffset = offset - mOffsetShift;
         do {
             shiftedOffset = mIterator.preceding(shiftedOffset);
             if (shiftedOffset == BreakIterator.DONE) {
                 return BreakIterator.DONE;
             }
             if (isOnLetterOrDigit(shiftedOffset)) {
                 return shiftedOffset + mOffsetShift;
             }
         } while (true);
     }

     /** {@inheritDoc} */
     public int following(int offset) {
         int shiftedOffset = offset - mOffsetShift;
         do {
             shiftedOffset = mIterator.following(shiftedOffset);
             if (shiftedOffset == BreakIterator.DONE) {
                 return BreakIterator.DONE;
             }
             if (isAfterLetterOrDigit(shiftedOffset)) {
                 return shiftedOffset + mOffsetShift;
             }
         } while (true);
     }

     /** {@inheritDoc} */
     public boolean isBoundary(int offset) {
         int shiftedOffset = offset - mOffsetShift;
         checkOffsetIsValid(shiftedOffset);
         return mIterator.isBoundary(shiftedOffset);
     }

     /**
      * Returns the position of next boundary after the given offset. Returns
      * {@code DONE} if there is no boundary after the given offset.
      *
      * @param offset the given start position to search from.
      * @return the position of the last boundary preceding the given offset.
      */
     public int nextBoundary(int offset) {
         int shiftedOffset = offset - mOffsetShift;
         shiftedOffset = mIterator.following(shiftedOffset);
         if (shiftedOffset == BreakIterator.DONE) {
             return BreakIterator.DONE;
         }
         return shiftedOffset + mOffsetShift;
     }

     /**
      * Returns the position of boundary preceding the given offset or
      * {@code DONE} if the given offset specifies the starting position.
      *
      * @param offset the given start position to search from.
      * @return the position of the last boundary preceding the given offset.
      */
     public int prevBoundary(int offset) {
         int shiftedOffset = offset - mOffsetShift;
         shiftedOffset = mIterator.preceding(shiftedOffset);
         if (shiftedOffset == BreakIterator.DONE) {
             return BreakIterator.DONE;
         }
         return shiftedOffset + mOffsetShift;
     }

     /** If <code>offset</code> is within a word, returns the index of the first character of that
      * word, otherwise returns BreakIterator.DONE.
      *
      * The offsets that are considered to be part of a word are the indexes of its characters,
      * <i>as well as</i> the index of its last character plus one.
      * If offset is the index of a low surrogate character, BreakIterator.DONE will be returned.
      *
      * Valid range for offset is [0..textLength] (note the inclusive upper bound).
      * The returned value is within [0..offset] or BreakIterator.DONE.
      *
      * @throws IllegalArgumentException is offset is not valid.
      */
     public int getBeginning(int offset) {
         // TODO: Check if usage of this can be updated to getBeginning(offset, true) if
         // so this method can be removed.
         return getBeginning(offset, false);
     }

     /**
      * If <code>offset</code> is within a word, returns the index of the last character of that
      * word plus one, otherwise returns BreakIterator.DONE.
      *
      * The offsets that are considered to be part of a word are the indexes of its characters,
      * <i>as well as</i> the index of its last character plus one.
      * If offset is the index of a low surrogate character, BreakIterator.DONE will be returned.
      *
      * Valid range for offset is [0..textLength] (note the inclusive upper bound).
      * The returned value is within [offset..textLength] or BreakIterator.DONE.
      *
      * @throws IllegalArgumentException is offset is not valid.
      */
     public int getEnd(int offset) {
         // TODO: Check if usage of this can be updated to getEnd(offset, true), if
         // so this method can be removed.
         return getEnd(offset, false);
     }

     /**
      * If the <code>offset</code> is within a word or on a word boundary that can only be
      * considered the start of a word (e.g. _word where "_" is any character that would not
      * be considered part of the word) then this returns the index of the first character of
      * that word.
      *
      * If the offset is on a word boundary that can be considered the start and end of a
      * word, e.g. AABB (where AA and BB are both words) and the offset is the boundary
      * between AA and BB, this would return the start of the previous word, AA.
      *
      * Returns BreakIterator.DONE if there is no previous boundary.
      *
      * @throws IllegalArgumentException is offset is not valid.
      */
     public int getPrevWordBeginningOnTwoWordsBoundary(int offset) {
         return getBeginning(offset, true);
     }

     /**
      * If the <code>offset</code> is within a word or on a word boundary that can only be
      * considered the end of a word (e.g. word_ where "_" is any character that would not
      * be considered part of the word) then this returns the index of the last character
      * plus one of that word.
      *
      * If the offset is on a word boundary that can be considered the start and end of a
      * word, e.g. AABB (where AA and BB are both words) and the offset is the boundary
      * between AA and BB, this would return the end of the next word, BB.
      *
      * Returns BreakIterator.DONE if there is no next boundary.
      *
      * @throws IllegalArgumentException is offset is not valid.
      */
     public int getNextWordEndOnTwoWordBoundary(int offset) {
         return getEnd(offset, true);
     }

     /**
      * If the <code>offset</code> is within a word or on a word boundary that can only be
      * considered the start of a word (e.g. _word where "_" is any character that would not
      * be considered part of the word) then this returns the index of the first character of
      * that word.
      *
      * If the offset is on a word boundary that can be considered the start and end of a
      * word, e.g. AABB (where AA and BB are both words) and the offset is the boundary
      * between AA and BB, and getPrevWordBeginningOnTwoWordsBoundary is true then this would
      * return the start of the previous word, AA. Otherwise it would return the current offset,
      * the start of BB.
      *
      * Returns BreakIterator.DONE if there is no previous boundary.
      *
      * @throws IllegalArgumentException is offset is not valid.
      */
     private int getBeginning(int offset, boolean getPrevWordBeginningOnTwoWordsBoundary) {
         final int shiftedOffset = offset - mOffsetShift;
         checkOffsetIsValid(shiftedOffset);

         if (isOnLetterOrDigit(shiftedOffset)) {
             if (mIterator.isBoundary(shiftedOffset)
                     && (!isAfterLetterOrDigit(shiftedOffset)
                             || !getPrevWordBeginningOnTwoWordsBoundary)) {
                 return shiftedOffset + mOffsetShift;
             } else {
                 return mIterator.preceding(shiftedOffset) + mOffsetShift;
             }
         } else {
             if (isAfterLetterOrDigit(shiftedOffset)) {
                 return mIterator.preceding(shiftedOffset) + mOffsetShift;
             }
         }
         return BreakIterator.DONE;
     }

     /**
      * If the <code>offset</code> is within a word or on a word boundary that can only be
      * considered the end of a word (e.g. word_ where "_" is any character that would not be
      * considered part of the word) then this returns the index of the last character plus one
      * of that word.
      *
      * If the offset is on a word boundary that can be considered the start and end of a
      * word, e.g. AABB (where AA and BB are both words) and the offset is the boundary
      * between AA and BB, and getNextWordEndOnTwoWordBoundary is true then this would return
      * the end of the next word, BB. Otherwise it would return the current offset, the end
      * of AA.
      *
      * Returns BreakIterator.DONE if there is no next boundary.
      *
      * @throws IllegalArgumentException is offset is not valid.
      */
     private int getEnd(int offset, boolean getNextWordEndOnTwoWordBoundary) {
         final int shiftedOffset = offset - mOffsetShift;
         checkOffsetIsValid(shiftedOffset);

         if (isAfterLetterOrDigit(shiftedOffset)) {
             if (mIterator.isBoundary(shiftedOffset)
                     && (!isOnLetterOrDigit(shiftedOffset) || !getNextWordEndOnTwoWordBoundary)) {
                 return shiftedOffset + mOffsetShift;
             } else {
                 return mIterator.following(shiftedOffset) + mOffsetShift;
             }
         } else {
             if (isOnLetterOrDigit(shiftedOffset)) {
                 return mIterator.following(shiftedOffset) + mOffsetShift;
             }
         }
         return BreakIterator.DONE;
     }

     /**
      * If <code>offset</code> is within a group of punctuation as defined
      * by {@link #isPunctuation(int)}, returns the index of the first character
      * of that group, otherwise returns BreakIterator.DONE.
      *
      * @param offset the offset to search from.
      */
     public int getPunctuationBeginning(int offset) {
         while (offset != BreakIterator.DONE && !isPunctuationStartBoundary(offset)) {
             offset = prevBoundary(offset);
         }
         // No need to shift offset, prevBoundary handles that.
         return offset;
     }

     /**
      * If <code>offset</code> is within a group of punctuation as defined
      * by {@link #isPunctuation(int)}, returns the index of the last character
      * of that group plus one, otherwise returns BreakIterator.DONE.
      *
      * @param offset the offset to search from.
      */
     public int getPunctuationEnd(int offset) {
         while (offset != BreakIterator.DONE && !isPunctuationEndBoundary(offset)) {
             offset = nextBoundary(offset);
         }
         // No need to shift offset, nextBoundary handles that.
         return offset;
     }

     /**
      * Indicates if the provided offset is after a punctuation character
      * as defined by {@link #isPunctuation(int)}.
      *
      * @param offset the offset to check from.
      * @return Whether the offset is after a punctuation character.
      */
     public boolean isAfterPunctuation(int offset) {
         final int shiftedOffset = offset - mOffsetShift;
         if (shiftedOffset >= 1 && shiftedOffset <= mString.length()) {
             final int codePoint = mString.codePointBefore(shiftedOffset);
             return isPunctuation(codePoint);
         }
         return false;
     }

     /**
      * Indicates if the provided offset is at a punctuation character
      * as defined by {@link #isPunctuation(int)}.
      *
      * @param offset the offset to check from.
      * @return Whether the offset is at a punctuation character.
      */
     public boolean isOnPunctuation(int offset) {
         final int shiftedOffset = offset - mOffsetShift;
         if (shiftedOffset >= 0 && shiftedOffset < mString.length()) {
             final int codePoint = mString.codePointAt(shiftedOffset);
             return isPunctuation(codePoint);
         }
         return false;
     }

     private boolean isPunctuationStartBoundary(int offset) {
         return isOnPunctuation(offset) && !isAfterPunctuation(offset);
     }

     private boolean isPunctuationEndBoundary(int offset) {
         return !isOnPunctuation(offset) && isAfterPunctuation(offset);
     }

     private boolean isPunctuation(int cp) {
         int type = Character.getType(cp);
         return (type == Character.CONNECTOR_PUNCTUATION ||
                 type == Character.DASH_PUNCTUATION ||
                 type == Character.END_PUNCTUATION ||
                 type == Character.FINAL_QUOTE_PUNCTUATION ||
                 type == Character.INITIAL_QUOTE_PUNCTUATION ||
                 type == Character.OTHER_PUNCTUATION ||
                 type == Character.START_PUNCTUATION);
     }

     private boolean isAfterLetterOrDigit(int shiftedOffset) {
         if (shiftedOffset >= 1 && shiftedOffset <= mString.length()) {
             final int codePoint = mString.codePointBefore(shiftedOffset);
             if (Character.isLetterOrDigit(codePoint)) return true;
         }
         return false;
     }

     private boolean isOnLetterOrDigit(int shiftedOffset) {
         if (shiftedOffset >= 0 && shiftedOffset < mString.length()) {
             final int codePoint = mString.codePointAt(shiftedOffset);
             if (Character.isLetterOrDigit(codePoint)) return true;
         }
         return false;
     }

     private void checkOffsetIsValid(int shiftedOffset) {
         if (shiftedOffset < 0 || shiftedOffset > mString.length()) {
             throw new IllegalArgumentException("Invalid offset: " + (shiftedOffset + mOffsetShift) +
                     ". Valid range is [" + mOffsetShift + ", " + (mString.length() + mOffsetShift) +
                     "]");
         }
     }
 }

	/*
	* Copyright (C) 2011 The Android Open Source Project
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/

	package android.text.method;

	import android.text.Selection;
	import android.text.SpannableStringBuilder;

	import android.icu.text.BreakIterator;
	import java.util.Locale;

	/**
	* Walks through cursor positions at word boundaries. Internally uses
	* {@link BreakIterator#getWordInstance()}, and caches {@link CharSequence}
	* for performance reasons.
	*
	* Also provides methods to determine word boundaries.
	* {@hide}
	*/
	public class WordIterator implements Selection.PositionIterator {
	// Size of the window for the word iterator, should be greater than the longest word's length
	private static final int WINDOW_WIDTH = 50;

	private String mString;
	private int mOffsetShift;

	private BreakIterator mIterator;

	/**
	* Constructs a WordIterator using the default locale.
	*/
	public WordIterator() {
	this(Locale.getDefault());
	}

	/**
	* Constructs a new WordIterator for the specified locale.
	* @param locale The locale to be used when analysing the text.
	*/
	public WordIterator(Locale locale) {
	mIterator = BreakIterator.getWordInstance(locale);
	}

	public void setCharSequence(CharSequence charSequence, int start, int end) {
	mOffsetShift = Math.max(0, start - WINDOW_WIDTH);
	final int windowEnd = Math.min(charSequence.length(), end + WINDOW_WIDTH);

	if (charSequence instanceof SpannableStringBuilder) {
	mString = ((SpannableStringBuilder) charSequence).substring(mOffsetShift, windowEnd);
	} else {
	mString = charSequence.subSequence(mOffsetShift, windowEnd).toString();
	}
	mIterator.setText(mString);
	}

	/** {@inheritDoc} */
	public int preceding(int offset) {
	int shiftedOffset = offset - mOffsetShift;
	do {
	shiftedOffset = mIterator.preceding(shiftedOffset);
	if (shiftedOffset == BreakIterator.DONE) {
	return BreakIterator.DONE;
	}
	if (isOnLetterOrDigit(shiftedOffset)) {
	return shiftedOffset + mOffsetShift;
	}
	} while (true);
	}

	/** {@inheritDoc} */
	public int following(int offset) {
	int shiftedOffset = offset - mOffsetShift;
	do {
	shiftedOffset = mIterator.following(shiftedOffset);
	if (shiftedOffset == BreakIterator.DONE) {
	return BreakIterator.DONE;
	}
	if (isAfterLetterOrDigit(shiftedOffset)) {
	return shiftedOffset + mOffsetShift;
	}
	} while (true);
	}

	/** {@inheritDoc} */
	public boolean isBoundary(int offset) {
	int shiftedOffset = offset - mOffsetShift;
	checkOffsetIsValid(shiftedOffset);
	return mIterator.isBoundary(shiftedOffset);
	}

	/**
	* Returns the position of next boundary after the given offset. Returns
	* {@code DONE} if there is no boundary after the given offset.
	*
	* @param offset the given start position to search from.
	* @return the position of the last boundary preceding the given offset.
	*/
	public int nextBoundary(int offset) {
	int shiftedOffset = offset - mOffsetShift;
	shiftedOffset = mIterator.following(shiftedOffset);
	if (shiftedOffset == BreakIterator.DONE) {
	return BreakIterator.DONE;
	}
	return shiftedOffset + mOffsetShift;
	}

	/**
	* Returns the position of boundary preceding the given offset or
	* {@code DONE} if the given offset specifies the starting position.
	*
	* @param offset the given start position to search from.
	* @return the position of the last boundary preceding the given offset.
	*/
	public int prevBoundary(int offset) {
	int shiftedOffset = offset - mOffsetShift;
	shiftedOffset = mIterator.preceding(shiftedOffset);
	if (shiftedOffset == BreakIterator.DONE) {
	return BreakIterator.DONE;
	}
	return shiftedOffset + mOffsetShift;
	}

	/** If <code>offset</code> is within a word, returns the index of the first character of that
	* word, otherwise returns BreakIterator.DONE.
	*
	* The offsets that are considered to be part of a word are the indexes of its characters,
	* <i>as well as</i> the index of its last character plus one.
	* If offset is the index of a low surrogate character, BreakIterator.DONE will be returned.
	*
	* Valid range for offset is [0..textLength] (note the inclusive upper bound).
	* The returned value is within [0..offset] or BreakIterator.DONE.
	*
	* @throws IllegalArgumentException is offset is not valid.
	*/
	public int getBeginning(int offset) {
	// TODO: Check if usage of this can be updated to getBeginning(offset, true) if
	// so this method can be removed.
	return getBeginning(offset, false);
	}

	/**
	* If <code>offset</code> is within a word, returns the index of the last character of that
	* word plus one, otherwise returns BreakIterator.DONE.
	*
	* The offsets that are considered to be part of a word are the indexes of its characters,
	* <i>as well as</i> the index of its last character plus one.
	* If offset is the index of a low surrogate character, BreakIterator.DONE will be returned.
	*
	* Valid range for offset is [0..textLength] (note the inclusive upper bound).
	* The returned value is within [offset..textLength] or BreakIterator.DONE.
	*
	* @throws IllegalArgumentException is offset is not valid.
	*/
	public int getEnd(int offset) {
	// TODO: Check if usage of this can be updated to getEnd(offset, true), if
	// so this method can be removed.
	return getEnd(offset, false);
	}

	/**
	* If the <code>offset</code> is within a word or on a word boundary that can only be
	* considered the start of a word (e.g. _word where "_" is any character that would not
	* be considered part of the word) then this returns the index of the first character of
	* that word.
	*
	* If the offset is on a word boundary that can be considered the start and end of a
	* word, e.g. AABB (where AA and BB are both words) and the offset is the boundary
	* between AA and BB, this would return the start of the previous word, AA.
	*
	* Returns BreakIterator.DONE if there is no previous boundary.
	*
	* @throws IllegalArgumentException is offset is not valid.
	*/
	public int getPrevWordBeginningOnTwoWordsBoundary(int offset) {
	return getBeginning(offset, true);
	}

	/**
	* If the <code>offset</code> is within a word or on a word boundary that can only be
	* considered the end of a word (e.g. word_ where "_" is any character that would not
	* be considered part of the word) then this returns the index of the last character
	* plus one of that word.
	*
	* If the offset is on a word boundary that can be considered the start and end of a
	* word, e.g. AABB (where AA and BB are both words) and the offset is the boundary
	* between AA and BB, this would return the end of the next word, BB.
	*
	* Returns BreakIterator.DONE if there is no next boundary.
	*
	* @throws IllegalArgumentException is offset is not valid.
	*/
	public int getNextWordEndOnTwoWordBoundary(int offset) {
	return getEnd(offset, true);
	}

	/**
	* If the <code>offset</code> is within a word or on a word boundary that can only be
	* considered the start of a word (e.g. _word where "_" is any character that would not
	* be considered part of the word) then this returns the index of the first character of
	* that word.
	*
	* If the offset is on a word boundary that can be considered the start and end of a
	* word, e.g. AABB (where AA and BB are both words) and the offset is the boundary
	* between AA and BB, and getPrevWordBeginningOnTwoWordsBoundary is true then this would
	* return the start of the previous word, AA. Otherwise it would return the current offset,
	* the start of BB.
	*
	* Returns BreakIterator.DONE if there is no previous boundary.
	*
	* @throws IllegalArgumentException is offset is not valid.
	*/
	private int getBeginning(int offset, boolean getPrevWordBeginningOnTwoWordsBoundary) {
	final int shiftedOffset = offset - mOffsetShift;
	checkOffsetIsValid(shiftedOffset);

	if (isOnLetterOrDigit(shiftedOffset)) {
	if (mIterator.isBoundary(shiftedOffset)
	&& (!isAfterLetterOrDigit(shiftedOffset)
	\|\| !getPrevWordBeginningOnTwoWordsBoundary)) {
	return shiftedOffset + mOffsetShift;
	} else {
	return mIterator.preceding(shiftedOffset) + mOffsetShift;
	}
	} else {
	if (isAfterLetterOrDigit(shiftedOffset)) {
	return mIterator.preceding(shiftedOffset) + mOffsetShift;
	}
	}
	return BreakIterator.DONE;
	}

	/**
	* If the <code>offset</code> is within a word or on a word boundary that can only be
	* considered the end of a word (e.g. word_ where "_" is any character that would not be
	* considered part of the word) then this returns the index of the last character plus one
	* of that word.
	*
	* If the offset is on a word boundary that can be considered the start and end of a
	* word, e.g. AABB (where AA and BB are both words) and the offset is the boundary
	* between AA and BB, and getNextWordEndOnTwoWordBoundary is true then this would return
	* the end of the next word, BB. Otherwise it would return the current offset, the end
	* of AA.
	*
	* Returns BreakIterator.DONE if there is no next boundary.
	*
	* @throws IllegalArgumentException is offset is not valid.
	*/
	private int getEnd(int offset, boolean getNextWordEndOnTwoWordBoundary) {
	final int shiftedOffset = offset - mOffsetShift;
	checkOffsetIsValid(shiftedOffset);

	if (isAfterLetterOrDigit(shiftedOffset)) {
	if (mIterator.isBoundary(shiftedOffset)
	&& (!isOnLetterOrDigit(shiftedOffset) \|\| !getNextWordEndOnTwoWordBoundary)) {
	return shiftedOffset + mOffsetShift;
	} else {
	return mIterator.following(shiftedOffset) + mOffsetShift;
	}
	} else {
	if (isOnLetterOrDigit(shiftedOffset)) {
	return mIterator.following(shiftedOffset) + mOffsetShift;
	}
	}
	return BreakIterator.DONE;
	}

	/**
	* If <code>offset</code> is within a group of punctuation as defined
	* by {@link #isPunctuation(int)}, returns the index of the first character
	* of that group, otherwise returns BreakIterator.DONE.
	*
	* @param offset the offset to search from.
	*/
	public int getPunctuationBeginning(int offset) {
	while (offset != BreakIterator.DONE && !isPunctuationStartBoundary(offset)) {
	offset = prevBoundary(offset);
	}
	// No need to shift offset, prevBoundary handles that.
	return offset;
	}

	/**
	* If <code>offset</code> is within a group of punctuation as defined
	* by {@link #isPunctuation(int)}, returns the index of the last character
	* of that group plus one, otherwise returns BreakIterator.DONE.
	*
	* @param offset the offset to search from.
	*/
	public int getPunctuationEnd(int offset) {
	while (offset != BreakIterator.DONE && !isPunctuationEndBoundary(offset)) {
	offset = nextBoundary(offset);
	}
	// No need to shift offset, nextBoundary handles that.
	return offset;
	}

	/**
	* Indicates if the provided offset is after a punctuation character
	* as defined by {@link #isPunctuation(int)}.
	*
	* @param offset the offset to check from.
	* @return Whether the offset is after a punctuation character.
	*/
	public boolean isAfterPunctuation(int offset) {
	final int shiftedOffset = offset - mOffsetShift;
	if (shiftedOffset >= 1 && shiftedOffset <= mString.length()) {
	final int codePoint = mString.codePointBefore(shiftedOffset);
	return isPunctuation(codePoint);
	}
	return false;
	}

	/**
	* Indicates if the provided offset is at a punctuation character
	* as defined by {@link #isPunctuation(int)}.
	*
	* @param offset the offset to check from.
	* @return Whether the offset is at a punctuation character.
	*/
	public boolean isOnPunctuation(int offset) {
	final int shiftedOffset = offset - mOffsetShift;
	if (shiftedOffset >= 0 && shiftedOffset < mString.length()) {
	final int codePoint = mString.codePointAt(shiftedOffset);
	return isPunctuation(codePoint);
	}
	return false;
	}

	private boolean isPunctuationStartBoundary(int offset) {
	return isOnPunctuation(offset) && !isAfterPunctuation(offset);
	}

	private boolean isPunctuationEndBoundary(int offset) {
	return !isOnPunctuation(offset) && isAfterPunctuation(offset);
	}

	private boolean isPunctuation(int cp) {
	int type = Character.getType(cp);
	return (type == Character.CONNECTOR_PUNCTUATION \|\|
	type == Character.DASH_PUNCTUATION \|\|
	type == Character.END_PUNCTUATION \|\|
	type == Character.FINAL_QUOTE_PUNCTUATION \|\|
	type == Character.INITIAL_QUOTE_PUNCTUATION \|\|
	type == Character.OTHER_PUNCTUATION \|\|
	type == Character.START_PUNCTUATION);
	}

	private boolean isAfterLetterOrDigit(int shiftedOffset) {
	if (shiftedOffset >= 1 && shiftedOffset <= mString.length()) {
	final int codePoint = mString.codePointBefore(shiftedOffset);
	if (Character.isLetterOrDigit(codePoint)) return true;
	}
	return false;
	}

	private boolean isOnLetterOrDigit(int shiftedOffset) {
	if (shiftedOffset >= 0 && shiftedOffset < mString.length()) {
	final int codePoint = mString.codePointAt(shiftedOffset);
	if (Character.isLetterOrDigit(codePoint)) return true;
	}
	return false;
	}

	private void checkOffsetIsValid(int shiftedOffset) {
	if (shiftedOffset < 0 \|\| shiftedOffset > mString.length()) {
	throw new IllegalArgumentException("Invalid offset: " + (shiftedOffset + mOffsetShift) +
	". Valid range is [" + mOffsetShift + ", " + (mString.length() + mOffsetShift) +
	"]");
	}
	}
	}