| |
| /* |
| * Copyright (C) 2011 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package android.text.method; |
| |
| import android.text.Selection; |
| import android.text.SpannableStringBuilder; |
| |
| import android.icu.text.BreakIterator; |
| import java.util.Locale; |
| |
| /** |
| * Walks through cursor positions at word boundaries. Internally uses |
| * {@link BreakIterator#getWordInstance()}, and caches {@link CharSequence} |
| * for performance reasons. |
| * |
| * Also provides methods to determine word boundaries. |
| * {@hide} |
| */ |
| public class WordIterator implements Selection.PositionIterator { |
| // Size of the window for the word iterator, should be greater than the longest word's length |
| private static final int WINDOW_WIDTH = 50; |
| |
| private String mString; |
| private int mOffsetShift; |
| |
| private BreakIterator mIterator; |
| |
| /** |
| * Constructs a WordIterator using the default locale. |
| */ |
| public WordIterator() { |
| this(Locale.getDefault()); |
| } |
| |
| /** |
| * Constructs a new WordIterator for the specified locale. |
| * @param locale The locale to be used when analysing the text. |
| */ |
| public WordIterator(Locale locale) { |
| mIterator = BreakIterator.getWordInstance(locale); |
| } |
| |
| public void setCharSequence(CharSequence charSequence, int start, int end) { |
| mOffsetShift = Math.max(0, start - WINDOW_WIDTH); |
| final int windowEnd = Math.min(charSequence.length(), end + WINDOW_WIDTH); |
| |
| if (charSequence instanceof SpannableStringBuilder) { |
| mString = ((SpannableStringBuilder) charSequence).substring(mOffsetShift, windowEnd); |
| } else { |
| mString = charSequence.subSequence(mOffsetShift, windowEnd).toString(); |
| } |
| mIterator.setText(mString); |
| } |
| |
| /** {@inheritDoc} */ |
| public int preceding(int offset) { |
| int shiftedOffset = offset - mOffsetShift; |
| do { |
| shiftedOffset = mIterator.preceding(shiftedOffset); |
| if (shiftedOffset == BreakIterator.DONE) { |
| return BreakIterator.DONE; |
| } |
| if (isOnLetterOrDigit(shiftedOffset)) { |
| return shiftedOffset + mOffsetShift; |
| } |
| } while (true); |
| } |
| |
| /** {@inheritDoc} */ |
| public int following(int offset) { |
| int shiftedOffset = offset - mOffsetShift; |
| do { |
| shiftedOffset = mIterator.following(shiftedOffset); |
| if (shiftedOffset == BreakIterator.DONE) { |
| return BreakIterator.DONE; |
| } |
| if (isAfterLetterOrDigit(shiftedOffset)) { |
| return shiftedOffset + mOffsetShift; |
| } |
| } while (true); |
| } |
| |
| /** {@inheritDoc} */ |
| public boolean isBoundary(int offset) { |
| int shiftedOffset = offset - mOffsetShift; |
| checkOffsetIsValid(shiftedOffset); |
| return mIterator.isBoundary(shiftedOffset); |
| } |
| |
| /** |
| * Returns the position of next boundary after the given offset. Returns |
| * {@code DONE} if there is no boundary after the given offset. |
| * |
| * @param offset the given start position to search from. |
| * @return the position of the last boundary preceding the given offset. |
| */ |
| public int nextBoundary(int offset) { |
| int shiftedOffset = offset - mOffsetShift; |
| shiftedOffset = mIterator.following(shiftedOffset); |
| if (shiftedOffset == BreakIterator.DONE) { |
| return BreakIterator.DONE; |
| } |
| return shiftedOffset + mOffsetShift; |
| } |
| |
| /** |
| * Returns the position of boundary preceding the given offset or |
| * {@code DONE} if the given offset specifies the starting position. |
| * |
| * @param offset the given start position to search from. |
| * @return the position of the last boundary preceding the given offset. |
| */ |
| public int prevBoundary(int offset) { |
| int shiftedOffset = offset - mOffsetShift; |
| shiftedOffset = mIterator.preceding(shiftedOffset); |
| if (shiftedOffset == BreakIterator.DONE) { |
| return BreakIterator.DONE; |
| } |
| return shiftedOffset + mOffsetShift; |
| } |
| |
| /** If <code>offset</code> is within a word, returns the index of the first character of that |
| * word, otherwise returns BreakIterator.DONE. |
| * |
| * The offsets that are considered to be part of a word are the indexes of its characters, |
| * <i>as well as</i> the index of its last character plus one. |
| * If offset is the index of a low surrogate character, BreakIterator.DONE will be returned. |
| * |
| * Valid range for offset is [0..textLength] (note the inclusive upper bound). |
| * The returned value is within [0..offset] or BreakIterator.DONE. |
| * |
| * @throws IllegalArgumentException is offset is not valid. |
| */ |
| public int getBeginning(int offset) { |
| // TODO: Check if usage of this can be updated to getBeginning(offset, true) if |
| // so this method can be removed. |
| return getBeginning(offset, false); |
| } |
| |
| /** |
| * If <code>offset</code> is within a word, returns the index of the last character of that |
| * word plus one, otherwise returns BreakIterator.DONE. |
| * |
| * The offsets that are considered to be part of a word are the indexes of its characters, |
| * <i>as well as</i> the index of its last character plus one. |
| * If offset is the index of a low surrogate character, BreakIterator.DONE will be returned. |
| * |
| * Valid range for offset is [0..textLength] (note the inclusive upper bound). |
| * The returned value is within [offset..textLength] or BreakIterator.DONE. |
| * |
| * @throws IllegalArgumentException is offset is not valid. |
| */ |
| public int getEnd(int offset) { |
| // TODO: Check if usage of this can be updated to getEnd(offset, true), if |
| // so this method can be removed. |
| return getEnd(offset, false); |
| } |
| |
| /** |
| * If the <code>offset</code> is within a word or on a word boundary that can only be |
| * considered the start of a word (e.g. _word where "_" is any character that would not |
| * be considered part of the word) then this returns the index of the first character of |
| * that word. |
| * |
| * If the offset is on a word boundary that can be considered the start and end of a |
| * word, e.g. AABB (where AA and BB are both words) and the offset is the boundary |
| * between AA and BB, this would return the start of the previous word, AA. |
| * |
| * Returns BreakIterator.DONE if there is no previous boundary. |
| * |
| * @throws IllegalArgumentException is offset is not valid. |
| */ |
| public int getPrevWordBeginningOnTwoWordsBoundary(int offset) { |
| return getBeginning(offset, true); |
| } |
| |
| /** |
| * If the <code>offset</code> is within a word or on a word boundary that can only be |
| * considered the end of a word (e.g. word_ where "_" is any character that would not |
| * be considered part of the word) then this returns the index of the last character |
| * plus one of that word. |
| * |
| * If the offset is on a word boundary that can be considered the start and end of a |
| * word, e.g. AABB (where AA and BB are both words) and the offset is the boundary |
| * between AA and BB, this would return the end of the next word, BB. |
| * |
| * Returns BreakIterator.DONE if there is no next boundary. |
| * |
| * @throws IllegalArgumentException is offset is not valid. |
| */ |
| public int getNextWordEndOnTwoWordBoundary(int offset) { |
| return getEnd(offset, true); |
| } |
| |
| /** |
| * If the <code>offset</code> is within a word or on a word boundary that can only be |
| * considered the start of a word (e.g. _word where "_" is any character that would not |
| * be considered part of the word) then this returns the index of the first character of |
| * that word. |
| * |
| * If the offset is on a word boundary that can be considered the start and end of a |
| * word, e.g. AABB (where AA and BB are both words) and the offset is the boundary |
| * between AA and BB, and getPrevWordBeginningOnTwoWordsBoundary is true then this would |
| * return the start of the previous word, AA. Otherwise it would return the current offset, |
| * the start of BB. |
| * |
| * Returns BreakIterator.DONE if there is no previous boundary. |
| * |
| * @throws IllegalArgumentException is offset is not valid. |
| */ |
| private int getBeginning(int offset, boolean getPrevWordBeginningOnTwoWordsBoundary) { |
| final int shiftedOffset = offset - mOffsetShift; |
| checkOffsetIsValid(shiftedOffset); |
| |
| if (isOnLetterOrDigit(shiftedOffset)) { |
| if (mIterator.isBoundary(shiftedOffset) |
| && (!isAfterLetterOrDigit(shiftedOffset) |
| || !getPrevWordBeginningOnTwoWordsBoundary)) { |
| return shiftedOffset + mOffsetShift; |
| } else { |
| return mIterator.preceding(shiftedOffset) + mOffsetShift; |
| } |
| } else { |
| if (isAfterLetterOrDigit(shiftedOffset)) { |
| return mIterator.preceding(shiftedOffset) + mOffsetShift; |
| } |
| } |
| return BreakIterator.DONE; |
| } |
| |
| /** |
| * If the <code>offset</code> is within a word or on a word boundary that can only be |
| * considered the end of a word (e.g. word_ where "_" is any character that would not be |
| * considered part of the word) then this returns the index of the last character plus one |
| * of that word. |
| * |
| * If the offset is on a word boundary that can be considered the start and end of a |
| * word, e.g. AABB (where AA and BB are both words) and the offset is the boundary |
| * between AA and BB, and getNextWordEndOnTwoWordBoundary is true then this would return |
| * the end of the next word, BB. Otherwise it would return the current offset, the end |
| * of AA. |
| * |
| * Returns BreakIterator.DONE if there is no next boundary. |
| * |
| * @throws IllegalArgumentException is offset is not valid. |
| */ |
| private int getEnd(int offset, boolean getNextWordEndOnTwoWordBoundary) { |
| final int shiftedOffset = offset - mOffsetShift; |
| checkOffsetIsValid(shiftedOffset); |
| |
| if (isAfterLetterOrDigit(shiftedOffset)) { |
| if (mIterator.isBoundary(shiftedOffset) |
| && (!isOnLetterOrDigit(shiftedOffset) || !getNextWordEndOnTwoWordBoundary)) { |
| return shiftedOffset + mOffsetShift; |
| } else { |
| return mIterator.following(shiftedOffset) + mOffsetShift; |
| } |
| } else { |
| if (isOnLetterOrDigit(shiftedOffset)) { |
| return mIterator.following(shiftedOffset) + mOffsetShift; |
| } |
| } |
| return BreakIterator.DONE; |
| } |
| |
| /** |
| * If <code>offset</code> is within a group of punctuation as defined |
| * by {@link #isPunctuation(int)}, returns the index of the first character |
| * of that group, otherwise returns BreakIterator.DONE. |
| * |
| * @param offset the offset to search from. |
| */ |
| public int getPunctuationBeginning(int offset) { |
| while (offset != BreakIterator.DONE && !isPunctuationStartBoundary(offset)) { |
| offset = prevBoundary(offset); |
| } |
| // No need to shift offset, prevBoundary handles that. |
| return offset; |
| } |
| |
| /** |
| * If <code>offset</code> is within a group of punctuation as defined |
| * by {@link #isPunctuation(int)}, returns the index of the last character |
| * of that group plus one, otherwise returns BreakIterator.DONE. |
| * |
| * @param offset the offset to search from. |
| */ |
| public int getPunctuationEnd(int offset) { |
| while (offset != BreakIterator.DONE && !isPunctuationEndBoundary(offset)) { |
| offset = nextBoundary(offset); |
| } |
| // No need to shift offset, nextBoundary handles that. |
| return offset; |
| } |
| |
| /** |
| * Indicates if the provided offset is after a punctuation character |
| * as defined by {@link #isPunctuation(int)}. |
| * |
| * @param offset the offset to check from. |
| * @return Whether the offset is after a punctuation character. |
| */ |
| public boolean isAfterPunctuation(int offset) { |
| final int shiftedOffset = offset - mOffsetShift; |
| if (shiftedOffset >= 1 && shiftedOffset <= mString.length()) { |
| final int codePoint = mString.codePointBefore(shiftedOffset); |
| return isPunctuation(codePoint); |
| } |
| return false; |
| } |
| |
| /** |
| * Indicates if the provided offset is at a punctuation character |
| * as defined by {@link #isPunctuation(int)}. |
| * |
| * @param offset the offset to check from. |
| * @return Whether the offset is at a punctuation character. |
| */ |
| public boolean isOnPunctuation(int offset) { |
| final int shiftedOffset = offset - mOffsetShift; |
| if (shiftedOffset >= 0 && shiftedOffset < mString.length()) { |
| final int codePoint = mString.codePointAt(shiftedOffset); |
| return isPunctuation(codePoint); |
| } |
| return false; |
| } |
| |
| private boolean isPunctuationStartBoundary(int offset) { |
| return isOnPunctuation(offset) && !isAfterPunctuation(offset); |
| } |
| |
| private boolean isPunctuationEndBoundary(int offset) { |
| return !isOnPunctuation(offset) && isAfterPunctuation(offset); |
| } |
| |
| private boolean isPunctuation(int cp) { |
| int type = Character.getType(cp); |
| return (type == Character.CONNECTOR_PUNCTUATION || |
| type == Character.DASH_PUNCTUATION || |
| type == Character.END_PUNCTUATION || |
| type == Character.FINAL_QUOTE_PUNCTUATION || |
| type == Character.INITIAL_QUOTE_PUNCTUATION || |
| type == Character.OTHER_PUNCTUATION || |
| type == Character.START_PUNCTUATION); |
| } |
| |
| private boolean isAfterLetterOrDigit(int shiftedOffset) { |
| if (shiftedOffset >= 1 && shiftedOffset <= mString.length()) { |
| final int codePoint = mString.codePointBefore(shiftedOffset); |
| if (Character.isLetterOrDigit(codePoint)) return true; |
| } |
| return false; |
| } |
| |
| private boolean isOnLetterOrDigit(int shiftedOffset) { |
| if (shiftedOffset >= 0 && shiftedOffset < mString.length()) { |
| final int codePoint = mString.codePointAt(shiftedOffset); |
| if (Character.isLetterOrDigit(codePoint)) return true; |
| } |
| return false; |
| } |
| |
| private void checkOffsetIsValid(int shiftedOffset) { |
| if (shiftedOffset < 0 || shiftedOffset > mString.length()) { |
| throw new IllegalArgumentException("Invalid offset: " + (shiftedOffset + mOffsetShift) + |
| ". Valid range is [" + mOffsetShift + ", " + (mString.length() + mOffsetShift) + |
| "]"); |
| } |
| } |
| } |