| /* |
| * Copyright (C) 2011 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package com.android.inputmethod.latin.makedict; |
| |
| import com.android.inputmethod.annotations.UsedForTesting; |
| import com.android.inputmethod.latin.define.DecoderSpecificConstants; |
| import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions; |
| |
| import java.util.ArrayList; |
| import java.util.Arrays; |
| import java.util.Collections; |
| import java.util.Iterator; |
| import java.util.LinkedList; |
| |
| /** |
| * A dictionary that can fusion heads and tails of words for more compression. |
| */ |
| @UsedForTesting |
| public final class FusionDictionary implements Iterable<WordProperty> { |
| private static final boolean DBG = MakedictLog.DBG; |
| |
| private static int CHARACTER_NOT_FOUND_INDEX = -1; |
| |
| /** |
| * A node array of the dictionary, containing several PtNodes. |
| * |
| * A PtNodeArray is but an ordered array of PtNodes, which essentially contain all the |
| * real information. |
| * This class also contains fields to cache size and address, to help with binary |
| * generation. |
| */ |
| public static final class PtNodeArray { |
| ArrayList<PtNode> mData; |
| // To help with binary generation |
| int mCachedSize = Integer.MIN_VALUE; |
| // mCachedAddressBefore/AfterUpdate are helpers for binary dictionary generation. They |
| // always hold the same value except between dictionary address compression, during which |
| // the update process needs to know about both values at the same time. Updating will |
| // update the AfterUpdate value, and the code will move them to BeforeUpdate before |
| // the next update pass. |
| int mCachedAddressBeforeUpdate = Integer.MIN_VALUE; |
| int mCachedAddressAfterUpdate = Integer.MIN_VALUE; |
| int mCachedParentAddress = 0; |
| |
| public PtNodeArray() { |
| mData = new ArrayList<>(); |
| } |
| public PtNodeArray(ArrayList<PtNode> data) { |
| Collections.sort(data, PTNODE_COMPARATOR); |
| mData = data; |
| } |
| } |
| |
| /** |
| * PtNode is a group of characters, with probability information, shortcut targets, bigrams, |
| * and children (Pt means Patricia Trie). |
| * |
| * This is the central class of the in-memory representation. A PtNode is what can |
| * be seen as a traditional "trie node", except it can hold several characters at the |
| * same time. A PtNode essentially represents one or several characters in the middle |
| * of the trie tree; as such, it can be a terminal, and it can have children. |
| * In this in-memory representation, whether the PtNode is a terminal or not is represented |
| * by mProbabilityInfo. The PtNode is a terminal when the mProbabilityInfo is not null and the |
| * PtNode is not a terminal when the mProbabilityInfo is null. A terminal may have non-null |
| * shortcuts and/or bigrams, but a non-terminal may not. Moreover, children, if present, |
| * are non-null. |
| */ |
| public static final class PtNode { |
| private static final int NOT_A_TERMINAL = -1; |
| final int mChars[]; |
| ArrayList<WeightedString> mBigrams; |
| // null == mProbabilityInfo indicates this is not a terminal. |
| ProbabilityInfo mProbabilityInfo; |
| int mTerminalId; // NOT_A_TERMINAL == mTerminalId indicates this is not a terminal. |
| PtNodeArray mChildren; |
| boolean mIsNotAWord; // Only a shortcut |
| boolean mIsPossiblyOffensive; |
| // mCachedSize and mCachedAddressBefore/AfterUpdate are helpers for binary dictionary |
| // generation. Before and After always hold the same value except during dictionary |
| // address compression, where the update process needs to know about both values at the |
| // same time. Updating will update the AfterUpdate value, and the code will move them |
| // to BeforeUpdate before the next update pass. |
| // The update process does not need two versions of mCachedSize. |
| int mCachedSize; // The size, in bytes, of this PtNode. |
| int mCachedAddressBeforeUpdate; // The address of this PtNode (before update) |
| int mCachedAddressAfterUpdate; // The address of this PtNode (after update) |
| |
| public PtNode(final int[] chars, final ArrayList<WeightedString> bigrams, |
| final ProbabilityInfo probabilityInfo, final boolean isNotAWord, |
| final boolean isPossiblyOffensive) { |
| mChars = chars; |
| mProbabilityInfo = probabilityInfo; |
| mTerminalId = probabilityInfo == null ? NOT_A_TERMINAL : probabilityInfo.mProbability; |
| mBigrams = bigrams; |
| mChildren = null; |
| mIsNotAWord = isNotAWord; |
| mIsPossiblyOffensive = isPossiblyOffensive; |
| } |
| |
| public PtNode(final int[] chars, final ArrayList<WeightedString> bigrams, |
| final ProbabilityInfo probabilityInfo, final boolean isNotAWord, |
| final boolean isPossiblyOffensive, final PtNodeArray children) { |
| mChars = chars; |
| mProbabilityInfo = probabilityInfo; |
| mBigrams = bigrams; |
| mChildren = children; |
| mIsNotAWord = isNotAWord; |
| mIsPossiblyOffensive = isPossiblyOffensive; |
| } |
| |
| public void addChild(PtNode n) { |
| if (null == mChildren) { |
| mChildren = new PtNodeArray(); |
| } |
| mChildren.mData.add(n); |
| } |
| |
| public int getTerminalId() { |
| return mTerminalId; |
| } |
| |
| public boolean isTerminal() { |
| return mProbabilityInfo != null; |
| } |
| |
| public int getProbability() { |
| return isTerminal() ? mProbabilityInfo.mProbability : NOT_A_TERMINAL; |
| } |
| |
| public boolean getIsNotAWord() { |
| return mIsNotAWord; |
| } |
| |
| public boolean getIsPossiblyOffensive() { |
| return mIsPossiblyOffensive; |
| } |
| |
| public ArrayList<WeightedString> getBigrams() { |
| // We don't want write permission to escape outside the package, so we return a copy |
| if (null == mBigrams) return null; |
| final ArrayList<WeightedString> copyOfBigrams = new ArrayList<>(mBigrams); |
| return copyOfBigrams; |
| } |
| |
| public boolean hasSeveralChars() { |
| assert(mChars.length > 0); |
| return 1 < mChars.length; |
| } |
| |
| /** |
| * Adds a word to the bigram list. Updates the probability information if the word already |
| * exists. |
| */ |
| public void addBigram(final String word, final ProbabilityInfo probabilityInfo) { |
| if (mBigrams == null) { |
| mBigrams = new ArrayList<>(); |
| } |
| WeightedString bigram = getBigram(word); |
| if (bigram != null) { |
| bigram.mProbabilityInfo = probabilityInfo; |
| } else { |
| bigram = new WeightedString(word, probabilityInfo); |
| mBigrams.add(bigram); |
| } |
| } |
| |
| /** |
| * Gets the bigram for the given word. |
| * Returns null if the word is not in the bigrams list. |
| */ |
| public WeightedString getBigram(final String word) { |
| // TODO: Don't do a linear search |
| if (mBigrams != null) { |
| final int size = mBigrams.size(); |
| for (int i = 0; i < size; ++i) { |
| WeightedString bigram = mBigrams.get(i); |
| if (bigram.mWord.equals(word)) { |
| return bigram; |
| } |
| } |
| } |
| return null; |
| } |
| |
| /** |
| * Updates the PtNode with the given properties. Adds the shortcut and bigram lists to |
| * the existing ones if any. Note: unigram, bigram, and shortcut frequencies are only |
| * updated if they are higher than the existing ones. |
| */ |
| void update(final ProbabilityInfo probabilityInfo, |
| final ArrayList<WeightedString> bigrams, |
| final boolean isNotAWord, final boolean isPossiblyOffensive) { |
| mProbabilityInfo = ProbabilityInfo.max(mProbabilityInfo, probabilityInfo); |
| if (bigrams != null) { |
| if (mBigrams == null) { |
| mBigrams = bigrams; |
| } else { |
| final int size = bigrams.size(); |
| for (int i = 0; i < size; ++i) { |
| final WeightedString bigram = bigrams.get(i); |
| final WeightedString existingBigram = getBigram(bigram.mWord); |
| if (existingBigram == null) { |
| mBigrams.add(bigram); |
| } else { |
| existingBigram.mProbabilityInfo = ProbabilityInfo.max( |
| existingBigram.mProbabilityInfo, bigram.mProbabilityInfo); |
| } |
| } |
| } |
| } |
| mIsNotAWord = isNotAWord; |
| mIsPossiblyOffensive = isPossiblyOffensive; |
| } |
| } |
| |
| public final DictionaryOptions mOptions; |
| public final PtNodeArray mRootNodeArray; |
| |
| public FusionDictionary(final PtNodeArray rootNodeArray, final DictionaryOptions options) { |
| mRootNodeArray = rootNodeArray; |
| mOptions = options; |
| } |
| |
| public void addOptionAttribute(final String key, final String value) { |
| mOptions.mAttributes.put(key, value); |
| } |
| |
| /** |
| * Helper method to convert a String to an int array. |
| */ |
| static int[] getCodePoints(final String word) { |
| // TODO: this is a copy-paste of the old contents of StringUtils.toCodePointArray, |
| // which is not visible from the makedict package. Factor this code. |
| final int length = word.length(); |
| if (length <= 0) return new int[] {}; |
| final char[] characters = word.toCharArray(); |
| final int[] codePoints = new int[Character.codePointCount(characters, 0, length)]; |
| int codePoint = Character.codePointAt(characters, 0); |
| int dsti = 0; |
| for (int srci = Character.charCount(codePoint); |
| srci < length; srci += Character.charCount(codePoint), ++dsti) { |
| codePoints[dsti] = codePoint; |
| codePoint = Character.codePointAt(characters, srci); |
| } |
| codePoints[dsti] = codePoint; |
| return codePoints; |
| } |
| |
| /** |
| * Helper method to add a word as a string. |
| * |
| * This method adds a word to the dictionary with the given frequency. Optional |
| * lists of bigrams can be passed here. For each word inside, |
| * they will be added to the dictionary as necessary. |
| * @param word the word to add. |
| * @param probabilityInfo probability information of the word. |
| * @param isNotAWord true if this should not be considered a word (e.g. shortcut only) |
| * @param isPossiblyOffensive true if this word is possibly offensive |
| */ |
| public void add(final String word, final ProbabilityInfo probabilityInfo, |
| final boolean isNotAWord, final boolean isPossiblyOffensive) { |
| add(getCodePoints(word), probabilityInfo, isNotAWord, isPossiblyOffensive); |
| } |
| |
| /** |
| * Validity check for a PtNode array. |
| * |
| * This method checks that all PtNodes in a node array are ordered as expected. |
| * If they are, nothing happens. If they aren't, an exception is thrown. |
| */ |
| private static void checkStack(PtNodeArray ptNodeArray) { |
| ArrayList<PtNode> stack = ptNodeArray.mData; |
| int lastValue = -1; |
| for (int i = 0; i < stack.size(); ++i) { |
| int currentValue = stack.get(i).mChars[0]; |
| if (currentValue <= lastValue) { |
| throw new RuntimeException("Invalid stack"); |
| } |
| lastValue = currentValue; |
| } |
| } |
| |
| /** |
| * Helper method to add a new bigram to the dictionary. |
| * |
| * @param word0 the previous word of the context |
| * @param word1 the next word of the context |
| * @param probabilityInfo the bigram probability info |
| */ |
| public void setBigram(final String word0, final String word1, |
| final ProbabilityInfo probabilityInfo) { |
| PtNode ptNode0 = findWordInTree(mRootNodeArray, word0); |
| if (ptNode0 != null) { |
| final PtNode ptNode1 = findWordInTree(mRootNodeArray, word1); |
| if (ptNode1 == null) { |
| add(getCodePoints(word1), new ProbabilityInfo(0), false /* isNotAWord */, |
| false /* isPossiblyOffensive */); |
| // The PtNode for the first word may have moved by the above insertion, |
| // if word1 and word2 share a common stem that happens not to have been |
| // a cutting point until now. In this case, we need to refresh ptNode. |
| ptNode0 = findWordInTree(mRootNodeArray, word0); |
| } |
| ptNode0.addBigram(word1, probabilityInfo); |
| } else { |
| throw new RuntimeException("First word of bigram not found " + word0); |
| } |
| } |
| |
| /** |
| * Add a word to this dictionary. |
| * |
| * The shortcuts, if any, have to be in the dictionary already. If they aren't, |
| * an exception is thrown. |
| * @param word the word, as an int array. |
| * @param probabilityInfo the probability information of the word. |
| * @param isNotAWord true if this is not a word for spellchecking purposes (shortcut only or so) |
| * @param isPossiblyOffensive true if this word is possibly offensive |
| */ |
| private void add(final int[] word, final ProbabilityInfo probabilityInfo, |
| final boolean isNotAWord, final boolean isPossiblyOffensive) { |
| assert(probabilityInfo.mProbability <= FormatSpec.MAX_TERMINAL_FREQUENCY); |
| if (word.length >= DecoderSpecificConstants.DICTIONARY_MAX_WORD_LENGTH) { |
| MakedictLog.w("Ignoring a word that is too long: word.length = " + word.length); |
| return; |
| } |
| |
| PtNodeArray currentNodeArray = mRootNodeArray; |
| int charIndex = 0; |
| |
| PtNode currentPtNode = null; |
| int differentCharIndex = 0; // Set by the loop to the index of the char that differs |
| int nodeIndex = findIndexOfChar(mRootNodeArray, word[charIndex]); |
| while (CHARACTER_NOT_FOUND_INDEX != nodeIndex) { |
| currentPtNode = currentNodeArray.mData.get(nodeIndex); |
| differentCharIndex = compareCharArrays(currentPtNode.mChars, word, charIndex); |
| if (ARRAYS_ARE_EQUAL != differentCharIndex |
| && differentCharIndex < currentPtNode.mChars.length) break; |
| if (null == currentPtNode.mChildren) break; |
| charIndex += currentPtNode.mChars.length; |
| if (charIndex >= word.length) break; |
| currentNodeArray = currentPtNode.mChildren; |
| nodeIndex = findIndexOfChar(currentNodeArray, word[charIndex]); |
| } |
| |
| if (CHARACTER_NOT_FOUND_INDEX == nodeIndex) { |
| // No node at this point to accept the word. Create one. |
| final int insertionIndex = findInsertionIndex(currentNodeArray, word[charIndex]); |
| final PtNode newPtNode = new PtNode(Arrays.copyOfRange(word, charIndex, word.length), |
| null /* bigrams */, probabilityInfo, isNotAWord, |
| isPossiblyOffensive); |
| currentNodeArray.mData.add(insertionIndex, newPtNode); |
| if (DBG) checkStack(currentNodeArray); |
| } else { |
| // There is a word with a common prefix. |
| if (differentCharIndex == currentPtNode.mChars.length) { |
| if (charIndex + differentCharIndex >= word.length) { |
| // The new word is a prefix of an existing word, but the node on which it |
| // should end already exists as is. Since the old PtNode was not a terminal, |
| // make it one by filling in its frequency and other attributes |
| currentPtNode.update(probabilityInfo, null, isNotAWord, |
| isPossiblyOffensive); |
| } else { |
| // The new word matches the full old word and extends past it. |
| // We only have to create a new node and add it to the end of this. |
| final PtNode newNode = new PtNode( |
| Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length), |
| null /* bigrams */, probabilityInfo, |
| isNotAWord, isPossiblyOffensive); |
| currentPtNode.mChildren = new PtNodeArray(); |
| currentPtNode.mChildren.mData.add(newNode); |
| } |
| } else { |
| if (0 == differentCharIndex) { |
| // Exact same word. Update the frequency if higher. This will also add the |
| // new shortcuts to the existing shortcut list if it already exists. |
| currentPtNode.update(probabilityInfo, null, |
| currentPtNode.mIsNotAWord && isNotAWord, |
| currentPtNode.mIsPossiblyOffensive || isPossiblyOffensive); |
| } else { |
| // Partial prefix match only. We have to replace the current node with a node |
| // containing the current prefix and create two new ones for the tails. |
| PtNodeArray newChildren = new PtNodeArray(); |
| final PtNode newOldWord = new PtNode( |
| Arrays.copyOfRange(currentPtNode.mChars, differentCharIndex, |
| currentPtNode.mChars.length), |
| currentPtNode.mBigrams, currentPtNode.mProbabilityInfo, |
| currentPtNode.mIsNotAWord, currentPtNode.mIsPossiblyOffensive, |
| currentPtNode.mChildren); |
| newChildren.mData.add(newOldWord); |
| |
| final PtNode newParent; |
| if (charIndex + differentCharIndex >= word.length) { |
| newParent = new PtNode( |
| Arrays.copyOfRange(currentPtNode.mChars, 0, differentCharIndex), |
| null /* bigrams */, probabilityInfo, |
| isNotAWord, isPossiblyOffensive, newChildren); |
| } else { |
| newParent = new PtNode( |
| Arrays.copyOfRange(currentPtNode.mChars, 0, differentCharIndex), |
| null /* bigrams */, null /* probabilityInfo */, |
| false /* isNotAWord */, false /* isPossiblyOffensive */, |
| newChildren); |
| final PtNode newWord = new PtNode(Arrays.copyOfRange(word, |
| charIndex + differentCharIndex, word.length), |
| null /* bigrams */, probabilityInfo, |
| isNotAWord, isPossiblyOffensive); |
| final int addIndex = word[charIndex + differentCharIndex] |
| > currentPtNode.mChars[differentCharIndex] ? 1 : 0; |
| newChildren.mData.add(addIndex, newWord); |
| } |
| currentNodeArray.mData.set(nodeIndex, newParent); |
| } |
| if (DBG) checkStack(currentNodeArray); |
| } |
| } |
| } |
| |
| private static int ARRAYS_ARE_EQUAL = 0; |
| |
| /** |
| * Custom comparison of two int arrays taken to contain character codes. |
| * |
| * This method compares the two arrays passed as an argument in a lexicographic way, |
| * with an offset in the dst string. |
| * This method does NOT test for the first character. It is taken to be equal. |
| * I repeat: this method starts the comparison at 1 <> dstOffset + 1. |
| * The index where the strings differ is returned. ARRAYS_ARE_EQUAL = 0 is returned if the |
| * strings are equal. This works BECAUSE we don't look at the first character. |
| * |
| * @param src the left-hand side string of the comparison. |
| * @param dst the right-hand side string of the comparison. |
| * @param dstOffset the offset in the right-hand side string. |
| * @return the index at which the strings differ, or ARRAYS_ARE_EQUAL = 0 if they don't. |
| */ |
| private static int compareCharArrays(final int[] src, final int[] dst, int dstOffset) { |
| // We do NOT test the first char, because we come from a method that already |
| // tested it. |
| for (int i = 1; i < src.length; ++i) { |
| if (dstOffset + i >= dst.length) return i; |
| if (src[i] != dst[dstOffset + i]) return i; |
| } |
| if (dst.length > src.length) return src.length; |
| return ARRAYS_ARE_EQUAL; |
| } |
| |
| /** |
| * Helper class that compares and sorts two PtNodes according to their |
| * first element only. I repeat: ONLY the first element is considered, the rest |
| * is ignored. |
| * This comparator imposes orderings that are inconsistent with equals. |
| */ |
| static final class PtNodeComparator implements java.util.Comparator<PtNode> { |
| @Override |
| public int compare(PtNode p1, PtNode p2) { |
| if (p1.mChars[0] == p2.mChars[0]) return 0; |
| return p1.mChars[0] < p2.mChars[0] ? -1 : 1; |
| } |
| } |
| final static PtNodeComparator PTNODE_COMPARATOR = new PtNodeComparator(); |
| |
| /** |
| * Finds the insertion index of a character within a node array. |
| */ |
| private static int findInsertionIndex(final PtNodeArray nodeArray, int character) { |
| final ArrayList<PtNode> data = nodeArray.mData; |
| final PtNode reference = new PtNode(new int[] { character }, |
| null /* bigrams */, null /* probabilityInfo */, |
| false /* isNotAWord */, false /* isPossiblyOffensive */); |
| int result = Collections.binarySearch(data, reference, PTNODE_COMPARATOR); |
| return result >= 0 ? result : -result - 1; |
| } |
| |
| /** |
| * Find the index of a char in a node array, if it exists. |
| * |
| * @param nodeArray the node array to search in. |
| * @param character the character to search for. |
| * @return the position of the character if it's there, or CHARACTER_NOT_FOUND_INDEX = -1 else. |
| */ |
| private static int findIndexOfChar(final PtNodeArray nodeArray, int character) { |
| final int insertionIndex = findInsertionIndex(nodeArray, character); |
| if (nodeArray.mData.size() <= insertionIndex) return CHARACTER_NOT_FOUND_INDEX; |
| return character == nodeArray.mData.get(insertionIndex).mChars[0] ? insertionIndex |
| : CHARACTER_NOT_FOUND_INDEX; |
| } |
| |
| /** |
| * Helper method to find a word in a given branch. |
| */ |
| public static PtNode findWordInTree(final PtNodeArray rootNodeArray, final String string) { |
| PtNodeArray nodeArray = rootNodeArray; |
| int index = 0; |
| final StringBuilder checker = DBG ? new StringBuilder() : null; |
| final int[] codePoints = getCodePoints(string); |
| |
| PtNode currentPtNode; |
| do { |
| int indexOfGroup = findIndexOfChar(nodeArray, codePoints[index]); |
| if (CHARACTER_NOT_FOUND_INDEX == indexOfGroup) return null; |
| currentPtNode = nodeArray.mData.get(indexOfGroup); |
| |
| if (codePoints.length - index < currentPtNode.mChars.length) return null; |
| int newIndex = index; |
| while (newIndex < codePoints.length && newIndex - index < currentPtNode.mChars.length) { |
| if (currentPtNode.mChars[newIndex - index] != codePoints[newIndex]) return null; |
| newIndex++; |
| } |
| index = newIndex; |
| |
| if (DBG) { |
| checker.append(new String(currentPtNode.mChars, 0, currentPtNode.mChars.length)); |
| } |
| if (index < codePoints.length) { |
| nodeArray = currentPtNode.mChildren; |
| } |
| } while (null != nodeArray && index < codePoints.length); |
| |
| if (index < codePoints.length) return null; |
| if (!currentPtNode.isTerminal()) return null; |
| if (DBG && !string.equals(checker.toString())) return null; |
| return currentPtNode; |
| } |
| |
| /** |
| * Helper method to find out whether a word is in the dict or not. |
| */ |
| public boolean hasWord(final String s) { |
| if (null == s || "".equals(s)) { |
| throw new RuntimeException("Can't search for a null or empty string"); |
| } |
| return null != findWordInTree(mRootNodeArray, s); |
| } |
| |
| /** |
| * Recursively count the number of PtNodes in a given branch of the trie. |
| * |
| * @param nodeArray the parent node. |
| * @return the number of PtNodes in all the branch under this node. |
| */ |
| public static int countPtNodes(final PtNodeArray nodeArray) { |
| final int nodeSize = nodeArray.mData.size(); |
| int size = nodeSize; |
| for (int i = nodeSize - 1; i >= 0; --i) { |
| PtNode ptNode = nodeArray.mData.get(i); |
| if (null != ptNode.mChildren) |
| size += countPtNodes(ptNode.mChildren); |
| } |
| return size; |
| } |
| |
| /** |
| * Iterator to walk through a dictionary. |
| * |
| * This is purely for convenience. |
| */ |
| public static final class DictionaryIterator implements Iterator<WordProperty> { |
| private static final class Position { |
| public Iterator<PtNode> pos; |
| public int length; |
| public Position(ArrayList<PtNode> ptNodes) { |
| pos = ptNodes.iterator(); |
| length = 0; |
| } |
| } |
| final StringBuilder mCurrentString; |
| final LinkedList<Position> mPositions; |
| |
| public DictionaryIterator(ArrayList<PtNode> ptRoot) { |
| mCurrentString = new StringBuilder(); |
| mPositions = new LinkedList<>(); |
| final Position rootPos = new Position(ptRoot); |
| mPositions.add(rootPos); |
| } |
| |
| @Override |
| public boolean hasNext() { |
| for (Position p : mPositions) { |
| if (p.pos.hasNext()) { |
| return true; |
| } |
| } |
| return false; |
| } |
| |
| @Override |
| public WordProperty next() { |
| Position currentPos = mPositions.getLast(); |
| mCurrentString.setLength(currentPos.length); |
| |
| do { |
| if (currentPos.pos.hasNext()) { |
| final PtNode currentPtNode = currentPos.pos.next(); |
| currentPos.length = mCurrentString.length(); |
| for (int i : currentPtNode.mChars) { |
| mCurrentString.append(Character.toChars(i)); |
| } |
| if (null != currentPtNode.mChildren) { |
| currentPos = new Position(currentPtNode.mChildren.mData); |
| currentPos.length = mCurrentString.length(); |
| mPositions.addLast(currentPos); |
| } |
| if (currentPtNode.isTerminal()) { |
| return new WordProperty(mCurrentString.toString(), |
| currentPtNode.mProbabilityInfo, currentPtNode.mBigrams, |
| currentPtNode.mIsNotAWord, currentPtNode.mIsPossiblyOffensive); |
| } |
| } else { |
| mPositions.removeLast(); |
| currentPos = mPositions.getLast(); |
| mCurrentString.setLength(mPositions.getLast().length); |
| } |
| } while (true); |
| } |
| |
| @Override |
| public void remove() { |
| throw new UnsupportedOperationException("Unsupported yet"); |
| } |
| |
| } |
| |
| /** |
| * Method to return an iterator. |
| * |
| * This method enables Java's enhanced for loop. With this you can have a FusionDictionary x |
| * and say : for (Word w : x) {} |
| */ |
| @Override |
| public Iterator<WordProperty> iterator() { |
| return new DictionaryIterator(mRootNodeArray.mData); |
| } |
| } |