Keisuke Kuroyanagi | 5f5feeb | 2014-02-06 15:13:33 +0900 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2011 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | package com.android.inputmethod.latin.makedict; |
| 18 | |
| 19 | import com.android.inputmethod.annotations.UsedForTesting; |
| 20 | import com.android.inputmethod.latin.BinaryDictionary; |
Keisuke Kuroyanagi | 5f5feeb | 2014-02-06 15:13:33 +0900 | [diff] [blame] | 21 | import com.android.inputmethod.latin.utils.CollectionUtils; |
Keisuke Kuroyanagi | b24de42 | 2014-02-06 16:09:25 +0900 | [diff] [blame] | 22 | import com.android.inputmethod.latin.utils.CombinedFormatUtils; |
Keisuke Kuroyanagi | 5f5feeb | 2014-02-06 15:13:33 +0900 | [diff] [blame] | 23 | import com.android.inputmethod.latin.utils.StringUtils; |
| 24 | |
| 25 | import java.util.ArrayList; |
| 26 | import java.util.Arrays; |
| 27 | |
| 28 | /** |
| 29 | * Utility class for a word with a probability. |
| 30 | * |
| 31 | * This is chiefly used to iterate a dictionary. |
| 32 | */ |
| 33 | public final class WordProperty implements Comparable<WordProperty> { |
| 34 | public final String mWord; |
| 35 | public final ProbabilityInfo mProbabilityInfo; |
| 36 | public final ArrayList<WeightedString> mShortcutTargets; |
| 37 | public final ArrayList<WeightedString> mBigrams; |
| 38 | public final boolean mIsNotAWord; |
| 39 | public final boolean mIsBlacklistEntry; |
| 40 | public final boolean mHasShortcuts; |
| 41 | public final boolean mHasBigrams; |
| 42 | |
| 43 | private int mHashCode = 0; |
| 44 | |
Keisuke Kuroyanagi | aa7abb2 | 2014-03-28 15:02:00 +0900 | [diff] [blame^] | 45 | @UsedForTesting |
Keisuke Kuroyanagi | 8ffc631 | 2014-02-10 15:05:08 +0900 | [diff] [blame] | 46 | public WordProperty(final String word, final ProbabilityInfo probabilityInfo, |
Keisuke Kuroyanagi | 5f5feeb | 2014-02-06 15:13:33 +0900 | [diff] [blame] | 47 | final ArrayList<WeightedString> shortcutTargets, |
| 48 | final ArrayList<WeightedString> bigrams, |
| 49 | final boolean isNotAWord, final boolean isBlacklistEntry) { |
| 50 | mWord = word; |
Keisuke Kuroyanagi | 8ffc631 | 2014-02-10 15:05:08 +0900 | [diff] [blame] | 51 | mProbabilityInfo = probabilityInfo; |
Keisuke Kuroyanagi | 5f5feeb | 2014-02-06 15:13:33 +0900 | [diff] [blame] | 52 | mShortcutTargets = shortcutTargets; |
| 53 | mBigrams = bigrams; |
| 54 | mIsNotAWord = isNotAWord; |
| 55 | mIsBlacklistEntry = isBlacklistEntry; |
Keisuke Kuroyanagi | b24de42 | 2014-02-06 16:09:25 +0900 | [diff] [blame] | 56 | mHasBigrams = bigrams != null && !bigrams.isEmpty(); |
| 57 | mHasShortcuts = shortcutTargets != null && !shortcutTargets.isEmpty(); |
Keisuke Kuroyanagi | 5f5feeb | 2014-02-06 15:13:33 +0900 | [diff] [blame] | 58 | } |
| 59 | |
| 60 | private static ProbabilityInfo createProbabilityInfoFromArray(final int[] probabilityInfo) { |
| 61 | return new ProbabilityInfo( |
| 62 | probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_PROBABILITY_INDEX], |
| 63 | probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX], |
| 64 | probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_LEVEL_INDEX], |
| 65 | probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_COUNT_INDEX]); |
| 66 | } |
| 67 | |
| 68 | // Construct word property using information from native code. |
| 69 | // This represents invalid word when the probability is BinaryDictionary.NOT_A_PROBABILITY. |
| 70 | public WordProperty(final int[] codePoints, final boolean isNotAWord, |
| 71 | final boolean isBlacklisted, final boolean hasBigram, |
| 72 | final boolean hasShortcuts, final int[] probabilityInfo, |
| 73 | final ArrayList<int[]> bigramTargets, final ArrayList<int[]> bigramProbabilityInfo, |
| 74 | final ArrayList<int[]> shortcutTargets, |
| 75 | final ArrayList<Integer> shortcutProbabilities) { |
| 76 | mWord = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints); |
| 77 | mProbabilityInfo = createProbabilityInfoFromArray(probabilityInfo); |
| 78 | mShortcutTargets = CollectionUtils.newArrayList(); |
| 79 | mBigrams = CollectionUtils.newArrayList(); |
| 80 | mIsNotAWord = isNotAWord; |
| 81 | mIsBlacklistEntry = isBlacklisted; |
| 82 | mHasShortcuts = hasShortcuts; |
| 83 | mHasBigrams = hasBigram; |
| 84 | |
| 85 | final int bigramTargetCount = bigramTargets.size(); |
| 86 | for (int i = 0; i < bigramTargetCount; i++) { |
| 87 | final String bigramTargetString = |
| 88 | StringUtils.getStringFromNullTerminatedCodePointArray(bigramTargets.get(i)); |
| 89 | mBigrams.add(new WeightedString(bigramTargetString, |
| 90 | createProbabilityInfoFromArray(bigramProbabilityInfo.get(i)))); |
| 91 | } |
| 92 | |
| 93 | final int shortcutTargetCount = shortcutTargets.size(); |
| 94 | for (int i = 0; i < shortcutTargetCount; i++) { |
| 95 | final String shortcutTargetString = |
| 96 | StringUtils.getStringFromNullTerminatedCodePointArray(shortcutTargets.get(i)); |
| 97 | mShortcutTargets.add( |
| 98 | new WeightedString(shortcutTargetString, shortcutProbabilities.get(i))); |
| 99 | } |
| 100 | } |
| 101 | |
| 102 | public int getProbability() { |
| 103 | return mProbabilityInfo.mProbability; |
| 104 | } |
| 105 | |
| 106 | private static int computeHashCode(WordProperty word) { |
| 107 | return Arrays.hashCode(new Object[] { |
| 108 | word.mWord, |
| 109 | word.mProbabilityInfo, |
| 110 | word.mShortcutTargets.hashCode(), |
| 111 | word.mBigrams.hashCode(), |
| 112 | word.mIsNotAWord, |
| 113 | word.mIsBlacklistEntry |
| 114 | }); |
| 115 | } |
| 116 | |
| 117 | /** |
| 118 | * Three-way comparison. |
| 119 | * |
| 120 | * A Word x is greater than a word y if x has a higher frequency. If they have the same |
| 121 | * frequency, they are sorted in lexicographic order. |
| 122 | */ |
| 123 | @Override |
| 124 | public int compareTo(final WordProperty w) { |
| 125 | if (getProbability() < w.getProbability()) return 1; |
| 126 | if (getProbability() > w.getProbability()) return -1; |
| 127 | return mWord.compareTo(w.mWord); |
| 128 | } |
| 129 | |
| 130 | /** |
| 131 | * Equality test. |
| 132 | * |
| 133 | * Words are equal if they have the same frequency, the same spellings, and the same |
| 134 | * attributes. |
| 135 | */ |
| 136 | @Override |
| 137 | public boolean equals(Object o) { |
| 138 | if (o == this) return true; |
| 139 | if (!(o instanceof WordProperty)) return false; |
| 140 | WordProperty w = (WordProperty)o; |
| 141 | return mProbabilityInfo.equals(w.mProbabilityInfo) && mWord.equals(w.mWord) |
| 142 | && mShortcutTargets.equals(w.mShortcutTargets) && mBigrams.equals(w.mBigrams) |
| 143 | && mIsNotAWord == w.mIsNotAWord && mIsBlacklistEntry == w.mIsBlacklistEntry |
| 144 | && mHasBigrams == w.mHasBigrams && mHasShortcuts && w.mHasBigrams; |
| 145 | } |
| 146 | |
| 147 | @Override |
| 148 | public int hashCode() { |
| 149 | if (mHashCode == 0) { |
| 150 | mHashCode = computeHashCode(this); |
| 151 | } |
| 152 | return mHashCode; |
| 153 | } |
| 154 | |
| 155 | @UsedForTesting |
| 156 | public boolean isValid() { |
| 157 | return getProbability() != BinaryDictionary.NOT_A_PROBABILITY; |
| 158 | } |
| 159 | |
| 160 | @Override |
| 161 | public String toString() { |
Keisuke Kuroyanagi | b24de42 | 2014-02-06 16:09:25 +0900 | [diff] [blame] | 162 | return CombinedFormatUtils.formatWordProperty(this); |
Keisuke Kuroyanagi | 5f5feeb | 2014-02-06 15:13:33 +0900 | [diff] [blame] | 163 | } |
| 164 | } |