blob: 264e75710061127a034109603dd41b202a9ed56c [file] [log] [blame]
Keisuke Kuroyanagi5f5feeb2014-02-06 15:13:33 +09001/*
2 * Copyright (C) 2011 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package com.android.inputmethod.latin.makedict;
18
19import com.android.inputmethod.annotations.UsedForTesting;
20import com.android.inputmethod.latin.BinaryDictionary;
Tadashi G. Takaoka5f00fe02014-10-20 14:48:56 +090021import com.android.inputmethod.latin.Dictionary;
Keisuke Kuroyanagic6a6f6a2014-10-01 11:21:08 +090022import com.android.inputmethod.latin.NgramContext;
23import com.android.inputmethod.latin.NgramContext.WordInfo;
Jean Chalard4beeb922014-10-28 21:31:09 +090024import com.android.inputmethod.latin.common.StringUtils;
Keisuke Kuroyanagib24de422014-02-06 16:09:25 +090025import com.android.inputmethod.latin.utils.CombinedFormatUtils;
Keisuke Kuroyanagi5f5feeb2014-02-06 15:13:33 +090026
27import java.util.ArrayList;
28import java.util.Arrays;
29
Keisuke Kuroyanagid7a51c22014-10-09 15:26:10 +090030import javax.annotation.Nullable;
31
Keisuke Kuroyanagi5f5feeb2014-02-06 15:13:33 +090032/**
33 * Utility class for a word with a probability.
34 *
35 * This is chiefly used to iterate a dictionary.
36 */
37public final class WordProperty implements Comparable<WordProperty> {
38 public final String mWord;
39 public final ProbabilityInfo mProbabilityInfo;
Keisuke Kuroyanagic6a6f6a2014-10-01 11:21:08 +090040 public final ArrayList<NgramProperty> mNgrams;
Keisuke Kuroyanagi1adca932014-05-23 19:58:58 +090041 // TODO: Support mIsBeginningOfSentence.
42 public final boolean mIsBeginningOfSentence;
Keisuke Kuroyanagi5f5feeb2014-02-06 15:13:33 +090043 public final boolean mIsNotAWord;
Adrian Velicu05172bf2014-10-14 12:13:11 +090044 public final boolean mIsPossiblyOffensive;
Keisuke Kuroyanagic6a6f6a2014-10-01 11:21:08 +090045 public final boolean mHasNgrams;
Keisuke Kuroyanagi5f5feeb2014-02-06 15:13:33 +090046
47 private int mHashCode = 0;
48
Keisuke Kuroyanagic6a6f6a2014-10-01 11:21:08 +090049 // TODO: Support n-gram.
Keisuke Kuroyanagiaa7abb22014-03-28 15:02:00 +090050 @UsedForTesting
Keisuke Kuroyanagi8ffc6312014-02-10 15:05:08 +090051 public WordProperty(final String word, final ProbabilityInfo probabilityInfo,
Keisuke Kuroyanagid7a51c22014-10-09 15:26:10 +090052 @Nullable final ArrayList<WeightedString> bigrams,
Adrian Velicu05172bf2014-10-14 12:13:11 +090053 final boolean isNotAWord, final boolean isPossiblyOffensive) {
Keisuke Kuroyanagi5f5feeb2014-02-06 15:13:33 +090054 mWord = word;
Keisuke Kuroyanagi8ffc6312014-02-10 15:05:08 +090055 mProbabilityInfo = probabilityInfo;
Jean Chalardb28d1cc2014-10-03 17:55:26 +090056 if (null == bigrams) {
57 mNgrams = null;
58 } else {
59 mNgrams = new ArrayList<>();
60 final NgramContext ngramContext = new NgramContext(new WordInfo(mWord));
Tadashi G. Takaoka5f00fe02014-10-20 14:48:56 +090061 for (final WeightedString bigramTarget : bigrams) {
62 mNgrams.add(new NgramProperty(bigramTarget, ngramContext));
Keisuke Kuroyanagic6a6f6a2014-10-01 11:21:08 +090063 }
64 }
Keisuke Kuroyanagi1adca932014-05-23 19:58:58 +090065 mIsBeginningOfSentence = false;
Keisuke Kuroyanagi5f5feeb2014-02-06 15:13:33 +090066 mIsNotAWord = isNotAWord;
Adrian Velicu05172bf2014-10-14 12:13:11 +090067 mIsPossiblyOffensive = isPossiblyOffensive;
Keisuke Kuroyanagic6a6f6a2014-10-01 11:21:08 +090068 mHasNgrams = bigrams != null && !bigrams.isEmpty();
Keisuke Kuroyanagi5f5feeb2014-02-06 15:13:33 +090069 }
70
71 private static ProbabilityInfo createProbabilityInfoFromArray(final int[] probabilityInfo) {
72 return new ProbabilityInfo(
73 probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_PROBABILITY_INDEX],
74 probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX],
75 probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_LEVEL_INDEX],
76 probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_COUNT_INDEX]);
77 }
78
79 // Construct word property using information from native code.
80 // This represents invalid word when the probability is BinaryDictionary.NOT_A_PROBABILITY.
81 public WordProperty(final int[] codePoints, final boolean isNotAWord,
Dan Zivkovic12d80eb2015-02-10 14:54:38 -080082 final boolean isPossiblyOffensive, final boolean hasBigram,
Keisuke Kuroyanagi88fa47a2014-06-24 12:37:07 +090083 final boolean isBeginningOfSentence, final int[] probabilityInfo,
Keisuke Kuroyanagid7a51c22014-10-09 15:26:10 +090084 final ArrayList<int[][]> ngramPrevWordsArray,
Keisuke Kuroyanagib5ef8842014-10-22 18:15:53 +090085 final ArrayList<boolean[]> ngramPrevWordIsBeginningOfSentenceArray,
Dan Zivkovic12d80eb2015-02-10 14:54:38 -080086 final ArrayList<int[]> ngramTargets, final ArrayList<int[]> ngramProbabilityInfo) {
Keisuke Kuroyanagi5f5feeb2014-02-06 15:13:33 +090087 mWord = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints);
88 mProbabilityInfo = createProbabilityInfoFromArray(probabilityInfo);
Jean Chalardb28d1cc2014-10-03 17:55:26 +090089 final ArrayList<NgramProperty> ngrams = new ArrayList<>();
Keisuke Kuroyanagi88fa47a2014-06-24 12:37:07 +090090 mIsBeginningOfSentence = isBeginningOfSentence;
Keisuke Kuroyanagi5f5feeb2014-02-06 15:13:33 +090091 mIsNotAWord = isNotAWord;
Adrian Velicu05172bf2014-10-14 12:13:11 +090092 mIsPossiblyOffensive = isPossiblyOffensive;
Keisuke Kuroyanagic6a6f6a2014-10-01 11:21:08 +090093 mHasNgrams = hasBigram;
Keisuke Kuroyanagi5f5feeb2014-02-06 15:13:33 +090094
Keisuke Kuroyanagid7a51c22014-10-09 15:26:10 +090095 final int relatedNgramCount = ngramTargets.size();
Keisuke Kuroyanagic6a6f6a2014-10-01 11:21:08 +090096 for (int i = 0; i < relatedNgramCount; i++) {
97 final String ngramTargetString =
Keisuke Kuroyanagid7a51c22014-10-09 15:26:10 +090098 StringUtils.getStringFromNullTerminatedCodePointArray(ngramTargets.get(i));
Keisuke Kuroyanagic6a6f6a2014-10-01 11:21:08 +090099 final WeightedString ngramTarget = new WeightedString(ngramTargetString,
Keisuke Kuroyanagid7a51c22014-10-09 15:26:10 +0900100 createProbabilityInfoFromArray(ngramProbabilityInfo.get(i)));
Keisuke Kuroyanagib5ef8842014-10-22 18:15:53 +0900101 final int[][] prevWords = ngramPrevWordsArray.get(i);
102 final boolean[] isBeginningOfSentenceArray =
103 ngramPrevWordIsBeginningOfSentenceArray.get(i);
104 final WordInfo[] wordInfoArray = new WordInfo[prevWords.length];
105 for (int j = 0; j < prevWords.length; j++) {
106 wordInfoArray[j] = isBeginningOfSentenceArray[j]
107 ? WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO
108 : new WordInfo(StringUtils.getStringFromNullTerminatedCodePointArray(
109 prevWords[j]));
110 }
111 final NgramContext ngramContext = new NgramContext(wordInfoArray);
Jean Chalardb28d1cc2014-10-03 17:55:26 +0900112 ngrams.add(new NgramProperty(ngramTarget, ngramContext));
Keisuke Kuroyanagi5f5feeb2014-02-06 15:13:33 +0900113 }
Jean Chalardb28d1cc2014-10-03 17:55:26 +0900114 mNgrams = ngrams.isEmpty() ? null : ngrams;
Keisuke Kuroyanagi5f5feeb2014-02-06 15:13:33 +0900115 }
116
Keisuke Kuroyanagic6a6f6a2014-10-01 11:21:08 +0900117 // TODO: Remove
Keisuke Kuroyanagib5ef8842014-10-22 18:15:53 +0900118 @UsedForTesting
Keisuke Kuroyanagic6a6f6a2014-10-01 11:21:08 +0900119 public ArrayList<WeightedString> getBigrams() {
Jean Chalardb28d1cc2014-10-03 17:55:26 +0900120 if (null == mNgrams) {
121 return null;
122 }
Keisuke Kuroyanagic6a6f6a2014-10-01 11:21:08 +0900123 final ArrayList<WeightedString> bigrams = new ArrayList<>();
124 for (final NgramProperty ngram : mNgrams) {
125 if (ngram.mNgramContext.getPrevWordCount() == 1) {
126 bigrams.add(ngram.mTargetWord);
127 }
128 }
129 return bigrams;
130 }
131
Keisuke Kuroyanagi5f5feeb2014-02-06 15:13:33 +0900132 public int getProbability() {
133 return mProbabilityInfo.mProbability;
134 }
135
136 private static int computeHashCode(WordProperty word) {
137 return Arrays.hashCode(new Object[] {
138 word.mWord,
139 word.mProbabilityInfo,
Keisuke Kuroyanagic6a6f6a2014-10-01 11:21:08 +0900140 word.mNgrams,
Keisuke Kuroyanagi5f5feeb2014-02-06 15:13:33 +0900141 word.mIsNotAWord,
Adrian Velicu05172bf2014-10-14 12:13:11 +0900142 word.mIsPossiblyOffensive
Keisuke Kuroyanagi5f5feeb2014-02-06 15:13:33 +0900143 });
144 }
145
146 /**
147 * Three-way comparison.
148 *
149 * A Word x is greater than a word y if x has a higher frequency. If they have the same
150 * frequency, they are sorted in lexicographic order.
151 */
152 @Override
153 public int compareTo(final WordProperty w) {
154 if (getProbability() < w.getProbability()) return 1;
155 if (getProbability() > w.getProbability()) return -1;
156 return mWord.compareTo(w.mWord);
157 }
158
159 /**
160 * Equality test.
161 *
162 * Words are equal if they have the same frequency, the same spellings, and the same
163 * attributes.
164 */
165 @Override
166 public boolean equals(Object o) {
167 if (o == this) return true;
168 if (!(o instanceof WordProperty)) return false;
169 WordProperty w = (WordProperty)o;
Dan Zivkovic12d80eb2015-02-10 14:54:38 -0800170 return mProbabilityInfo.equals(w.mProbabilityInfo)
171 && mWord.equals(w.mWord) && equals(mNgrams, w.mNgrams)
Adrian Velicu05172bf2014-10-14 12:13:11 +0900172 && mIsNotAWord == w.mIsNotAWord && mIsPossiblyOffensive == w.mIsPossiblyOffensive
Dan Zivkovic12d80eb2015-02-10 14:54:38 -0800173 && mHasNgrams == w.mHasNgrams;
Keisuke Kuroyanagi5f5feeb2014-02-06 15:13:33 +0900174 }
175
Keisuke Kuroyanagid7a51c22014-10-09 15:26:10 +0900176 // TDOO: Have a utility method like java.util.Objects.equals.
177 private static <T> boolean equals(final ArrayList<T> a, final ArrayList<T> b) {
Jean Chalardb28d1cc2014-10-03 17:55:26 +0900178 if (null == a) {
179 return null == b;
180 }
181 return a.equals(b);
182 }
183
Keisuke Kuroyanagi5f5feeb2014-02-06 15:13:33 +0900184 @Override
185 public int hashCode() {
186 if (mHashCode == 0) {
187 mHashCode = computeHashCode(this);
188 }
189 return mHashCode;
190 }
191
192 @UsedForTesting
193 public boolean isValid() {
Tadashi G. Takaoka5f00fe02014-10-20 14:48:56 +0900194 return getProbability() != Dictionary.NOT_A_PROBABILITY;
Keisuke Kuroyanagi5f5feeb2014-02-06 15:13:33 +0900195 }
196
197 @Override
198 public String toString() {
Keisuke Kuroyanagib24de422014-02-06 16:09:25 +0900199 return CombinedFormatUtils.formatWordProperty(this);
Keisuke Kuroyanagi5f5feeb2014-02-06 15:13:33 +0900200 }
201}