blob: a889965247de59d904c750d126080dbaca528db6 [file] [log] [blame]
Keisuke Kuroyanagic4696b22014-08-01 20:19:16 +09001/*
2 * Copyright (C) 2014, The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#include "suggest/policyimpl/dictionary/structure/v4/content/language_model_dict_content.h"
18
Keisuke Kuroyanagi063f86d2014-08-21 12:48:24 +090019#include <algorithm>
20#include <cstring>
21
Keisuke Kuroyanagi9aa66992014-08-22 20:07:54 +090022#include "suggest/policyimpl/dictionary/utils/forgetting_curve_utils.h"
23
Keisuke Kuroyanagic4696b22014-08-01 20:19:16 +090024namespace latinime {
25
Keisuke Kuroyanagi54007012014-10-15 12:29:31 +090026const int LanguageModelDictContent::DUMMY_PROBABILITY_FOR_VALID_WORDS = 1;
Keisuke Kuroyanagi758d0932014-08-27 20:04:39 +090027
Keisuke Kuroyanagic4696b22014-08-01 20:19:16 +090028bool LanguageModelDictContent::save(FILE *const file) const {
29 return mTrieMap.save(file);
30}
31
Keisuke Kuroyanagi08894842014-08-05 12:38:55 +090032bool LanguageModelDictContent::runGC(
33 const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
Keisuke Kuroyanagi47fc6562014-10-21 16:36:03 +090034 const LanguageModelDictContent *const originalContent) {
Keisuke Kuroyanagi08894842014-08-05 12:38:55 +090035 return runGCInner(terminalIdMap, originalContent->mTrieMap.getEntriesInRootLevel(),
Keisuke Kuroyanagi47fc6562014-10-21 16:36:03 +090036 0 /* nextLevelBitmapEntryIndex */);
Keisuke Kuroyanagi08894842014-08-05 12:38:55 +090037}
38
Keisuke Kuroyanagi7d911d62014-09-24 14:15:34 +090039const WordAttributes LanguageModelDictContent::getWordAttributes(const WordIdArrayView prevWordIds,
Keisuke Kuroyanagi36ba1392014-09-12 20:17:41 +090040 const int wordId, const HeaderPolicy *const headerPolicy) const {
Keisuke Kuroyanagi395fe8e2014-09-10 19:51:12 +090041 int bitmapEntryIndices[MAX_PREV_WORD_COUNT_FOR_N_GRAM + 1];
42 bitmapEntryIndices[0] = mTrieMap.getRootBitmapEntryIndex();
Keisuke Kuroyanagi72d17d92014-10-15 18:23:00 +090043 int maxPrevWordCount = 0;
Keisuke Kuroyanagi395fe8e2014-09-10 19:51:12 +090044 for (size_t i = 0; i < prevWordIds.size(); ++i) {
45 const int nextBitmapEntryIndex =
46 mTrieMap.get(prevWordIds[i], bitmapEntryIndices[i]).mNextLevelBitmapEntryIndex;
47 if (nextBitmapEntryIndex == TrieMap::INVALID_INDEX) {
48 break;
49 }
Keisuke Kuroyanagi72d17d92014-10-15 18:23:00 +090050 maxPrevWordCount = i + 1;
Keisuke Kuroyanagi395fe8e2014-09-10 19:51:12 +090051 bitmapEntryIndices[i + 1] = nextBitmapEntryIndex;
52 }
53
Keisuke Kuroyanagi72d17d92014-10-15 18:23:00 +090054 for (int i = maxPrevWordCount; i >= 0; --i) {
Keisuke Kuroyanagi395fe8e2014-09-10 19:51:12 +090055 const TrieMap::Result result = mTrieMap.get(wordId, bitmapEntryIndices[i]);
56 if (!result.mIsValid) {
57 continue;
58 }
Keisuke Kuroyanagi36ba1392014-09-12 20:17:41 +090059 const ProbabilityEntry probabilityEntry =
60 ProbabilityEntry::decode(result.mValue, mHasHistoricalInfo);
Keisuke Kuroyanagi7d911d62014-09-24 14:15:34 +090061 int probability = NOT_A_PROBABILITY;
Keisuke Kuroyanagi395fe8e2014-09-10 19:51:12 +090062 if (mHasHistoricalInfo) {
Keisuke Kuroyanagi7d911d62014-09-24 14:15:34 +090063 const int rawProbability = ForgettingCurveUtils::decodeProbability(
Keisuke Kuroyanagicb4f5442014-09-25 11:41:50 +090064 probabilityEntry.getHistoricalInfo(), headerPolicy);
65 if (rawProbability == NOT_A_PROBABILITY) {
66 // The entry should not be treated as a valid entry.
67 continue;
68 }
Keisuke Kuroyanagi72d17d92014-10-15 18:23:00 +090069 if (i == 0) {
70 // unigram
71 probability = rawProbability;
72 } else {
73 const ProbabilityEntry prevWordProbabilityEntry = getNgramProbabilityEntry(
74 prevWordIds.skip(1 /* n */).limit(i - 1), prevWordIds[0]);
75 if (!prevWordProbabilityEntry.isValid()) {
76 continue;
77 }
78 if (prevWordProbabilityEntry.representsBeginningOfSentence()) {
79 probability = rawProbability;
80 } else {
81 const int prevWordRawProbability = ForgettingCurveUtils::decodeProbability(
82 prevWordProbabilityEntry.getHistoricalInfo(), headerPolicy);
83 probability = std::min(MAX_PROBABILITY - prevWordRawProbability
84 + rawProbability, MAX_PROBABILITY);
85 }
86 }
Keisuke Kuroyanagi395fe8e2014-09-10 19:51:12 +090087 } else {
Keisuke Kuroyanagi7d911d62014-09-24 14:15:34 +090088 probability = probabilityEntry.getProbability();
Keisuke Kuroyanagi395fe8e2014-09-10 19:51:12 +090089 }
Keisuke Kuroyanagi7d911d62014-09-24 14:15:34 +090090 // TODO: Some flags in unigramProbabilityEntry should be overwritten by flags in
91 // probabilityEntry.
92 const ProbabilityEntry unigramProbabilityEntry = getProbabilityEntry(wordId);
93 return WordAttributes(probability, unigramProbabilityEntry.isNotAWord(),
94 unigramProbabilityEntry.isBlacklisted(),
95 unigramProbabilityEntry.isPossiblyOffensive());
Keisuke Kuroyanagi395fe8e2014-09-10 19:51:12 +090096 }
97 // Cannot find the word.
Keisuke Kuroyanagi7d911d62014-09-24 14:15:34 +090098 return WordAttributes();
Keisuke Kuroyanagi395fe8e2014-09-10 19:51:12 +090099}
100
Keisuke Kuroyanagi851e0452014-08-05 14:13:07 +0900101ProbabilityEntry LanguageModelDictContent::getNgramProbabilityEntry(
Keisuke Kuroyanagi08894842014-08-05 12:38:55 +0900102 const WordIdArrayView prevWordIds, const int wordId) const {
Keisuke Kuroyanagi03dc44f2014-08-05 14:51:11 +0900103 const int bitmapEntryIndex = getBitmapEntryIndex(prevWordIds);
104 if (bitmapEntryIndex == TrieMap::INVALID_INDEX) {
Keisuke Kuroyanagi08894842014-08-05 12:38:55 +0900105 return ProbabilityEntry();
106 }
Keisuke Kuroyanagi03dc44f2014-08-05 14:51:11 +0900107 const TrieMap::Result result = mTrieMap.get(wordId, bitmapEntryIndex);
Keisuke Kuroyanagi08894842014-08-05 12:38:55 +0900108 if (!result.mIsValid) {
109 // Not found.
110 return ProbabilityEntry();
111 }
112 return ProbabilityEntry::decode(result.mValue, mHasHistoricalInfo);
113}
114
Keisuke Kuroyanagi851e0452014-08-05 14:13:07 +0900115bool LanguageModelDictContent::setNgramProbabilityEntry(const WordIdArrayView prevWordIds,
Keisuke Kuroyanagid3097c62014-08-15 19:55:07 +0900116 const int wordId, const ProbabilityEntry *const probabilityEntry) {
117 if (wordId == Ver4DictConstants::NOT_A_TERMINAL_ID) {
118 return false;
119 }
Keisuke Kuroyanagi9a23f0f2014-08-12 20:32:42 +0900120 const int bitmapEntryIndex = createAndGetBitmapEntryIndex(prevWordIds);
Keisuke Kuroyanagi03dc44f2014-08-05 14:51:11 +0900121 if (bitmapEntryIndex == TrieMap::INVALID_INDEX) {
Keisuke Kuroyanagi08894842014-08-05 12:38:55 +0900122 return false;
123 }
Keisuke Kuroyanagid3097c62014-08-15 19:55:07 +0900124 return mTrieMap.put(wordId, probabilityEntry->encode(mHasHistoricalInfo), bitmapEntryIndex);
Keisuke Kuroyanagi08894842014-08-05 12:38:55 +0900125}
126
Keisuke Kuroyanagib4531d82014-08-18 12:34:48 +0900127bool LanguageModelDictContent::removeNgramProbabilityEntry(const WordIdArrayView prevWordIds,
128 const int wordId) {
129 const int bitmapEntryIndex = getBitmapEntryIndex(prevWordIds);
130 if (bitmapEntryIndex == TrieMap::INVALID_INDEX) {
131 // Cannot find bitmap entry for the probability entry. The entry doesn't exist.
132 return false;
133 }
134 return mTrieMap.remove(wordId, bitmapEntryIndex);
135}
136
Keisuke Kuroyanagi07b3b412014-08-26 12:01:08 +0900137LanguageModelDictContent::EntryRange LanguageModelDictContent::getProbabilityEntries(
138 const WordIdArrayView prevWordIds) const {
139 const int bitmapEntryIndex = getBitmapEntryIndex(prevWordIds);
140 return EntryRange(mTrieMap.getEntriesInSpecifiedLevel(bitmapEntryIndex), mHasHistoricalInfo);
141}
142
Keisuke Kuroyanagi47fc6562014-10-21 16:36:03 +0900143bool LanguageModelDictContent::truncateEntries(const EntryCounts &currentEntryCounts,
144 const EntryCounts &maxEntryCounts, const HeaderPolicy *const headerPolicy,
145 MutableEntryCounters *const outEntryCounters) {
146 for (int prevWordCount = 0; prevWordCount <= MAX_PREV_WORD_COUNT_FOR_N_GRAM; ++prevWordCount) {
147 const int totalWordCount = prevWordCount + 1;
148 if (currentEntryCounts.getNgramCount(totalWordCount)
149 <= maxEntryCounts.getNgramCount(totalWordCount)) {
150 outEntryCounters->setNgramCount(totalWordCount,
151 currentEntryCounts.getNgramCount(totalWordCount));
Keisuke Kuroyanagi063f86d2014-08-21 12:48:24 +0900152 continue;
153 }
Keisuke Kuroyanagi47fc6562014-10-21 16:36:03 +0900154 int entryCount = 0;
155 if (!turncateEntriesInSpecifiedLevel(headerPolicy,
156 maxEntryCounts.getNgramCount(totalWordCount), prevWordCount, &entryCount)) {
Keisuke Kuroyanagi063f86d2014-08-21 12:48:24 +0900157 return false;
158 }
Keisuke Kuroyanagi47fc6562014-10-21 16:36:03 +0900159 outEntryCounters->setNgramCount(totalWordCount, entryCount);
Keisuke Kuroyanagi063f86d2014-08-21 12:48:24 +0900160 }
161 return true;
162}
163
Keisuke Kuroyanagi54007012014-10-15 12:29:31 +0900164bool LanguageModelDictContent::updateAllEntriesOnInputWord(const WordIdArrayView prevWordIds,
165 const int wordId, const bool isValid, const HistoricalInfo historicalInfo,
Keisuke Kuroyanagie8750d92014-10-21 15:46:14 +0900166 const HeaderPolicy *const headerPolicy, MutableEntryCounters *const entryCountersToUpdate) {
Keisuke Kuroyanagi54007012014-10-15 12:29:31 +0900167 if (!mHasHistoricalInfo) {
168 AKLOGE("updateAllEntriesOnInputWord is called for dictionary without historical info.");
169 return false;
170 }
171 const ProbabilityEntry originalUnigramProbabilityEntry = getProbabilityEntry(wordId);
172 const ProbabilityEntry updatedUnigramProbabilityEntry = createUpdatedEntryFrom(
173 originalUnigramProbabilityEntry, isValid, historicalInfo, headerPolicy);
174 if (!setProbabilityEntry(wordId, &updatedUnigramProbabilityEntry)) {
175 return false;
176 }
177 for (size_t i = 0; i < prevWordIds.size(); ++i) {
178 if (prevWordIds[i] == NOT_A_WORD_ID) {
179 break;
180 }
181 // TODO: Optimize this code.
182 const WordIdArrayView limitedPrevWordIds = prevWordIds.limit(i + 1);
183 const ProbabilityEntry originalNgramProbabilityEntry = getNgramProbabilityEntry(
184 limitedPrevWordIds, wordId);
185 const ProbabilityEntry updatedNgramProbabilityEntry = createUpdatedEntryFrom(
186 originalNgramProbabilityEntry, isValid, historicalInfo, headerPolicy);
187 if (!setNgramProbabilityEntry(limitedPrevWordIds, wordId, &updatedNgramProbabilityEntry)) {
188 return false;
189 }
Keisuke Kuroyanagie8750d92014-10-21 15:46:14 +0900190 if (!originalNgramProbabilityEntry.isValid()) {
191 entryCountersToUpdate->incrementNgramCount(i + 2);
Keisuke Kuroyanagi54007012014-10-15 12:29:31 +0900192 }
193 }
194 return true;
195}
196
197const ProbabilityEntry LanguageModelDictContent::createUpdatedEntryFrom(
198 const ProbabilityEntry &originalProbabilityEntry, const bool isValid,
199 const HistoricalInfo historicalInfo, const HeaderPolicy *const headerPolicy) const {
200 const HistoricalInfo updatedHistoricalInfo = ForgettingCurveUtils::createUpdatedHistoricalInfo(
201 originalProbabilityEntry.getHistoricalInfo(), isValid ?
202 DUMMY_PROBABILITY_FOR_VALID_WORDS : NOT_A_PROBABILITY,
203 &historicalInfo, headerPolicy);
204 if (originalProbabilityEntry.isValid()) {
205 return ProbabilityEntry(originalProbabilityEntry.getFlags(), &updatedHistoricalInfo);
206 } else {
207 return ProbabilityEntry(0 /* flags */, &updatedHistoricalInfo);
208 }
209}
210
Keisuke Kuroyanagi08894842014-08-05 12:38:55 +0900211bool LanguageModelDictContent::runGCInner(
212 const TerminalPositionLookupTable::TerminalIdMap *const terminalIdMap,
Keisuke Kuroyanagi47fc6562014-10-21 16:36:03 +0900213 const TrieMap::TrieMapRange trieMapRange, const int nextLevelBitmapEntryIndex) {
Keisuke Kuroyanagi08894842014-08-05 12:38:55 +0900214 for (auto &entry : trieMapRange) {
215 const auto it = terminalIdMap->find(entry.key());
216 if (it == terminalIdMap->end() || it->second == Ver4DictConstants::NOT_A_TERMINAL_ID) {
217 // The word has been removed.
218 continue;
219 }
220 if (!mTrieMap.put(it->second, entry.value(), nextLevelBitmapEntryIndex)) {
221 return false;
222 }
Keisuke Kuroyanagi08894842014-08-05 12:38:55 +0900223 if (entry.hasNextLevelMap()) {
224 if (!runGCInner(terminalIdMap, entry.getEntriesInNextLevel(),
Keisuke Kuroyanagi47fc6562014-10-21 16:36:03 +0900225 mTrieMap.getNextLevelBitmapEntryIndex(it->second, nextLevelBitmapEntryIndex))) {
Keisuke Kuroyanagi08894842014-08-05 12:38:55 +0900226 return false;
227 }
228 }
229 }
230 return true;
231}
232
Keisuke Kuroyanagi9a23f0f2014-08-12 20:32:42 +0900233int LanguageModelDictContent::createAndGetBitmapEntryIndex(const WordIdArrayView prevWordIds) {
234 if (prevWordIds.empty()) {
235 return mTrieMap.getRootBitmapEntryIndex();
236 }
237 const int lastBitmapEntryIndex =
238 getBitmapEntryIndex(prevWordIds.limit(prevWordIds.size() - 1));
239 if (lastBitmapEntryIndex == TrieMap::INVALID_INDEX) {
240 return TrieMap::INVALID_INDEX;
241 }
Keisuke Kuroyanagi09c15492014-09-17 21:16:31 +0900242 const int oldestPrevWordId = prevWordIds.lastOrDefault(NOT_A_WORD_ID);
Keisuke Kuroyanagi4926b902014-09-16 18:10:56 +0900243 const TrieMap::Result result = mTrieMap.get(oldestPrevWordId, lastBitmapEntryIndex);
244 if (!result.mIsValid) {
245 if (!mTrieMap.put(oldestPrevWordId,
246 ProbabilityEntry().encode(mHasHistoricalInfo), lastBitmapEntryIndex)) {
247 return TrieMap::INVALID_INDEX;
248 }
249 }
Keisuke Kuroyanagi09c15492014-09-17 21:16:31 +0900250 return mTrieMap.getNextLevelBitmapEntryIndex(prevWordIds.lastOrDefault(NOT_A_WORD_ID),
Keisuke Kuroyanagi9a23f0f2014-08-12 20:32:42 +0900251 lastBitmapEntryIndex);
252}
253
Keisuke Kuroyanagi03dc44f2014-08-05 14:51:11 +0900254int LanguageModelDictContent::getBitmapEntryIndex(const WordIdArrayView prevWordIds) const {
255 int bitmapEntryIndex = mTrieMap.getRootBitmapEntryIndex();
256 for (const int wordId : prevWordIds) {
257 const TrieMap::Result result = mTrieMap.get(wordId, bitmapEntryIndex);
258 if (!result.mIsValid) {
259 return TrieMap::INVALID_INDEX;
260 }
261 bitmapEntryIndex = result.mNextLevelBitmapEntryIndex;
262 }
263 return bitmapEntryIndex;
264}
265
Keisuke Kuroyanagi54007012014-10-15 12:29:31 +0900266bool LanguageModelDictContent::updateAllProbabilityEntriesForGCInner(const int bitmapEntryIndex,
Keisuke Kuroyanagi3601c212014-10-15 20:43:27 +0900267 const int prevWordCount, const HeaderPolicy *const headerPolicy,
Keisuke Kuroyanagi47fc6562014-10-21 16:36:03 +0900268 MutableEntryCounters *const outEntryCounters) {
Keisuke Kuroyanagi9aa66992014-08-22 20:07:54 +0900269 for (const auto &entry : mTrieMap.getEntriesInSpecifiedLevel(bitmapEntryIndex)) {
Keisuke Kuroyanagi3601c212014-10-15 20:43:27 +0900270 if (prevWordCount > MAX_PREV_WORD_COUNT_FOR_N_GRAM) {
271 AKLOGE("Invalid prevWordCount. prevWordCount: %d, MAX_PREV_WORD_COUNT_FOR_N_GRAM: %d.",
272 prevWordCount, MAX_PREV_WORD_COUNT_FOR_N_GRAM);
Keisuke Kuroyanagi9aa66992014-08-22 20:07:54 +0900273 return false;
274 }
275 const ProbabilityEntry probabilityEntry =
276 ProbabilityEntry::decode(entry.value(), mHasHistoricalInfo);
Keisuke Kuroyanagi3601c212014-10-15 20:43:27 +0900277 if (prevWordCount > 0 && probabilityEntry.isValid()
278 && !mTrieMap.getRoot(entry.key()).mIsValid) {
279 // The entry is related to a word that has been removed. Remove the entry.
280 if (!mTrieMap.remove(entry.key(), bitmapEntryIndex)) {
281 return false;
282 }
283 continue;
284 }
285 if (mHasHistoricalInfo && !probabilityEntry.representsBeginningOfSentence()
286 && probabilityEntry.isValid()) {
Keisuke Kuroyanagi9aa66992014-08-22 20:07:54 +0900287 const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave(
288 probabilityEntry.getHistoricalInfo(), headerPolicy);
289 if (ForgettingCurveUtils::needsToKeep(&historicalInfo, headerPolicy)) {
290 // Update the entry.
291 const ProbabilityEntry updatedEntry(probabilityEntry.getFlags(), &historicalInfo);
292 if (!mTrieMap.put(entry.key(), updatedEntry.encode(mHasHistoricalInfo),
293 bitmapEntryIndex)) {
294 return false;
295 }
296 } else {
297 // Remove the entry.
298 if (!mTrieMap.remove(entry.key(), bitmapEntryIndex)) {
299 return false;
300 }
301 continue;
302 }
303 }
304 if (!probabilityEntry.representsBeginningOfSentence()) {
Keisuke Kuroyanagi47fc6562014-10-21 16:36:03 +0900305 outEntryCounters->incrementNgramCount(prevWordCount + 1);
Keisuke Kuroyanagi9aa66992014-08-22 20:07:54 +0900306 }
307 if (!entry.hasNextLevelMap()) {
308 continue;
309 }
Keisuke Kuroyanagi3601c212014-10-15 20:43:27 +0900310 if (!updateAllProbabilityEntriesForGCInner(entry.getNextLevelBitmapEntryIndex(),
Keisuke Kuroyanagi47fc6562014-10-21 16:36:03 +0900311 prevWordCount + 1, headerPolicy, outEntryCounters)) {
Keisuke Kuroyanagi9aa66992014-08-22 20:07:54 +0900312 return false;
313 }
314 }
315 return true;
316}
317
Keisuke Kuroyanagi063f86d2014-08-21 12:48:24 +0900318bool LanguageModelDictContent::turncateEntriesInSpecifiedLevel(
Keisuke Kuroyanagi758d0932014-08-27 20:04:39 +0900319 const HeaderPolicy *const headerPolicy, const int maxEntryCount, const int targetLevel,
320 int *const outEntryCount) {
Keisuke Kuroyanagi063f86d2014-08-21 12:48:24 +0900321 std::vector<int> prevWordIds;
322 std::vector<EntryInfoToTurncate> entryInfoVector;
323 if (!getEntryInfo(headerPolicy, targetLevel, mTrieMap.getRootBitmapEntryIndex(),
324 &prevWordIds, &entryInfoVector)) {
325 return false;
326 }
327 if (static_cast<int>(entryInfoVector.size()) <= maxEntryCount) {
Keisuke Kuroyanagi758d0932014-08-27 20:04:39 +0900328 *outEntryCount = static_cast<int>(entryInfoVector.size());
Keisuke Kuroyanagi063f86d2014-08-21 12:48:24 +0900329 return true;
330 }
Keisuke Kuroyanagi758d0932014-08-27 20:04:39 +0900331 *outEntryCount = maxEntryCount;
Keisuke Kuroyanagi063f86d2014-08-21 12:48:24 +0900332 const int entryCountToRemove = static_cast<int>(entryInfoVector.size()) - maxEntryCount;
333 std::partial_sort(entryInfoVector.begin(), entryInfoVector.begin() + entryCountToRemove,
334 entryInfoVector.end(),
335 EntryInfoToTurncate::Comparator());
336 for (int i = 0; i < entryCountToRemove; ++i) {
337 const EntryInfoToTurncate &entryInfo = entryInfoVector[i];
338 if (!removeNgramProbabilityEntry(
Keisuke Kuroyanagi3601c212014-10-15 20:43:27 +0900339 WordIdArrayView(entryInfo.mPrevWordIds, entryInfo.mPrevWordCount), entryInfo.mKey)) {
Keisuke Kuroyanagi063f86d2014-08-21 12:48:24 +0900340 return false;
341 }
342 }
343 return true;
344}
345
346bool LanguageModelDictContent::getEntryInfo(const HeaderPolicy *const headerPolicy,
347 const int targetLevel, const int bitmapEntryIndex, std::vector<int> *const prevWordIds,
348 std::vector<EntryInfoToTurncate> *const outEntryInfo) const {
Keisuke Kuroyanagi3601c212014-10-15 20:43:27 +0900349 const int prevWordCount = prevWordIds->size();
Keisuke Kuroyanagi063f86d2014-08-21 12:48:24 +0900350 for (const auto &entry : mTrieMap.getEntriesInSpecifiedLevel(bitmapEntryIndex)) {
Keisuke Kuroyanagi3601c212014-10-15 20:43:27 +0900351 if (prevWordCount < targetLevel) {
Keisuke Kuroyanagi063f86d2014-08-21 12:48:24 +0900352 if (!entry.hasNextLevelMap()) {
353 continue;
354 }
355 prevWordIds->push_back(entry.key());
356 if (!getEntryInfo(headerPolicy, targetLevel, entry.getNextLevelBitmapEntryIndex(),
357 prevWordIds, outEntryInfo)) {
358 return false;
359 }
360 prevWordIds->pop_back();
361 continue;
362 }
363 const ProbabilityEntry probabilityEntry =
364 ProbabilityEntry::decode(entry.value(), mHasHistoricalInfo);
365 const int probability = (mHasHistoricalInfo) ?
366 ForgettingCurveUtils::decodeProbability(probabilityEntry.getHistoricalInfo(),
367 headerPolicy) : probabilityEntry.getProbability();
368 outEntryInfo->emplace_back(probability,
Keisuke Kuroyanagi287e1552014-10-01 11:39:33 +0900369 probabilityEntry.getHistoricalInfo()->getTimestamp(),
Keisuke Kuroyanagi063f86d2014-08-21 12:48:24 +0900370 entry.key(), targetLevel, prevWordIds->data());
371 }
372 return true;
373}
374
375bool LanguageModelDictContent::EntryInfoToTurncate::Comparator::operator()(
376 const EntryInfoToTurncate &left, const EntryInfoToTurncate &right) const {
377 if (left.mProbability != right.mProbability) {
378 return left.mProbability < right.mProbability;
379 }
380 if (left.mTimestamp != right.mTimestamp) {
381 return left.mTimestamp > right.mTimestamp;
382 }
383 if (left.mKey != right.mKey) {
384 return left.mKey < right.mKey;
385 }
Keisuke Kuroyanagi3601c212014-10-15 20:43:27 +0900386 if (left.mPrevWordCount != right.mPrevWordCount) {
387 return left.mPrevWordCount > right.mPrevWordCount;
Keisuke Kuroyanagi063f86d2014-08-21 12:48:24 +0900388 }
Keisuke Kuroyanagi3601c212014-10-15 20:43:27 +0900389 for (int i = 0; i < left.mPrevWordCount; ++i) {
Keisuke Kuroyanagi063f86d2014-08-21 12:48:24 +0900390 if (left.mPrevWordIds[i] != right.mPrevWordIds[i]) {
391 return left.mPrevWordIds[i] < right.mPrevWordIds[i];
392 }
393 }
394 // left and rigth represent the same entry.
395 return false;
396}
397
398LanguageModelDictContent::EntryInfoToTurncate::EntryInfoToTurncate(const int probability,
Keisuke Kuroyanagi3601c212014-10-15 20:43:27 +0900399 const int timestamp, const int key, const int prevWordCount, const int *const prevWordIds)
400 : mProbability(probability), mTimestamp(timestamp), mKey(key),
401 mPrevWordCount(prevWordCount) {
402 memmove(mPrevWordIds, prevWordIds, mPrevWordCount * sizeof(mPrevWordIds[0]));
Keisuke Kuroyanagi063f86d2014-08-21 12:48:24 +0900403}
404
Keisuke Kuroyanagic4696b22014-08-01 20:19:16 +0900405} // namespace latinime