| /* |
| * Copyright (C) 2013 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| /* |
| * !!!!! DO NOT CHANGE THE LOGIC IN THIS FILE !!!!! |
| * Do not edit this file other than updating policy's interface. |
| * |
| * This file was generated from |
| * dictionary/structure/v4/bigram/ver4_bigram_list_policy.cpp |
| */ |
| |
| #include "dictionary/structure/backward/v402/bigram/ver4_bigram_list_policy.h" |
| |
| #include "dictionary/header/header_policy.h" |
| #include "dictionary/property/ngram_property.h" |
| #include "dictionary/structure/pt_common/bigram/bigram_list_read_write_utils.h" |
| #include "dictionary/structure/backward/v402/content/bigram_dict_content.h" |
| #include "dictionary/structure/backward/v402/content/terminal_position_lookup_table.h" |
| #include "dictionary/structure/backward/v402/ver4_dict_constants.h" |
| #include "dictionary/utils/forgetting_curve_utils.h" |
| |
| namespace latinime { |
| namespace backward { |
| namespace v402 { |
| |
| void Ver4BigramListPolicy::getNextBigram(int *const outBigramPos, int *const outProbability, |
| bool *const outHasNext, int *const bigramEntryPos) const { |
| const BigramEntry bigramEntry = |
| mBigramDictContent->getBigramEntryAndAdvancePosition(bigramEntryPos); |
| if (outBigramPos) { |
| // Lookup target PtNode position. |
| *outBigramPos = mTerminalPositionLookupTable->getTerminalPtNodePosition( |
| bigramEntry.getTargetTerminalId()); |
| } |
| if (outProbability) { |
| if (bigramEntry.hasHistoricalInfo()) { |
| *outProbability = |
| ForgettingCurveUtils::decodeProbability(bigramEntry.getHistoricalInfo(), |
| mHeaderPolicy); |
| } else { |
| *outProbability = bigramEntry.getProbability(); |
| } |
| } |
| if (outHasNext) { |
| *outHasNext = bigramEntry.hasNext(); |
| } |
| } |
| |
| bool Ver4BigramListPolicy::addNewEntry(const int terminalId, const int newTargetTerminalId, |
| const NgramProperty *const ngramProperty, bool *const outAddedNewEntry) { |
| // 1. The word has no bigrams yet. |
| // 2. The word has bigrams, and there is the target in the list. |
| // 3. The word has bigrams, and there is an invalid entry that can be reclaimed. |
| // 4. The word has bigrams. We have to append new bigram entry to the list. |
| // 5. Same as 4, but the list is the last entry of the content file. |
| if (outAddedNewEntry) { |
| *outAddedNewEntry = false; |
| } |
| const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); |
| if (bigramListPos == NOT_A_DICT_POS) { |
| // Case 1. PtNode that doesn't have a bigram list. |
| // Create new bigram list. |
| if (!mBigramDictContent->createNewBigramList(terminalId)) { |
| return false; |
| } |
| const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY, |
| newTargetTerminalId); |
| const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom(&newBigramEntry, |
| ngramProperty); |
| // Write an entry. |
| const int writingPos = mBigramDictContent->getBigramListHeadPos(terminalId); |
| if (!mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, writingPos)) { |
| return false; |
| } |
| if (outAddedNewEntry) { |
| *outAddedNewEntry = true; |
| } |
| return true; |
| } |
| |
| int tailEntryPos = NOT_A_DICT_POS; |
| const int entryPosToUpdate = getEntryPosToUpdate(newTargetTerminalId, bigramListPos, |
| &tailEntryPos); |
| if (tailEntryPos != NOT_A_DICT_POS || entryPosToUpdate == NOT_A_DICT_POS) { |
| // Case 4, 5. |
| // Add new entry to the bigram list. |
| if (tailEntryPos == NOT_A_DICT_POS) { |
| // Case 4. Create new bigram list. |
| if (!mBigramDictContent->createNewBigramList(terminalId)) { |
| return false; |
| } |
| const int destPos = mBigramDictContent->getBigramListHeadPos(terminalId); |
| // Copy existing bigram list. |
| if (!mBigramDictContent->copyBigramList(bigramListPos, destPos, &tailEntryPos)) { |
| return false; |
| } |
| } |
| // Write new entry at the tail position of the bigram content. |
| const BigramEntry newBigramEntry(false /* hasNext */, NOT_A_PROBABILITY, |
| newTargetTerminalId); |
| const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom( |
| &newBigramEntry, ngramProperty); |
| if (!mBigramDictContent->writeBigramEntryAtTail(&bigramEntryToWrite)) { |
| return false; |
| } |
| // Update has next flag of the tail entry. |
| if (!updateHasNextFlag(true /* hasNext */, tailEntryPos)) { |
| return false; |
| } |
| if (outAddedNewEntry) { |
| *outAddedNewEntry = true; |
| } |
| return true; |
| } |
| |
| // Case 2. Overwrite the existing entry. Case 3. Reclaim and reuse the existing invalid entry. |
| const BigramEntry originalBigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate); |
| if (!originalBigramEntry.isValid()) { |
| // Case 3. Reuse the existing invalid entry. outAddedNewEntry is false when an existing |
| // entry is updated. |
| if (outAddedNewEntry) { |
| *outAddedNewEntry = true; |
| } |
| } |
| const BigramEntry updatedBigramEntry = |
| originalBigramEntry.updateTargetTerminalIdAndGetEntry(newTargetTerminalId); |
| const BigramEntry bigramEntryToWrite = createUpdatedBigramEntryFrom( |
| &updatedBigramEntry, ngramProperty); |
| return mBigramDictContent->writeBigramEntry(&bigramEntryToWrite, entryPosToUpdate); |
| } |
| |
| bool Ver4BigramListPolicy::removeEntry(const int terminalId, const int targetTerminalId) { |
| const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); |
| if (bigramListPos == NOT_A_DICT_POS) { |
| // Bigram list doesn't exist. |
| return false; |
| } |
| const int entryPosToUpdate = getEntryPosToUpdate(targetTerminalId, bigramListPos, |
| nullptr /* outTailEntryPos */); |
| if (entryPosToUpdate == NOT_A_DICT_POS) { |
| // Bigram entry doesn't exist. |
| return false; |
| } |
| const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(entryPosToUpdate); |
| if (targetTerminalId != bigramEntry.getTargetTerminalId()) { |
| // Bigram entry doesn't exist. |
| return false; |
| } |
| // Remove bigram entry by marking it as invalid entry and overwriting the original entry. |
| const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry(); |
| return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPosToUpdate); |
| } |
| |
| bool Ver4BigramListPolicy::updateAllBigramEntriesAndDeleteUselessEntries(const int terminalId, |
| int *const outBigramCount) { |
| const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); |
| if (bigramListPos == NOT_A_DICT_POS) { |
| // Bigram list doesn't exist. |
| return true; |
| } |
| bool hasNext = true; |
| int readingPos = bigramListPos; |
| while (hasNext) { |
| const int entryPos = readingPos; |
| const BigramEntry bigramEntry = |
| mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); |
| hasNext = bigramEntry.hasNext(); |
| if (!bigramEntry.isValid()) { |
| continue; |
| } |
| const int targetPtNodePos = mTerminalPositionLookupTable->getTerminalPtNodePosition( |
| bigramEntry.getTargetTerminalId()); |
| if (targetPtNodePos == NOT_A_DICT_POS) { |
| // Invalidate bigram entry. |
| const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry(); |
| if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) { |
| return false; |
| } |
| } else if (bigramEntry.hasHistoricalInfo()) { |
| const HistoricalInfo historicalInfo = ForgettingCurveUtils::createHistoricalInfoToSave( |
| bigramEntry.getHistoricalInfo(), mHeaderPolicy); |
| if (ForgettingCurveUtils::needsToKeep(&historicalInfo, mHeaderPolicy)) { |
| const BigramEntry updatedBigramEntry = |
| bigramEntry.updateHistoricalInfoAndGetEntry(&historicalInfo); |
| if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) { |
| return false; |
| } |
| *outBigramCount += 1; |
| } else { |
| // Remove entry. |
| const BigramEntry updatedBigramEntry = bigramEntry.getInvalidatedEntry(); |
| if (!mBigramDictContent->writeBigramEntry(&updatedBigramEntry, entryPos)) { |
| return false; |
| } |
| } |
| } else { |
| *outBigramCount += 1; |
| } |
| } |
| return true; |
| } |
| |
| int Ver4BigramListPolicy::getBigramEntryConut(const int terminalId) { |
| const int bigramListPos = mBigramDictContent->getBigramListHeadPos(terminalId); |
| if (bigramListPos == NOT_A_DICT_POS) { |
| // Bigram list doesn't exist. |
| return 0; |
| } |
| int bigramCount = 0; |
| bool hasNext = true; |
| int readingPos = bigramListPos; |
| while (hasNext) { |
| const BigramEntry bigramEntry = |
| mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); |
| hasNext = bigramEntry.hasNext(); |
| if (bigramEntry.isValid()) { |
| bigramCount++; |
| } |
| } |
| return bigramCount; |
| } |
| |
| int Ver4BigramListPolicy::getEntryPosToUpdate(const int targetTerminalIdToFind, |
| const int bigramListPos, int *const outTailEntryPos) const { |
| if (outTailEntryPos) { |
| *outTailEntryPos = NOT_A_DICT_POS; |
| } |
| bool hasNext = true; |
| int invalidEntryPos = NOT_A_DICT_POS; |
| int readingPos = bigramListPos; |
| while (hasNext) { |
| const int entryPos = readingPos; |
| const BigramEntry bigramEntry = |
| mBigramDictContent->getBigramEntryAndAdvancePosition(&readingPos); |
| hasNext = bigramEntry.hasNext(); |
| if (bigramEntry.getTargetTerminalId() == targetTerminalIdToFind) { |
| // Entry with same target is found. |
| return entryPos; |
| } else if (!bigramEntry.isValid()) { |
| // Invalid entry that can be reused is found. |
| invalidEntryPos = entryPos; |
| } |
| if (!hasNext && mBigramDictContent->isContentTailPos(readingPos)) { |
| if (outTailEntryPos) { |
| *outTailEntryPos = entryPos; |
| } |
| } |
| } |
| return invalidEntryPos; |
| } |
| |
| const BigramEntry Ver4BigramListPolicy::createUpdatedBigramEntryFrom( |
| const BigramEntry *const originalBigramEntry, |
| const NgramProperty *const ngramProperty) const { |
| // TODO: Consolidate historical info and probability. |
| if (mHeaderPolicy->hasHistoricalInfoOfWords()) { |
| const HistoricalInfo &historicalInfoForUpdate = ngramProperty->getHistoricalInfo(); |
| const HistoricalInfo updatedHistoricalInfo = |
| ForgettingCurveUtils::createUpdatedHistoricalInfo( |
| originalBigramEntry->getHistoricalInfo(), ngramProperty->getProbability(), |
| &historicalInfoForUpdate, mHeaderPolicy); |
| return originalBigramEntry->updateHistoricalInfoAndGetEntry(&updatedHistoricalInfo); |
| } else { |
| return originalBigramEntry->updateProbabilityAndGetEntry(ngramProperty->getProbability()); |
| } |
| } |
| |
| bool Ver4BigramListPolicy::updateHasNextFlag(const bool hasNext, const int bigramEntryPos) { |
| const BigramEntry bigramEntry = mBigramDictContent->getBigramEntry(bigramEntryPos); |
| const BigramEntry updatedBigramEntry = bigramEntry.updateHasNextAndGetEntry(hasNext); |
| return mBigramDictContent->writeBigramEntry(&updatedBigramEntry, bigramEntryPos); |
| } |
| |
| } // namespace v402 |
| } // namespace backward |
| } // namespace latinime |