| /* |
| * Copyright (C) 2013, The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #ifndef LATINIME_DYNAMIC_PT_READING_HELPER_H |
| #define LATINIME_DYNAMIC_PT_READING_HELPER_H |
| |
| #include <cstddef> |
| #include <vector> |
| |
| #include "defines.h" |
| #include "dictionary/structure/pt_common/pt_node_params.h" |
| #include "dictionary/structure/pt_common/pt_node_reader.h" |
| |
| namespace latinime { |
| |
| class DictionaryShortcutsStructurePolicy; |
| class PtNodeArrayReader; |
| |
| /* |
| * This class is used for traversing dynamic patricia trie. This class supports iterating nodes and |
| * dealing with additional buffer. This class counts nodes and node arrays to avoid infinite loop. |
| */ |
| class DynamicPtReadingHelper { |
| public: |
| class TraversingEventListener { |
| public: |
| virtual ~TraversingEventListener() {}; |
| |
| // Returns whether the event handling was succeeded or not. |
| virtual bool onAscend() = 0; |
| |
| // Returns whether the event handling was succeeded or not. |
| virtual bool onDescend(const int ptNodeArrayPos) = 0; |
| |
| // Returns whether the event handling was succeeded or not. |
| virtual bool onReadingPtNodeArrayTail() = 0; |
| |
| // Returns whether the event handling was succeeded or not. |
| virtual bool onVisitingPtNode(const PtNodeParams *const node) = 0; |
| |
| protected: |
| TraversingEventListener() {}; |
| |
| private: |
| DISALLOW_COPY_AND_ASSIGN(TraversingEventListener); |
| }; |
| |
| class TraversePolicyToGetAllTerminalPtNodePositions : public TraversingEventListener { |
| public: |
| TraversePolicyToGetAllTerminalPtNodePositions(std::vector<int> *const terminalPositions) |
| : mTerminalPositions(terminalPositions) {} |
| bool onAscend() { return true; } |
| bool onDescend(const int ptNodeArrayPos) { return true; } |
| bool onReadingPtNodeArrayTail() { return true; } |
| bool onVisitingPtNode(const PtNodeParams *const ptNodeParams); |
| |
| private: |
| DISALLOW_IMPLICIT_CONSTRUCTORS(TraversePolicyToGetAllTerminalPtNodePositions); |
| |
| std::vector<int> *const mTerminalPositions; |
| }; |
| |
| DynamicPtReadingHelper(const PtNodeReader *const ptNodeReader, |
| const PtNodeArrayReader *const ptNodeArrayReader) |
| : mIsError(false), mReadingState(), mPtNodeReader(ptNodeReader), |
| mPtNodeArrayReader(ptNodeArrayReader), mReadingStateStack() {} |
| |
| ~DynamicPtReadingHelper() {} |
| |
| AK_FORCE_INLINE bool isError() const { |
| return mIsError; |
| } |
| |
| AK_FORCE_INLINE bool isEnd() const { |
| return mReadingState.mPos == NOT_A_DICT_POS; |
| } |
| |
| // Initialize reading state with the head position of a PtNode array. |
| AK_FORCE_INLINE void initWithPtNodeArrayPos(const int ptNodeArrayPos) { |
| if (ptNodeArrayPos == NOT_A_DICT_POS) { |
| mReadingState.mPos = NOT_A_DICT_POS; |
| } else { |
| mIsError = false; |
| mReadingState.mPos = ptNodeArrayPos; |
| mReadingState.mTotalCodePointCountSinceInitialization = 0; |
| mReadingState.mTotalPtNodeIndexInThisArrayChain = 0; |
| mReadingState.mPtNodeArrayIndexInThisArrayChain = 0; |
| mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; |
| mReadingStateStack.clear(); |
| nextPtNodeArray(); |
| } |
| } |
| |
| // Initialize reading state with the head position of a node. |
| AK_FORCE_INLINE void initWithPtNodePos(const int ptNodePos) { |
| if (ptNodePos == NOT_A_DICT_POS) { |
| mReadingState.mPos = NOT_A_DICT_POS; |
| } else { |
| mIsError = false; |
| mReadingState.mPos = ptNodePos; |
| mReadingState.mRemainingPtNodeCountInThisArray = 1; |
| mReadingState.mTotalCodePointCountSinceInitialization = 0; |
| mReadingState.mTotalPtNodeIndexInThisArrayChain = 1; |
| mReadingState.mPtNodeArrayIndexInThisArrayChain = 1; |
| mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; |
| mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS; |
| mReadingStateStack.clear(); |
| } |
| } |
| |
| AK_FORCE_INLINE const PtNodeParams getPtNodeParams() const { |
| if (isEnd()) { |
| return PtNodeParams(); |
| } |
| return mPtNodeReader->fetchPtNodeParamsInBufferFromPtNodePos(mReadingState.mPos); |
| } |
| |
| AK_FORCE_INLINE bool isValidTerminalNode(const PtNodeParams &ptNodeParams) const { |
| return !isEnd() && !ptNodeParams.isDeleted() && ptNodeParams.isTerminal(); |
| } |
| |
| AK_FORCE_INLINE bool isMatchedCodePoint(const PtNodeParams &ptNodeParams, const int index, |
| const int codePoint) const { |
| return ptNodeParams.getCodePoints()[index] == codePoint; |
| } |
| |
| // Return code point count exclude the last read node's code points. |
| AK_FORCE_INLINE size_t getPrevTotalCodePointCount() const { |
| return mReadingState.mTotalCodePointCountSinceInitialization; |
| } |
| |
| // Return code point count include the last read node's code points. |
| AK_FORCE_INLINE size_t getTotalCodePointCount(const PtNodeParams &ptNodeParams) const { |
| return mReadingState.mTotalCodePointCountSinceInitialization |
| + ptNodeParams.getCodePointCount(); |
| } |
| |
| AK_FORCE_INLINE void fetchMergedNodeCodePointsInReverseOrder(const PtNodeParams &ptNodeParams, |
| const int index, int *const outCodePoints) const { |
| const int nodeCodePointCount = ptNodeParams.getCodePointCount(); |
| const int *const nodeCodePoints = ptNodeParams.getCodePoints(); |
| for (int i = 0; i < nodeCodePointCount; ++i) { |
| outCodePoints[index + i] = nodeCodePoints[nodeCodePointCount - 1 - i]; |
| } |
| } |
| |
| AK_FORCE_INLINE void readNextSiblingNode(const PtNodeParams &ptNodeParams) { |
| mReadingState.mRemainingPtNodeCountInThisArray -= 1; |
| mReadingState.mPos = ptNodeParams.getSiblingNodePos(); |
| if (mReadingState.mRemainingPtNodeCountInThisArray <= 0) { |
| // All nodes in the current node array have been read. |
| followForwardLink(); |
| } |
| } |
| |
| // Read the first child node of the current node. |
| AK_FORCE_INLINE void readChildNode(const PtNodeParams &ptNodeParams) { |
| if (ptNodeParams.hasChildren()) { |
| mReadingState.mTotalCodePointCountSinceInitialization += |
| ptNodeParams.getCodePointCount(); |
| mReadingState.mTotalPtNodeIndexInThisArrayChain = 0; |
| mReadingState.mPtNodeArrayIndexInThisArrayChain = 0; |
| mReadingState.mPos = ptNodeParams.getChildrenPos(); |
| mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; |
| // Read children node array. |
| nextPtNodeArray(); |
| } else { |
| mReadingState.mPos = NOT_A_DICT_POS; |
| } |
| } |
| |
| // Read the parent node of the current node. |
| AK_FORCE_INLINE void readParentNode(const PtNodeParams &ptNodeParams) { |
| if (ptNodeParams.getParentPos() != NOT_A_DICT_POS) { |
| mReadingState.mTotalCodePointCountSinceInitialization += |
| ptNodeParams.getCodePointCount(); |
| mReadingState.mTotalPtNodeIndexInThisArrayChain = 1; |
| mReadingState.mPtNodeArrayIndexInThisArrayChain = 1; |
| mReadingState.mRemainingPtNodeCountInThisArray = 1; |
| mReadingState.mPos = ptNodeParams.getParentPos(); |
| mReadingState.mPosOfLastForwardLinkField = NOT_A_DICT_POS; |
| mReadingState.mPosOfThisPtNodeArrayHead = NOT_A_DICT_POS; |
| } else { |
| mReadingState.mPos = NOT_A_DICT_POS; |
| } |
| } |
| |
| AK_FORCE_INLINE int getPosOfLastForwardLinkField() const { |
| return mReadingState.mPosOfLastForwardLinkField; |
| } |
| |
| AK_FORCE_INLINE int getPosOfLastPtNodeArrayHead() const { |
| return mReadingState.mPosOfThisPtNodeArrayHead; |
| } |
| |
| bool traverseAllPtNodesInPostorderDepthFirstManner(TraversingEventListener *const listener); |
| |
| bool traverseAllPtNodesInPtNodeArrayLevelPreorderDepthFirstManner( |
| TraversingEventListener *const listener); |
| |
| int getCodePointsAndReturnCodePointCount(const int maxCodePointCount, int *const outCodePoints); |
| |
| int getTerminalPtNodePositionOfWord(const int *const inWord, const size_t length, |
| const bool forceLowerCaseSearch); |
| |
| private: |
| DISALLOW_COPY_AND_ASSIGN(DynamicPtReadingHelper); |
| |
| // This class encapsulates the reading state of a position in the dictionary. It points at a |
| // specific PtNode in the dictionary. |
| class PtNodeReadingState { |
| public: |
| // Note that copy constructor and assignment operator are used for this class to use |
| // std::vector. |
| PtNodeReadingState() : mPos(NOT_A_DICT_POS), mRemainingPtNodeCountInThisArray(0), |
| mTotalCodePointCountSinceInitialization(0), mTotalPtNodeIndexInThisArrayChain(0), |
| mPtNodeArrayIndexInThisArrayChain(0), mPosOfLastForwardLinkField(NOT_A_DICT_POS), |
| mPosOfThisPtNodeArrayHead(NOT_A_DICT_POS) {} |
| |
| int mPos; |
| // Remaining node count in the current array. |
| int mRemainingPtNodeCountInThisArray; |
| size_t mTotalCodePointCountSinceInitialization; |
| // Counter of PtNodes used to avoid infinite loops caused by broken or malicious links. |
| int mTotalPtNodeIndexInThisArrayChain; |
| // Counter of PtNode arrays used to avoid infinite loops caused by cyclic links of empty |
| // PtNode arrays. |
| int mPtNodeArrayIndexInThisArrayChain; |
| int mPosOfLastForwardLinkField; |
| int mPosOfThisPtNodeArrayHead; |
| }; |
| |
| static const int MAX_CHILD_COUNT_TO_AVOID_INFINITE_LOOP; |
| static const int MAX_PT_NODE_ARRAY_COUNT_TO_AVOID_INFINITE_LOOP; |
| static const size_t MAX_READING_STATE_STACK_SIZE; |
| |
| // TODO: Introduce error code to track what caused the error. |
| bool mIsError; |
| PtNodeReadingState mReadingState; |
| const PtNodeReader *const mPtNodeReader; |
| const PtNodeArrayReader *const mPtNodeArrayReader; |
| std::vector<PtNodeReadingState> mReadingStateStack; |
| |
| void nextPtNodeArray(); |
| |
| void followForwardLink(); |
| |
| AK_FORCE_INLINE void pushReadingStateToStack() { |
| if (mReadingStateStack.size() > MAX_READING_STATE_STACK_SIZE) { |
| AKLOGI("Reading state stack overflow. Max size: %zd", MAX_READING_STATE_STACK_SIZE); |
| ASSERT(false); |
| mIsError = true; |
| mReadingState.mPos = NOT_A_DICT_POS; |
| } else { |
| mReadingStateStack.push_back(mReadingState); |
| } |
| } |
| |
| AK_FORCE_INLINE void popReadingStateFromStack() { |
| if (mReadingStateStack.empty()) { |
| mReadingState.mPos = NOT_A_DICT_POS; |
| } else { |
| mReadingState = mReadingStateStack.back(); |
| mReadingStateStack.pop_back(); |
| } |
| } |
| }; |
| } // namespace latinime |
| #endif /* LATINIME_DYNAMIC_PT_READING_HELPER_H */ |