| /* |
| * Copyright (C) 2014 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #ifndef LATINIME_JNI_DATA_UTILS_H |
| #define LATINIME_JNI_DATA_UTILS_H |
| |
| #include <vector> |
| |
| #include "defines.h" |
| #include "dictionary/header/header_read_write_utils.h" |
| #include "dictionary/interface/dictionary_header_structure_policy.h" |
| #include "dictionary/property/ngram_context.h" |
| #include "dictionary/property/word_property.h" |
| #include "jni.h" |
| #include "utils/char_utils.h" |
| |
| namespace latinime { |
| |
| class JniDataUtils { |
| public: |
| static void jintarrayToVector(JNIEnv *env, jintArray array, std::vector<int> *const outVector) { |
| if (!array) { |
| outVector->clear(); |
| return; |
| } |
| const jsize arrayLength = env->GetArrayLength(array); |
| outVector->resize(arrayLength); |
| env->GetIntArrayRegion(array, 0 /* start */, arrayLength, outVector->data()); |
| } |
| |
| static DictionaryHeaderStructurePolicy::AttributeMap constructAttributeMap(JNIEnv *env, |
| jobjectArray attributeKeyStringArray, jobjectArray attributeValueStringArray) { |
| DictionaryHeaderStructurePolicy::AttributeMap attributeMap; |
| const int keyCount = env->GetArrayLength(attributeKeyStringArray); |
| for (int i = 0; i < keyCount; i++) { |
| jstring keyString = static_cast<jstring>( |
| env->GetObjectArrayElement(attributeKeyStringArray, i)); |
| const jsize keyUtf8Length = env->GetStringUTFLength(keyString); |
| char keyChars[keyUtf8Length + 1]; |
| env->GetStringUTFRegion(keyString, 0, env->GetStringLength(keyString), keyChars); |
| env->DeleteLocalRef(keyString); |
| keyChars[keyUtf8Length] = '\0'; |
| DictionaryHeaderStructurePolicy::AttributeMap::key_type key; |
| HeaderReadWriteUtils::insertCharactersIntoVector(keyChars, &key); |
| |
| jstring valueString = static_cast<jstring>( |
| env->GetObjectArrayElement(attributeValueStringArray, i)); |
| const jsize valueUtf8Length = env->GetStringUTFLength(valueString); |
| char valueChars[valueUtf8Length + 1]; |
| env->GetStringUTFRegion(valueString, 0, env->GetStringLength(valueString), valueChars); |
| env->DeleteLocalRef(valueString); |
| valueChars[valueUtf8Length] = '\0'; |
| DictionaryHeaderStructurePolicy::AttributeMap::mapped_type value; |
| HeaderReadWriteUtils::insertCharactersIntoVector(valueChars, &value); |
| attributeMap[key] = value; |
| } |
| return attributeMap; |
| } |
| |
| static void outputCodePoints(JNIEnv *env, jintArray intArrayToOutputCodePoints, const int start, |
| const int maxLength, const int *const codePoints, const int codePointCount, |
| const bool needsNullTermination) { |
| const int codePointBufSize = std::min(maxLength, codePointCount); |
| int outputCodePonts[codePointBufSize]; |
| int outputCodePointCount = 0; |
| for (int i = 0; i < codePointBufSize; ++i) { |
| const int codePoint = codePoints[i]; |
| int codePointToOutput = codePoint; |
| if (!CharUtils::isInUnicodeSpace(codePoint)) { |
| if (codePoint == CODE_POINT_BEGINNING_OF_SENTENCE) { |
| // Just skip Beginning-of-Sentence marker. |
| continue; |
| } |
| codePointToOutput = CODE_POINT_REPLACEMENT_CHARACTER; |
| } else if (codePoint >= 0x01 && codePoint <= 0x1F) { |
| // Control code. |
| codePointToOutput = CODE_POINT_REPLACEMENT_CHARACTER; |
| } |
| outputCodePonts[outputCodePointCount++] = codePointToOutput; |
| } |
| env->SetIntArrayRegion(intArrayToOutputCodePoints, start, outputCodePointCount, |
| outputCodePonts); |
| if (needsNullTermination && outputCodePointCount < maxLength) { |
| env->SetIntArrayRegion(intArrayToOutputCodePoints, start + outputCodePointCount, |
| 1 /* len */, &CODE_POINT_NULL); |
| } |
| } |
| |
| static NgramContext constructNgramContext(JNIEnv *env, jobjectArray prevWordCodePointArrays, |
| jbooleanArray isBeginningOfSentenceArray, const size_t prevWordCount) { |
| int prevWordCodePoints[MAX_PREV_WORD_COUNT_FOR_N_GRAM][MAX_WORD_LENGTH]; |
| int prevWordCodePointCount[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; |
| bool isBeginningOfSentence[MAX_PREV_WORD_COUNT_FOR_N_GRAM]; |
| for (size_t i = 0; i < prevWordCount; ++i) { |
| prevWordCodePointCount[i] = 0; |
| isBeginningOfSentence[i] = false; |
| jintArray prevWord = (jintArray)env->GetObjectArrayElement(prevWordCodePointArrays, i); |
| if (!prevWord) { |
| continue; |
| } |
| jsize prevWordLength = env->GetArrayLength(prevWord); |
| if (prevWordLength > MAX_WORD_LENGTH) { |
| continue; |
| } |
| env->GetIntArrayRegion(prevWord, 0, prevWordLength, prevWordCodePoints[i]); |
| env->DeleteLocalRef(prevWord); |
| prevWordCodePointCount[i] = prevWordLength; |
| jboolean isBeginningOfSentenceBoolean = JNI_FALSE; |
| env->GetBooleanArrayRegion(isBeginningOfSentenceArray, i, 1 /* len */, |
| &isBeginningOfSentenceBoolean); |
| isBeginningOfSentence[i] = isBeginningOfSentenceBoolean == JNI_TRUE; |
| } |
| return NgramContext(prevWordCodePoints, prevWordCodePointCount, isBeginningOfSentence, |
| prevWordCount); |
| } |
| |
| static void putBooleanToArray(JNIEnv *env, jbooleanArray array, const int index, |
| const jboolean value) { |
| env->SetBooleanArrayRegion(array, index, 1 /* len */, &value); |
| } |
| |
| static void putIntToArray(JNIEnv *env, jintArray array, const int index, const int value) { |
| env->SetIntArrayRegion(array, index, 1 /* len */, &value); |
| } |
| |
| static void putFloatToArray(JNIEnv *env, jfloatArray array, const int index, |
| const float value) { |
| env->SetFloatArrayRegion(array, index, 1 /* len */, &value); |
| } |
| |
| static void outputWordProperty(JNIEnv *const env, const WordProperty &wordProperty, |
| jintArray outCodePoints, jbooleanArray outFlags, jintArray outProbabilityInfo, |
| jobject outNgramPrevWordsArray, jobject outNgramPrevWordIsBeginningOfSentenceArray, |
| jobject outNgramTargets, jobject outNgramProbabilities, jobject outShortcutTargets, |
| jobject outShortcutProbabilities); |
| |
| private: |
| DISALLOW_IMPLICIT_CONSTRUCTORS(JniDataUtils); |
| |
| static const int CODE_POINT_REPLACEMENT_CHARACTER; |
| static const int CODE_POINT_NULL; |
| }; |
| } // namespace latinime |
| #endif // LATINIME_JNI_DATA_UTILS_H |