Fixes crashes by making native library thread-safe, makes Annotate calls much faster by
re-using tokens, fixes default values in enums in FlatBuffer schema.
Test: bit FrameworksCoreTests:android.view.textclassifier.TextClassificationManagerTest
Test: bit CtsViewTestCases:android.view.textclassifier.cts.TextClassificationManagerTest
Bug: 74193987
Bug: 68239358
Change-Id: Ic5ca42b628280bece59d31203748072084ac452c
(cherry picked from commit 2191547d7109587d73077f9d4818c691f7d7dafb)
Merged-In: Ic5ca42b628280bece59d31203748072084ac452c
diff --git a/tokenizer.h b/tokenizer.h
index 9ce2c7c..2524e12 100644
--- a/tokenizer.h
+++ b/tokenizer.h
@@ -47,7 +47,7 @@
protected:
// Finds the tokenization codepoint range config for given codepoint.
// Internally uses binary search so should be O(log(# of codepoint_ranges)).
- const TokenizationCodepointRange* FindTokenizationRange(int codepoint) const;
+ const TokenizationCodepointRangeT* FindTokenizationRange(int codepoint) const;
// Finds the role and script for given codepoint. If not found, DEFAULT_ROLE
// and kUnknownScript are assigned.
@@ -58,7 +58,8 @@
private:
// Codepoint ranges that determine how different codepoints are tokenized.
// The ranges must not overlap.
- std::vector<const TokenizationCodepointRange*> codepoint_ranges_;
+ std::vector<std::unique_ptr<const TokenizationCodepointRangeT>>
+ codepoint_ranges_;
// If true, tokens will be additionally split when the codepoint's script_id
// changes.