Export libtextclassifier am: 1ac2e4ab8d

Change-Id: If73e048d7de4cb440c6892db93e579701fc7a53a
diff --git a/jni/com/google/android/textclassifier/AnnotatorModel.java b/jni/com/google/android/textclassifier/AnnotatorModel.java
index e604653..7c72446 100644
--- a/jni/com/google/android/textclassifier/AnnotatorModel.java
+++ b/jni/com/google/android/textclassifier/AnnotatorModel.java
@@ -180,6 +180,15 @@
   }
 
   /**
+   * Annotates multiple fragments of text at once. There will be one AnnotatedSpan array for each
+   * input fragment to annotate.
+   */
+  public AnnotatedSpan[][] annotateStructuredInput(
+      InputFragment[] fragments, AnnotationOptions options) {
+    return nativeAnnotateStructuredInput(annotatorPtr, fragments, options);
+  }
+
+  /**
    * Looks up a knowledge entity by its identifier. Returns null if the entity is not found or on
    * error.
    */
@@ -415,6 +424,52 @@
     }
   }
 
+  /** Represents a fragment of text to the AnnotateStructuredInput call. */
+  public static final class InputFragment {
+
+    /** Encapsulates the data required to set the relative time of an InputFragment. */
+    public static final class DatetimeOptions {
+      private final String referenceTimezone;
+      private final Long referenceTimeMsUtc;
+
+      DatetimeOptions(String referenceTimezone, Long referenceTimeMsUtc) {
+        this.referenceTimeMsUtc = referenceTimeMsUtc;
+        this.referenceTimezone = referenceTimezone;
+      }
+    }
+
+    InputFragment(String text) {
+      this.text = text;
+      this.datetimeOptionsNullable = null;
+    }
+
+    InputFragment(String text, DatetimeOptions datetimeOptions) {
+      this.text = text;
+      this.datetimeOptionsNullable = datetimeOptions;
+    }
+
+    private final String text;
+    // The DatetimeOptions can't be Optional because the _api16 build of the TCLib SDK does not
+    // support java.util.Optional.
+    private final DatetimeOptions datetimeOptionsNullable;
+
+    public String getText() {
+      return text;
+    }
+
+    public boolean hasDatetimeOptions() {
+      return datetimeOptionsNullable != null;
+    }
+
+    public long getReferenceTimeMsUtc() {
+      return datetimeOptionsNullable.referenceTimeMsUtc;
+    }
+
+    public String getReferenceTimezone() {
+      return datetimeOptionsNullable.referenceTimezone;
+    }
+  }
+
   /**
    * Represents options for the suggestSelection call. TODO(b/63427420): Use location with Selection
    * options.
@@ -760,6 +815,9 @@
   private native AnnotatedSpan[] nativeAnnotate(
       long context, String text, AnnotationOptions options);
 
+  private native AnnotatedSpan[][] nativeAnnotateStructuredInput(
+      long context, InputFragment[] inputFragments, AnnotationOptions options);
+
   private native byte[] nativeLookUpKnowledgeEntity(long context, String id);
 
   private native void nativeCloseAnnotator(long context);
diff --git a/native/actions/actions-suggestions.cc b/native/actions/actions-suggestions.cc
index 34fa76f..0c9d60a 100644
--- a/native/actions/actions-suggestions.cc
+++ b/native/actions/actions-suggestions.cc
@@ -283,7 +283,7 @@
   // Initialize regular expressions model.
   std::unique_ptr<ZlibDecompressor> decompressor = ZlibDecompressor::Instance();
   regex_actions_.reset(
-      new RegexActions(model_->smart_reply_action_type()->str(), unilib_));
+      new RegexActions(unilib_, model_->smart_reply_action_type()->str()));
   if (!regex_actions_->InitializeRules(
           model_->rules(), model_->low_confidence_rules(),
           triggering_preconditions_overlay_, decompressor.get())) {
@@ -348,11 +348,10 @@
 
   // Create low confidence model if specified.
   if (model_->low_confidence_ngram_model() != nullptr) {
-    ngram_model_ = NGramModel::Create(model_->low_confidence_ngram_model(),
-                                      feature_processor_ == nullptr
-                                          ? nullptr
-                                          : feature_processor_->tokenizer(),
-                                      unilib_);
+    ngram_model_ = NGramModel::Create(
+        unilib_, model_->low_confidence_ngram_model(),
+        feature_processor_ == nullptr ? nullptr
+                                      : feature_processor_->tokenizer());
     if (ngram_model_ == nullptr) {
       TC3_LOG(ERROR) << "Could not create ngram linear regression model.";
       return false;
@@ -681,7 +680,7 @@
     ActionsSuggestionsResponse* response) const {
   // Read sensitivity and triggering score predictions.
   if (model_->tflite_model_spec()->output_triggering_score() >= 0) {
-    const TensorView<float>& triggering_score =
+    const TensorView<float> triggering_score =
         model_executor_->OutputView<float>(
             model_->tflite_model_spec()->output_triggering_score(),
             interpreter);
@@ -695,7 +694,7 @@
          preconditions_.min_smart_reply_triggering_score);
   }
   if (model_->tflite_model_spec()->output_sensitive_topic_score() >= 0) {
-    const TensorView<float>& sensitive_topic_score =
+    const TensorView<float> sensitive_topic_score =
         model_executor_->OutputView<float>(
             model_->tflite_model_spec()->output_sensitive_topic_score(),
             interpreter);
@@ -851,7 +850,11 @@
   }
   const int num_messages_grammar =
       ((model_->rules() && model_->rules()->grammar_rules() &&
-        model_->rules()->grammar_rules()->annotation_nonterminal())
+        model_->rules()
+            ->grammar_rules()
+            ->rules()
+            ->nonterminals()
+            ->annotation_nt())
            ? 1
            : 0);
   const int num_messages_mapping =
@@ -988,7 +991,7 @@
         // Apply normalization if specified.
         if (mapping->normalization_options() != nullptr) {
           normalized_annotation_text =
-              NormalizeText(unilib_, mapping->normalization_options(),
+              NormalizeText(*unilib_, mapping->normalization_options(),
                             normalized_annotation_text);
         }
 
diff --git a/native/actions/actions-suggestions.h b/native/actions/actions-suggestions.h
index 4bf468b..0b593aa 100644
--- a/native/actions/actions-suggestions.h
+++ b/native/actions/actions-suggestions.h
@@ -110,7 +110,7 @@
   const ActionsModel* model() const;
   const reflection::Schema* entity_data_schema() const;
 
-  static const int kLocalUserId = 0;
+  static constexpr int kLocalUserId = 0;
 
   // Should be in sync with those defined in Android.
   // android/frameworks/base/core/java/android/view/textclassifier/ConversationActions.java
diff --git a/native/actions/actions_model.fbs b/native/actions/actions_model.fbs
index 3a86fc1..2f0d1bd 100755
--- a/native/actions/actions_model.fbs
+++ b/native/actions/actions_model.fbs
@@ -436,12 +436,6 @@
   action_id:[uint];
 }
 
-namespace libtextclassifier3.RulesModel_.GrammarRules_;
-table AnnotationNonterminalEntry {
-  key:string (key, shared);
-  value:int;
-}
-
 // Configuration for actions based on context-free grammars.
 namespace libtextclassifier3.RulesModel_;
 table GrammarRules {
@@ -455,10 +449,6 @@
 
   // The action specifications used by the rule matches.
   actions:[RuleActionSpec];
-
-  // Predefined nonterminals for annotations.
-  // Maps annotation/collection names to non-terminal ids.
-  annotation_nonterminal:[GrammarRules_.AnnotationNonterminalEntry];
 }
 
 // Rule based actions.
diff --git a/native/actions/feature-processor.cc b/native/actions/feature-processor.cc
index d0b2072..249a132 100644
--- a/native/actions/feature-processor.cc
+++ b/native/actions/feature-processor.cc
@@ -32,8 +32,8 @@
   extractor_options.unicode_aware_features = options->unicode_aware_features();
   extractor_options.extract_selection_mask_feature = false;
   if (options->regexp_features() != nullptr) {
-    for (const auto& regexp_feauture : *options->regexp_features()) {
-      extractor_options.regexp_features.push_back(regexp_feauture->str());
+    for (const auto regexp_feature : *options->regexp_features()) {
+      extractor_options.regexp_features.push_back(regexp_feature->str());
     }
   }
   extractor_options.remap_digits = options->remap_digits();
@@ -70,7 +70,7 @@
     : options_(options),
       tokenizer_(CreateTokenizer(options->tokenizer_options(), unilib)),
       token_feature_extractor_(BuildTokenFeatureExtractorOptions(options),
-                               *unilib) {}
+                               unilib) {}
 
 int ActionsFeatureProcessor::GetTokenEmbeddingSize() const {
   return options_->embedding_size() +
diff --git a/native/actions/feature-processor.h b/native/actions/feature-processor.h
index e34ccff..5e4085a 100644
--- a/native/actions/feature-processor.h
+++ b/native/actions/feature-processor.h
@@ -36,8 +36,8 @@
 // Feature processor for the actions suggestions model.
 class ActionsFeatureProcessor {
  public:
-  ActionsFeatureProcessor(const ActionsTokenFeatureProcessorOptions* options,
-                          const UniLib* unilib);
+  explicit ActionsFeatureProcessor(
+      const ActionsTokenFeatureProcessorOptions* options, const UniLib* unilib);
 
   // Embeds and appends features to the output vector.
   bool AppendFeatures(const std::vector<int>& sparse_features,
diff --git a/native/actions/grammar-actions.cc b/native/actions/grammar-actions.cc
index 5f24c99..4995eaa 100644
--- a/native/actions/grammar-actions.cc
+++ b/native/actions/grammar-actions.cc
@@ -32,17 +32,11 @@
 namespace libtextclassifier3 {
 namespace {
 
-// Represents an annotator annotated span in the grammar.
-struct AnnotationMatch : public grammar::Match {
-  static const int16 kType = 1;
-  ClassificationResult annotation;
-};
-
 class GrammarActionsCallbackDelegate : public grammar::CallbackDelegate {
  public:
   GrammarActionsCallbackDelegate(const UniLib* unilib,
                                  const RulesModel_::GrammarRules* grammar_rules)
-      : unilib_(unilib), grammar_rules_(grammar_rules) {}
+      : unilib_(*unilib), grammar_rules_(grammar_rules) {}
 
   // Handle a grammar rule match in the actions grammar.
   void MatchFound(const grammar::Match* match, grammar::CallbackId type,
@@ -169,14 +163,14 @@
                   /*span=*/capturing_match->codepoint_span, group,
                   /*message_index=*/message_index, match_text, &annotation)) {
             if (group->use_annotation_match()) {
-              const AnnotationMatch* annotation_match =
-                  grammar::SelectFirstOfType<AnnotationMatch>(
-                      capturing_match, AnnotationMatch::kType);
+              const grammar::AnnotationMatch* annotation_match =
+                  grammar::SelectFirstOfType<grammar::AnnotationMatch>(
+                      capturing_match, grammar::Match::kAnnotationMatch);
               if (!annotation_match) {
                 TC3_LOG(ERROR) << "Could not get annotation for match.";
                 return false;
               }
-              annotation.entity = annotation_match->annotation;
+              annotation.entity = *annotation_match->annotation;
             }
             annotations.push_back(std::move(annotation));
           }
@@ -194,7 +188,7 @@
     return true;
   }
 
-  const UniLib* unilib_;
+  const UniLib& unilib_;
   const RulesModel_::GrammarRules* grammar_rules_;
 
   // All action rule match candidates.
@@ -208,10 +202,10 @@
     const UniLib* unilib, const RulesModel_::GrammarRules* grammar_rules,
     const ReflectiveFlatbufferBuilder* entity_data_builder,
     const std::string& smart_reply_action_type)
-    : unilib_(unilib),
+    : unilib_(*unilib),
       grammar_rules_(grammar_rules),
       tokenizer_(CreateTokenizer(grammar_rules->tokenizer_options(), unilib)),
-      lexer_(*unilib, grammar_rules->rules()),
+      lexer_(unilib, grammar_rules->rules()),
       entity_data_builder_(entity_data_builder),
       smart_reply_action_type_(smart_reply_action_type),
       rules_locales_(ParseRulesLocales(grammar_rules->rules())) {}
@@ -240,34 +234,8 @@
     return true;
   }
 
-  GrammarActionsCallbackDelegate callback_handler(unilib_, grammar_rules_);
-
-  std::vector<AnnotationMatch> matches;
-  if (auto annotation_nonterminals = grammar_rules_->annotation_nonterminal()) {
-    for (const AnnotatedSpan& annotation :
-         conversation.messages.back().annotations) {
-      if (annotation.classification.empty()) {
-        continue;
-      }
-      const ClassificationResult& classification =
-          annotation.classification.front();
-      if (auto entry = annotation_nonterminals->LookupByKey(
-              classification.collection.c_str())) {
-        AnnotationMatch match;
-        match.Init(entry->value(), annotation.span, annotation.span.first,
-                   AnnotationMatch::kType);
-        match.annotation = classification;
-        matches.push_back(std::move(match));
-      }
-    }
-  }
-
-  std::vector<grammar::Match*> annotation_matches(matches.size());
-  for (int i = 0; i < matches.size(); i++) {
-    annotation_matches[i] = &matches[i];
-  }
-
-  grammar::Matcher matcher(*unilib_, grammar_rules_->rules(), locale_rules,
+  GrammarActionsCallbackDelegate callback_handler(&unilib_, grammar_rules_);
+  grammar::Matcher matcher(&unilib_, grammar_rules_->rules(), locale_rules,
                            &callback_handler);
 
   const UnicodeText text =
@@ -275,7 +243,8 @@
 
   // Run grammar on last message.
   lexer_.Process(text, tokenizer_->Tokenize(text),
-                 /*matches=*/annotation_matches, &matcher);
+                 /*annotations=*/&conversation.messages.back().annotations,
+                 &matcher);
 
   // Populate results.
   return callback_handler.GetActions(conversation, smart_reply_action_type_,
diff --git a/native/actions/grammar-actions.h b/native/actions/grammar-actions.h
index 5832fc3..fc3270d 100644
--- a/native/actions/grammar-actions.h
+++ b/native/actions/grammar-actions.h
@@ -37,17 +37,17 @@
  public:
   enum class Callback : grammar::CallbackId { kActionRuleMatch = 1 };
 
-  GrammarActions(const UniLib* unilib,
-                 const RulesModel_::GrammarRules* grammar_rules,
-                 const ReflectiveFlatbufferBuilder* entity_data_builder,
-                 const std::string& smart_reply_action_type);
+  explicit GrammarActions(
+      const UniLib* unilib, const RulesModel_::GrammarRules* grammar_rules,
+      const ReflectiveFlatbufferBuilder* entity_data_builder,
+      const std::string& smart_reply_action_type);
 
   // Suggests actions for a conversation from a message stream.
   bool SuggestActions(const Conversation& conversation,
                       std::vector<ActionSuggestion>* result) const;
 
  private:
-  const UniLib* unilib_;
+  const UniLib& unilib_;
   const RulesModel_::GrammarRules* grammar_rules_;
   const std::unique_ptr<Tokenizer> tokenizer_;
   const grammar::Lexer lexer_;
diff --git a/native/actions/ngram-model.cc b/native/actions/ngram-model.cc
index 50f912e..fb3992c 100644
--- a/native/actions/ngram-model.cc
+++ b/native/actions/ngram-model.cc
@@ -61,8 +61,8 @@
 }  // anonymous namespace
 
 std::unique_ptr<NGramModel> NGramModel::Create(
-    const NGramLinearRegressionModel* model, const Tokenizer* tokenizer,
-    const UniLib* unilib) {
+    const UniLib* unilib, const NGramLinearRegressionModel* model,
+    const Tokenizer* tokenizer) {
   if (model == nullptr) {
     return nullptr;
   }
@@ -70,11 +70,12 @@
     TC3_LOG(ERROR) << "No tokenizer options specified.";
     return nullptr;
   }
-  return std::unique_ptr<NGramModel>(new NGramModel(model, tokenizer, unilib));
+  return std::unique_ptr<NGramModel>(new NGramModel(unilib, model, tokenizer));
 }
 
-NGramModel::NGramModel(const NGramLinearRegressionModel* model,
-                       const Tokenizer* tokenizer, const UniLib* unilib)
+NGramModel::NGramModel(const UniLib* unilib,
+                       const NGramLinearRegressionModel* model,
+                       const Tokenizer* tokenizer)
     : model_(model) {
   // Create new tokenizer if options are specified, reuse feature processor
   // tokenizer otherwise.
diff --git a/native/actions/ngram-model.h b/native/actions/ngram-model.h
index da19ddb..a9072cd 100644
--- a/native/actions/ngram-model.h
+++ b/native/actions/ngram-model.h
@@ -30,8 +30,8 @@
 class NGramModel {
  public:
   static std::unique_ptr<NGramModel> Create(
-      const NGramLinearRegressionModel* model, const Tokenizer* tokenizer,
-      const UniLib* unilib);
+      const UniLib* unilib, const NGramLinearRegressionModel* model,
+      const Tokenizer* tokenizer);
 
   // Evaluates an n-gram linear regression model, and tests against the
   // threshold. Returns true in case of a positive classification. The caller
@@ -48,8 +48,8 @@
                                 int max_skips);
 
  private:
-  NGramModel(const NGramLinearRegressionModel* model,
-             const Tokenizer* tokenizer, const UniLib* unilib);
+  NGramModel(const UniLib* unilib, const NGramLinearRegressionModel* model,
+             const Tokenizer* tokenizer);
 
   // Returns the (begin,end] range of n-grams where the first hashed token
   // matches the given value.
diff --git a/native/actions/regex-actions.cc b/native/actions/regex-actions.cc
index 49d2493..7d5a4b2 100644
--- a/native/actions/regex-actions.cc
+++ b/native/actions/regex-actions.cc
@@ -96,7 +96,7 @@
   for (const RulesModel_::RegexRule* rule : *rules->regex_rule()) {
     std::unique_ptr<UniLib::RegexPattern> compiled_pattern =
         UncompressMakeRegexPattern(
-            *unilib_, rule->pattern(), rule->compressed_pattern(),
+            unilib_, rule->pattern(), rule->compressed_pattern(),
             rules->lazy_regex_compilation(), decompressor);
     if (compiled_pattern == nullptr) {
       TC3_LOG(ERROR) << "Failed to load rule pattern.";
@@ -108,7 +108,7 @@
     if (rule->output_pattern() != nullptr ||
         rule->compressed_output_pattern() != nullptr) {
       compiled_output_pattern = UncompressMakeRegexPattern(
-          *unilib_, rule->output_pattern(), rule->compressed_output_pattern(),
+          unilib_, rule->output_pattern(), rule->compressed_output_pattern(),
           rules->lazy_regex_compilation(), decompressor);
       if (compiled_output_pattern == nullptr) {
         TC3_LOG(ERROR) << "Failed to load rule output pattern.";
diff --git a/native/actions/regex-actions.h b/native/actions/regex-actions.h
index c6b9ce2..871f08b 100644
--- a/native/actions/regex-actions.h
+++ b/native/actions/regex-actions.h
@@ -32,8 +32,9 @@
 // Regular expression backed actions suggestions.
 class RegexActions {
  public:
-  RegexActions(const std::string& smart_reply_action_type, const UniLib* unilib)
-      : unilib_(unilib), smart_reply_action_type_(smart_reply_action_type) {}
+  explicit RegexActions(const UniLib* unilib,
+                        const std::string& smart_reply_action_type)
+      : unilib_(*unilib), smart_reply_action_type_(smart_reply_action_type) {}
 
   // Decompresses and initializes all rules in a model.
   bool InitializeRules(
@@ -75,7 +76,7 @@
                             ZlibDecompressor* decompressor,
                             std::vector<CompiledRule>* compiled_rules) const;
 
-  const UniLib* unilib_;
+  const UniLib& unilib_;
   const std::string smart_reply_action_type_;
   std::vector<CompiledRule> rules_, low_confidence_rules_;
 };
diff --git a/native/actions/utils.cc b/native/actions/utils.cc
index 42543f4..96f6f1f 100644
--- a/native/actions/utils.cc
+++ b/native/actions/utils.cc
@@ -69,7 +69,7 @@
 }
 
 UnicodeText NormalizeMatchText(
-    const UniLib* unilib,
+    const UniLib& unilib,
     const RulesModel_::RuleActionSpec_::RuleCapturingGroup* group,
     StringPiece match_text) {
   UnicodeText normalized_match_text =
diff --git a/native/actions/utils.h b/native/actions/utils.h
index 18df251..820c79d 100644
--- a/native/actions/utils.h
+++ b/native/actions/utils.h
@@ -45,7 +45,7 @@
 
 // Applies normalization to a capturing match.
 UnicodeText NormalizeMatchText(
-    const UniLib* unilib,
+    const UniLib& unilib,
     const RulesModel_::RuleActionSpec_::RuleCapturingGroup* group,
     StringPiece match_text);
 
diff --git a/native/annotator/annotator.cc b/native/annotator/annotator.cc
index 2fc2318..efdf6c8 100644
--- a/native/annotator/annotator.cc
+++ b/native/annotator/annotator.cc
@@ -442,10 +442,10 @@
   if (model_->grammar_datetime_model() &&
       model_->grammar_datetime_model()->datetime_rules()) {
     cfg_datetime_parser_.reset(new dates::CfgDatetimeAnnotator(
-        *unilib_,
+        unilib_,
         /*tokenizer_options=*/
         model_->grammar_datetime_model()->grammar_tokenizer_options(),
-        *calendarlib_,
+        calendarlib_,
         /*datetime_rules=*/model_->grammar_datetime_model()->datetime_rules(),
         model_->grammar_datetime_model()->target_classification_score(),
         model_->grammar_datetime_model()->priority_score()));
@@ -458,7 +458,7 @@
 
   if (model_->datetime_model()) {
     datetime_parser_ = DatetimeParser::Instance(
-        model_->datetime_model(), *unilib_, *calendarlib_, decompressor.get());
+        model_->datetime_model(), unilib_, calendarlib_, decompressor.get());
     if (!datetime_parser_) {
       TC3_LOG(ERROR) << "Could not initialize datetime parser.";
       return;
@@ -558,7 +558,7 @@
 
   // Initialize pattern recognizers.
   int regex_pattern_id = 0;
-  for (const auto& regex_pattern : *model_->regex_model()->patterns()) {
+  for (const auto regex_pattern : *model_->regex_model()->patterns()) {
     std::unique_ptr<UniLib::RegexPattern> compiled_pattern =
         UncompressMakeRegexPattern(
             *unilib_, regex_pattern->pattern(),
@@ -2391,7 +2391,7 @@
         // Apply normalization if specified.
         if (group->normalization_options() != nullptr) {
           normalized_group_match_text =
-              NormalizeText(unilib_, group->normalization_options(),
+              NormalizeText(*unilib_, group->normalization_options(),
                             normalized_group_match_text);
         }
 
diff --git a/native/annotator/annotator_jni.cc b/native/annotator/annotator_jni.cc
index 4d5b4df..3e04f7f 100644
--- a/native/annotator/annotator_jni.cc
+++ b/native/annotator/annotator_jni.cc
@@ -27,6 +27,7 @@
 #include "annotator/annotator_jni_common.h"
 #include "annotator/types.h"
 #include "utils/base/integral_types.h"
+#include "utils/base/status_macros.h"
 #include "utils/base/statusor.h"
 #include "utils/calendar/calendar.h"
 #include "utils/intents/intent-generator.h"
@@ -327,7 +328,8 @@
             datetime_parse_class_constructor, device_locales, options, context,
             selection_indices, classification_result[i],
             generate_intents && (i == 0)));
-    env->SetObjectArrayElement(results.get(), i, result.get());
+    TC3_RETURN_IF_ERROR(
+        JniHelper::SetObjectArrayElement(env, results.get(), i, result.get()));
   }
   return results;
 }
@@ -451,7 +453,9 @@
 using libtextclassifier3::ConvertIndicesUTF8ToBMP;
 using libtextclassifier3::FromJavaAnnotationOptions;
 using libtextclassifier3::FromJavaClassificationOptions;
+using libtextclassifier3::FromJavaInputFragment;
 using libtextclassifier3::FromJavaSelectionOptions;
+using libtextclassifier3::InputFragment;
 using libtextclassifier3::ToStlString;
 
 TC3_JNI_METHOD(jlong, TC3_ANNOTATOR_CLASS_NAME, nativeNewAnnotator)
@@ -513,7 +517,8 @@
   Annotator* model = reinterpret_cast<AnnotatorJniContext*>(ptr)->model();
 
   std::string serialized_config_string;
-  const int length = env->GetArrayLength(serialized_config);
+  TC3_ASSIGN_OR_RETURN_FALSE(jsize length,
+                             JniHelper::GetArrayLength(env, serialized_config));
   serialized_config_string.resize(length);
   env->GetByteArrayRegion(serialized_config, 0, length,
                           reinterpret_cast<jbyte*>(const_cast<char*>(
@@ -532,7 +537,8 @@
   Annotator* model = reinterpret_cast<AnnotatorJniContext*>(ptr)->model();
 
   std::string serialized_config_string;
-  const int length = env->GetArrayLength(serialized_config);
+  TC3_ASSIGN_OR_RETURN_FALSE(jsize length,
+                             JniHelper::GetArrayLength(env, serialized_config));
   serialized_config_string.resize(length);
   env->GetByteArrayRegion(serialized_config, 0, length,
                           reinterpret_cast<jbyte*>(const_cast<char*>(
@@ -551,7 +557,8 @@
   Annotator* model = reinterpret_cast<AnnotatorJniContext*>(ptr)->model();
 
   std::string serialized_config_string;
-  const int length = env->GetArrayLength(serialized_config);
+  TC3_ASSIGN_OR_RETURN_FALSE(jsize length,
+                             JniHelper::GetArrayLength(env, serialized_config));
   serialized_config_string.resize(length);
   env->GetByteArrayRegion(serialized_config, 0, length,
                           reinterpret_cast<jbyte*>(const_cast<char*>(
@@ -677,10 +684,12 @@
       JniHelper::FindClass(
           env, TC3_PACKAGE_PATH TC3_ANNOTATOR_CLASS_NAME_STR "$AnnotatedSpan"));
 
-  jmethodID result_class_constructor =
-      env->GetMethodID(result_class.get(), "<init>",
-                       "(II[L" TC3_PACKAGE_PATH TC3_ANNOTATOR_CLASS_NAME_STR
-                       "$ClassificationResult;)V");
+  TC3_ASSIGN_OR_RETURN_NULL(
+      jmethodID result_class_constructor,
+      JniHelper::GetMethodID(
+          env, result_class.get(), "<init>",
+          "(II[L" TC3_PACKAGE_PATH TC3_ANNOTATOR_CLASS_NAME_STR
+          "$ClassificationResult;)V"));
 
   TC3_ASSIGN_OR_RETURN_NULL(
       ScopedLocalRef<jobjectArray> results,
@@ -701,11 +710,114 @@
                              static_cast<jint>(span_bmp.first),
                              static_cast<jint>(span_bmp.second),
                              classification_results.get()));
-    env->SetObjectArrayElement(results.get(), i, result.get());
+    if (!JniHelper::SetObjectArrayElement(env, results.get(), i, result.get())
+             .ok()) {
+      return nullptr;
+    }
   }
   return results.release();
 }
 
+TC3_JNI_METHOD(jobjectArray, TC3_ANNOTATOR_CLASS_NAME,
+               nativeAnnotateStructuredInput)
+(JNIEnv* env, jobject thiz, jlong ptr, jobjectArray jinput_fragments,
+ jobject options) {
+  if (!ptr) {
+    return nullptr;
+  }
+  const AnnotatorJniContext* model_context =
+      reinterpret_cast<AnnotatorJniContext*>(ptr);
+
+  std::vector<InputFragment> string_fragments;
+  TC3_ASSIGN_OR_RETURN_NULL(jsize input_size,
+                            JniHelper::GetArrayLength(env, jinput_fragments));
+  for (int i = 0; i < input_size; ++i) {
+    TC3_ASSIGN_OR_RETURN_NULL(
+        ScopedLocalRef<jobject> jfragment,
+        JniHelper::GetObjectArrayElement<jobject>(env, jinput_fragments, i));
+    TC3_ASSIGN_OR_RETURN_NULL(InputFragment fragment,
+                              FromJavaInputFragment(env, jfragment.get()));
+    string_fragments.push_back(std::move(fragment));
+  }
+
+  TC3_ASSIGN_OR_RETURN_NULL(
+      libtextclassifier3::AnnotationOptions annotation_options,
+      FromJavaAnnotationOptions(env, options));
+  const StatusOr<std::vector<std::vector<AnnotatedSpan>>> annotations_or =
+      model_context->model()->AnnotateStructuredInput(string_fragments,
+                                                      annotation_options);
+  if (!annotations_or.ok()) {
+    TC3_LOG(ERROR) << "Annotation of structured input failed with error: "
+                   << annotations_or.status().error_message();
+    return nullptr;
+  }
+
+  std::vector<std::vector<AnnotatedSpan>> annotations =
+      std::move(annotations_or.ValueOrDie());
+  TC3_ASSIGN_OR_RETURN_NULL(
+      ScopedLocalRef<jclass> span_class,
+      JniHelper::FindClass(
+          env, TC3_PACKAGE_PATH TC3_ANNOTATOR_CLASS_NAME_STR "$AnnotatedSpan"));
+
+  TC3_ASSIGN_OR_RETURN_NULL(
+      jmethodID span_class_constructor,
+      JniHelper::GetMethodID(
+          env, span_class.get(), "<init>",
+          "(II[L" TC3_PACKAGE_PATH TC3_ANNOTATOR_CLASS_NAME_STR
+          "$ClassificationResult;)V"));
+
+  TC3_ASSIGN_OR_RETURN_NULL(
+      ScopedLocalRef<jclass> span_class_array,
+      JniHelper::FindClass(env,
+                           "[L" TC3_PACKAGE_PATH TC3_ANNOTATOR_CLASS_NAME_STR
+                           "$AnnotatedSpan;"));
+
+  TC3_ASSIGN_OR_RETURN_NULL(
+      ScopedLocalRef<jobjectArray> results,
+      JniHelper::NewObjectArray(env, input_size, span_class_array.get()));
+
+  for (int fragment_index = 0; fragment_index < annotations.size();
+       ++fragment_index) {
+    TC3_ASSIGN_OR_RETURN_NULL(
+        ScopedLocalRef<jobjectArray> jfragmentAnnotations,
+        JniHelper::NewObjectArray(env, annotations[fragment_index].size(),
+                                  span_class.get()));
+    for (int annotation_index = 0;
+         annotation_index < annotations[fragment_index].size();
+         ++annotation_index) {
+      CodepointSpan span_bmp = ConvertIndicesUTF8ToBMP(
+          string_fragments[fragment_index].text,
+          annotations[fragment_index][annotation_index].span);
+      TC3_ASSIGN_OR_RETURN_NULL(
+          ScopedLocalRef<jobjectArray> classification_results,
+          ClassificationResultsToJObjectArray(
+              env, model_context,
+              annotations[fragment_index][annotation_index].classification));
+      TC3_ASSIGN_OR_RETURN_NULL(
+          ScopedLocalRef<jobject> single_annotation,
+          JniHelper::NewObject(env, span_class.get(), span_class_constructor,
+                               static_cast<jint>(span_bmp.first),
+                               static_cast<jint>(span_bmp.second),
+                               classification_results.get()));
+
+      if (!JniHelper::SetObjectArrayElement(env, jfragmentAnnotations.get(),
+                                            annotation_index,
+                                            single_annotation.get())
+               .ok()) {
+        return nullptr;
+      }
+    }
+
+    if (!JniHelper::SetObjectArrayElement(env, results.get(), fragment_index,
+                                          jfragmentAnnotations.get())
+             .ok()) {
+      return nullptr;
+    }
+  }
+
+  return results.release();
+}
+
 TC3_JNI_METHOD(jbyteArray, TC3_ANNOTATOR_CLASS_NAME,
                nativeLookUpKnowledgeEntity)
 (JNIEnv* env, jobject thiz, jlong ptr, jstring id) {
diff --git a/native/annotator/annotator_jni.h b/native/annotator/annotator_jni.h
index 55893a4..39a9d9a 100644
--- a/native/annotator/annotator_jni.h
+++ b/native/annotator/annotator_jni.h
@@ -68,6 +68,11 @@
  jint selection_end, jobject options, jobject app_context,
  jstring device_locales);
 
+TC3_JNI_METHOD(jobjectArray, TC3_ANNOTATOR_CLASS_NAME,
+               nativeAnnotateStructuredInput)
+(JNIEnv* env, jobject thiz, jlong ptr, jobjectArray jinput_fragments,
+ jobject options);
+
 TC3_JNI_METHOD(jobjectArray, TC3_ANNOTATOR_CLASS_NAME, nativeAnnotate)
 (JNIEnv* env, jobject thiz, jlong ptr, jstring context, jobject options);
 
diff --git a/native/annotator/annotator_jni_common.cc b/native/annotator/annotator_jni_common.cc
index 479d31c..de58b70 100644
--- a/native/annotator/annotator_jni_common.cc
+++ b/native/annotator/annotator_jni_common.cc
@@ -269,4 +269,67 @@
   return annotation_options;
 }
 
+StatusOr<InputFragment> FromJavaInputFragment(JNIEnv* env, jobject jfragment) {
+  if (!jfragment) {
+    return Status(StatusCode::INTERNAL, "Called with null input fragment.");
+  }
+  InputFragment fragment;
+
+  TC3_ASSIGN_OR_RETURN(
+      ScopedLocalRef<jclass> fragment_class,
+      JniHelper::FindClass(
+          env, TC3_PACKAGE_PATH TC3_ANNOTATOR_CLASS_NAME_STR "$InputFragment"));
+
+  // .getText()
+  TC3_ASSIGN_OR_RETURN(
+      jmethodID get_text,
+      JniHelper::GetMethodID(env, fragment_class.get(), "getText",
+                             "()Ljava/lang/String;"));
+
+  TC3_ASSIGN_OR_RETURN(
+      ScopedLocalRef<jstring> text,
+      JniHelper::CallObjectMethod<jstring>(env, jfragment, get_text));
+
+  TC3_ASSIGN_OR_RETURN(fragment.text, ToStlString(env, text.get()));
+
+  // .hasDatetimeOptions()
+  TC3_ASSIGN_OR_RETURN(jmethodID has_date_time_options_method,
+                       JniHelper::GetMethodID(env, fragment_class.get(),
+                                              "hasDatetimeOptions", "()Z"));
+
+  TC3_ASSIGN_OR_RETURN(bool has_date_time_options,
+                       JniHelper::CallBooleanMethod(
+                           env, jfragment, has_date_time_options_method));
+
+  if (has_date_time_options) {
+    // .getReferenceTimeMsUtc()
+    TC3_ASSIGN_OR_RETURN(
+        jmethodID get_reference_time_method,
+        JniHelper::GetMethodID(env, fragment_class.get(),
+                               "getReferenceTimeMsUtc", "()J"));
+
+    TC3_ASSIGN_OR_RETURN(
+        int64 reference_time,
+        JniHelper::CallLongMethod(env, jfragment, get_reference_time_method));
+
+    // .getReferenceTimezone()
+    TC3_ASSIGN_OR_RETURN(
+        jmethodID get_reference_timezone_method,
+        JniHelper::GetMethodID(env, fragment_class.get(),
+                               "getReferenceTimezone", "()Ljava/lang/String;"));
+
+    TC3_ASSIGN_OR_RETURN(ScopedLocalRef<jstring> jreference_timezone,
+                         JniHelper::CallObjectMethod<jstring>(
+                             env, jfragment, get_reference_timezone_method));
+
+    TC3_ASSIGN_OR_RETURN(std::string reference_timezone,
+                         ToStlString(env, jreference_timezone.get()));
+
+    fragment.datetime_options =
+        DatetimeOptions{.reference_time_ms_utc = reference_time,
+                        .reference_timezone = reference_timezone};
+  }
+
+  return fragment;
+}
 }  // namespace libtextclassifier3
diff --git a/native/annotator/annotator_jni_common.h b/native/annotator/annotator_jni_common.h
index 4ad984c..cadd2fd 100644
--- a/native/annotator/annotator_jni_common.h
+++ b/native/annotator/annotator_jni_common.h
@@ -40,6 +40,8 @@
 StatusOr<AnnotationOptions> FromJavaAnnotationOptions(JNIEnv* env,
                                                       jobject joptions);
 
+StatusOr<InputFragment> FromJavaInputFragment(JNIEnv* env, jobject jfragment);
+
 }  // namespace libtextclassifier3
 
 #endif  // LIBTEXTCLASSIFIER_ANNOTATOR_ANNOTATOR_JNI_COMMON_H_
diff --git a/native/annotator/collections.h b/native/annotator/collections.h
index 2718bae..417b447 100644
--- a/native/annotator/collections.h
+++ b/native/annotator/collections.h
@@ -139,6 +139,11 @@
         *[]() { return new std::string("url"); }();
     return value;
   }
+  static const std::string& OtpCode() {
+    static const std::string& value =
+        *[]() { return new std::string("otp_code"); }();
+    return value;
+  }
 };
 
 }  // namespace libtextclassifier3
diff --git a/native/annotator/datetime/extractor.h b/native/annotator/datetime/extractor.h
index 097dd95..0f92b2a 100644
--- a/native/annotator/datetime/extractor.h
+++ b/native/annotator/datetime/extractor.h
@@ -44,9 +44,9 @@
 // (DateParseDate) from the current match of the passed RegexMatcher.
 class DatetimeExtractor {
  public:
-  DatetimeExtractor(
+  explicit DatetimeExtractor(
       const CompiledRule& rule, const UniLib::RegexMatcher& matcher,
-      int locale_id, const UniLib& unilib,
+      int locale_id, const UniLib* unilib,
       const std::vector<std::unique_ptr<const UniLib::RegexPattern>>&
           extractor_rules,
       const std::unordered_map<DatetimeExtractorType,
@@ -55,7 +55,7 @@
       : rule_(rule),
         matcher_(matcher),
         locale_id_(locale_id),
-        unilib_(unilib),
+        unilib_(*unilib),
         rules_(extractor_rules),
         type_and_locale_to_rule_(type_and_locale_to_extractor_rule) {}
   bool Extract(DatetimeParsedData* result, CodepointSpan* result_span) const;
diff --git a/native/annotator/datetime/parser.cc b/native/annotator/datetime/parser.cc
index a8305a0..72fd3ab 100644
--- a/native/annotator/datetime/parser.cc
+++ b/native/annotator/datetime/parser.cc
@@ -28,8 +28,8 @@
 
 namespace libtextclassifier3 {
 std::unique_ptr<DatetimeParser> DatetimeParser::Instance(
-    const DatetimeModel* model, const UniLib& unilib,
-    const CalendarLib& calendarlib, ZlibDecompressor* decompressor) {
+    const DatetimeModel* model, const UniLib* unilib,
+    const CalendarLib* calendarlib, ZlibDecompressor* decompressor) {
   std::unique_ptr<DatetimeParser> result(
       new DatetimeParser(model, unilib, calendarlib, decompressor));
   if (!result->initialized_) {
@@ -38,10 +38,10 @@
   return result;
 }
 
-DatetimeParser::DatetimeParser(const DatetimeModel* model, const UniLib& unilib,
-                               const CalendarLib& calendarlib,
+DatetimeParser::DatetimeParser(const DatetimeModel* model, const UniLib* unilib,
+                               const CalendarLib* calendarlib,
                                ZlibDecompressor* decompressor)
-    : unilib_(unilib), calendarlib_(calendarlib) {
+    : unilib_(*unilib), calendarlib_(*calendarlib) {
   initialized_ = false;
 
   if (model == nullptr) {
@@ -54,7 +54,7 @@
         for (const DatetimeModelPattern_::Regex* regex : *pattern->regexes()) {
           std::unique_ptr<UniLib::RegexPattern> regex_pattern =
               UncompressMakeRegexPattern(
-                  unilib, regex->pattern(), regex->compressed_pattern(),
+                  unilib_, regex->pattern(), regex->compressed_pattern(),
                   model->lazy_regex_compilation(), decompressor);
           if (!regex_pattern) {
             TC3_LOG(ERROR) << "Couldn't create rule pattern.";
@@ -75,7 +75,7 @@
     for (const DatetimeModelExtractor* extractor : *model->extractors()) {
       std::unique_ptr<UniLib::RegexPattern> regex_pattern =
           UncompressMakeRegexPattern(
-              unilib, extractor->pattern(), extractor->compressed_pattern(),
+              unilib_, extractor->pattern(), extractor->compressed_pattern(),
               model->lazy_regex_compilation(), decompressor);
       if (!regex_pattern) {
         TC3_LOG(ERROR) << "Couldn't create extractor pattern";
@@ -357,7 +357,7 @@
                                      std::vector<DatetimeParseResult>* results,
                                      CodepointSpan* result_span) const {
   DatetimeParsedData parse;
-  DatetimeExtractor extractor(rule, matcher, locale_id, unilib_,
+  DatetimeExtractor extractor(rule, matcher, locale_id, &unilib_,
                               extractor_rules_,
                               type_and_locale_to_extractor_rule_);
   if (!extractor.Extract(&parse, result_span)) {
diff --git a/native/annotator/datetime/parser.h b/native/annotator/datetime/parser.h
index 2b8b615..8b58388 100644
--- a/native/annotator/datetime/parser.h
+++ b/native/annotator/datetime/parser.h
@@ -39,8 +39,8 @@
 class DatetimeParser {
  public:
   static std::unique_ptr<DatetimeParser> Instance(
-      const DatetimeModel* model, const UniLib& unilib,
-      const CalendarLib& calendarlib, ZlibDecompressor* decompressor);
+      const DatetimeModel* model, const UniLib* unilib,
+      const CalendarLib* calendarlib, ZlibDecompressor* decompressor);
 
   // Parses the dates in 'input' and fills result. Makes sure that the results
   // do not overlap.
@@ -60,9 +60,9 @@
              std::vector<DatetimeParseResultSpan>* results) const;
 
  protected:
-  DatetimeParser(const DatetimeModel* model, const UniLib& unilib,
-                 const CalendarLib& calendarlib,
-                 ZlibDecompressor* decompressor);
+  explicit DatetimeParser(const DatetimeModel* model, const UniLib* unilib,
+                          const CalendarLib* calendarlib,
+                          ZlibDecompressor* decompressor);
 
   // Returns a list of locale ids for given locale spec string (comma-separated
   // locale names). Assigns the first parsed locale to reference_locale.
diff --git a/native/annotator/feature-processor.h b/native/annotator/feature-processor.h
index 2245b66..78dbbce 100644
--- a/native/annotator/feature-processor.h
+++ b/native/annotator/feature-processor.h
@@ -91,9 +91,10 @@
   // identical.
   typedef std::map<CodepointSpan, std::vector<float>> EmbeddingCache;
 
-  FeatureProcessor(const FeatureProcessorOptions* options, const UniLib* unilib)
+  explicit FeatureProcessor(const FeatureProcessorOptions* options,
+                            const UniLib* unilib)
       : feature_extractor_(internal::BuildTokenFeatureExtractorOptions(options),
-                           *unilib),
+                           unilib),
         options_(options),
         tokenizer_(internal::BuildTokenizer(options, unilib)) {
     MakeLabelMaps();
diff --git a/native/annotator/grammar/dates/cfg-datetime-annotator.cc b/native/annotator/grammar/dates/cfg-datetime-annotator.cc
index 554471b..99d3be0 100644
--- a/native/annotator/grammar/dates/cfg-datetime-annotator.cc
+++ b/native/annotator/grammar/dates/cfg-datetime-annotator.cc
@@ -23,7 +23,6 @@
 #include "utils/tokenizer.h"
 #include "utils/utf8/unicodetext.h"
 
-
 namespace libtextclassifier3::dates {
 namespace {
 
@@ -56,12 +55,12 @@
 }  // namespace
 
 CfgDatetimeAnnotator::CfgDatetimeAnnotator(
-    const UniLib& unilib, const GrammarTokenizerOptions* tokenizer_options,
-    const CalendarLib& calendar_lib, const DatetimeRules* datetime_rules,
+    const UniLib* unilib, const GrammarTokenizerOptions* tokenizer_options,
+    const CalendarLib* calendar_lib, const DatetimeRules* datetime_rules,
     const float annotator_target_classification_score,
     const float annotator_priority_score)
-    : calendar_lib_(calendar_lib),
-      tokenizer_(BuildTokenizer(&unilib, tokenizer_options)),
+    : calendar_lib_(*calendar_lib),
+      tokenizer_(BuildTokenizer(unilib, tokenizer_options)),
       parser_(unilib, datetime_rules),
       annotator_target_classification_score_(
           annotator_target_classification_score),
diff --git a/native/annotator/grammar/dates/cfg-datetime-annotator.h b/native/annotator/grammar/dates/cfg-datetime-annotator.h
index 3ccc479..73c9b7b 100644
--- a/native/annotator/grammar/dates/cfg-datetime-annotator.h
+++ b/native/annotator/grammar/dates/cfg-datetime-annotator.h
@@ -33,12 +33,11 @@
 // (List of annotation generated from Grammar rules) to DatetimeParseResultSpan.
 class CfgDatetimeAnnotator {
  public:
-  CfgDatetimeAnnotator(const UniLib& unilib,
-                       const GrammarTokenizerOptions* tokenizer_options,
-                       const CalendarLib& calendar_lib,
-                       const DatetimeRules* datetime_rules,
-                       const float annotator_target_classification_score,
-                       const float annotator_priority_score);
+  explicit CfgDatetimeAnnotator(
+      const UniLib* unilib, const GrammarTokenizerOptions* tokenizer_options,
+      const CalendarLib* calendar_lib, const DatetimeRules* datetime_rules,
+      const float annotator_target_classification_score,
+      const float annotator_priority_score);
 
   // CfgDatetimeAnnotator is neither copyable nor movable.
   CfgDatetimeAnnotator(const CfgDatetimeAnnotator&) = delete;
diff --git a/native/annotator/grammar/dates/parser.cc b/native/annotator/grammar/dates/parser.cc
index 88133fc..37e65fc 100644
--- a/native/annotator/grammar/dates/parser.cc
+++ b/native/annotator/grammar/dates/parser.cc
@@ -784,9 +784,9 @@
   if (locale_rules.empty()) {
     return {};
   }
-  grammar::Matcher matcher(unilib_, datetime_rules_->rules(), locale_rules,
+  grammar::Matcher matcher(&unilib_, datetime_rules_->rules(), locale_rules,
                            &extractor);
-  lexer_.Process(text_unicode, tokens, /*matches=*/{}, &matcher);
+  lexer_.Process(text_unicode, tokens, /*annotations=*/nullptr, &matcher);
   return GetOutputAsAnnotationList(unilib_, extractor, codepoint_offsets,
                                    options);
 }
diff --git a/native/annotator/grammar/dates/parser.h b/native/annotator/grammar/dates/parser.h
index bc55d00..be919df 100644
--- a/native/annotator/grammar/dates/parser.h
+++ b/native/annotator/grammar/dates/parser.h
@@ -35,8 +35,8 @@
 // constructs, validates, deduplicates and normalizes date time annotations.
 class DateParser {
  public:
-  DateParser(const UniLib& unilib, const DatetimeRules* datetime_rules)
-      : unilib_(unilib),
+  explicit DateParser(const UniLib* unilib, const DatetimeRules* datetime_rules)
+      : unilib_(*unilib),
         lexer_(unilib, datetime_rules->rules()),
         datetime_rules_(datetime_rules),
         rules_locales_(ParseRulesLocales(datetime_rules->rules())) {}
diff --git a/native/annotator/grammar/grammar-annotator.cc b/native/annotator/grammar/grammar-annotator.cc
index f8c9721..3acc3ce 100644
--- a/native/annotator/grammar/grammar-annotator.cc
+++ b/native/annotator/grammar/grammar-annotator.cc
@@ -47,11 +47,11 @@
 
 class GrammarAnnotatorCallbackDelegate : public grammar::CallbackDelegate {
  public:
-  GrammarAnnotatorCallbackDelegate(
+  explicit GrammarAnnotatorCallbackDelegate(
       const UniLib* unilib, const GrammarModel* model,
       const ReflectiveFlatbufferBuilder* entity_data_builder,
       const ModeFlag mode)
-      : unilib_(unilib),
+      : unilib_(*unilib),
         model_(model),
         entity_data_builder_(entity_data_builder),
         mode_(mode) {}
@@ -340,7 +340,7 @@
     return true;
   }
 
-  const UniLib* unilib_;
+  const UniLib& unilib_;
   const GrammarModel* model_;
   const ReflectiveFlatbufferBuilder* entity_data_builder_;
   const ModeFlag mode_;
@@ -353,9 +353,9 @@
 GrammarAnnotator::GrammarAnnotator(
     const UniLib* unilib, const GrammarModel* model,
     const ReflectiveFlatbufferBuilder* entity_data_builder)
-    : unilib_(unilib),
+    : unilib_(*unilib),
       model_(model),
-      lexer_(*unilib, model->rules()),
+      lexer_(unilib, model->rules()),
       tokenizer_(BuildTokenizer(unilib, model->tokenizer_options())),
       entity_data_builder_(entity_data_builder),
       rules_locales_(grammar::ParseRulesLocales(model->rules())) {}
@@ -378,11 +378,12 @@
 
   // Run the grammar.
   GrammarAnnotatorCallbackDelegate callback_handler(
-      unilib_, model_, entity_data_builder_,
+      &unilib_, model_, entity_data_builder_,
       /*mode=*/ModeFlag_ANNOTATION);
-  grammar::Matcher matcher(*unilib_, model_->rules(), locale_rules,
+  grammar::Matcher matcher(&unilib_, model_->rules(), locale_rules,
                            &callback_handler);
-  lexer_.Process(text, tokenizer_.Tokenize(text), /*matches=*/{}, &matcher);
+  lexer_.Process(text, tokenizer_.Tokenize(text), /*annotations=*/nullptr,
+                 &matcher);
 
   // Populate results.
   return callback_handler.GetAnnotations(UnicodeCodepointOffsets(text), result);
@@ -408,11 +409,12 @@
 
   // Run the grammar.
   GrammarAnnotatorCallbackDelegate callback_handler(
-      unilib_, model_, entity_data_builder_,
+      &unilib_, model_, entity_data_builder_,
       /*mode=*/ModeFlag_SELECTION);
-  grammar::Matcher matcher(*unilib_, model_->rules(), locale_rules,
+  grammar::Matcher matcher(&unilib_, model_->rules(), locale_rules,
                            &callback_handler);
-  lexer_.Process(text, tokenizer_.Tokenize(text), /*matches=*/{}, &matcher);
+  lexer_.Process(text, tokenizer_.Tokenize(text), /*annotations=*/nullptr,
+                 &matcher);
 
   // Populate the result.
   return callback_handler.GetTextSelection(UnicodeCodepointOffsets(text),
@@ -439,16 +441,16 @@
 
   // Run the grammar.
   GrammarAnnotatorCallbackDelegate callback_handler(
-      unilib_, model_, entity_data_builder_,
+      &unilib_, model_, entity_data_builder_,
       /*mode=*/ModeFlag_CLASSIFICATION);
-  grammar::Matcher matcher(*unilib_, model_->rules(), locale_rules,
+  grammar::Matcher matcher(&unilib_, model_->rules(), locale_rules,
                            &callback_handler);
 
   const std::vector<Token> tokens = tokenizer_.Tokenize(text);
   if (model_->context_left_num_tokens() == -1 &&
       model_->context_right_num_tokens() == -1) {
     // Use all tokens.
-    lexer_.Process(text, tokens, /*matches=*/{}, &matcher);
+    lexer_.Process(text, tokens, /*annotations=*/{}, &matcher);
   } else {
     TokenSpan context_span = CodepointSpanToTokenSpan(
         tokens, selection, /*snap_boundaries_to_containing_tokens=*/true);
@@ -466,7 +468,7 @@
                                      model_->context_right_num_tokens()));
     }
     lexer_.Process(text, begin, end,
-                   /*matches=*/{}, &matcher);
+                   /*annotations=*/nullptr, &matcher);
   }
 
   // Populate result.
diff --git a/native/annotator/grammar/grammar-annotator.h b/native/annotator/grammar/grammar-annotator.h
index c09d2e2..365bb44 100644
--- a/native/annotator/grammar/grammar-annotator.h
+++ b/native/annotator/grammar/grammar-annotator.h
@@ -37,8 +37,9 @@
     kRuleMatch = 1,
   };
 
-  GrammarAnnotator(const UniLib* unilib, const GrammarModel* model,
-                   const ReflectiveFlatbufferBuilder* entity_data_builder);
+  explicit GrammarAnnotator(
+      const UniLib* unilib, const GrammarModel* model,
+      const ReflectiveFlatbufferBuilder* entity_data_builder);
 
   // Annotates a given text.
   // Returns true if the text was successfully annotated.
@@ -58,7 +59,7 @@
                         AnnotatedSpan* result) const;
 
  private:
-  const UniLib* unilib_;
+  const UniLib& unilib_;
   const GrammarModel* model_;
   const grammar::Lexer lexer_;
   const Tokenizer tokenizer_;
diff --git a/native/annotator/model-executor.h b/native/annotator/model-executor.h
index bcc318b..5d6c4a7 100644
--- a/native/annotator/model-executor.h
+++ b/native/annotator/model-executor.h
@@ -56,8 +56,8 @@
   explicit ModelExecutor(std::unique_ptr<const tflite::FlatBufferModel> model)
       : TfLiteModelExecutor(std::move(model)) {}
 
-  static const int kInputIndexFeatures = 0;
-  static const int kOutputIndexLogits = 0;
+  static constexpr int kInputIndexFeatures = 0;
+  static constexpr int kOutputIndexLogits = 0;
 };
 
 // Executor for embedding sparse features into a dense vector.
diff --git a/native/models/actions_suggestions.en.model b/native/models/actions_suggestions.en.model
index a978e1f..6604fcb 100755
--- a/native/models/actions_suggestions.en.model
+++ b/native/models/actions_suggestions.en.model
Binary files differ
diff --git a/native/models/actions_suggestions.universal.model b/native/models/actions_suggestions.universal.model
index a285ab0..6261d8f 100755
--- a/native/models/actions_suggestions.universal.model
+++ b/native/models/actions_suggestions.universal.model
Binary files differ
diff --git a/native/utils/base/arena.h b/native/utils/base/arena.h
index aec1950..7562917 100644
--- a/native/utils/base/arena.h
+++ b/native/utils/base/arena.h
@@ -105,7 +105,7 @@
   }
 
   // The alignment that ArenaAllocator uses except for 1-byte objects.
-  static const int kDefaultAlignment = 8;
+  static constexpr int kDefaultAlignment = 8;
 
  protected:
   bool SatisfyAlignment(const size_t alignment);
diff --git a/native/utils/base/prefixvarint.h b/native/utils/base/prefixvarint.h
index f00e05e..8e4f308 100644
--- a/native/utils/base/prefixvarint.h
+++ b/native/utils/base/prefixvarint.h
@@ -109,11 +109,11 @@
 class PrefixVarint {
  public:
   // The max bytes used to encode a uint32:
-  static const int kMax32 = 5;
-  static const int kMax64 = 9;
+  static constexpr int kMax32 = 5;
+  static constexpr int kMax64 = 9;
 
   // This decoder does not read past the encoded buffer.
-  static const int kSlopBytes = 0;
+  static constexpr int kSlopBytes = 0;
 
   // Returns the number of bytes used to encode the given value:
   static int Length32(uint32 val);
@@ -122,8 +122,8 @@
   // The Encode functions could reset up to the following bytes past the last
   // encoded byte. Use the slower SafeEncode equivalent if you want the encode
   // to not use any slop bytes.
-  static const int kEncode32SlopBytes = 1;
-  static const int kEncode64SlopBytes = 3;
+  static constexpr int kEncode32SlopBytes = 1;
+  static constexpr int kEncode64SlopBytes = 3;
 
   // The safer version of the Encode functions, which don't need any slop bytes.
   static char* SafeEncode32(char* ptr, uint32 val);
@@ -145,14 +145,14 @@
   static const char* Parse64Inline(const char* ptr, uint64* val);
 
  private:
-  static const int kMin2Bytes = (1 << 7);
-  static const int kMin3Bytes = (1 << 14);
-  static const int kMin4Bytes = (1 << 21);
-  static const int kMin5Bytes = (1 << 28);
-  static const int64 kMin6Bytes = (1LL << 35);
-  static const int64 kMin7Bytes = (1LL << 42);
-  static const int64 kMin8Bytes = (1LL << 49);
-  static const int64 kMin9Bytes = (1LL << 56);
+  static constexpr int kMin2Bytes = (1 << 7);
+  static constexpr int kMin3Bytes = (1 << 14);
+  static constexpr int kMin4Bytes = (1 << 21);
+  static constexpr int kMin5Bytes = (1 << 28);
+  static constexpr int64 kMin6Bytes = (1LL << 35);
+  static constexpr int64 kMin7Bytes = (1LL << 42);
+  static constexpr int64 kMin8Bytes = (1LL << 49);
+  static constexpr int64 kMin9Bytes = (1LL << 56);
 
   static void Append32Slow(std::string* s, uint32 value);
   static void Append64Slow(std::string* s, uint64 value);
diff --git a/native/utils/bitmap/bitmap.h b/native/utils/bitmap/bitmap.h
deleted file mode 100644
index 6eb9dff..0000000
--- a/native/utils/bitmap/bitmap.h
+++ /dev/null
@@ -1,536 +0,0 @@
-/*
- * Copyright (C) 2018 The Android Open Source Project
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *      http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#ifndef LIBTEXTCLASSIFIER_UTILS_BITMAP_BITMAP_H_
-#define LIBTEXTCLASSIFIER_UTILS_BITMAP_BITMAP_H_
-
-#include <algorithm>
-#include <climits>
-#include <ostream>
-
-#include "utils/base/integral_types.h"
-#include "utils/base/logging.h"
-
-namespace libtextclassifier3 {
-
-template <typename W>
-void SetBit(W* map, size_t index, bool value) {
-  static constexpr size_t kIntBits = CHAR_BIT * sizeof(W);
-  // This is written in such a way that our current compiler generates
-  // a conditional move instead of a conditional branch, which is data
-  // dependent and unpredictable.  Branch mis-prediction is much more
-  // expensive than cost of a conditional move.
-  const W bit = W{1} << (index & (kIntBits - 1));
-  const W old_value = map[index / kIntBits];
-  const W new_value = value ? old_value | bit : old_value & ~bit;
-  map[index / kIntBits] = new_value;
-}
-
-template <typename W>
-bool GetBit(const W* map, size_t index) {
-  static constexpr size_t kIntBits = CHAR_BIT * sizeof(W);
-  return map[index / kIntBits] & (W{1} << (index & (kIntBits - 1)));
-}
-
-namespace internal {
-template <typename W>
-class BasicBitmap {
- public:
-  using size_type = size_t;
-  using Word = W;  // packed bit internal storage type.
-
-  // Allocates a new bitmap with size bits set to the value fill.
-  BasicBitmap(size_type size, bool fill) : size_(size), alloc_(true) {
-    map_ = std::allocator<Word>().allocate(array_size());
-    SetAll(fill);
-  }
-
-  explicit BasicBitmap(size_type size) : BasicBitmap(size, false) {}
-
-  // Borrows a reference to a region of memory that is the caller's
-  // responsibility to manage for the life of the Bitmap. The map is expected
-  // to have enough memory to store size bits.
-  BasicBitmap(Word* map, size_type size)
-      : map_(map), size_(size), alloc_(false) {}
-
-  // Default constructor: creates a bitmap with zero bits.
-  BasicBitmap() : size_(0), alloc_(true) {
-    map_ = std::allocator<Word>().allocate(array_size());
-  }
-
-  BasicBitmap(const BasicBitmap& src);
-
-  // Assigns this Bitmap to the values of the src Bitmap.
-  // This includes pointing to the same underlying map_ if the src Bitmap
-  // does not allocate its own.
-  BasicBitmap& operator=(const BasicBitmap& src);
-
-  // Destructor : clean up if we allocated
-  ~BasicBitmap() {
-    if (alloc_) {
-      std::allocator<Word>().deallocate(map_, array_size());
-    }
-  }
-
-  // Resizes the bitmap.
-  // If size < bits(), the extra bits will be discarded.
-  // If size > bits(), the extra bits will be filled with the fill value.
-  void Resize(size_type size, bool fill = false);
-
-  // ACCESSORS
-  size_type bits() const { return size_; }
-  size_type array_size() const { return RequiredArraySize(bits()); }
-
-  // Gets an entry of the internal map. Requires array_index < array_size()
-  Word GetMapElement(size_type array_index) const {
-    CHECK_LT(array_index, array_size());
-    return map_[array_index];
-  }
-
-  // Gets an entry of the internal map. Requires array_index < array_size()
-  // Also performs masking to insure no bits >= bits().
-  Word GetMaskedMapElement(size_type array_index) const {
-    return (array_index == array_size() - 1)
-               ? map_[array_size() - 1] & HighOrderMapElementMask()
-               : map_[array_index];
-  }
-
-  // Sets an element of the internal map. Requires array_index < array_size()
-  void SetMapElement(size_type array_index, Word value) {
-    CHECK_LT(array_index, array_size());
-    map_[array_index] = value;
-  }
-
-  // The highest order element in map_ will have some meaningless bits
-  // (with undefined values) if bits() is not a multiple of
-  // kIntBits. If you & HighOrderMapElementMask with the high order
-  // element, you will be left with only the valid, defined bits (the
-  // others will be 0)
-  Word HighOrderMapElementMask() const {
-    return (size_ == 0) ? 0 : (~W{0}) >> (-size_ & (kIntBits - 1));
-  }
-
-  bool Get(size_type index) const {
-    TC3_DCHECK_LT(index, size_);
-    return GetBit(map_, index);
-  }
-
-  // Returns true if all bits are unset
-  bool IsAllZeroes() const {
-    return std::all_of(map_, map_ + array_size() - 1,
-                       [](Word w) { return w == W{0}; }) &&
-           (map_[array_size() - 1] & HighOrderMapElementMask()) == W{0};
-  }
-
-  // Returns true if all bits are set
-  bool IsAllOnes() const {
-    return std::all_of(map_, map_ + array_size() - 1,
-                       [](Word w) { return w == ~W{0}; }) &&
-           ((~map_[array_size() - 1]) & HighOrderMapElementMask()) == W{0};
-  }
-
-  void Set(size_type index, bool value) {
-    TC3_DCHECK_LT(index, size_);
-    SetBit(map_, index, value);
-  }
-
-  void Toggle(size_type index) {
-    TC3_DCHECK_LT(index, size_);
-    map_[index / kIntBits] ^= (W{1} << (index & (kIntBits - 1)));
-  }
-
-  // Sets all the bits to true or false
-  void SetAll(bool value) {
-    std::fill(map_, map_ + array_size(), value ? ~W{0} : W{0});
-  }
-
-  // Clears all bits in the bitmap
-  void Clear() { SetAll(false); }
-
-  // Sets a range of bits (begin inclusive, end exclusive) to true or false
-  void SetRange(size_type begin, size_type end, bool value);
-
-  // Sets "this" to be the union of "this" and "other". The bitmaps do
-  // not have to be the same size. If other is smaller, all the higher
-  // order bits are assumed to be 0. The size of "this" is never
-  // changed by this operation (higher order bits in other are
-  // ignored). Note this make Union *not* commutative -- it matters
-  // which Bitmap is this and which is other
-  void Union(const BasicBitmap& other);
-
-  // Sets "this" to be the intersection of "this" and "other". The
-  // bitmaps do not have to be the same size. If other is smaller, all
-  // the higher order bits are assumed to be 0. The size of this is
-  // never changed by this operation (higher order bits in other are
-  // ignored)
-  void Intersection(const BasicBitmap& other);
-
-  // Returns true if "this" and "other" have any bits set in common.
-  bool IsIntersectionNonEmpty(const BasicBitmap& other) const;
-
-  // Sets "this" to be the "~" (Complement) of "this".
-  void Complement() {
-    std::transform(map_, map_ + array_size(), map_, [](Word w) { return ~w; });
-  }
-
-  // Sets "this" to be the set of bits in "this" but not in "other"
-  // REQUIRES: "bits() == other.bits()" (i.e. the bitmaps are the same size)
-  void Difference(const BasicBitmap& other) {
-    TC3_CHECK_EQ(bits(), other.bits());
-    std::transform(map_, map_ + array_size(), other.map_, map_,
-                   [](Word a, Word b) { return a & ~b; });
-  }
-
-  // Sets "this" to be the set of bits which is set in either "this" or "other",
-  // but not both.
-  // REQUIRES: "bits() == other.bits()" (i.e. the bitmaps are the same size)
-  void ExclusiveOr(const BasicBitmap& other) {
-    TC3_CHECK_EQ(bits(), other.bits());
-    std::transform(map_, map_ + array_size(), other.map_, map_,
-                   [](Word a, Word b) { return a ^ b; });
-  }
-
-  // Return true if any bit between begin inclusive and end exclusive
-  // is set.  0 <= begin <= end <= bits() is required.
-  bool TestRange(size_type begin, size_type end) const;
-
-  // Return true if both Bitmaps are of equal length and have the same
-  // value.
-  bool IsEqual(const BasicBitmap& other) const {
-    return (bits() == other.bits()) &&
-           ((array_size() < 1) ||
-            std::equal(map_, map_ + array_size() - 1, other.map_)) &&
-           ((HighOrderMapElementMask() & other.map_[array_size() - 1]) ==
-            (HighOrderMapElementMask() & map_[array_size() - 1]));
-  }
-
-  // Return true is this bitmap is a subset of another bitmap in terms of
-  // the positions of 1s. That is, 0110 is a subset of 1110.
-  // REQUIRES: "bits() == other.bits()" (i.e. the bitmaps are the same size)
-  bool IsSubsetOf(const BasicBitmap& other) const;
-
-  // Returns 0 if the two bitmaps are equal.  Returns a negative number if the
-  // this bitmap is less than other, and a positive number otherwise.
-  //
-  // The relation we use is the natural relation defined by assigning an integer
-  // to each bitmap:
-  //
-  // int(bitmap) = b_0 + 2 * b_1 + ... + 2^k * b_k
-  //
-  // Then for our comparison function:
-  //
-  // if int(b1) != int(b2), then b1 is less than b2 if int(b1) < int(b2),
-  // and b2 is less than b1 otherwise.
-  //
-  // if int(b1) == int(b2), then we compare the numbers of bits in b1 and b2.
-  // If b1 has strictly fewer bits, then b1 is less than b2 (same for b2).
-  // If b1 and b2 have the same number of bits, then they are equal and we
-  // return 0.
-  int CompareTo(const BasicBitmap& other) const;
-
-  // return number of allocated words required for a bitmap of size num_bits
-  // minimum size is 1
-  static constexpr size_t RequiredArraySize(size_type num_bits) {
-    return num_bits == 0 ? 1 : (num_bits - 1) / kIntBits + 1;
-  }
-
- private:
-  // The same semantics as CompareTo, except that we have the invariant that
-  // first has at least as many bits as second.
-  static int CompareToHelper(const BasicBitmap& first,
-                             const BasicBitmap& second);
-
-  static constexpr unsigned Log2(unsigned n, unsigned p = 0) {
-    return (n <= 1) ? p : Log2(n / 2, p + 1);
-  }
-
-  // NOTE: we make assumptions throughout the code that kIntBits is a power of
-  // 2, so that we can use shift and mask instead of division and modulo.
-  static constexpr int kIntBits = CHAR_BIT * sizeof(Word);  // bits in a Word
-  static constexpr int kLogIntBits = Log2(kIntBits, 0);
-  Word* map_;       // the bitmap
-  size_type size_;  // the upper bound of the bitmap
-  bool alloc_;      // whether or not *we* allocated the memory
-};
-}  // namespace internal
-
-
-class Bitmap : public libtextclassifier3::internal::BasicBitmap<uint32> {
- public:
-  using internal::BasicBitmap<uint32>::BasicBitmap;
-};
-
-namespace internal {
-template <typename W>
-BasicBitmap<W>::BasicBitmap(const BasicBitmap& src)
-    : size_(src.size_), alloc_(src.alloc_) {
-  static_assert(((kIntBits & (kIntBits - 1)) == 0), "kIntBits not power of 2");
-  if (alloc_) {
-    map_ = std::allocator<Word>().allocate(array_size());
-    std::copy(src.map_, src.map_ + array_size(), map_);
-  } else {
-    map_ = src.map_;
-  }
-}
-
-template <typename W>
-void BasicBitmap<W>::Resize(size_type size, bool fill) {
-  const size_type old_size = size_;
-  const size_t new_array_size = RequiredArraySize(size);
-  if (new_array_size != array_size()) {
-    Word* new_map = std::allocator<Word>().allocate(new_array_size);
-    std::copy(map_, map_ + std::min<size_t>(new_array_size, array_size()),
-              new_map);
-    if (alloc_) {
-      std::allocator<Word>().deallocate(map_, array_size());
-    }
-    map_ = new_map;
-    alloc_ = true;
-  }
-  size_ = size;
-  if (old_size < size_) {
-    SetRange(old_size, size_, fill);
-  }
-}
-
-template <typename W>
-BasicBitmap<W>& BasicBitmap<W>::operator=(const BasicBitmap<W>& src) {
-  if (this != &src) {
-    if (alloc_ && array_size() != src.array_size()) {
-      std::allocator<Word>().deallocate(map_, array_size());
-      map_ = std::allocator<Word>().allocate(src.array_size());
-    }
-    size_ = src.size_;
-    if (src.alloc_) {
-      if (!alloc_) {
-        map_ = std::allocator<Word>().allocate(src.array_size());
-      }
-      std::copy(src.map_, src.map_ + src.array_size(), map_);
-      alloc_ = true;
-    } else {
-      if (alloc_) {
-        std::allocator<Word>().deallocate(map_, array_size());
-      }
-      map_ = src.map_;
-      alloc_ = false;
-    }
-  }
-  return *this;
-}
-
-// Return true if any bit between begin inclusive and end exclusive
-// is set.  0 <= begin <= end <= bits() is required.
-template <typename W>
-bool BasicBitmap<W>::TestRange(size_type begin, size_type end) const {
-  // Return false immediately if the range is empty.
-  if (begin == end) {
-    return false;
-  }
-  // Calculate the indices of the words containing the first and last bits,
-  // along with the positions of the bits within those words.
-  size_t i = begin / kIntBits;
-  size_t j = begin & (kIntBits - 1);
-  size_t ilast = (end - 1) / kIntBits;
-  size_t jlast = (end - 1) & (kIntBits - 1);
-  // If the range spans multiple words, discard the extraneous bits of the
-  // first word by shifting to the right, and then test the remaining bits.
-  if (i < ilast) {
-    if (map_[i++] >> j) {
-      return true;
-    }
-    j = 0;
-
-    // Test each of the "middle" words that lies completely within the range.
-    while (i < ilast) {
-      if (map_[i++]) {
-        return true;
-      }
-    }
-  }
-
-  // Test the portion of the last word that lies within the range. (This logic
-  // also handles the case where the entire range lies within a single word.)
-  const Word mask = (((W{1} << 1) << (jlast - j)) - 1) << j;
-  return (map_[ilast] & mask) != W{0};
-}
-
-template <typename W>
-bool BasicBitmap<W>::IsSubsetOf(const BasicBitmap& other) const {
-  TC3_CHECK_EQ(bits(), other.bits());
-  Word* mp = map_;
-  Word* endp = mp + array_size() - 1;
-  Word* op = other.map_;
-  // A is a subset of B if A - B = {}, that is A & ~B = {}
-  for (; mp != endp; ++mp, ++op)
-    if (*mp & ~*op) return false;
-  return (*mp & ~*op & HighOrderMapElementMask()) == W{0};
-}
-
-// Same semantics as CompareTo, except that we have the invariant that first
-// has at least as many bits as second.
-template <typename W>
-int BasicBitmap<W>::CompareToHelper(const BasicBitmap<W>& first,
-                                    const BasicBitmap<W>& second) {
-  // Checks if the high order bits in first that are not in second are set.  If
-  // any of these are set, then first is greater than second, and we return a
-  // positive value.
-  if (first.TestRange(second.bits(), first.bits())) {
-    return 1;
-  }
-
-  // We use unsigned integer comparison to compare the bitmaps.  We need to
-  // handle the high order bits in a special case (since there may be undefined
-  // bits for the element representing the highest order bits) and then we
-  // can do direct integer comparison.
-  size_t index = second.array_size() - 1;
-  Word left = first.map_[index] & second.HighOrderMapElementMask();
-  Word right = second.map_[index] & second.HighOrderMapElementMask();
-  if (left != right) {
-    return left < right ? -1 : 1;
-  }
-  while (index > 0) {
-    --index;
-    left = first.map_[index];
-    right = second.map_[index];
-    if (left != right) {
-      return left < right ? -1 : 1;
-    }
-  }
-  // Now we have reached the end, all common bits are equal, and all bits that
-  // are only in the longer list are 0.  We return 1 if the first bitmap is
-  // strictly larger, and 0 if the bitmaps are of equal size.
-  if (first.bits() == second.bits()) {
-    return 0;
-  } else {
-    return 1;
-  }
-}
-
-template <typename W>
-int BasicBitmap<W>::CompareTo(const BasicBitmap<W>& other) const {
-  if (bits() > other.bits()) {
-    return CompareToHelper(*this, other);
-  } else {
-    return -CompareToHelper(other, *this);
-  }
-}
-
-// Note that bits > size end up in undefined states when sizes
-// aren't equal, but that's okay.
-template <typename W>
-void BasicBitmap<W>::Union(const BasicBitmap<W>& other) {
-  const size_t this_array_size = array_size();
-  const size_t other_array_size = other.array_size();
-  const size_t min_array_size = std::min(this_array_size, other_array_size);
-  if (min_array_size == 0) {
-    // Nothing to do.
-    return;
-  }
-  // Perform bitwise OR of all but the last common word.
-  const size_t last = min_array_size - 1;
-  std::transform(map_, map_ + last, other.map_, map_,
-                 [](Word a, Word b) { return a | b; });
-  // Perform bitwise OR of the last common word, applying mask if necessary.
-  map_[last] |= other_array_size == min_array_size
-                    ? other.map_[last] & other.HighOrderMapElementMask()
-                    : other.map_[last];
-}
-
-// Note that bits > size end up in undefined states when sizes
-// aren't equal, but that's okay.
-template <typename W>
-void BasicBitmap<W>::Intersection(const BasicBitmap<W>& other) {
-  const size_t this_array_size = array_size();
-  const size_t other_array_size = other.array_size();
-  const size_t min_array_size = std::min(this_array_size, other_array_size);
-  // Perform bitwise AND of all common words.
-  std::transform(map_, map_ + min_array_size, other.map_, map_,
-                 [](Word a, Word b) { return a & b; });
-  if (other_array_size == min_array_size) {
-    // Zero out bits that are outside the range of 'other'.
-    if (other_array_size != 0) {
-      map_[other_array_size - 1] &= other.HighOrderMapElementMask();
-    }
-    std::fill(map_ + other_array_size, map_ + this_array_size, 0);
-  }
-}
-
-template <typename W>
-bool BasicBitmap<W>::IsIntersectionNonEmpty(const BasicBitmap<W>& other) const {
-  // First check fully overlapping bytes.
-  size_t max_overlap = std::min(array_size(), other.array_size()) - 1;
-  for (size_t i = 0; i < max_overlap; ++i) {
-    if (map_[i] & other.map_[i]) return true;
-  }
-
-  // Now check the highest overlapping byte, applying bit masks as necessary.
-  Word high_byte = map_[max_overlap] & other.map_[max_overlap];
-
-  if (other.array_size() > array_size())
-    return high_byte & HighOrderMapElementMask();
-  else if (array_size() > other.array_size())
-    return high_byte & other.HighOrderMapElementMask();
-
-  // Same array_size, apply both masks.
-  return high_byte & HighOrderMapElementMask() &
-         other.HighOrderMapElementMask();
-}
-
-/*static*/
-template <typename W>
-void BasicBitmap<W>::SetRange(size_type begin, size_type end, bool value) {
-  if (begin == end) return;
-  // Figure out which element(s) in the map_ array are affected
-  // by this op.
-  const size_type begin_element = begin / kIntBits;
-  const size_type begin_bit = begin % kIntBits;
-  const size_type end_element = end / kIntBits;
-  const size_type end_bit = end % kIntBits;
-  Word initial_mask = ~W{0} << begin_bit;
-  if (end_element == begin_element) {
-    // The range is contained in a single element of the array, so
-    // adjust both ends of the mask.
-    initial_mask = initial_mask & (~W{0} >> (kIntBits - end_bit));
-  }
-  if (value) {
-    map_[begin_element] |= initial_mask;
-  } else {
-    map_[begin_element] &= ~initial_mask;
-  }
-  if (end_element != begin_element) {
-    // Set all the bits in the array elements between the begin
-    // and end elements.
-    std::fill(map_ + begin_element + 1, map_ + end_element,
-              value ? ~W{0} : W{0});
-
-    // Update the appropriate bit-range in the last element.
-    // Note end_bit is an exclusive bound, so if it's 0 none of the
-    // bits in end_element are contained in the range (and we don't
-    // have to modify it).
-    if (end_bit != 0) {
-      const Word final_mask = ~W{0} >> (kIntBits - end_bit);
-      if (value) {
-        map_[end_element] |= final_mask;
-      } else {
-        map_[end_element] &= ~final_mask;
-      }
-    }
-  }
-}
-}  // namespace internal
-}  // namespace libtextclassifier3
-
-#endif  // LIBTEXTCLASSIFIER_UTILS_BITMAP_BITMAP_H_
diff --git a/native/utils/grammar/lexer.cc b/native/utils/grammar/lexer.cc
index 73162fd..3a2d0d3 100644
--- a/native/utils/grammar/lexer.cc
+++ b/native/utils/grammar/lexer.cc
@@ -50,12 +50,22 @@
   }
 }
 
+int MapCodepointToTokenPaddingIfPresent(
+    const std::unordered_map<CodepointIndex, CodepointIndex>& token_alignment,
+    const int start) {
+  const auto it = token_alignment.find(start);
+  if (it != token_alignment.end()) {
+    return it->second;
+  }
+  return start;
+}
+
 }  // namespace
 
-Lexer::Lexer(const UniLib& unilib, const RulesSet* rules)
-    : unilib_(unilib),
+Lexer::Lexer(const UniLib* unilib, const RulesSet* rules)
+    : unilib_(*unilib),
       rules_(rules),
-      regex_annotators_(BuildRegexAnnotator(unilib, rules)) {}
+      regex_annotators_(BuildRegexAnnotator(unilib_, rules)) {}
 
 std::vector<Lexer::RegexAnnotator> Lexer::BuildRegexAnnotator(
     const UniLib& unilib, const RulesSet* rules) const {
@@ -174,15 +184,15 @@
 }
 
 void Lexer::Process(const UnicodeText& text, const std::vector<Token>& tokens,
-                    const std::vector<Match*>& matches,
+                    const std::vector<AnnotatedSpan>* annotations,
                     Matcher* matcher) const {
-  return Process(text, tokens.begin(), tokens.end(), matches, matcher);
+  return Process(text, tokens.begin(), tokens.end(), annotations, matcher);
 }
 
 void Lexer::Process(const UnicodeText& text,
                     const std::vector<Token>::const_iterator& begin,
                     const std::vector<Token>::const_iterator& end,
-                    const std::vector<Match*>& matches,
+                    const std::vector<AnnotatedSpan>* annotations,
                     Matcher* matcher) const {
   if (begin == end) {
     return;
@@ -249,14 +259,24 @@
     symbols.push_back(Symbol(match));
   }
 
-  // Add predefined matches.
-  for (Match* match : matches) {
-    // Decrease match offset to include preceding whitespace.
-    auto token_match_start_it = token_match_start.find(match->match_offset);
-    if (token_match_start_it != token_match_start.end()) {
-      match->match_offset = token_match_start_it->second;
+  // Add matches based on annotations.
+  auto annotation_nonterminals = nonterminals->annotation_nt();
+  if (annotation_nonterminals != nullptr && annotations != nullptr) {
+    for (const AnnotatedSpan& annotated_span : *annotations) {
+      const ClassificationResult& classification =
+          annotated_span.classification.front();
+      if (auto entry = annotation_nonterminals->LookupByKey(
+              classification.collection.c_str())) {
+        AnnotationMatch* match = matcher->AllocateAndInitMatch<AnnotationMatch>(
+            entry->value(), annotated_span.span,
+            /*match_offset=*/
+            MapCodepointToTokenPaddingIfPresent(token_match_start,
+                                                annotated_span.span.first),
+            Match::kAnnotationMatch);
+        match->annotation = &classification;
+        symbols.push_back(Symbol(match));
+      }
     }
-    symbols.push_back(Symbol(match));
   }
 
   // Add regex annotator matches for the range covered by the tokens.
@@ -270,15 +290,10 @@
       const CodepointSpan span = {
           regex_matcher->Start(0, &status) + begin->start,
           regex_matcher->End(0, &status) + begin->start};
-      auto match_start_it = token_match_start.find(span.first);
-
-      // Decrease match offset to incldue preceding whitespace if the match is
-      // aligning with token boundaries.
-      const int match_offset =
-          (match_start_it != token_match_start.end() ? match_start_it->second
-                                                     : span.first);
       if (Match* match =
-              CheckedAddMatch(regex_annotator.nonterm, span, match_offset,
+              CheckedAddMatch(regex_annotator.nonterm, span, /*match_offset=*/
+                              MapCodepointToTokenPaddingIfPresent(
+                                  token_match_start, span.first),
                               Match::kUnknownType, matcher)) {
         symbols.push_back(Symbol(match));
       }
diff --git a/native/utils/grammar/lexer.h b/native/utils/grammar/lexer.h
index 2623ded..ca31c25 100644
--- a/native/utils/grammar/lexer.h
+++ b/native/utils/grammar/lexer.h
@@ -75,17 +75,20 @@
 
 class Lexer {
  public:
-  explicit Lexer(const UniLib& unilib, const RulesSet* rules);
+  explicit Lexer(const UniLib* unilib, const RulesSet* rules);
 
   // Processes a tokenized text. Classifies the tokens and feeds them to the
-  // matcher. Predefined existing matches `matches` will be fed to the matcher
-  // alongside the tokens.
+  // matcher.
+  // The provided annotations will be fed to the matcher alongside the tokens.
+  // NOTE: The `annotations` need to outlive any dependent processing.
   void Process(const UnicodeText& text, const std::vector<Token>& tokens,
-               const std::vector<Match*>& matches, Matcher* matcher) const;
+               const std::vector<AnnotatedSpan>* annotations,
+               Matcher* matcher) const;
   void Process(const UnicodeText& text,
                const std::vector<Token>::const_iterator& begin,
                const std::vector<Token>::const_iterator& end,
-               const std::vector<Match*>& matches, Matcher* matcher) const;
+               const std::vector<AnnotatedSpan>* annotations,
+               Matcher* matcher) const;
 
  private:
   // A lexical symbol with an identified meaning that represents raw tokens,
diff --git a/native/utils/grammar/match.h b/native/utils/grammar/match.h
index 594f114..97edac9 100644
--- a/native/utils/grammar/match.h
+++ b/native/utils/grammar/match.h
@@ -30,13 +30,14 @@
 // Instances should be created by calling Matcher::AllocateMatch().
 // This uses an arena to allocate matches (and subclasses thereof).
 struct Match {
-  static const int16 kUnknownType = 0;
-  static const int16 kTokenType = -1;
-  static const int16 kDigitsType = -2;
-  static const int16 kBreakType = -3;
-  static const int16 kAssertionMatch = -4;
-  static const int16 kMappingMatch = -5;
-  static const int16 kExclusionMatch = -6;
+  static constexpr int16 kUnknownType = 0;
+  static constexpr int16 kTokenType = -1;
+  static constexpr int16 kDigitsType = -2;
+  static constexpr int16 kBreakType = -3;
+  static constexpr int16 kAssertionMatch = -4;
+  static constexpr int16 kMappingMatch = -5;
+  static constexpr int16 kExclusionMatch = -6;
+  static constexpr int16 kAnnotationMatch = -7;
 
   void Init(const Nonterm arg_lhs, const CodepointSpan arg_codepoint_span,
             const int arg_match_offset, const int arg_type = kUnknownType) {
@@ -114,6 +115,11 @@
   Nonterm exclusion_nonterm;
 };
 
+// Match to represent an annotator annotated span in the grammar.
+struct AnnotationMatch : public Match {
+  const ClassificationResult* annotation;
+};
+
 // Utility functions for parse tree traversal.
 
 // Does a preorder traversal, calling `node_fn` on each node.
diff --git a/native/utils/grammar/matcher.cc b/native/utils/grammar/matcher.cc
index 475e77b..a8ebba5 100644
--- a/native/utils/grammar/matcher.cc
+++ b/native/utils/grammar/matcher.cc
@@ -47,8 +47,8 @@
 
 // Iterator that lowercases a utf8 string on the fly and enumerates the bytes.
 struct LowercasingByteIterator {
-  LowercasingByteIterator(const UniLib& unilib, StringPiece text)
-      : unilib(unilib),
+  LowercasingByteIterator(const UniLib* unilib, StringPiece text)
+      : unilib(*unilib),
         data(text.data()),
         end(text.data() + text.size()),
         buffer_pos(0),
@@ -291,7 +291,7 @@
 
     // Try case-insensitive matches.
     if (const RulesSet_::LhsSet* lhs_set = FindTerminalMatches(
-            LowercasingByteIterator(unilib_, terminal), rules_,
+            LowercasingByteIterator(&unilib_, terminal), rules_,
             shard->lowercase_terminal_rules(), &terminal)) {
       // `terminal` points now into the rules string pool, providing a
       // stable reference.
diff --git a/native/utils/grammar/matcher.h b/native/utils/grammar/matcher.h
index 6ab0bf3..47bac43 100644
--- a/native/utils/grammar/matcher.h
+++ b/native/utils/grammar/matcher.h
@@ -103,11 +103,11 @@
 
 class Matcher {
  public:
-  Matcher(const UniLib& unilib, const RulesSet* rules,
-          const std::vector<const RulesSet_::Rules*> rules_shards,
-          CallbackDelegate* delegate)
+  explicit Matcher(const UniLib* unilib, const RulesSet* rules,
+                   const std::vector<const RulesSet_::Rules*> rules_shards,
+                   CallbackDelegate* delegate)
       : state_(STATE_DEFAULT),
-        unilib_(unilib),
+        unilib_(*unilib),
         arena_(kBlocksize),
         rules_(rules),
         rules_shards_(rules_shards),
@@ -115,8 +115,8 @@
     TC3_CHECK(rules_ != nullptr);
     Reset();
   }
-  Matcher(const UniLib& unilib, const RulesSet* rules,
-          CallbackDelegate* delegate)
+  explicit Matcher(const UniLib* unilib, const RulesSet* rules,
+                   CallbackDelegate* delegate)
       : Matcher(unilib, rules, {}, delegate) {
     rules_shards_.reserve(rules->rules()->size());
     rules_shards_.insert(rules_shards_.end(), rules->rules()->begin(),
diff --git a/native/utils/grammar/rules.fbs b/native/utils/grammar/rules.fbs
index b3d7db8..8052c11 100755
--- a/native/utils/grammar/rules.fbs
+++ b/native/utils/grammar/rules.fbs
@@ -110,6 +110,12 @@
   max_whitespace_gap:byte;
 }
 
+namespace libtextclassifier3.grammar.RulesSet_.Nonterminals_;
+table AnnotationNtEntry {
+  key:string (key, shared);
+  value:int;
+}
+
 // Usage of pre-defined non-terminals that the lexer can generate if used by
 // the grammar.
 namespace libtextclassifier3.grammar.RulesSet_;
@@ -135,6 +141,10 @@
 
   // Id of the nonterminal indicating an uppercase token.
   uppercase_token_nt:int;
+
+  // Predefined nonterminals for annotations.
+  // Maps annotation/collection names to non-terminal ids.
+  annotation_nt:[Nonterminals_.AnnotationNtEntry];
 }
 
 // Callback information.
diff --git a/native/utils/java/jni-helper.cc b/native/utils/java/jni-helper.cc
index de53bbe..d1677e4 100644
--- a/native/utils/java/jni-helper.cc
+++ b/native/utils/java/jni-helper.cc
@@ -139,6 +139,14 @@
   return result;
 }
 
+Status JniHelper::SetObjectArrayElement(JNIEnv* env, jobjectArray array,
+                                        jsize index, jobject val) {
+  TC3_ENSURE_LOCAL_CAPACITY_OR_RETURN;
+  env->SetObjectArrayElement(array, index, val);
+  TC3_NO_EXCEPTION_OR_RETURN;
+  return Status::OK;
+}
+
 StatusOr<ScopedLocalRef<jobjectArray>> JniHelper::NewObjectArray(
     JNIEnv* env, jsize length, jclass element_class, jobject initial_element) {
   TC3_ENSURE_LOCAL_CAPACITY_OR_RETURN;
@@ -149,6 +157,14 @@
   return result;
 }
 
+StatusOr<jsize> JniHelper::GetArrayLength(JNIEnv* env,
+                                          jarray jinput_fragments) {
+  TC3_ENSURE_LOCAL_CAPACITY_OR_RETURN;
+  jsize result = env->GetArrayLength(jinput_fragments);
+  TC3_NO_EXCEPTION_OR_RETURN;
+  return result;
+}
+
 StatusOr<ScopedLocalRef<jstring>> JniHelper::NewStringUTF(JNIEnv* env,
                                                           const char* bytes) {
   TC3_ENSURE_LOCAL_CAPACITY_OR_RETURN;
diff --git a/native/utils/java/jni-helper.h b/native/utils/java/jni-helper.h
index aa26326..55d4696 100644
--- a/native/utils/java/jni-helper.h
+++ b/native/utils/java/jni-helper.h
@@ -100,6 +100,11 @@
   static StatusOr<ScopedLocalRef<jfloatArray>> NewFloatArray(JNIEnv* env,
                                                              jsize length);
 
+  static StatusOr<jsize> GetArrayLength(JNIEnv* env, jarray jinput_fragments);
+
+  static Status SetObjectArrayElement(JNIEnv* env, jobjectArray array,
+                                      jsize index, jobject val);
+
   // Call* methods.
   TC3_DEFINE_VARIADIC_SCOPED_LOCAL_REF_ENV_METHOD(CallObjectMethod, jobject,
                                                   jobject, TC3_JNI_NO_CHECK);
diff --git a/native/utils/math/fastexp.h b/native/utils/math/fastexp.h
index f690c73..8128627 100644
--- a/native/utils/math/fastexp.h
+++ b/native/utils/math/fastexp.h
@@ -31,9 +31,9 @@
 
 class FastMathClass {
  private:
-  static const int kBits = 7;
-  static const int kMask1 = (1 << kBits) - 1;
-  static const int kMask2 = 0xFF << kBits;
+  static constexpr int kBits = 7;
+  static constexpr int kMask1 = (1 << kBits) - 1;
+  static constexpr int kMask2 = 0xFF << kBits;
   static constexpr float kLogBase2OfE = 1.44269504088896340736f;
 
   struct Table {
diff --git a/native/utils/normalization.cc b/native/utils/normalization.cc
index fd64dbb..f9623f7 100644
--- a/native/utils/normalization.cc
+++ b/native/utils/normalization.cc
@@ -21,14 +21,14 @@
 
 namespace libtextclassifier3 {
 
-UnicodeText NormalizeText(const UniLib* unilib,
+UnicodeText NormalizeText(const UniLib& unilib,
                           const NormalizationOptions* normalization_options,
                           const UnicodeText& text) {
   return NormalizeTextCodepointWise(
       unilib, normalization_options->codepointwise_normalization(), text);
 }
 
-UnicodeText NormalizeTextCodepointWise(const UniLib* unilib,
+UnicodeText NormalizeTextCodepointWise(const UniLib& unilib,
                                        const uint32 codepointwise_ops,
                                        const UnicodeText& text) {
   // Sanity check.
@@ -42,7 +42,7 @@
     // Skip whitespace.
     if ((codepointwise_ops &
          NormalizationOptions_::CodepointwiseNormalizationOp_DROP_WHITESPACE) &&
-        unilib->IsWhitespace(codepoint)) {
+        unilib.IsWhitespace(codepoint)) {
       continue;
     }
 
@@ -50,7 +50,7 @@
     if ((codepointwise_ops &
          NormalizationOptions_::
              CodepointwiseNormalizationOp_DROP_PUNCTUATION) &&
-        unilib->IsPunctuation(codepoint)) {
+        unilib.IsPunctuation(codepoint)) {
       continue;
     }
 
@@ -59,12 +59,12 @@
     // Lower case.
     if (codepointwise_ops &
         NormalizationOptions_::CodepointwiseNormalizationOp_LOWERCASE) {
-      normalized_codepoint = unilib->ToLower(normalized_codepoint);
+      normalized_codepoint = unilib.ToLower(normalized_codepoint);
 
       // Upper case.
     } else if (codepointwise_ops &
                NormalizationOptions_::CodepointwiseNormalizationOp_UPPERCASE) {
-      normalized_codepoint = unilib->ToUpper(normalized_codepoint);
+      normalized_codepoint = unilib.ToUpper(normalized_codepoint);
     }
 
     result.push_back(normalized_codepoint);
diff --git a/native/utils/normalization.h b/native/utils/normalization.h
index 0ded163..ff00783 100644
--- a/native/utils/normalization.h
+++ b/native/utils/normalization.h
@@ -27,14 +27,14 @@
 namespace libtextclassifier3 {
 
 // Normalizes a text according to the options.
-UnicodeText NormalizeText(const UniLib* unilib,
+UnicodeText NormalizeText(const UniLib& unilib,
                           const NormalizationOptions* normalization_options,
                           const UnicodeText& text);
 
 // Normalizes a text codepoint wise by applying each codepoint wise op in
 // `codepointwise_ops` that is interpreted as a set of
 // `CodepointwiseNormalizationOp`.
-UnicodeText NormalizeTextCodepointWise(const UniLib* unilib,
+UnicodeText NormalizeTextCodepointWise(const UniLib& unilib,
                                        const uint32 codepointwise_ops,
                                        const UnicodeText& text);
 
diff --git a/native/utils/strings/utf8.h b/native/utils/strings/utf8.h
index bebdaaa..e871731 100644
--- a/native/utils/strings/utf8.h
+++ b/native/utils/strings/utf8.h
@@ -23,22 +23,14 @@
 
 // Returns the length (number of bytes) of the Unicode code point starting at
 // src, based on inspecting just that one byte.  Preconditions: src != NULL,
-// *src can be read, and *src is not '\0', and src points to a well-formed UTF-8
-// std::string.
-static inline int GetNumBytesForNonZeroUTF8Char(const char *src) {
+// *src can be read.
+static inline int GetNumBytesForUTF8Char(const char *src) {
   // On most platforms, char is unsigned by default, but iOS is an exception.
   // The cast below makes sure we always interpret *src as an unsigned char.
   return "\1\1\1\1\1\1\1\1\1\1\1\1\2\2\3\4"
       [(*(reinterpret_cast<const unsigned char *>(src)) & 0xFF) >> 4];
 }
 
-// Like GetNumBytesForNonZeroUTF8Char, but *src may be '\0'; returns 0 in that
-// case.
-static inline int GetNumBytesForUTF8Char(const char *src) {
-  if (*src == '\0') return 0;
-  return GetNumBytesForNonZeroUTF8Char(src);
-}
-
 // Returns true if this byte is a trailing UTF-8 byte (10xx xxxx)
 static inline bool IsTrailByte(char x) {
   // return (x & 0xC0) == 0x80;
diff --git a/native/utils/token-feature-extractor.cc b/native/utils/token-feature-extractor.cc
index b14f96e..ee915db 100644
--- a/native/utils/token-feature-extractor.cc
+++ b/native/utils/token-feature-extractor.cc
@@ -70,8 +70,8 @@
 }  // namespace
 
 TokenFeatureExtractor::TokenFeatureExtractor(
-    const TokenFeatureExtractorOptions& options, const UniLib& unilib)
-    : options_(options), unilib_(unilib) {
+    const TokenFeatureExtractorOptions& options, const UniLib* unilib)
+    : options_(options), unilib_(*unilib) {
   for (const std::string& pattern : options.regexp_features) {
     regex_patterns_.push_back(std::unique_ptr<UniLib::RegexPattern>(
         unilib_.CreateRegexPattern(UTF8ToUnicodeText(
diff --git a/native/utils/token-feature-extractor.h b/native/utils/token-feature-extractor.h
index fed113b..b3f2f33 100644
--- a/native/utils/token-feature-extractor.h
+++ b/native/utils/token-feature-extractor.h
@@ -65,8 +65,10 @@
 
 class TokenFeatureExtractor {
  public:
-  TokenFeatureExtractor(const TokenFeatureExtractorOptions& options,
-                        const UniLib& unilib);
+  // Des not take ownership of unilib, which must refer to a valid unilib
+  // instance that outlives this feature extractor.
+  explicit TokenFeatureExtractor(const TokenFeatureExtractorOptions& options,
+                                 const UniLib* unilib);
 
   // Extracts both the sparse (charactergram) and the dense features from a
   // token. is_in_span is a bool indicator whether the token is a part of the
diff --git a/native/utils/tokenizer.cc b/native/utils/tokenizer.cc
index 5e50c09..bd47592 100644
--- a/native/utils/tokenizer.cc
+++ b/native/utils/tokenizer.cc
@@ -126,8 +126,7 @@
 
 void AppendCodepointToToken(UnicodeText::const_iterator it, Token* token) {
   token->value += std::string(
-      it.utf8_data(),
-      it.utf8_data() + GetNumBytesForNonZeroUTF8Char(it.utf8_data()));
+      it.utf8_data(), it.utf8_data() + GetNumBytesForUTF8Char(it.utf8_data()));
 }
 
 std::vector<Token> Tokenizer::InternalTokenize(
@@ -285,20 +284,19 @@
     }
   };
 
-  auto MaybeResetTokenAndAddChar = [&new_token, PushToken, &current_token_type](
-                                       int codepoint_index,
-                                       NumberTokenType token_type,
-                                       UnicodeText::const_iterator it,
-                                       bool is_whitespace = false) {
-    if (current_token_type != token_type) {
-      PushToken();
-      new_token = Token("", codepoint_index, codepoint_index,
-                        /*is_padding=*/false, is_whitespace);
-    }
-    new_token.end += 1;
-    AppendCodepointToToken(it, &new_token);
-    current_token_type = token_type;
-  };
+  auto MaybeResetTokenAndAddChar =
+      [&new_token, PushToken, &current_token_type](
+          int codepoint_index, NumberTokenType token_type,
+          UnicodeText::const_iterator it, bool is_whitespace = false) {
+        if (current_token_type != token_type) {
+          PushToken();
+          new_token = Token("", codepoint_index, codepoint_index,
+                            /*is_padding=*/false, is_whitespace);
+        }
+        new_token.end += 1;
+        AppendCodepointToToken(it, &new_token);
+        current_token_type = token_type;
+      };
 
   auto FinishTokenAndAddSeparator =
       [&new_token, result, &current_token_type, PushToken](
diff --git a/native/utils/utf8/unicodetext.cc b/native/utils/utf8/unicodetext.cc
index 45bbbf6..7b56ce2 100644
--- a/native/utils/utf8/unicodetext.cc
+++ b/native/utils/utf8/unicodetext.cc
@@ -288,7 +288,7 @@
 }
 
 UnicodeText::const_iterator& UnicodeText::const_iterator::operator++() {
-  it_ += GetNumBytesForNonZeroUTF8Char(it_);
+  it_ += GetNumBytesForUTF8Char(it_);
   return *this;
 }