Sync of libtextclassifier from Google3.

Exported by: knowledge/cerebra/sense/text_classifier/lib/export_to_aosp.sh

Bug: 67618889
Test: Builds. Tested also with oc-mr1 and tested that smartselect/sharing features work.
Change-Id: I25ad82cdd5eed20c60e83e7eb94dae6ab08b3690
diff --git a/util/base/casts.h b/util/base/casts.h
index ad12ce4..805ee89 100644
--- a/util/base/casts.h
+++ b/util/base/casts.h
@@ -21,13 +21,12 @@
 
 namespace libtextclassifier {
 
-// lang_id_bit_cast<Dest,Source> is a template function that implements the
-// equivalent of "*reinterpret_cast<Dest*>(&source)".  We need this in
-// very low-level functions like the protobuf library and fast math
-// support.
+// bit_cast<Dest, Source> is a template function that implements the equivalent
+// of "*reinterpret_cast<Dest*>(&source)".  We need this in very low-level
+// functions like fast math support.
 //
 //   float f = 3.14159265358979;
-//   int i = lang_id_bit_cast<int32>(f);
+//   int i = bit_cast<int32>(f);
 //   // i = 0x40490fdb
 //
 // The classical address-casting method is:
@@ -60,9 +59,9 @@
 //
 // Anyways ...
 //
-// lang_id_bit_cast<> calls memcpy() which is blessed by the standard,
-// especially by the example in section 3.9 .  Also, of course,
-// lang_id_bit_cast<> wraps up the nasty logic in one place.
+// bit_cast<> calls memcpy() which is blessed by the standard, especially by the
+// example in section 3.9 .  Also, of course, bit_cast<> wraps up the nasty
+// logic in one place.
 //
 // Fortunately memcpy() is very fast.  In optimized mode, with a
 // constant size, gcc 2.95.3, gcc 4.0.1, and msvc 7.1 produce inline
@@ -70,15 +69,14 @@
 // memcpy(d,s,4) compiles to one load and one store, and memcpy(d,s,8)
 // compiles to two loads and two stores.
 //
-// I tested this code with gcc 2.95.3, gcc 4.0.1, icc 8.1, and msvc 7.1.
+// Mike Chastain tested this code with gcc 2.95.3, gcc 4.0.1, icc 8.1, and msvc
+// 7.1.
 //
 // WARNING: if Dest or Source is a non-POD type, the result of the memcpy
 // is likely to surprise you.
 //
 // Props to Bill Gibbons for the compile time assertion technique and
 // Art Komninos and Igor Tandetnik for the msvc experiments.
-//
-// -- mec 2005-10-17
 
 template <class Dest, class Source>
 inline Dest bit_cast(const Source &source) {
diff --git a/util/base/endian.h b/util/base/endian.h
new file mode 100644
index 0000000..5813288
--- /dev/null
+++ b/util/base/endian.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_UTIL_BASE_ENDIAN_H_
+#define LIBTEXTCLASSIFIER_UTIL_BASE_ENDIAN_H_
+
+#include "util/base/integral_types.h"
+
+namespace libtextclassifier {
+
+#if defined OS_LINUX || defined OS_CYGWIN || defined OS_ANDROID || \
+    defined(__ANDROID__)
+#include <endian.h>
+#endif
+
+// The following guarantees declaration of the byte swap functions, and
+// defines __BYTE_ORDER for MSVC
+#if defined(__GLIBC__) || defined(__CYGWIN__)
+#include <byteswap.h>  // IWYU pragma: export
+
+#else
+#define GG_LONGLONG(x) x##LL
+#define GG_ULONGLONG(x) x##ULL
+static inline uint16 bswap_16(uint16 x) {
+  return (uint16)(((x & 0xFF) << 8) | ((x & 0xFF00) >> 8));  // NOLINT
+}
+#define bswap_16(x) bswap_16(x)
+static inline uint32 bswap_32(uint32 x) {
+  return (((x & 0xFF) << 24) | ((x & 0xFF00) << 8) | ((x & 0xFF0000) >> 8) |
+          ((x & 0xFF000000) >> 24));
+}
+#define bswap_32(x) bswap_32(x)
+static inline uint64 bswap_64(uint64 x) {
+  return (((x & GG_ULONGLONG(0xFF)) << 56) |
+          ((x & GG_ULONGLONG(0xFF00)) << 40) |
+          ((x & GG_ULONGLONG(0xFF0000)) << 24) |
+          ((x & GG_ULONGLONG(0xFF000000)) << 8) |
+          ((x & GG_ULONGLONG(0xFF00000000)) >> 8) |
+          ((x & GG_ULONGLONG(0xFF0000000000)) >> 24) |
+          ((x & GG_ULONGLONG(0xFF000000000000)) >> 40) |
+          ((x & GG_ULONGLONG(0xFF00000000000000)) >> 56));
+}
+#define bswap_64(x) bswap_64(x)
+#endif
+
+// define the macros IS_LITTLE_ENDIAN or IS_BIG_ENDIAN
+// using the above endian definitions from endian.h if
+// endian.h was included
+#ifdef __BYTE_ORDER
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define IS_LITTLE_ENDIAN
+#endif
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define IS_BIG_ENDIAN
+#endif
+
+#else
+
+#if defined(__LITTLE_ENDIAN__)
+#define IS_LITTLE_ENDIAN
+#elif defined(__BIG_ENDIAN__)
+#define IS_BIG_ENDIAN
+#endif
+
+// there is also PDP endian ...
+
+#endif  // __BYTE_ORDER
+
+class LittleEndian {
+ public:
+// Conversion functions.
+#ifdef IS_LITTLE_ENDIAN
+
+  static uint16 FromHost16(uint16 x) { return x; }
+  static uint16 ToHost16(uint16 x) { return x; }
+
+  static uint32 FromHost32(uint32 x) { return x; }
+  static uint32 ToHost32(uint32 x) { return x; }
+
+  static uint64 FromHost64(uint64 x) { return x; }
+  static uint64 ToHost64(uint64 x) { return x; }
+
+  static bool IsLittleEndian() { return true; }
+
+#elif defined IS_BIG_ENDIAN
+
+  static uint16 FromHost16(uint16 x) { return gbswap_16(x); }
+  static uint16 ToHost16(uint16 x) { return gbswap_16(x); }
+
+  static uint32 FromHost32(uint32 x) { return gbswap_32(x); }
+  static uint32 ToHost32(uint32 x) { return gbswap_32(x); }
+
+  static uint64 FromHost64(uint64 x) { return gbswap_64(x); }
+  static uint64 ToHost64(uint64 x) { return gbswap_64(x); }
+
+  static bool IsLittleEndian() { return false; }
+
+#endif /* ENDIAN */
+};
+
+}  // namespace libtextclassifier
+
+#endif  // LIBTEXTCLASSIFIER_UTIL_BASE_ENDIAN_H_
diff --git a/util/base/logging.h b/util/base/logging.h
index b0f3c5d..dba0ed4 100644
--- a/util/base/logging.h
+++ b/util/base/logging.h
@@ -24,6 +24,23 @@
 #include "util/base/logging_levels.h"
 #include "util/base/port.h"
 
+// TC_STRIP
+namespace libtextclassifier {
+// string class that can't be instantiated.  Makes sure that the code does not
+// compile when non std::string is used.
+//
+// NOTE: defined here because most files directly or transitively include this
+// file.  Asking people to include a special header just to make sure they don't
+// use the unqualified string doesn't work: as that header doesn't produce any
+// immediate benefit, one can easily forget about it.
+class string {
+ public:
+  // Makes the class non-instantiable.
+  virtual ~string() = 0;
+};
+}  // namespace libtextclassifier
+// TC_END_STRIP
+
 namespace libtextclassifier {
 namespace logging {
 
@@ -75,10 +92,6 @@
 #define TC_CHECK_GE(x, y) TC_CHECK((x) >= (y))
 #define TC_CHECK_NE(x, y) TC_CHECK((x) != (y))
 
-// Debug checks: a TC_DCHECK<suffix> macro should behave like TC_CHECK<suffix>
-// in debug mode an don't check / don't print anything in non-debug mode.
-#ifdef NDEBUG
-
 // Pseudo-stream that "eats" the tokens <<-pumped into it, without printing
 // anything.
 class NullStream {
@@ -92,6 +105,11 @@
 }
 
 #define TC_NULLSTREAM ::libtextclassifier::logging::NullStream().stream()
+
+// Debug checks: a TC_DCHECK<suffix> macro should behave like TC_CHECK<suffix>
+// in debug mode an don't check / don't print anything in non-debug mode.
+#ifdef NDEBUG
+
 #define TC_DCHECK(x) TC_NULLSTREAM
 #define TC_DCHECK_EQ(x, y) TC_NULLSTREAM
 #define TC_DCHECK_LT(x, y) TC_NULLSTREAM
@@ -113,6 +131,16 @@
 #define TC_DCHECK_NE(x, y) TC_CHECK_NE(x, y)
 
 #endif  // NDEBUG
+
+#ifdef LIBTEXTCLASSIFIER_VLOG
+#define TC_VLOG(severity)                                                      \
+  ::libtextclassifier::logging::LogMessage(::libtextclassifier::logging::INFO, \
+                                           __FILE__, __LINE__)                 \
+      .stream()
+#else
+#define TC_VLOG(severity) TC_NULLSTREAM
+#endif
+
 }  // namespace logging
 }  // namespace libtextclassifier
 
diff --git a/util/hash/farmhash.cc b/util/hash/farmhash.cc
index 55786a9..f4f2e84 100644
--- a/util/hash/farmhash.cc
+++ b/util/hash/farmhash.cc
@@ -642,7 +642,7 @@
 
 uint32_t Hash32(const char *s, size_t len) {
   FARMHASH_DIE_IF_MISCONFIGURED;
-  return s == NULL ? 0 : len;
+  return s == nullptr ? 0 : len;
 }
 
 uint32_t Hash32WithSeed(const char *s, size_t len, uint32_t seed) {
@@ -865,7 +865,7 @@
 
 uint32_t Hash32(const char *s, size_t len) {
   FARMHASH_DIE_IF_MISCONFIGURED;
-  return s == NULL ? 0 : len;
+  return s == nullptr ? 0 : len;
 }
 
 uint32_t Hash32WithSeed(const char *s, size_t len, uint32_t seed) {
@@ -894,7 +894,7 @@
 
 uint32_t Hash32(const char *s, size_t len) {
   FARMHASH_DIE_IF_MISCONFIGURED;
-  return s == NULL ? 0 : len;
+  return s == nullptr ? 0 : len;
 }
 
 uint32_t Hash32WithSeed(const char *s, size_t len, uint32_t seed) {
diff --git a/util/java/scoped_local_ref.h b/util/java/scoped_local_ref.h
new file mode 100644
index 0000000..d995468
--- /dev/null
+++ b/util/java/scoped_local_ref.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_UTIL_JAVA_SCOPED_LOCAL_REF_H_
+#define LIBTEXTCLASSIFIER_UTIL_JAVA_SCOPED_LOCAL_REF_H_
+
+#include <jni.h>
+#include <memory>
+#include <type_traits>
+
+#include "util/base/logging.h"
+
+namespace libtextclassifier {
+
+// A deleter to be used with std::unique_ptr to delete JNI local references.
+class LocalRefDeleter {
+ public:
+  // Style guide violating implicit constructor so that the LocalRefDeleter
+  // is implicitly constructed from the second argument to ScopedLocalRef.
+  LocalRefDeleter(JNIEnv* env) : env_(env) {}  // NOLINT(runtime/explicit)
+
+  LocalRefDeleter(const LocalRefDeleter& orig) = default;
+
+  // Copy assignment to allow move semantics in ScopedLocalRef.
+  LocalRefDeleter& operator=(const LocalRefDeleter& rhs) {
+    // As the deleter and its state are thread-local, ensure the envs
+    // are consistent but do nothing.
+    TC_CHECK_EQ(env_, rhs.env_);
+    return *this;
+  }
+
+  // The delete operator.
+  void operator()(jobject o) const { env_->DeleteLocalRef(o); }
+
+ private:
+  // The env_ stashed to use for deletion. Thread-local, don't share!
+  JNIEnv* const env_;
+};
+
+// A smart pointer that deletes a JNI local reference when it goes out
+// of scope. Usage is:
+// ScopedLocalRef<jobject> scoped_local(env->JniFunction(), env);
+//
+// Note that this class is not thread-safe since it caches JNIEnv in
+// the deleter. Do not use the same jobject across different threads.
+template <typename T>
+using ScopedLocalRef =
+    std::unique_ptr<typename std::remove_pointer<T>::type, LocalRefDeleter>;
+
+}  // namespace libtextclassifier
+
+#endif  // LIBTEXTCLASSIFIER_UTIL_JAVA_SCOPED_LOCAL_REF_H_
diff --git a/util/strings/numbers_test.cc b/util/strings/numbers_test.cc
new file mode 100644
index 0000000..f3a3f27
--- /dev/null
+++ b/util/strings/numbers_test.cc
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/strings/numbers.h"
+
+#include "util/base/integral_types.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier {
+namespace {
+
+void TestParseInt32(const char *c_str, bool expected_parsing_success,
+                    int32 expected_parsed_value = 0) {
+  int32 parsed_value = 0;
+  EXPECT_EQ(expected_parsing_success, ParseInt32(c_str, &parsed_value));
+  if (expected_parsing_success) {
+    EXPECT_EQ(expected_parsed_value, parsed_value);
+  }
+}
+
+TEST(ParseInt32Test, Normal) {
+  TestParseInt32("2", true, 2);
+  TestParseInt32("-357", true, -357);
+  TestParseInt32("7", true, 7);
+  TestParseInt32("+7", true, 7);
+  TestParseInt32("  +7", true, 7);
+  TestParseInt32("-23", true, -23);
+  TestParseInt32("  -23", true, -23);
+}
+
+TEST(ParseInt32Test, ErrorCases) {
+  TestParseInt32("", false);
+  TestParseInt32("  ", false);
+  TestParseInt32("not-a-number", false);
+  TestParseInt32("123a", false);
+}
+
+void TestParseInt64(const char *c_str, bool expected_parsing_success,
+                    int64 expected_parsed_value = 0) {
+  int64 parsed_value = 0;
+  EXPECT_EQ(expected_parsing_success, ParseInt64(c_str, &parsed_value));
+  if (expected_parsing_success) {
+    EXPECT_EQ(expected_parsed_value, parsed_value);
+  }
+}
+
+TEST(ParseInt64Test, Normal) {
+  TestParseInt64("2", true, 2);
+  TestParseInt64("-357", true, -357);
+  TestParseInt64("7", true, 7);
+  TestParseInt64("+7", true, 7);
+  TestParseInt64("  +7", true, 7);
+  TestParseInt64("-23", true, -23);
+  TestParseInt64("  -23", true, -23);
+}
+
+TEST(ParseInt64Test, ErrorCases) {
+  TestParseInt64("", false);
+  TestParseInt64("  ", false);
+  TestParseInt64("not-a-number", false);
+  TestParseInt64("23z", false);
+}
+
+void TestParseDouble(const char *c_str, bool expected_parsing_success,
+                     double expected_parsed_value = 0.0) {
+  double parsed_value = 0.0;
+  EXPECT_EQ(expected_parsing_success, ParseDouble(c_str, &parsed_value));
+  if (expected_parsing_success) {
+    EXPECT_NEAR(expected_parsed_value, parsed_value, 0.00001);
+  }
+}
+
+TEST(ParseDoubleTest, Normal) {
+  TestParseDouble("2", true, 2.0);
+  TestParseDouble("-357.023", true, -357.023);
+  TestParseDouble("7.04", true, 7.04);
+  TestParseDouble("+7.2", true, 7.2);
+  TestParseDouble("  +7.236", true, 7.236);
+  TestParseDouble("-23.4", true, -23.4);
+  TestParseDouble("  -23.4", true, -23.4);
+}
+
+TEST(ParseDoubleTest, ErrorCases) {
+  TestParseDouble("", false);
+  TestParseDouble("  ", false);
+  TestParseDouble("not-a-number", false);
+  TestParseDouble("23.5a", false);
+}
+}  // namespace
+}  // namespace libtextclassifier
diff --git a/util/utf8/unicodetext.cc b/util/utf8/unicodetext.cc
index e83c890..dbab1c8 100644
--- a/util/utf8/unicodetext.cc
+++ b/util/utf8/unicodetext.cc
@@ -16,7 +16,10 @@
 
 #include "util/utf8/unicodetext.h"
 
-#include "base.h"
+#include <string.h>
+
+#include <algorithm>
+
 #include "util/strings/utf8.h"
 
 namespace libtextclassifier {
@@ -108,6 +111,8 @@
 
 void UnicodeText::clear() { repr_.clear(); }
 
+int UnicodeText::size() const { return std::distance(begin(), end()); }
+
 std::string UnicodeText::UTF8Substring(const const_iterator& first,
                                        const const_iterator& last) {
   return std::string(first.it_, last.it_ - first.it_);
diff --git a/util/utf8/unicodetext.h b/util/utf8/unicodetext.h
index 5327383..6a21058 100644
--- a/util/utf8/unicodetext.h
+++ b/util/utf8/unicodetext.h
@@ -17,9 +17,11 @@
 #ifndef LIBTEXTCLASSIFIER_UTIL_UTF8_UNICODETEXT_H_
 #define LIBTEXTCLASSIFIER_UTIL_UTF8_UNICODETEXT_H_
 
+#include <iterator>
+#include <string>
 #include <utility>
 
-#include "base.h"
+#include "util/base/integral_types.h"
 
 namespace libtextclassifier {
 
@@ -137,6 +139,7 @@
 
   const_iterator begin() const;
   const_iterator end() const;
+  int size() const;  // the number of Unicode characters (codepoints)
 
   // x.PointToUTF8(buf,len) changes x so that it points to buf
   // ("becomes an alias"). It does not take ownership or copy buf.
@@ -162,7 +165,7 @@
     int capacity_;
     bool ours_;  // Do we own data_?
 
-    Repr() : data_(NULL), size_(0), capacity_(0), ours_(true) {}
+    Repr() : data_(nullptr), size_(0), capacity_(0), ours_(true) {}
     ~Repr() {
       if (ours_) delete[] data_;
     }