Sync of libtextclassifier from Google3.
Exported by: knowledge/cerebra/sense/text_classifier/lib/export_to_aosp.sh
Bug: 67618889
Test: Builds. Tested also with oc-mr1 and tested that smartselect/sharing features work.
Change-Id: I25ad82cdd5eed20c60e83e7eb94dae6ab08b3690
diff --git a/util/base/casts.h b/util/base/casts.h
index ad12ce4..805ee89 100644
--- a/util/base/casts.h
+++ b/util/base/casts.h
@@ -21,13 +21,12 @@
namespace libtextclassifier {
-// lang_id_bit_cast<Dest,Source> is a template function that implements the
-// equivalent of "*reinterpret_cast<Dest*>(&source)". We need this in
-// very low-level functions like the protobuf library and fast math
-// support.
+// bit_cast<Dest, Source> is a template function that implements the equivalent
+// of "*reinterpret_cast<Dest*>(&source)". We need this in very low-level
+// functions like fast math support.
//
// float f = 3.14159265358979;
-// int i = lang_id_bit_cast<int32>(f);
+// int i = bit_cast<int32>(f);
// // i = 0x40490fdb
//
// The classical address-casting method is:
@@ -60,9 +59,9 @@
//
// Anyways ...
//
-// lang_id_bit_cast<> calls memcpy() which is blessed by the standard,
-// especially by the example in section 3.9 . Also, of course,
-// lang_id_bit_cast<> wraps up the nasty logic in one place.
+// bit_cast<> calls memcpy() which is blessed by the standard, especially by the
+// example in section 3.9 . Also, of course, bit_cast<> wraps up the nasty
+// logic in one place.
//
// Fortunately memcpy() is very fast. In optimized mode, with a
// constant size, gcc 2.95.3, gcc 4.0.1, and msvc 7.1 produce inline
@@ -70,15 +69,14 @@
// memcpy(d,s,4) compiles to one load and one store, and memcpy(d,s,8)
// compiles to two loads and two stores.
//
-// I tested this code with gcc 2.95.3, gcc 4.0.1, icc 8.1, and msvc 7.1.
+// Mike Chastain tested this code with gcc 2.95.3, gcc 4.0.1, icc 8.1, and msvc
+// 7.1.
//
// WARNING: if Dest or Source is a non-POD type, the result of the memcpy
// is likely to surprise you.
//
// Props to Bill Gibbons for the compile time assertion technique and
// Art Komninos and Igor Tandetnik for the msvc experiments.
-//
-// -- mec 2005-10-17
template <class Dest, class Source>
inline Dest bit_cast(const Source &source) {
diff --git a/util/base/endian.h b/util/base/endian.h
new file mode 100644
index 0000000..5813288
--- /dev/null
+++ b/util/base/endian.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_UTIL_BASE_ENDIAN_H_
+#define LIBTEXTCLASSIFIER_UTIL_BASE_ENDIAN_H_
+
+#include "util/base/integral_types.h"
+
+namespace libtextclassifier {
+
+#if defined OS_LINUX || defined OS_CYGWIN || defined OS_ANDROID || \
+ defined(__ANDROID__)
+#include <endian.h>
+#endif
+
+// The following guarantees declaration of the byte swap functions, and
+// defines __BYTE_ORDER for MSVC
+#if defined(__GLIBC__) || defined(__CYGWIN__)
+#include <byteswap.h> // IWYU pragma: export
+
+#else
+#define GG_LONGLONG(x) x##LL
+#define GG_ULONGLONG(x) x##ULL
+static inline uint16 bswap_16(uint16 x) {
+ return (uint16)(((x & 0xFF) << 8) | ((x & 0xFF00) >> 8)); // NOLINT
+}
+#define bswap_16(x) bswap_16(x)
+static inline uint32 bswap_32(uint32 x) {
+ return (((x & 0xFF) << 24) | ((x & 0xFF00) << 8) | ((x & 0xFF0000) >> 8) |
+ ((x & 0xFF000000) >> 24));
+}
+#define bswap_32(x) bswap_32(x)
+static inline uint64 bswap_64(uint64 x) {
+ return (((x & GG_ULONGLONG(0xFF)) << 56) |
+ ((x & GG_ULONGLONG(0xFF00)) << 40) |
+ ((x & GG_ULONGLONG(0xFF0000)) << 24) |
+ ((x & GG_ULONGLONG(0xFF000000)) << 8) |
+ ((x & GG_ULONGLONG(0xFF00000000)) >> 8) |
+ ((x & GG_ULONGLONG(0xFF0000000000)) >> 24) |
+ ((x & GG_ULONGLONG(0xFF000000000000)) >> 40) |
+ ((x & GG_ULONGLONG(0xFF00000000000000)) >> 56));
+}
+#define bswap_64(x) bswap_64(x)
+#endif
+
+// define the macros IS_LITTLE_ENDIAN or IS_BIG_ENDIAN
+// using the above endian definitions from endian.h if
+// endian.h was included
+#ifdef __BYTE_ORDER
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define IS_LITTLE_ENDIAN
+#endif
+
+#if __BYTE_ORDER == __BIG_ENDIAN
+#define IS_BIG_ENDIAN
+#endif
+
+#else
+
+#if defined(__LITTLE_ENDIAN__)
+#define IS_LITTLE_ENDIAN
+#elif defined(__BIG_ENDIAN__)
+#define IS_BIG_ENDIAN
+#endif
+
+// there is also PDP endian ...
+
+#endif // __BYTE_ORDER
+
+class LittleEndian {
+ public:
+// Conversion functions.
+#ifdef IS_LITTLE_ENDIAN
+
+ static uint16 FromHost16(uint16 x) { return x; }
+ static uint16 ToHost16(uint16 x) { return x; }
+
+ static uint32 FromHost32(uint32 x) { return x; }
+ static uint32 ToHost32(uint32 x) { return x; }
+
+ static uint64 FromHost64(uint64 x) { return x; }
+ static uint64 ToHost64(uint64 x) { return x; }
+
+ static bool IsLittleEndian() { return true; }
+
+#elif defined IS_BIG_ENDIAN
+
+ static uint16 FromHost16(uint16 x) { return gbswap_16(x); }
+ static uint16 ToHost16(uint16 x) { return gbswap_16(x); }
+
+ static uint32 FromHost32(uint32 x) { return gbswap_32(x); }
+ static uint32 ToHost32(uint32 x) { return gbswap_32(x); }
+
+ static uint64 FromHost64(uint64 x) { return gbswap_64(x); }
+ static uint64 ToHost64(uint64 x) { return gbswap_64(x); }
+
+ static bool IsLittleEndian() { return false; }
+
+#endif /* ENDIAN */
+};
+
+} // namespace libtextclassifier
+
+#endif // LIBTEXTCLASSIFIER_UTIL_BASE_ENDIAN_H_
diff --git a/util/base/logging.h b/util/base/logging.h
index b0f3c5d..dba0ed4 100644
--- a/util/base/logging.h
+++ b/util/base/logging.h
@@ -24,6 +24,23 @@
#include "util/base/logging_levels.h"
#include "util/base/port.h"
+// TC_STRIP
+namespace libtextclassifier {
+// string class that can't be instantiated. Makes sure that the code does not
+// compile when non std::string is used.
+//
+// NOTE: defined here because most files directly or transitively include this
+// file. Asking people to include a special header just to make sure they don't
+// use the unqualified string doesn't work: as that header doesn't produce any
+// immediate benefit, one can easily forget about it.
+class string {
+ public:
+ // Makes the class non-instantiable.
+ virtual ~string() = 0;
+};
+} // namespace libtextclassifier
+// TC_END_STRIP
+
namespace libtextclassifier {
namespace logging {
@@ -75,10 +92,6 @@
#define TC_CHECK_GE(x, y) TC_CHECK((x) >= (y))
#define TC_CHECK_NE(x, y) TC_CHECK((x) != (y))
-// Debug checks: a TC_DCHECK<suffix> macro should behave like TC_CHECK<suffix>
-// in debug mode an don't check / don't print anything in non-debug mode.
-#ifdef NDEBUG
-
// Pseudo-stream that "eats" the tokens <<-pumped into it, without printing
// anything.
class NullStream {
@@ -92,6 +105,11 @@
}
#define TC_NULLSTREAM ::libtextclassifier::logging::NullStream().stream()
+
+// Debug checks: a TC_DCHECK<suffix> macro should behave like TC_CHECK<suffix>
+// in debug mode an don't check / don't print anything in non-debug mode.
+#ifdef NDEBUG
+
#define TC_DCHECK(x) TC_NULLSTREAM
#define TC_DCHECK_EQ(x, y) TC_NULLSTREAM
#define TC_DCHECK_LT(x, y) TC_NULLSTREAM
@@ -113,6 +131,16 @@
#define TC_DCHECK_NE(x, y) TC_CHECK_NE(x, y)
#endif // NDEBUG
+
+#ifdef LIBTEXTCLASSIFIER_VLOG
+#define TC_VLOG(severity) \
+ ::libtextclassifier::logging::LogMessage(::libtextclassifier::logging::INFO, \
+ __FILE__, __LINE__) \
+ .stream()
+#else
+#define TC_VLOG(severity) TC_NULLSTREAM
+#endif
+
} // namespace logging
} // namespace libtextclassifier
diff --git a/util/hash/farmhash.cc b/util/hash/farmhash.cc
index 55786a9..f4f2e84 100644
--- a/util/hash/farmhash.cc
+++ b/util/hash/farmhash.cc
@@ -642,7 +642,7 @@
uint32_t Hash32(const char *s, size_t len) {
FARMHASH_DIE_IF_MISCONFIGURED;
- return s == NULL ? 0 : len;
+ return s == nullptr ? 0 : len;
}
uint32_t Hash32WithSeed(const char *s, size_t len, uint32_t seed) {
@@ -865,7 +865,7 @@
uint32_t Hash32(const char *s, size_t len) {
FARMHASH_DIE_IF_MISCONFIGURED;
- return s == NULL ? 0 : len;
+ return s == nullptr ? 0 : len;
}
uint32_t Hash32WithSeed(const char *s, size_t len, uint32_t seed) {
@@ -894,7 +894,7 @@
uint32_t Hash32(const char *s, size_t len) {
FARMHASH_DIE_IF_MISCONFIGURED;
- return s == NULL ? 0 : len;
+ return s == nullptr ? 0 : len;
}
uint32_t Hash32WithSeed(const char *s, size_t len, uint32_t seed) {
diff --git a/util/java/scoped_local_ref.h b/util/java/scoped_local_ref.h
new file mode 100644
index 0000000..d995468
--- /dev/null
+++ b/util/java/scoped_local_ref.h
@@ -0,0 +1,65 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef LIBTEXTCLASSIFIER_UTIL_JAVA_SCOPED_LOCAL_REF_H_
+#define LIBTEXTCLASSIFIER_UTIL_JAVA_SCOPED_LOCAL_REF_H_
+
+#include <jni.h>
+#include <memory>
+#include <type_traits>
+
+#include "util/base/logging.h"
+
+namespace libtextclassifier {
+
+// A deleter to be used with std::unique_ptr to delete JNI local references.
+class LocalRefDeleter {
+ public:
+ // Style guide violating implicit constructor so that the LocalRefDeleter
+ // is implicitly constructed from the second argument to ScopedLocalRef.
+ LocalRefDeleter(JNIEnv* env) : env_(env) {} // NOLINT(runtime/explicit)
+
+ LocalRefDeleter(const LocalRefDeleter& orig) = default;
+
+ // Copy assignment to allow move semantics in ScopedLocalRef.
+ LocalRefDeleter& operator=(const LocalRefDeleter& rhs) {
+ // As the deleter and its state are thread-local, ensure the envs
+ // are consistent but do nothing.
+ TC_CHECK_EQ(env_, rhs.env_);
+ return *this;
+ }
+
+ // The delete operator.
+ void operator()(jobject o) const { env_->DeleteLocalRef(o); }
+
+ private:
+ // The env_ stashed to use for deletion. Thread-local, don't share!
+ JNIEnv* const env_;
+};
+
+// A smart pointer that deletes a JNI local reference when it goes out
+// of scope. Usage is:
+// ScopedLocalRef<jobject> scoped_local(env->JniFunction(), env);
+//
+// Note that this class is not thread-safe since it caches JNIEnv in
+// the deleter. Do not use the same jobject across different threads.
+template <typename T>
+using ScopedLocalRef =
+ std::unique_ptr<typename std::remove_pointer<T>::type, LocalRefDeleter>;
+
+} // namespace libtextclassifier
+
+#endif // LIBTEXTCLASSIFIER_UTIL_JAVA_SCOPED_LOCAL_REF_H_
diff --git a/util/strings/numbers_test.cc b/util/strings/numbers_test.cc
new file mode 100644
index 0000000..f3a3f27
--- /dev/null
+++ b/util/strings/numbers_test.cc
@@ -0,0 +1,103 @@
+/*
+ * Copyright (C) 2017 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "util/strings/numbers.h"
+
+#include "util/base/integral_types.h"
+#include "gtest/gtest.h"
+
+namespace libtextclassifier {
+namespace {
+
+void TestParseInt32(const char *c_str, bool expected_parsing_success,
+ int32 expected_parsed_value = 0) {
+ int32 parsed_value = 0;
+ EXPECT_EQ(expected_parsing_success, ParseInt32(c_str, &parsed_value));
+ if (expected_parsing_success) {
+ EXPECT_EQ(expected_parsed_value, parsed_value);
+ }
+}
+
+TEST(ParseInt32Test, Normal) {
+ TestParseInt32("2", true, 2);
+ TestParseInt32("-357", true, -357);
+ TestParseInt32("7", true, 7);
+ TestParseInt32("+7", true, 7);
+ TestParseInt32(" +7", true, 7);
+ TestParseInt32("-23", true, -23);
+ TestParseInt32(" -23", true, -23);
+}
+
+TEST(ParseInt32Test, ErrorCases) {
+ TestParseInt32("", false);
+ TestParseInt32(" ", false);
+ TestParseInt32("not-a-number", false);
+ TestParseInt32("123a", false);
+}
+
+void TestParseInt64(const char *c_str, bool expected_parsing_success,
+ int64 expected_parsed_value = 0) {
+ int64 parsed_value = 0;
+ EXPECT_EQ(expected_parsing_success, ParseInt64(c_str, &parsed_value));
+ if (expected_parsing_success) {
+ EXPECT_EQ(expected_parsed_value, parsed_value);
+ }
+}
+
+TEST(ParseInt64Test, Normal) {
+ TestParseInt64("2", true, 2);
+ TestParseInt64("-357", true, -357);
+ TestParseInt64("7", true, 7);
+ TestParseInt64("+7", true, 7);
+ TestParseInt64(" +7", true, 7);
+ TestParseInt64("-23", true, -23);
+ TestParseInt64(" -23", true, -23);
+}
+
+TEST(ParseInt64Test, ErrorCases) {
+ TestParseInt64("", false);
+ TestParseInt64(" ", false);
+ TestParseInt64("not-a-number", false);
+ TestParseInt64("23z", false);
+}
+
+void TestParseDouble(const char *c_str, bool expected_parsing_success,
+ double expected_parsed_value = 0.0) {
+ double parsed_value = 0.0;
+ EXPECT_EQ(expected_parsing_success, ParseDouble(c_str, &parsed_value));
+ if (expected_parsing_success) {
+ EXPECT_NEAR(expected_parsed_value, parsed_value, 0.00001);
+ }
+}
+
+TEST(ParseDoubleTest, Normal) {
+ TestParseDouble("2", true, 2.0);
+ TestParseDouble("-357.023", true, -357.023);
+ TestParseDouble("7.04", true, 7.04);
+ TestParseDouble("+7.2", true, 7.2);
+ TestParseDouble(" +7.236", true, 7.236);
+ TestParseDouble("-23.4", true, -23.4);
+ TestParseDouble(" -23.4", true, -23.4);
+}
+
+TEST(ParseDoubleTest, ErrorCases) {
+ TestParseDouble("", false);
+ TestParseDouble(" ", false);
+ TestParseDouble("not-a-number", false);
+ TestParseDouble("23.5a", false);
+}
+} // namespace
+} // namespace libtextclassifier
diff --git a/util/utf8/unicodetext.cc b/util/utf8/unicodetext.cc
index e83c890..dbab1c8 100644
--- a/util/utf8/unicodetext.cc
+++ b/util/utf8/unicodetext.cc
@@ -16,7 +16,10 @@
#include "util/utf8/unicodetext.h"
-#include "base.h"
+#include <string.h>
+
+#include <algorithm>
+
#include "util/strings/utf8.h"
namespace libtextclassifier {
@@ -108,6 +111,8 @@
void UnicodeText::clear() { repr_.clear(); }
+int UnicodeText::size() const { return std::distance(begin(), end()); }
+
std::string UnicodeText::UTF8Substring(const const_iterator& first,
const const_iterator& last) {
return std::string(first.it_, last.it_ - first.it_);
diff --git a/util/utf8/unicodetext.h b/util/utf8/unicodetext.h
index 5327383..6a21058 100644
--- a/util/utf8/unicodetext.h
+++ b/util/utf8/unicodetext.h
@@ -17,9 +17,11 @@
#ifndef LIBTEXTCLASSIFIER_UTIL_UTF8_UNICODETEXT_H_
#define LIBTEXTCLASSIFIER_UTIL_UTF8_UNICODETEXT_H_
+#include <iterator>
+#include <string>
#include <utility>
-#include "base.h"
+#include "util/base/integral_types.h"
namespace libtextclassifier {
@@ -137,6 +139,7 @@
const_iterator begin() const;
const_iterator end() const;
+ int size() const; // the number of Unicode characters (codepoints)
// x.PointToUTF8(buf,len) changes x so that it points to buf
// ("becomes an alias"). It does not take ownership or copy buf.
@@ -162,7 +165,7 @@
int capacity_;
bool ours_; // Do we own data_?
- Repr() : data_(NULL), size_(0), capacity_(0), ours_(true) {}
+ Repr() : data_(nullptr), size_(0), capacity_(0), ours_(true) {}
~Repr() {
if (ours_) delete[] data_;
}