Cherry-pick from master: AAPT2: Fix JavaDoc first sentence extraction.

The old algorithm for detecting the first sentence of a JavaDoc comment
looked for the first occurence of '.'. This does not work when code or a
{@link android.R.styleable} link is encountered in the first sentence.

Switch to checking for whitespace characters after the '.' character.

Bug: 62900335
Test: make aapt2_tests , make ds-docs
Original change-Id: I8238f6a6304c9c2f92e2e576ca8962a59c2b20ea

Change-Id: Ie272d0d793d157f2a30f3ead20b2b53976661d33
diff --git a/tools/aapt2/java/AnnotationProcessor.cpp b/tools/aapt2/java/AnnotationProcessor.cpp
index a0ef00b..1f83fa0 100644
--- a/tools/aapt2/java/AnnotationProcessor.cpp
+++ b/tools/aapt2/java/AnnotationProcessor.cpp
@@ -18,12 +18,29 @@
 
 #include <algorithm>
 
+#include "text/Unicode.h"
+#include "text/Utf8Iterator.h"
 #include "util/Util.h"
 
-using android::StringPiece;
+using ::aapt::text::Utf8Iterator;
+using ::android::StringPiece;
 
 namespace aapt {
 
+StringPiece AnnotationProcessor::ExtractFirstSentence(const StringPiece& comment) {
+  Utf8Iterator iter(comment);
+  while (iter.HasNext()) {
+    const char32_t codepoint = iter.Next();
+    if (codepoint == U'.') {
+      const size_t current_position = iter.Position();
+      if (!iter.HasNext() || text::IsWhitespace(iter.Next())) {
+        return comment.substr(0, current_position);
+      }
+    }
+  }
+  return comment;
+}
+
 void AnnotationProcessor::AppendCommentLine(std::string& comment) {
   static const std::string sDeprecated = "@deprecated";
   static const std::string sSystemApi = "@SystemApi";
diff --git a/tools/aapt2/java/AnnotationProcessor.h b/tools/aapt2/java/AnnotationProcessor.h
index 99cd44f..a06eda0 100644
--- a/tools/aapt2/java/AnnotationProcessor.h
+++ b/tools/aapt2/java/AnnotationProcessor.h
@@ -53,6 +53,8 @@
  */
 class AnnotationProcessor {
  public:
+  static android::StringPiece ExtractFirstSentence(const android::StringPiece& comment);
+
   /**
    * Adds more comments. Since resources can have various values with different
    * configurations,
diff --git a/tools/aapt2/java/AnnotationProcessor_test.cpp b/tools/aapt2/java/AnnotationProcessor_test.cpp
index 3e43c42..9ccac88 100644
--- a/tools/aapt2/java/AnnotationProcessor_test.cpp
+++ b/tools/aapt2/java/AnnotationProcessor_test.cpp
@@ -18,6 +18,10 @@
 
 #include "test/Test.h"
 
+using ::testing::Eq;
+using ::testing::HasSubstr;
+using ::testing::Not;
+
 namespace aapt {
 
 TEST(AnnotationProcessorTest, EmitsDeprecated) {
@@ -33,7 +37,7 @@
   processor.WriteToStream(&result, "");
   std::string annotations = result.str();
 
-  EXPECT_NE(std::string::npos, annotations.find("@Deprecated"));
+  EXPECT_THAT(annotations, HasSubstr("@Deprecated"));
 }
 
 TEST(AnnotationProcessorTest, EmitsSystemApiAnnotationAndRemovesFromComment) {
@@ -44,10 +48,20 @@
   processor.WriteToStream(&result, "");
   std::string annotations = result.str();
 
-  EXPECT_NE(std::string::npos,
-            annotations.find("@android.annotation.SystemApi"));
-  EXPECT_EQ(std::string::npos, annotations.find("@SystemApi"));
-  EXPECT_NE(std::string::npos, annotations.find("This is a system API"));
+  EXPECT_THAT(annotations, HasSubstr("@android.annotation.SystemApi"));
+  EXPECT_THAT(annotations, Not(HasSubstr("@SystemApi")));
+  EXPECT_THAT(annotations, HasSubstr("This is a system API"));
+}
+
+TEST(AnnotationProcessor, ExtractsFirstSentence) {
+  EXPECT_THAT(AnnotationProcessor::ExtractFirstSentence("This is the only sentence"),
+              Eq("This is the only sentence"));
+  EXPECT_THAT(AnnotationProcessor::ExtractFirstSentence(
+                  "This is the\n  first sentence.  This is the rest of the paragraph."),
+              Eq("This is the\n  first sentence."));
+  EXPECT_THAT(AnnotationProcessor::ExtractFirstSentence(
+                  "This is the first sentence with a {@link android.R.styleable.Theme}."),
+              Eq("This is the first sentence with a {@link android.R.styleable.Theme}."));
 }
 
 }  // namespace aapt
diff --git a/tools/aapt2/java/JavaClassGenerator.cpp b/tools/aapt2/java/JavaClassGenerator.cpp
index 2a23aa9..44fa0f1 100644
--- a/tools/aapt2/java/JavaClassGenerator.cpp
+++ b/tools/aapt2/java/JavaClassGenerator.cpp
@@ -299,24 +299,16 @@
       }
 
       const ResourceName& attr_name = entry.attr_ref->name.value();
-      styleable_comment << "<tr><td>";
-      styleable_comment << "<code>{@link #" << entry.field_name << " "
-                        << (!attr_name.package.empty()
-                                ? attr_name.package
-                                : context_->GetCompilationPackage())
-                        << ":" << attr_name.entry << "}</code>";
-      styleable_comment << "</td>";
-
-      styleable_comment << "<td>";
+      styleable_comment << "<tr><td><code>{@link #" << entry.field_name << " "
+                        << (!attr_name.package.empty() ? attr_name.package
+                                                       : context_->GetCompilationPackage())
+                        << ":" << attr_name.entry << "}</code></td>";
 
       // Only use the comment up until the first '.'. This is to stay compatible with
       // the way old AAPT did it (presumably to keep it short and to avoid including
       // annotations like @hide which would affect this Styleable).
-      auto iter = std::find(attr_comment_line.begin(), attr_comment_line.end(), '.');
-      if (iter != attr_comment_line.end()) {
-        attr_comment_line = attr_comment_line.substr(0, (iter - attr_comment_line.begin()) + 1);
-      }
-      styleable_comment << attr_comment_line << "</td></tr>\n";
+      styleable_comment << "<td>" << AnnotationProcessor::ExtractFirstSentence(attr_comment_line)
+                        << "</td></tr>\n";
     }
     styleable_comment << "</table>\n";
 
diff --git a/tools/aapt2/text/Unicode.cpp b/tools/aapt2/text/Unicode.cpp
index 38ec9c4..75eeb46 100644
--- a/tools/aapt2/text/Unicode.cpp
+++ b/tools/aapt2/text/Unicode.cpp
@@ -66,6 +66,17 @@
   return FindCharacterProperties(codepoint) & CharacterProperties::kXidContinue;
 }
 
+// Hardcode the White_Space characters since they are few and the external/icu project doesn't
+// list them as data files to parse.
+// Sourced from http://www.unicode.org/Public/UCD/latest/ucd/PropList.txt
+bool IsWhitespace(char32_t codepoint) {
+  return (codepoint >= 0x0009 && codepoint <= 0x000d) || (codepoint == 0x0020) ||
+         (codepoint == 0x0085) || (codepoint == 0x00a0) || (codepoint == 0x1680) ||
+         (codepoint >= 0x2000 && codepoint <= 0x200a) || (codepoint == 0x2028) ||
+         (codepoint == 0x2029) || (codepoint == 0x202f) || (codepoint == 0x205f) ||
+         (codepoint == 0x3000);
+}
+
 bool IsJavaIdentifier(const StringPiece& str) {
   Utf8Iterator iter(str);
 
diff --git a/tools/aapt2/text/Unicode.h b/tools/aapt2/text/Unicode.h
index 2707187..546714e 100644
--- a/tools/aapt2/text/Unicode.h
+++ b/tools/aapt2/text/Unicode.h
@@ -40,6 +40,10 @@
 // characters in the ID_Continue set.
 bool IsXidContinue(char32_t codepoint);
 
+// Returns true if the Unicode codepoint has the White_Space property.
+// http://unicode.org/reports/tr44/#White_Space
+bool IsWhitespace(char32_t codepoint);
+
 // Returns true if the UTF8 string can be used as a Java identifier.
 // NOTE: This does not check against the set of reserved Java keywords.
 bool IsJavaIdentifier(const android::StringPiece& str);
diff --git a/tools/aapt2/text/Utf8Iterator.cpp b/tools/aapt2/text/Utf8Iterator.cpp
index 0d43353..20b9073 100644
--- a/tools/aapt2/text/Utf8Iterator.cpp
+++ b/tools/aapt2/text/Utf8Iterator.cpp
@@ -25,18 +25,17 @@
 namespace text {
 
 Utf8Iterator::Utf8Iterator(const StringPiece& str)
-    : str_(str), next_pos_(0), current_codepoint_(0) {
+    : str_(str), current_pos_(0), next_pos_(0), current_codepoint_(0) {
   DoNext();
 }
 
 void Utf8Iterator::DoNext() {
-  size_t next_pos = 0u;
-  int32_t result = utf32_from_utf8_at(str_.data(), str_.size(), next_pos_, &next_pos);
+  current_pos_ = next_pos_;
+  int32_t result = utf32_from_utf8_at(str_.data(), str_.size(), current_pos_, &next_pos_);
   if (result == -1) {
     current_codepoint_ = 0u;
   } else {
     current_codepoint_ = static_cast<char32_t>(result);
-    next_pos_ = next_pos;
   }
 }
 
@@ -44,6 +43,10 @@
   return current_codepoint_ != 0;
 }
 
+size_t Utf8Iterator::Position() const {
+  return current_pos_;
+}
+
 void Utf8Iterator::Skip(int amount) {
   while (amount > 0 && HasNext()) {
     Next();
diff --git a/tools/aapt2/text/Utf8Iterator.h b/tools/aapt2/text/Utf8Iterator.h
index 6923957..9318401 100644
--- a/tools/aapt2/text/Utf8Iterator.h
+++ b/tools/aapt2/text/Utf8Iterator.h
@@ -29,6 +29,10 @@
 
   bool HasNext() const;
 
+  // Returns the current position of the iterator in bytes of the source UTF8 string.
+  // This position is the start of the codepoint returned by the next call to Next().
+  size_t Position() const;
+
   void Skip(int amount);
 
   char32_t Next();
@@ -39,6 +43,7 @@
   void DoNext();
 
   android::StringPiece str_;
+  size_t current_pos_;
   size_t next_pos_;
   char32_t current_codepoint_;
 };
diff --git a/tools/aapt2/text/Utf8Iterator_test.cpp b/tools/aapt2/text/Utf8Iterator_test.cpp
index f3111c0..8c3e774 100644
--- a/tools/aapt2/text/Utf8Iterator_test.cpp
+++ b/tools/aapt2/text/Utf8Iterator_test.cpp
@@ -18,6 +18,7 @@
 
 #include "test/Test.h"
 
+using ::android::StringPiece;
 using ::testing::Eq;
 
 namespace aapt {
@@ -63,5 +64,32 @@
   EXPECT_FALSE(iter.HasNext());
 }
 
+TEST(Utf8IteratorTest, PositionPointsToTheCorrectPlace) {
+  const StringPiece expected("Mm🍩");
+  Utf8Iterator iter(expected);
+
+  // Before any character, the position should be 0.
+  EXPECT_THAT(iter.Position(), Eq(0u));
+
+  // The 'M' character, one byte.
+  ASSERT_TRUE(iter.HasNext());
+  iter.Next();
+  EXPECT_THAT(iter.Position(), Eq(1u));
+
+  // The 'm' character, one byte.
+  ASSERT_TRUE(iter.HasNext());
+  iter.Next();
+  EXPECT_THAT(iter.Position(), Eq(2u));
+
+  // The doughnut character, 4 bytes.
+  ASSERT_TRUE(iter.HasNext());
+  iter.Next();
+  EXPECT_THAT(iter.Position(), Eq(6u));
+
+  // There should be nothing left.
+  EXPECT_FALSE(iter.HasNext());
+  EXPECT_THAT(iter.Position(), Eq(expected.size()));
+}
+
 }  // namespace text
 }  // namespace aapt
diff --git a/tools/aapt2/tools/extract_unicode_properties.py b/tools/aapt2/tools/extract_unicode_properties.py
index d7e0479..7577ec8 100644
--- a/tools/aapt2/tools/extract_unicode_properties.py
+++ b/tools/aapt2/tools/extract_unicode_properties.py
@@ -35,9 +35,8 @@
         return "{}0x{:04x}, 0x{:04x}, {}{}".format(
                 "{", self.first_char, self.last_char, ' | '.join(types), "}")
 
-def extract_unicode_properties(f, props):
-    prog = re.compile(r"^(?P<first>\w{4})(..(?P<last>\w{4}))?\W+;\W+(?P<prop>\w+)\n$")
-    chars = {}
+def extract_unicode_properties(f, props, chars_out):
+    prog = re.compile(r"^(?P<first>\w{4})(..(?P<last>\w{4}))?\W+;\W+(?P<prop>\w+)")
     for line in f:
         result = prog.match(line)
         if result:
@@ -49,10 +48,12 @@
                 last_char = (int(last_char_str, 16) if last_char_str else start_char) + 1
                 prop_type = props[prop_type_str]
                 for char in range(start_char, last_char):
-                    if char not in chars:
-                        chars[char] = CharacterProperty(char, char, 0)
-                    chars[char].prop_type |= prop_type
+                    if char not in chars_out:
+                        chars_out[char] = CharacterProperty(char, char, 0)
+                    chars_out[char].prop_type |= prop_type
+    return chars_out
 
+def flatten_unicode_properties(chars):
     result = []
     for char_prop in sorted(chars.values(), key=CharacterProperty.key):
         if len(result) == 0:
@@ -82,17 +83,20 @@
 """
 
 if __name__ == "__main__":
-    if len(sys.argv) != 2:
+    if len(sys.argv) < 2:
         print("must specify path to icu DerivedCoreProperties file (e.g:" \
                 "external/icu/icu4c/source/data/unidata/DerivedCoreProperties.txt)")
         sys.exit(1)
 
-    with open(sys.argv[1]) as f:
-        props = {"XID_Start": 1, "XID_Continue": 2}
-        char_props = extract_unicode_properties(f, props)
-        print("{}\nconst static std::array<CharacterProperties, {}> sCharacterProperties = {}"
-                .format(license, len(char_props), "{{"))
-        for prop in char_props:
-            print("    {},".format(prop))
-        print("}};")
+    props = {"XID_Start": 1, "XID_Continue": 2}
+    char_props = {}
+    for file_path in sys.argv[1:]:
+        with open(file_path) as f:
+            extract_unicode_properties(f, props, char_props)
+    result = flatten_unicode_properties(char_props)
+    print("{}\nconst static std::array<CharacterProperties, {}> sCharacterProperties = {}"
+            .format(license, len(result), "{{"))
+    for prop in result:
+        print("    {},".format(prop))
+    print("}};")