[clangd] Support UTF-32 (i.e. codepoint) offsets.

Summary:
(Changes to UTF-8/UTF-16 here are NFC, moving things around to make the
cases more symmetrical)

Reviewers: ilya-biryukov

Subscribers: ioeric, MaskRay, jkorous, arphaman, kadircet, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D59927

llvm-svn: 357173
diff --git a/clang-tools-extra/unittests/clangd/SourceCodeTests.cpp b/clang-tools-extra/unittests/clangd/SourceCodeTests.cpp
index e09f42b..e38eaa3 100644
--- a/clang-tools-extra/unittests/clangd/SourceCodeTests.cpp
+++ b/clang-tools-extra/unittests/clangd/SourceCodeTests.cpp
@@ -58,6 +58,15 @@
   EXPECT_EQ(lspLength("¥"), 2UL);
   // astral
   EXPECT_EQ(lspLength("😂"), 4UL);
+
+  WithContextValue UTF32(kCurrentOffsetEncoding, OffsetEncoding::UTF32);
+  EXPECT_EQ(lspLength(""), 0UL);
+  EXPECT_EQ(lspLength("ascii"), 5UL);
+  // BMP
+  EXPECT_EQ(lspLength("↓"), 1UL);
+  EXPECT_EQ(lspLength("¥"), 1UL);
+  // astral
+  EXPECT_EQ(lspLength("😂"), 1UL);
 }
 
 // The = → 🡆 below are ASCII (1 byte), BMP (3 bytes), and astral (4 bytes).
@@ -131,6 +140,63 @@
   EXPECT_THAT_EXPECTED(positionToOffset(File, position(3, 0)), llvm::Failed());
   EXPECT_THAT_EXPECTED(positionToOffset(File, position(3, 1)), llvm::Failed());
 
+  // Codepoints are similar, except near astral characters.
+  WithContextValue UTF32(kCurrentOffsetEncoding, OffsetEncoding::UTF32);
+  // line out of bounds
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(-1, 2)), llvm::Failed());
+  // first line
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, -1)),
+                       llvm::Failed()); // out of range
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 0)),
+                       llvm::HasValue(0)); // first character
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 3)),
+                       llvm::HasValue(3)); // middle character
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 6)),
+                       llvm::HasValue(6)); // last character
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 7)),
+                       llvm::HasValue(7)); // the newline itself
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 7), false),
+                       llvm::HasValue(7));
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 8)),
+                       llvm::HasValue(7)); // out of range
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(0, 8), false),
+                       llvm::Failed()); // out of range
+  // middle line
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, -1)),
+                       llvm::Failed()); // out of range
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 0)),
+                       llvm::HasValue(8)); // first character
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 3)),
+                       llvm::HasValue(11)); // middle character
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 3), false),
+                       llvm::HasValue(11));
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 6)),
+                       llvm::HasValue(16)); // last character
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 7)),
+                       llvm::HasValue(17)); // the newline itself
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 8)),
+                       llvm::HasValue(17)); // out of range
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(1, 8), false),
+                       llvm::Failed()); // out of range
+  // last line
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, -1)),
+                       llvm::Failed()); // out of range
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 0)),
+                       llvm::HasValue(18)); // first character
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 4)),
+                       llvm::HasValue(22)); // Before astral character.
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 5), false),
+                       llvm::HasValue(26)); // after astral character
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 7)),
+                       llvm::HasValue(28)); // last character
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 8)),
+                       llvm::HasValue(29)); // EOF
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(2, 9), false),
+                       llvm::Failed()); // out of range
+  // line out of bounds
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(3, 0)), llvm::Failed());
+  EXPECT_THAT_EXPECTED(positionToOffset(File, position(3, 1)), llvm::Failed());
+
   // Test UTF-8, where transformations are trivial.
   WithContextValue UTF8(kCurrentOffsetEncoding, OffsetEncoding::UTF8);
   EXPECT_THAT_EXPECTED(positionToOffset(File, position(-1, 2)), llvm::Failed());
@@ -169,6 +235,27 @@
   EXPECT_THAT(offsetToPosition(File, 29), Pos(2, 9)) << "EOF";
   EXPECT_THAT(offsetToPosition(File, 30), Pos(2, 9)) << "out of bounds";
 
+  // Codepoints are similar, except near astral characters.
+  WithContextValue UTF32(kCurrentOffsetEncoding, OffsetEncoding::UTF32);
+  EXPECT_THAT(offsetToPosition(File, 0), Pos(0, 0)) << "start of file";
+  EXPECT_THAT(offsetToPosition(File, 3), Pos(0, 3)) << "in first line";
+  EXPECT_THAT(offsetToPosition(File, 6), Pos(0, 6)) << "end of first line";
+  EXPECT_THAT(offsetToPosition(File, 7), Pos(0, 7)) << "first newline";
+  EXPECT_THAT(offsetToPosition(File, 8), Pos(1, 0)) << "start of second line";
+  EXPECT_THAT(offsetToPosition(File, 12), Pos(1, 4)) << "before BMP char";
+  EXPECT_THAT(offsetToPosition(File, 13), Pos(1, 5)) << "in BMP char";
+  EXPECT_THAT(offsetToPosition(File, 15), Pos(1, 5)) << "after BMP char";
+  EXPECT_THAT(offsetToPosition(File, 16), Pos(1, 6)) << "end of second line";
+  EXPECT_THAT(offsetToPosition(File, 17), Pos(1, 7)) << "second newline";
+  EXPECT_THAT(offsetToPosition(File, 18), Pos(2, 0)) << "start of last line";
+  EXPECT_THAT(offsetToPosition(File, 21), Pos(2, 3)) << "in last line";
+  EXPECT_THAT(offsetToPosition(File, 22), Pos(2, 4)) << "before astral char";
+  EXPECT_THAT(offsetToPosition(File, 24), Pos(2, 5)) << "in astral char";
+  EXPECT_THAT(offsetToPosition(File, 26), Pos(2, 5)) << "after astral char";
+  EXPECT_THAT(offsetToPosition(File, 28), Pos(2, 7)) << "end of last line";
+  EXPECT_THAT(offsetToPosition(File, 29), Pos(2, 8)) << "EOF";
+  EXPECT_THAT(offsetToPosition(File, 30), Pos(2, 8)) << "out of bounds";
+
   WithContextValue UTF8(kCurrentOffsetEncoding, OffsetEncoding::UTF8);
   for (Line L : FileLines) {
     for (unsigned I = 0; I <= L.Length; ++I)