[clangd] Support utf-8 offsets (rather than utf-16) as a protocol extension

Summary:
Still some pieces to go here: unit tests for new SourceCode functionality and
a command-line flag to force utf-8 mode. But wanted to get early feedback.

Reviewers: hokein

Subscribers: ilya-biryukov, ioeric, MaskRay, jkorous, arphaman, kadircet, jdoerfert, cfe-commits

Tags: #clang

Differential Revision: https://reviews.llvm.org/D58275

llvm-svn: 357102
diff --git a/clang-tools-extra/clangd/Protocol.cpp b/clang-tools-extra/clangd/Protocol.cpp
index dfd130a..e11c621 100644
--- a/clang-tools-extra/clangd/Protocol.cpp
+++ b/clang-tools-extra/clangd/Protocol.cpp
@@ -16,6 +16,7 @@
 #include "clang/Basic/LLVM.h"
 #include "llvm/ADT/Hashing.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/StringSwitch.h"
 #include "llvm/Support/Format.h"
 #include "llvm/Support/FormatVariadic.h"
 #include "llvm/Support/JSON.h"
@@ -311,6 +312,11 @@
       }
     }
   }
+  if (auto *OffsetEncoding = O->get("offsetEncoding")) {
+    R.offsetEncoding.emplace();
+    if (!fromJSON(*OffsetEncoding, *R.offsetEncoding))
+      return false;
+  }
   return true;
 }
 
@@ -932,5 +938,26 @@
   return fromJSON(Params, Base);
 }
 
+llvm::json::Value toJSON(const OffsetEncoding &OE) {
+  switch (OE) {
+    case OffsetEncoding::UTF8:
+      return "utf-8";
+    case OffsetEncoding::UTF16:
+      return "utf-16";
+    case OffsetEncoding::UnsupportedEncoding:
+      return "unknown";
+  }
+}
+bool fromJSON(const llvm::json::Value &V, OffsetEncoding &OE) {
+  auto Str = V.getAsString();
+  if (!Str)
+    return false;
+  OE = llvm::StringSwitch<OffsetEncoding>(*Str)
+           .Case("utf-8", OffsetEncoding::UTF8)
+           .Case("utf-16", OffsetEncoding::UTF16)
+           .Default(OffsetEncoding::UnsupportedEncoding);
+  return true;
+}
+
 } // namespace clangd
 } // namespace clang