When a bad UTF-8 encoding or bogus escape sequence is encountered in a
string literal, produce a diagnostic pointing at the erroneous character
range, not at the start of the literal.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@163459 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Basic/ConvertUTF.c b/lib/Basic/ConvertUTF.c
index 2e25e79..ec57be7 100644
--- a/lib/Basic/ConvertUTF.c
+++ b/lib/Basic/ConvertUTF.c
@@ -393,15 +393,25 @@
/* --------------------------------------------------------------------- */
/*
+ * Exported function to return the total number of bytes in a codepoint
+ * represented in UTF-8, given the value of the first byte.
+ */
+unsigned getNumBytesForUTF8(UTF8 first) {
+ return trailingBytesForUTF8[first] + 1;
+}
+
+/* --------------------------------------------------------------------- */
+
+/*
* Exported function to return whether a UTF-8 string is legal or not.
* This is not used here; it's just exported.
*/
-Boolean isLegalUTF8String(const UTF8 *source, const UTF8 *sourceEnd) {
- while (source != sourceEnd) {
- int length = trailingBytesForUTF8[*source] + 1;
- if (length > sourceEnd - source || !isLegalUTF8(source, length))
+Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd) {
+ while (*source != sourceEnd) {
+ int length = trailingBytesForUTF8[**source] + 1;
+ if (length > sourceEnd - *source || !isLegalUTF8(*source, length))
return false;
- source += length;
+ *source += length;
}
return true;
}