When checking the encoding of an 8-bit string literal, don't just check the
first codepoint! Also, don't reject empty raw string literals for spurious
"encoding" issues. Also, don't rely on undefined behavior in ConvertUTF.c.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@152344 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/Basic/ConvertUTF.c b/lib/Basic/ConvertUTF.c
index b3fa916..e197003 100644
--- a/lib/Basic/ConvertUTF.c
+++ b/lib/Basic/ConvertUTF.c
@@ -387,7 +387,7 @@
*/
Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd) {
int length = trailingBytesForUTF8[*source]+1;
- if (source+length > sourceEnd) {
+ if (length > sourceEnd - source) {
return false;
}
return isLegalUTF8(source, length);
@@ -395,6 +395,22 @@
/* --------------------------------------------------------------------- */
+/*
+ * Exported function to return whether a UTF-8 string is legal or not.
+ * This is not used here; it's just exported.
+ */
+Boolean isLegalUTF8String(const UTF8 *source, const UTF8 *sourceEnd) {
+ while (source != sourceEnd) {
+ int length = trailingBytesForUTF8[*source] + 1;
+ if (length > sourceEnd - source || !isLegalUTF8(source, length))
+ return false;
+ source += length;
+ }
+ return true;
+}
+
+/* --------------------------------------------------------------------- */
+
ConversionResult ConvertUTF8toUTF16 (
const UTF8** sourceStart, const UTF8* sourceEnd,
UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags) {
@@ -404,7 +420,7 @@
while (source < sourceEnd) {
UTF32 ch = 0;
unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
- if (source + extraBytesToRead >= sourceEnd) {
+ if (extraBytesToRead >= sourceEnd - source) {
result = sourceExhausted; break;
}
/* Do this check whether lenient or strict */
@@ -477,7 +493,7 @@
while (source < sourceEnd) {
UTF32 ch = 0;
unsigned short extraBytesToRead = trailingBytesForUTF8[*source];
- if (source + extraBytesToRead >= sourceEnd) {
+ if (extraBytesToRead >= sourceEnd - source) {
result = sourceExhausted; break;
}
/* Do this check whether lenient or strict */