[c++20] Implement P0482R6: enable -fchar8_t by default in C++20 mode.
This unfortunately results in a substantial breaking change when
switching to C++20, but it's not yet clear what / how much we should
do about that. We may want to add a compatibility conversion from
u8 string literals to const char*, similar to how C++98 provided a
compatibility conversion from string literals to non-const char*,
but that's not handled by this patch.
The feature can be disabled in C++20 mode with -fno-char8_t.
llvm-svn: 346892
diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp
index c549514..980dbf9 100644
--- a/clang/lib/Sema/SemaExpr.cpp
+++ b/clang/lib/Sema/SemaExpr.cpp
@@ -1559,6 +1559,32 @@
CharTy = Context.UnsignedCharTy;
}
+ // Warn on initializing an array of char from a u8 string literal; this
+ // becomes ill-formed in C++2a.
+ if (getLangOpts().CPlusPlus && !getLangOpts().CPlusPlus2a &&
+ !getLangOpts().Char8 && Kind == StringLiteral::UTF8) {
+ Diag(StringTokLocs.front(), diag::warn_cxx2a_compat_utf8_string);
+
+ // Create removals for all 'u8' prefixes in the string literal(s). This
+ // ensures C++2a compatibility (but may change the program behavior when
+ // built by non-Clang compilers for which the execution character set is
+ // not always UTF-8).
+ auto RemovalDiag = PDiag(diag::note_cxx2a_compat_utf8_string_remove_u8);
+ SourceLocation RemovalDiagLoc;
+ for (const Token &Tok : StringToks) {
+ if (Tok.getKind() == tok::utf8_string_literal) {
+ if (RemovalDiagLoc.isInvalid())
+ RemovalDiagLoc = Tok.getLocation();
+ RemovalDiag << FixItHint::CreateRemoval(CharSourceRange::getCharRange(
+ Tok.getLocation(),
+ Lexer::AdvanceToTokenCharacter(Tok.getLocation(), 2,
+ getSourceManager(), getLangOpts())));
+ }
+ }
+ Diag(RemovalDiagLoc, RemovalDiag);
+ }
+
+
QualType CharTyConst = CharTy;
// A C++ string literal has a const-qualified element type (C++ 2.13.4p1).
if (getLangOpts().CPlusPlus || getLangOpts().ConstStrings)
diff --git a/clang/lib/Sema/SemaInit.cpp b/clang/lib/Sema/SemaInit.cpp
index 438e904..5249986 100644
--- a/clang/lib/Sema/SemaInit.cpp
+++ b/clang/lib/Sema/SemaInit.cpp
@@ -8268,7 +8268,8 @@
break;
case FK_UTF8StringIntoPlainChar:
S.Diag(Kind.getLocation(),
- diag::err_array_init_utf8_string_into_char);
+ diag::err_array_init_utf8_string_into_char)
+ << S.getLangOpts().CPlusPlus2a;
break;
case FK_ArrayTypeMismatch:
case FK_NonConstantArrayInit: