Basic support for C++0x unicode types.  Support for literals will follow in an incremental patch

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@75622 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/include/clang/AST/ASTContext.h b/include/clang/AST/ASTContext.h
index b13028d..56b3e69 100644
--- a/include/clang/AST/ASTContext.h
+++ b/include/clang/AST/ASTContext.h
@@ -198,7 +198,9 @@
   QualType VoidTy;
   QualType BoolTy;
   QualType CharTy;
-  QualType WCharTy; // [C++ 3.9.1p5], integer type in C99.
+  QualType WCharTy;  // [C++ 3.9.1p5], integer type in C99.
+  QualType Char16Ty; // [C++0x 3.9.1p5], integer type in C99.
+  QualType Char32Ty; // [C++0x 3.9.1p5], integer type in C99.
   QualType SignedCharTy, ShortTy, IntTy, LongTy, LongLongTy, Int128Ty;
   QualType UnsignedCharTy, UnsignedShortTy, UnsignedIntTy, UnsignedLongTy;
   QualType UnsignedLongLongTy, UnsignedInt128Ty;
diff --git a/include/clang/AST/Type.h b/include/clang/AST/Type.h
index 8634bb0..d3f7b5b 100644
--- a/include/clang/AST/Type.h
+++ b/include/clang/AST/Type.h
@@ -567,6 +567,8 @@
     Bool,     // This is bool and/or _Bool.
     Char_U,   // This is 'char' for targets where char is unsigned.
     UChar,    // This is explicitly qualified unsigned char.
+    Char16,   // This is 'char16_t' for C++.
+    Char32,   // This is 'char32_t' for C++.
     UShort,
     UInt,
     ULong,
diff --git a/include/clang/Basic/TargetInfo.h b/include/clang/Basic/TargetInfo.h
index 537d553..b1f9821 100644
--- a/include/clang/Basic/TargetInfo.h
+++ b/include/clang/Basic/TargetInfo.h
@@ -41,6 +41,8 @@
   bool TLSSupported;
   unsigned char PointerWidth, PointerAlign;
   unsigned char WCharWidth, WCharAlign;
+  unsigned char Char16Width, Char16Align;
+  unsigned char Char32Width, Char32Align;
   unsigned char IntWidth, IntAlign;
   unsigned char FloatWidth, FloatAlign;
   unsigned char DoubleWidth, DoubleAlign;
@@ -77,7 +79,7 @@
   };
 protected:
   IntType SizeType, IntMaxType, UIntMaxType, PtrDiffType, IntPtrType, WCharType,
-          Int64Type;
+          Char16Type, Char32Type, Int64Type;
 public:
   IntType getSizeType() const { return SizeType; }
   IntType getIntMaxType() const { return IntMaxType; }
@@ -87,6 +89,8 @@
   }
   IntType getIntPtrType() const { return IntPtrType; }
   IntType getWCharType() const { return WCharType; }
+  IntType getChar16Type() const { return Char16Type; }
+  IntType getChar32Type() const { return Char32Type; }
   IntType getInt64Type() const { return Int64Type; }
 
   /// getPointerWidth - Return the width of pointers on this target, for the
@@ -102,13 +106,9 @@
   /// target, in bits.
   unsigned getBoolWidth(bool isWide = false) const { return 8; }  // FIXME
   unsigned getBoolAlign(bool isWide = false) const { return 8; }  // FIXME
-  
-  unsigned getCharWidth(bool isWide = false) const {
-    return isWide ? getWCharWidth() : 8; // FIXME
-  }
-  unsigned getCharAlign(bool isWide = false) const {
-    return isWide ? getWCharAlign() : 8; // FIXME
-  }
+ 
+  unsigned getCharWidth() const { return 8; } // FIXME
+  unsigned getCharAlign() const { return 8; } // FIXME
   
   /// getShortWidth/Align - Return the size of 'signed short' and
   /// 'unsigned short' for this target, in bits.  
@@ -130,11 +130,21 @@
   unsigned getLongLongWidth() const { return LongLongWidth; }
   unsigned getLongLongAlign() const { return LongLongAlign; }
   
-  /// getWcharWidth/Align - Return the size of 'wchar_t' for this target, in
+  /// getWCharWidth/Align - Return the size of 'wchar_t' for this target, in
   /// bits.
   unsigned getWCharWidth() const { return WCharWidth; }
   unsigned getWCharAlign() const { return WCharAlign; }
 
+  /// getChar16Width/Align - Return the size of 'char16_t' for this target, in
+  /// bits.
+  unsigned getChar16Width() const { return Char16Width; }
+  unsigned getChar16Align() const { return Char16Align; }
+
+  /// getChar32Width/Align - Return the size of 'char32_t' for this target, in
+  /// bits.
+  unsigned getChar32Width() const { return Char32Width; }
+  unsigned getChar32Align() const { return Char32Align; }
+
   /// getFloatWidth/Align/Format - Return the size/align/format of 'float'.
   unsigned getFloatWidth() const { return FloatWidth; }
   unsigned getFloatAlign() const { return FloatAlign; }
diff --git a/include/clang/Frontend/PCHBitCodes.h b/include/clang/Frontend/PCHBitCodes.h
index b615543..a1feec3 100644
--- a/include/clang/Frontend/PCHBitCodes.h
+++ b/include/clang/Frontend/PCHBitCodes.h
@@ -329,7 +329,11 @@
       /// \brief The '__int128_t' type.
       PREDEF_TYPE_INT128_ID     = 22,
       /// \brief The type of 'nullptr'.
-      PREDEF_TYPE_NULLPTR_ID    = 23
+      PREDEF_TYPE_NULLPTR_ID    = 23,
+      /// \brief The C++ 'char16_t' type.
+      PREDEF_TYPE_CHAR16_ID     = 24,
+      /// \brief The C++ 'char32_t' type.
+      PREDEF_TYPE_CHAR32_ID     = 25
     };
 
     /// \brief The number of predefined type IDs that are reserved for
diff --git a/include/clang/Frontend/TypeXML.def b/include/clang/Frontend/TypeXML.def
index 2a78fd9..dd12738 100644
--- a/include/clang/Frontend/TypeXML.def
+++ b/include/clang/Frontend/TypeXML.def
@@ -104,6 +104,8 @@
 	  ENUM_XML(BuiltinType::Double, "double");
 	  ENUM_XML(BuiltinType::LongDouble, "long double");
 	  ENUM_XML(BuiltinType::WChar, "wchar_t");
+	  ENUM_XML(BuiltinType::Char16, "char16_t");
+	  ENUM_XML(BuiltinType::Char32, "char32_t");
 	  ENUM_XML(BuiltinType::NullPtr, "nullptr_t");        // This is the type of C++0x 'nullptr'.
 	  ENUM_XML(BuiltinType::Overload, "overloaded");
 	  ENUM_XML(BuiltinType::Dependent, "dependent");
diff --git a/include/clang/Parse/DeclSpec.h b/include/clang/Parse/DeclSpec.h
index 0c57915..b6044ff 100644
--- a/include/clang/Parse/DeclSpec.h
+++ b/include/clang/Parse/DeclSpec.h
@@ -68,6 +68,8 @@
     TST_void,
     TST_char,
     TST_wchar,        // C++ wchar_t
+    TST_char16,       // C++0x char16_t
+    TST_char32,       // C++0x char32_t
     TST_int,
     TST_float,
     TST_double,
diff --git a/lib/AST/ASTContext.cpp b/lib/AST/ASTContext.cpp
index 5de6659..9d1e126 100644
--- a/lib/AST/ASTContext.cpp
+++ b/lib/AST/ASTContext.cpp
@@ -170,6 +170,16 @@
   else // C99
     WCharTy = getFromTargetType(Target.getWCharType());
 
+  if (LangOpts.CPlusPlus) // C++0x 3.9.1p5, extension for C++
+    InitBuiltinType(Char16Ty,           BuiltinType::Char16);
+  else // C99
+    Char16Ty = getFromTargetType(Target.getChar16Type());
+
+  if (LangOpts.CPlusPlus) // C++0x 3.9.1p5, extension for C++
+    InitBuiltinType(Char32Ty,           BuiltinType::Char32);
+  else // C99
+    Char32Ty = getFromTargetType(Target.getChar32Type());
+
   // Placeholder type for functions.
   InitBuiltinType(OverloadTy,          BuiltinType::Overload);
 
@@ -522,6 +532,14 @@
       Width = Target.getWCharWidth();
       Align = Target.getWCharAlign();
       break;
+    case BuiltinType::Char16:
+      Width = Target.getChar16Width();
+      Align = Target.getChar16Align();
+      break;
+    case BuiltinType::Char32:
+      Width = Target.getChar32Width();
+      Align = Target.getChar32Align();
+      break;
     case BuiltinType::UShort:
     case BuiltinType::Short:
       Width = Target.getShortWidth();
@@ -2326,6 +2344,12 @@
   if (T->isSpecificBuiltinType(BuiltinType::WChar))
     T = getFromTargetType(Target.getWCharType()).getTypePtr();
 
+  if (T->isSpecificBuiltinType(BuiltinType::Char16))
+    T = getFromTargetType(Target.getChar16Type()).getTypePtr();
+
+  if (T->isSpecificBuiltinType(BuiltinType::Char32))
+    T = getFromTargetType(Target.getChar32Type()).getTypePtr();
+
   // There are two things which impact the integer rank: the width, and
   // the ordering of builtins.  The builtin ordering is encoded in the
   // bottom three bits; the width is encoded in the bits above that.
diff --git a/lib/AST/Type.cpp b/lib/AST/Type.cpp
index 7a9faac..9edb9c0 100644
--- a/lib/AST/Type.cpp
+++ b/lib/AST/Type.cpp
@@ -1003,6 +1003,8 @@
   case Double:            return "double";
   case LongDouble:        return "long double";
   case WChar:             return "wchar_t";
+  case Char16:            return "char16_t";
+  case Char32:            return "char32_t";
   case NullPtr:           return "nullptr_t";
   case Overload:          return "<overloaded function type>";
   case Dependent:         return "<dependent type>";
diff --git a/lib/Basic/TargetInfo.cpp b/lib/Basic/TargetInfo.cpp
index ba7f190..5b2ffb7 100644
--- a/lib/Basic/TargetInfo.cpp
+++ b/lib/Basic/TargetInfo.cpp
@@ -25,6 +25,8 @@
   TLSSupported = true;
   PointerWidth = PointerAlign = 32;
   WCharWidth = WCharAlign = 32;
+  Char16Width = Char16Align = 16;
+  Char32Width = Char32Align = 32;
   IntWidth = IntAlign = 32;
   LongWidth = LongAlign = 32;
   LongLongWidth = LongLongAlign = 64;
@@ -41,6 +43,8 @@
   UIntMaxType = UnsignedLongLong;
   IntPtrType = SignedLong;
   WCharType = SignedInt;
+  Char16Type = UnsignedShort;
+  Char32Type = UnsignedInt;
   Int64Type = SignedLongLong;
   FloatFormat = &llvm::APFloat::IEEEsingle;
   DoubleFormat = &llvm::APFloat::IEEEdouble;
diff --git a/lib/CodeGen/CodeGenTypes.cpp b/lib/CodeGen/CodeGenTypes.cpp
index 6add2c2..5ed2b7c 100644
--- a/lib/CodeGen/CodeGenTypes.cpp
+++ b/lib/CodeGen/CodeGenTypes.cpp
@@ -249,6 +249,8 @@
     case BuiltinType::LongLong:
     case BuiltinType::ULongLong:
     case BuiltinType::WChar:
+    case BuiltinType::Char16:
+    case BuiltinType::Char32:
       return llvm::IntegerType::get(
         static_cast<unsigned>(Context.getTypeSize(T)));
       
diff --git a/lib/CodeGen/Mangle.cpp b/lib/CodeGen/Mangle.cpp
index 97c26f8..cd0e2ea 100644
--- a/lib/CodeGen/Mangle.cpp
+++ b/lib/CodeGen/Mangle.cpp
@@ -540,8 +540,8 @@
   // UNSUPPORTED:    ::= De # IEEE 754r decimal floating point (128 bits)
   // UNSUPPORTED:    ::= Df # IEEE 754r decimal floating point (32 bits)
   // UNSUPPORTED:    ::= Dh # IEEE 754r half-precision floating point (16 bits)
-  // UNSUPPORTED:    ::= Di # char32_t
-  // UNSUPPORTED:    ::= Ds # char16_t
+  //                 ::= Di # char32_t
+  //                 ::= Ds # char16_t
   //                 ::= u <source-name>    # vendor extended type
   // From our point of view, std::nullptr_t is a builtin, but as far as mangling
   // is concerned, it's a type called std::nullptr_t.
@@ -557,6 +557,8 @@
   case BuiltinType::UInt128: Out << 'o'; break;
   case BuiltinType::SChar: Out << 'a'; break;
   case BuiltinType::WChar: Out << 'w'; break;
+  case BuiltinType::Char16: Out << "Ds"; break;
+  case BuiltinType::Char32: Out << "Di"; break;
   case BuiltinType::Short: Out << 's'; break;
   case BuiltinType::Int: Out << 'i'; break;
   case BuiltinType::Long: Out << 'l'; break;
diff --git a/lib/Frontend/PCHReader.cpp b/lib/Frontend/PCHReader.cpp
index c922b0d..012c6fe 100644
--- a/lib/Frontend/PCHReader.cpp
+++ b/lib/Frontend/PCHReader.cpp
@@ -1984,6 +1984,8 @@
     case pch::PREDEF_TYPE_OVERLOAD_ID:   T = Context->OverloadTy;         break;
     case pch::PREDEF_TYPE_DEPENDENT_ID:  T = Context->DependentTy;        break;
     case pch::PREDEF_TYPE_NULLPTR_ID:    T = Context->NullPtrTy;          break;
+    case pch::PREDEF_TYPE_CHAR16_ID:     T = Context->Char16Ty;           break;
+    case pch::PREDEF_TYPE_CHAR32_ID:     T = Context->Char32Ty;           break;
     }
 
     assert(!T.isNull() && "Unknown predefined type");
diff --git a/lib/Frontend/PCHWriter.cpp b/lib/Frontend/PCHWriter.cpp
index 57577b7..333bcc9 100644
--- a/lib/Frontend/PCHWriter.cpp
+++ b/lib/Frontend/PCHWriter.cpp
@@ -2005,6 +2005,8 @@
     case BuiltinType::Double:     ID = pch::PREDEF_TYPE_DOUBLE_ID;     break;
     case BuiltinType::LongDouble: ID = pch::PREDEF_TYPE_LONGDOUBLE_ID; break;
     case BuiltinType::NullPtr:    ID = pch::PREDEF_TYPE_NULLPTR_ID;    break;
+    case BuiltinType::Char16:     ID = pch::PREDEF_TYPE_CHAR16_ID;     break;
+    case BuiltinType::Char32:     ID = pch::PREDEF_TYPE_CHAR32_ID;     break;
     case BuiltinType::Overload:   ID = pch::PREDEF_TYPE_OVERLOAD_ID;   break;
     case BuiltinType::Dependent:  ID = pch::PREDEF_TYPE_DEPENDENT_ID;  break;
     case BuiltinType::UndeducedAuto:
diff --git a/lib/Lex/PPExpressions.cpp b/lib/Lex/PPExpressions.cpp
index c98acc4..a7307c6 100644
--- a/lib/Lex/PPExpressions.cpp
+++ b/lib/Lex/PPExpressions.cpp
@@ -224,8 +224,10 @@
     unsigned NumBits;
     if (Literal.isMultiChar())
       NumBits = TI.getIntWidth();
+    else if (Literal.isWide())
+      NumBits = TI.getWCharWidth();
     else
-      NumBits = TI.getCharWidth(Literal.isWide());
+      NumBits = TI.getCharWidth();
 
     // Set the width.
     llvm::APSInt Val(NumBits);
diff --git a/lib/Parse/DeclSpec.cpp b/lib/Parse/DeclSpec.cpp
index 8b3b285..e58076e 100644
--- a/lib/Parse/DeclSpec.cpp
+++ b/lib/Parse/DeclSpec.cpp
@@ -159,6 +159,8 @@
   case DeclSpec::TST_void:        return "void";
   case DeclSpec::TST_char:        return "char";
   case DeclSpec::TST_wchar:       return "wchar_t";
+  case DeclSpec::TST_char16:      return "char16_t";
+  case DeclSpec::TST_char32:      return "char32_t";
   case DeclSpec::TST_int:         return "int";
   case DeclSpec::TST_float:       return "float";
   case DeclSpec::TST_double:      return "double";
diff --git a/lib/Parse/ParseDecl.cpp b/lib/Parse/ParseDecl.cpp
index 94855b2..cefd325 100644
--- a/lib/Parse/ParseDecl.cpp
+++ b/lib/Parse/ParseDecl.cpp
@@ -159,6 +159,8 @@
             break;
           case tok::kw_char:
           case tok::kw_wchar_t:
+          case tok::kw_char16_t:
+          case tok::kw_char32_t:
           case tok::kw_bool:
           case tok::kw_short:
           case tok::kw_int:
@@ -999,6 +1001,12 @@
     case tok::kw_wchar_t:
       isInvalid = DS.SetTypeSpecType(DeclSpec::TST_wchar, Loc, PrevSpec);
       break;
+    case tok::kw_char16_t:
+      isInvalid = DS.SetTypeSpecType(DeclSpec::TST_char16, Loc, PrevSpec);
+      break;
+    case tok::kw_char32_t:
+      isInvalid = DS.SetTypeSpecType(DeclSpec::TST_char32, Loc, PrevSpec);
+      break;
     case tok::kw_bool:
     case tok::kw__Bool:
       isInvalid = DS.SetTypeSpecType(DeclSpec::TST_bool, Loc, PrevSpec);
@@ -1226,6 +1234,12 @@
   case tok::kw_wchar_t:
     isInvalid = DS.SetTypeSpecType(DeclSpec::TST_wchar, Loc, PrevSpec);
     break;
+  case tok::kw_char16_t:
+    isInvalid = DS.SetTypeSpecType(DeclSpec::TST_char16, Loc, PrevSpec);
+    break;
+  case tok::kw_char32_t:
+    isInvalid = DS.SetTypeSpecType(DeclSpec::TST_char32, Loc, PrevSpec);
+    break;
   case tok::kw_bool:
   case tok::kw__Bool:
     isInvalid = DS.SetTypeSpecType(DeclSpec::TST_bool, Loc, PrevSpec);
@@ -1714,6 +1728,8 @@
   case tok::kw_void:
   case tok::kw_char:
   case tok::kw_wchar_t:
+  case tok::kw_char16_t:
+  case tok::kw_char32_t:
   case tok::kw_int:
   case tok::kw_float:
   case tok::kw_double:
@@ -1802,6 +1818,9 @@
   case tok::kw_void:
   case tok::kw_char:
   case tok::kw_wchar_t:
+  case tok::kw_char16_t:
+  case tok::kw_char32_t:
+
   case tok::kw_int:
   case tok::kw_float:
   case tok::kw_double:
diff --git a/lib/Parse/ParseExpr.cpp b/lib/Parse/ParseExpr.cpp
index cd7618f..c2086b9 100644
--- a/lib/Parse/ParseExpr.cpp
+++ b/lib/Parse/ParseExpr.cpp
@@ -739,6 +739,8 @@
 
   case tok::kw_char:
   case tok::kw_wchar_t:
+  case tok::kw_char16_t:
+  case tok::kw_char32_t:
   case tok::kw_bool:
   case tok::kw_short:
   case tok::kw_int:
diff --git a/lib/Parse/ParseExprCXX.cpp b/lib/Parse/ParseExprCXX.cpp
index 1c00a8e..68101fc 100644
--- a/lib/Parse/ParseExprCXX.cpp
+++ b/lib/Parse/ParseExprCXX.cpp
@@ -654,6 +654,12 @@
   case tok::kw_wchar_t:
     DS.SetTypeSpecType(DeclSpec::TST_wchar, Loc, PrevSpec);
     break;
+  case tok::kw_char16_t:
+    DS.SetTypeSpecType(DeclSpec::TST_char16, Loc, PrevSpec);
+    break;
+  case tok::kw_char32_t:
+    DS.SetTypeSpecType(DeclSpec::TST_char32, Loc, PrevSpec);
+    break;
   case tok::kw_bool:
     DS.SetTypeSpecType(DeclSpec::TST_bool, Loc, PrevSpec);
     break;
diff --git a/lib/Parse/ParseTentative.cpp b/lib/Parse/ParseTentative.cpp
index 02687a2..97f6f52 100644
--- a/lib/Parse/ParseTentative.cpp
+++ b/lib/Parse/ParseTentative.cpp
@@ -681,6 +681,8 @@
 
   case tok::kw_char:
   case tok::kw_wchar_t:
+  case tok::kw_char16_t:
+  case tok::kw_char32_t:
   case tok::kw_bool:
   case tok::kw_short:
   case tok::kw_int:
diff --git a/lib/Sema/SemaOverload.cpp b/lib/Sema/SemaOverload.cpp
index b7f698e..bee3936 100644
--- a/lib/Sema/SemaOverload.cpp
+++ b/lib/Sema/SemaOverload.cpp
@@ -2792,7 +2792,8 @@
                  LastPromotedArithmeticType = 16;
   const unsigned NumArithmeticTypes = 16;
   QualType ArithmeticTypes[NumArithmeticTypes] = {
-    Context.BoolTy, Context.CharTy, Context.WCharTy,
+    Context.BoolTy, Context.CharTy, Context.WCharTy, 
+//    Context.Char16Ty, Context.Char32Ty, 
     Context.SignedCharTy, Context.ShortTy,
     Context.UnsignedCharTy, Context.UnsignedShortTy,
     Context.IntTy, Context.LongTy, Context.LongLongTy,
diff --git a/lib/Sema/SemaType.cpp b/lib/Sema/SemaType.cpp
index 7551d93..8195aba 100644
--- a/lib/Sema/SemaType.cpp
+++ b/lib/Sema/SemaType.cpp
@@ -88,6 +88,16 @@
       Result = Context.getUnsignedWCharType();
     }
     break;
+  case DeclSpec::TST_char16:
+      assert(DS.getTypeSpecSign() == DeclSpec::TSS_unspecified &&
+        "Unknown TSS value");
+      Result = Context.Char16Ty;
+    break;
+  case DeclSpec::TST_char32:
+      assert(DS.getTypeSpecSign() == DeclSpec::TSS_unspecified &&
+        "Unknown TSS value");
+      Result = Context.Char32Ty;
+    break;
   case DeclSpec::TST_unspecified:
     // "<proto1,proto2>" is an objc qualified ID with a missing id.
     if (DeclSpec::ProtocolQualifierListTy PQ = DS.getProtocolQualifiers()) {