Fix the representation of wide strings in the AST and IR so that it uses the native representation of integers for the elements. This fixes a bunch of nastiness involving
treating wide strings as a series of bytes.
Patch by Seth Cantrell.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@143417 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/CodeGen/CGExprConstant.cpp b/lib/CodeGen/CGExprConstant.cpp
index 0622c10..889cdd8 100644
--- a/lib/CodeGen/CGExprConstant.cpp
+++ b/lib/CodeGen/CGExprConstant.cpp
@@ -817,13 +817,7 @@
}
llvm::Constant *VisitStringLiteral(StringLiteral *E) {
- assert(!E->getType()->isPointerType() && "Strings are always arrays");
-
- // This must be a string initializing an array in a static initializer.
- // Don't emit it as the address of the string, emit the string data itself
- // as an inline array.
- return llvm::ConstantArray::get(VMContext,
- CGM.GetStringForStringLiteral(E), false);
+ return CGM.GetConstantArrayFromStringLiteral(E);
}
llvm::Constant *VisitObjCEncodeExpr(ObjCEncodeExpr *E) {
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp
index c796e0d..0905c4b 100644
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -2037,6 +2037,8 @@
/// GetStringForStringLiteral - Return the appropriate bytes for a
/// string literal, properly padded to match the literal type.
std::string CodeGenModule::GetStringForStringLiteral(const StringLiteral *E) {
+ assert((E->isAscii() || E->isUTF8())
+ && "Use GetConstantArrayFromStringLiteral for wide strings");
const ASTContext &Context = getContext();
const ConstantArrayType *CAT =
Context.getAsConstantArrayType(E->getType());
@@ -2045,27 +2047,44 @@
// Resize the string to the right size.
uint64_t RealLen = CAT->getSize().getZExtValue();
- switch (E->getKind()) {
- case StringLiteral::Ascii:
- case StringLiteral::UTF8:
- break;
- case StringLiteral::Wide:
- RealLen *= Context.getTargetInfo().getWCharWidth() / Context.getCharWidth();
- break;
- case StringLiteral::UTF16:
- RealLen *= Context.getTargetInfo().getChar16Width() / Context.getCharWidth();
- break;
- case StringLiteral::UTF32:
- RealLen *= Context.getTargetInfo().getChar32Width() / Context.getCharWidth();
- break;
- }
-
std::string Str = E->getString().str();
Str.resize(RealLen, '\0');
return Str;
}
+llvm::Constant *
+CodeGenModule::GetConstantArrayFromStringLiteral(const StringLiteral *E) {
+ assert(!E->getType()->isPointerType() && "Strings are always arrays");
+
+ // Don't emit it as the address of the string, emit the string data itself
+ // as an inline array.
+ if (E->getCharByteWidth()==1) {
+ return llvm::ConstantArray::get(VMContext,
+ GetStringForStringLiteral(E), false);
+ } else {
+ llvm::ArrayType *AType =
+ cast<llvm::ArrayType>(getTypes().ConvertType(E->getType()));
+ llvm::Type *ElemTy = AType->getElementType();
+ unsigned NumElements = AType->getNumElements();
+ std::vector<llvm::Constant*> Elts;
+ Elts.reserve(NumElements);
+
+ for(unsigned i=0;i<E->getLength();++i) {
+ unsigned value = E->getCodeUnit(i);
+ llvm::Constant *C = llvm::ConstantInt::get(ElemTy,value,false);
+ Elts.push_back(C);
+ }
+ for(unsigned i=E->getLength();i<NumElements;++i) {
+ llvm::Constant *C = llvm::ConstantInt::get(ElemTy,0,false);
+ Elts.push_back(C);
+ }
+
+ return llvm::ConstantArray::get(AType, Elts);
+ }
+
+}
+
/// GetAddrOfConstantStringFromLiteral - Return a pointer to a
/// constant array for the given string literal.
llvm::Constant *
@@ -2073,15 +2092,23 @@
// FIXME: This can be more efficient.
// FIXME: We shouldn't need to bitcast the constant in the wide string case.
CharUnits Align = getContext().getTypeAlignInChars(S->getType());
- llvm::Constant *C = GetAddrOfConstantString(GetStringForStringLiteral(S),
- /* GlobalName */ 0,
- Align.getQuantity());
- if (S->isWide() || S->isUTF16() || S->isUTF32()) {
- llvm::Type *DestTy =
- llvm::PointerType::getUnqual(getTypes().ConvertType(S->getType()));
- C = llvm::ConstantExpr::getBitCast(C, DestTy);
+ if (S->isAscii() || S->isUTF8()) {
+ return GetAddrOfConstantString(GetStringForStringLiteral(S),
+ /* GlobalName */ 0,
+ Align.getQuantity());
}
- return C;
+
+ // FIXME: the following does not memoize wide strings
+ llvm::Constant *C = GetConstantArrayFromStringLiteral(S);
+ llvm::GlobalVariable *GV =
+ new llvm::GlobalVariable(getModule(),C->getType(),
+ !Features.WritableStrings,
+ llvm::GlobalValue::PrivateLinkage,
+ C,".str");
+ GV->setAlignment(Align.getQuantity());
+ GV->setUnnamedAddr(true);
+
+ return GV;
}
/// GetAddrOfConstantStringFromObjCEncode - Return a pointer to a constant
diff --git a/lib/CodeGen/CodeGenModule.h b/lib/CodeGen/CodeGenModule.h
index ea2e177..0ce698a 100644
--- a/lib/CodeGen/CodeGenModule.h
+++ b/lib/CodeGen/CodeGenModule.h
@@ -565,6 +565,10 @@
/// -fconstant-string-class=class_name option.
llvm::Constant *GetAddrOfConstantString(const StringLiteral *Literal);
+ /// GetConstantArrayFromStringLiteral - Return a constant array for the given
+ /// string.
+ llvm::Constant *GetConstantArrayFromStringLiteral(const StringLiteral *E);
+
/// GetAddrOfConstantStringFromLiteral - Return a pointer to a constant array
/// for the given string literal.
llvm::Constant *GetAddrOfConstantStringFromLiteral(const StringLiteral *S);