Output UTF-16 string literals independent of host byte order.
 - Steve, can you take a look at this? It seems like this code should live
   elsewhere, and there is a FIXME about having Sema validates the UTF-8 to
   UTF-16 conversion.


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@76915 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/CodeGen/CodeGenModule.cpp b/lib/CodeGen/CodeGenModule.cpp
index 994f60b..803df31 100644
--- a/lib/CodeGen/CodeGenModule.cpp
+++ b/lib/CodeGen/CodeGenModule.cpp
@@ -1193,6 +1193,7 @@
 static llvm::StringMapEntry<llvm::Constant*> &
 GetConstantCFStringEntry(llvm::StringMap<llvm::Constant*> &Map,
                          const StringLiteral *Literal,
+                         bool TargetIsLSB,
                          bool &IsUTF16,
                          unsigned &StringLength) {
   unsigned NumBytes = Literal->getByteLength();
@@ -1223,15 +1224,28 @@
                                                 StringLength));
   }
 
-  // FIXME: Storing UTF-16 in a C string is a hack to test Unicode strings
-  // without doing more surgery to this routine. Since we aren't explicitly
-  // checking for endianness here, it's also a bug (when generating code for
-  // a target that doesn't match the host endianness). Modeling this as an
-  // i16 array is likely the cleanest solution.
+  // ConvertUTF8toUTF16 returns the length in ToPtr.
   StringLength = ToPtr - &ToBuf[0];
+
+  // Render the UTF-16 string into a byte array and convert to the target byte
+  // order.
+  //
+  // FIXME: This isn't something we should need to do here.
+  llvm::SmallString<128> AsBytes;
+  AsBytes.reserve(StringLength * 2);
+  for (unsigned i = 0; i != StringLength; ++i) {
+    unsigned short Val = ToBuf[i];
+    if (TargetIsLSB) {
+      AsBytes.push_back(Val & 0xFF);
+      AsBytes.push_back(Val >> 8);
+    } else {
+      AsBytes.push_back(Val >> 8);
+      AsBytes.push_back(Val & 0xFF);
+    }
+  }
+
   IsUTF16 = true;
-  return Map.GetOrCreateValue(llvm::StringRef((char *)&ToBuf[0], 
-                                              StringLength * 2));
+  return Map.GetOrCreateValue(llvm::StringRef(AsBytes.data(), AsBytes.size()));
 }
 
 llvm::Constant *
@@ -1239,8 +1253,9 @@
   unsigned StringLength = 0;
   bool isUTF16 = false;
   llvm::StringMapEntry<llvm::Constant*> &Entry =
-    GetConstantCFStringEntry(CFConstantStringMap, Literal, isUTF16, 
-                             StringLength);
+    GetConstantCFStringEntry(CFConstantStringMap, Literal, 
+                             getTargetData().isLittleEndian(),
+                             isUTF16, StringLength);
   
   if (llvm::Constant *C = Entry.getValue())
     return C;