Emit 4 byte UTF-sequences in place of encoded surrogate pairs.

Symmetric with a5afcfc73141e5e378d79a326d0 which converts 4 byte UTF-8
sequences to surrogate pairs.

bug: 18848397

Change-Id: I42adc275b7e0df0cbbd9d8a799e8b0447d8f5cae
diff --git a/runtime/jni_internal_test.cc b/runtime/jni_internal_test.cc
index 1048214..5516eab 100644
--- a/runtime/jni_internal_test.cc
+++ b/runtime/jni_internal_test.cc
@@ -1355,24 +1355,38 @@
   s = env_->NewStringUTF("\xed\xa0\x81\xed\xb0\x80");
   EXPECT_NE(s, nullptr);
   EXPECT_EQ(2, env_->GetStringLength(s));
-  // Note that this uses 2 x 3 byte UTF sequences, one
-  // for each half of the surrogate pair.
-  EXPECT_EQ(6, env_->GetStringUTFLength(s));
+
+  // The surrogate pair gets encoded into a 4 byte UTF sequence..
+  EXPECT_EQ(4, env_->GetStringUTFLength(s));
   const char* chars = env_->GetStringUTFChars(s, nullptr);
-  EXPECT_STREQ("\xed\xa0\x81\xed\xb0\x80", chars);
+  EXPECT_STREQ("\xf0\x90\x90\x80", chars);
   env_->ReleaseStringUTFChars(s, chars);
 
+  // .. but is stored as is in the utf-16 representation.
+  const jchar* jchars = env_->GetStringChars(s, nullptr);
+  EXPECT_EQ(0xd801, jchars[0]);
+  EXPECT_EQ(0xdc00, jchars[1]);
+  env_->ReleaseStringChars(s, jchars);
+
   // 4 byte UTF sequence appended to an encoded surrogate pair.
   s = env_->NewStringUTF("\xed\xa0\x81\xed\xb0\x80 \xf0\x9f\x8f\xa0");
   EXPECT_NE(s, nullptr);
-  EXPECT_EQ(5, env_->GetStringLength(s));
-  EXPECT_EQ(13, env_->GetStringUTFLength(s));
-  chars = env_->GetStringUTFChars(s, nullptr);
+
   // The 4 byte sequence {0xf0, 0x9f, 0x8f, 0xa0} is converted into a surrogate
-  // pair {0xd83c, 0xdfe0} which is then converted into a two three byte
-  // sequences {0xed 0xa0, 0xbc} and {0xed, 0xbf, 0xa0}, one for each half of
-  // the surrogate pair.
-  EXPECT_STREQ("\xed\xa0\x81\xed\xb0\x80 \xed\xa0\xbc\xed\xbf\xa0", chars);
+  // pair {0xd83c, 0xdfe0}.
+  EXPECT_EQ(5, env_->GetStringLength(s));
+  jchars = env_->GetStringChars(s, nullptr);
+  // The first surrogate pair, encoded as such in the input.
+  EXPECT_EQ(0xd801, jchars[0]);
+  EXPECT_EQ(0xdc00, jchars[1]);
+  // The second surrogate pair, from the 4 byte UTF sequence in the input.
+  EXPECT_EQ(0xd83c, jchars[3]);
+  EXPECT_EQ(0xdfe0, jchars[4]);
+  env_->ReleaseStringChars(s, jchars);
+
+  EXPECT_EQ(9, env_->GetStringUTFLength(s));
+  chars = env_->GetStringUTFChars(s, nullptr);
+  EXPECT_STREQ("\xf0\x90\x90\x80 \xf0\x9f\x8f\xa0", chars);
   env_->ReleaseStringUTFChars(s, chars);
 
   // A string with 1, 2, 3 and 4 byte UTF sequences with spaces
@@ -1380,7 +1394,7 @@
   s = env_->NewStringUTF("\x24 \xc2\xa2 \xe2\x82\xac \xf0\x9f\x8f\xa0");
   EXPECT_NE(s, nullptr);
   EXPECT_EQ(8, env_->GetStringLength(s));
-  EXPECT_EQ(15, env_->GetStringUTFLength(s));
+  EXPECT_EQ(13, env_->GetStringUTFLength(s));
 }
 
 TEST_F(JniInternalTest, NewString) {