[locale.stdcvt]
git-svn-id: https://llvm.org/svn/llvm-project/libcxx/trunk@105174 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/src/locale.cpp b/src/locale.cpp
index a65d6ed..6c41abf 100644
--- a/src/locale.cpp
+++ b/src/locale.cpp
@@ -9,6 +9,7 @@
#include "string"
#include "locale"
+#include "codecvt"
#include "vector"
#include "algorithm"
#include "algorithm"
@@ -1304,6 +1305,1389 @@
// 040000 - 0FFFFF D8C0 - DBBF, DC00 - DFFF F1 - F3, 80 - BF, 80 - BF, 80 - BF 786432
// 100000 - 10FFFF DBC0 - DBFF, DC00 - DFFF F4 - F4, 80 - 8F, 80 - BF, 80 - BF 65536
+static
+codecvt_base::result
+utf16_to_utf8(const uint16_t* frm, const uint16_t* frm_end, const uint16_t*& frm_nxt,
+ uint8_t* to, uint8_t* to_end, uint8_t*& to_nxt,
+ unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0))
+{
+ frm_nxt = frm;
+ to_nxt = to;
+ if (mode & generate_header)
+ {
+ if (to_end-to_nxt < 3)
+ return codecvt_base::partial;
+ *to_nxt++ = static_cast<uint8_t>(0xEF);
+ *to_nxt++ = static_cast<uint8_t>(0xBB);
+ *to_nxt++ = static_cast<uint8_t>(0xBF);
+ }
+ for (; frm_nxt < frm_end; ++frm_nxt)
+ {
+ uint16_t wc1 = *frm_nxt;
+ if (wc1 > Maxcode)
+ return codecvt_base::error;
+ if (wc1 < 0x0080)
+ {
+ if (to_end-to_nxt < 1)
+ return codecvt_base::partial;
+ *to_nxt++ = static_cast<uint8_t>(wc1);
+ }
+ else if (wc1 < 0x0800)
+ {
+ if (to_end-to_nxt < 2)
+ return codecvt_base::partial;
+ *to_nxt++ = static_cast<uint8_t>(0xC0 | (wc1 >> 6));
+ *to_nxt++ = static_cast<uint8_t>(0x80 | (wc1 & 0x03F));
+ }
+ else if (wc1 < 0xD800)
+ {
+ if (to_end-to_nxt < 3)
+ return codecvt_base::partial;
+ *to_nxt++ = static_cast<uint8_t>(0xE0 | (wc1 >> 12));
+ *to_nxt++ = static_cast<uint8_t>(0x80 | ((wc1 & 0x0FC0) >> 6));
+ *to_nxt++ = static_cast<uint8_t>(0x80 | (wc1 & 0x003F));
+ }
+ else if (wc1 < 0xDC00)
+ {
+ if (frm_end-frm_nxt < 2)
+ return codecvt_base::partial;
+ uint16_t wc2 = frm_nxt[1];
+ if ((wc2 & 0xFC00) != 0xDC00)
+ return codecvt_base::error;
+ if (to_end-to_nxt < 4)
+ return codecvt_base::partial;
+ if ((((((unsigned long)wc1 & 0x03C0) >> 6) + 1) << 16) +
+ (((unsigned long)wc1 & 0x003F) << 10) + (wc2 & 0x03FF) > Maxcode)
+ return codecvt_base::error;
+ ++frm_nxt;
+ uint8_t z = ((wc1 & 0x03C0) >> 6) + 1;
+ *to_nxt++ = static_cast<uint8_t>(0xF0 | (z >> 2));
+ *to_nxt++ = static_cast<uint8_t>(0x80 | ((z & 0x03) << 4) | ((wc1 & 0x003C) >> 2));
+ *to_nxt++ = static_cast<uint8_t>(0x80 | ((wc1 & 0x0003) << 4) | ((wc2 & 0x03C0) >> 6));
+ *to_nxt++ = static_cast<uint8_t>(0x80 | (wc2 & 0x003F));
+ }
+ else if (wc1 < 0xE000)
+ {
+ return codecvt_base::error;
+ }
+ else
+ {
+ if (to_end-to_nxt < 3)
+ return codecvt_base::partial;
+ *to_nxt++ = static_cast<uint8_t>(0xE0 | (wc1 >> 12));
+ *to_nxt++ = static_cast<uint8_t>(0x80 | ((wc1 & 0x0FC0) >> 6));
+ *to_nxt++ = static_cast<uint8_t>(0x80 | (wc1 & 0x003F));
+ }
+ }
+ return codecvt_base::ok;
+}
+
+static
+codecvt_base::result
+utf16_to_utf8(const uint32_t* frm, const uint32_t* frm_end, const uint32_t*& frm_nxt,
+ uint8_t* to, uint8_t* to_end, uint8_t*& to_nxt,
+ unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0))
+{
+ frm_nxt = frm;
+ to_nxt = to;
+ if (mode & generate_header)
+ {
+ if (to_end-to_nxt < 3)
+ return codecvt_base::partial;
+ *to_nxt++ = static_cast<uint8_t>(0xEF);
+ *to_nxt++ = static_cast<uint8_t>(0xBB);
+ *to_nxt++ = static_cast<uint8_t>(0xBF);
+ }
+ for (; frm_nxt < frm_end; ++frm_nxt)
+ {
+ uint16_t wc1 = static_cast<uint16_t>(*frm_nxt);
+ if (wc1 > Maxcode)
+ return codecvt_base::error;
+ if (wc1 < 0x0080)
+ {
+ if (to_end-to_nxt < 1)
+ return codecvt_base::partial;
+ *to_nxt++ = static_cast<uint8_t>(wc1);
+ }
+ else if (wc1 < 0x0800)
+ {
+ if (to_end-to_nxt < 2)
+ return codecvt_base::partial;
+ *to_nxt++ = static_cast<uint8_t>(0xC0 | (wc1 >> 6));
+ *to_nxt++ = static_cast<uint8_t>(0x80 | (wc1 & 0x03F));
+ }
+ else if (wc1 < 0xD800)
+ {
+ if (to_end-to_nxt < 3)
+ return codecvt_base::partial;
+ *to_nxt++ = static_cast<uint8_t>(0xE0 | (wc1 >> 12));
+ *to_nxt++ = static_cast<uint8_t>(0x80 | ((wc1 & 0x0FC0) >> 6));
+ *to_nxt++ = static_cast<uint8_t>(0x80 | (wc1 & 0x003F));
+ }
+ else if (wc1 < 0xDC00)
+ {
+ if (frm_end-frm_nxt < 2)
+ return codecvt_base::partial;
+ uint16_t wc2 = static_cast<uint16_t>(frm_nxt[1]);
+ if ((wc2 & 0xFC00) != 0xDC00)
+ return codecvt_base::error;
+ if (to_end-to_nxt < 4)
+ return codecvt_base::partial;
+ if ((((((unsigned long)wc1 & 0x03C0) >> 6) + 1) << 16) +
+ (((unsigned long)wc1 & 0x003F) << 10) + (wc2 & 0x03FF) > Maxcode)
+ return codecvt_base::error;
+ ++frm_nxt;
+ uint8_t z = ((wc1 & 0x03C0) >> 6) + 1;
+ *to_nxt++ = static_cast<uint8_t>(0xF0 | (z >> 2));
+ *to_nxt++ = static_cast<uint8_t>(0x80 | ((z & 0x03) << 4) | ((wc1 & 0x003C) >> 2));
+ *to_nxt++ = static_cast<uint8_t>(0x80 | ((wc1 & 0x0003) << 4) | ((wc2 & 0x03C0) >> 6));
+ *to_nxt++ = static_cast<uint8_t>(0x80 | (wc2 & 0x003F));
+ }
+ else if (wc1 < 0xE000)
+ {
+ return codecvt_base::error;
+ }
+ else
+ {
+ if (to_end-to_nxt < 3)
+ return codecvt_base::partial;
+ *to_nxt++ = static_cast<uint8_t>(0xE0 | (wc1 >> 12));
+ *to_nxt++ = static_cast<uint8_t>(0x80 | ((wc1 & 0x0FC0) >> 6));
+ *to_nxt++ = static_cast<uint8_t>(0x80 | (wc1 & 0x003F));
+ }
+ }
+ return codecvt_base::ok;
+}
+
+static
+codecvt_base::result
+utf8_to_utf16(const uint8_t* frm, const uint8_t* frm_end, const uint8_t*& frm_nxt,
+ uint16_t* to, uint16_t* to_end, uint16_t*& to_nxt,
+ unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0))
+{
+ frm_nxt = frm;
+ to_nxt = to;
+ if (mode & consume_header)
+ {
+ if (frm_end-frm_nxt >= 3 && frm_nxt[0] == 0xEF && frm_nxt[1] == 0xBB &&
+ frm_nxt[2] == 0xBF)
+ frm_nxt += 3;
+ }
+ for (; frm_nxt < frm_end && to_nxt < to_end; ++to_nxt)
+ {
+ uint8_t c1 = *frm_nxt;
+ if (c1 > Maxcode)
+ return codecvt_base::error;
+ if (c1 < 0x80)
+ {
+ *to_nxt = static_cast<uint16_t>(c1);
+ ++frm_nxt;
+ }
+ else if (c1 < 0xC2)
+ {
+ return codecvt_base::error;
+ }
+ else if (c1 < 0xE0)
+ {
+ if (frm_end-frm_nxt < 2)
+ return codecvt_base::partial;
+ uint8_t c2 = frm_nxt[1];
+ if ((c2 & 0xC0) != 0x80)
+ return codecvt_base::error;
+ uint16_t t = static_cast<uint16_t>(((c1 & 0x1F) << 6) | (c2 & 0x3F));
+ if (t > Maxcode)
+ return codecvt_base::error;
+ *to_nxt = t;
+ frm_nxt += 2;
+ }
+ else if (c1 < 0xF0)
+ {
+ if (frm_end-frm_nxt < 3)
+ return codecvt_base::partial;
+ uint8_t c2 = frm_nxt[1];
+ uint8_t c3 = frm_nxt[2];
+ switch (c1)
+ {
+ case 0xE0:
+ if ((c2 & 0xE0) != 0xA0)
+ return codecvt_base::error;
+ break;
+ case 0xED:
+ if ((c2 & 0xE0) != 0x80)
+ return codecvt_base::error;
+ break;
+ default:
+ if ((c2 & 0xC0) != 0x80)
+ return codecvt_base::error;
+ break;
+ }
+ if ((c3 & 0xC0) != 0x80)
+ return codecvt_base::error;
+ uint16_t t = static_cast<uint16_t>(((c1 & 0x0F) << 12)
+ | ((c2 & 0x3F) << 6)
+ | (c3 & 0x3F));
+ if (t > Maxcode)
+ return codecvt_base::error;
+ *to_nxt = t;
+ frm_nxt += 3;
+ }
+ else if (c1 < 0xF5)
+ {
+ if (frm_end-frm_nxt < 4)
+ return codecvt_base::partial;
+ uint8_t c2 = frm_nxt[1];
+ uint8_t c3 = frm_nxt[2];
+ uint8_t c4 = frm_nxt[3];
+ switch (c1)
+ {
+ case 0xF0:
+ if (!(0x90 <= c2 && c2 <= 0xBF))
+ return codecvt_base::error;
+ break;
+ case 0xF4:
+ if ((c2 & 0xF0) != 0x80)
+ return codecvt_base::error;
+ break;
+ default:
+ if ((c2 & 0xC0) != 0x80)
+ return codecvt_base::error;
+ break;
+ }
+ if ((c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
+ return codecvt_base::error;
+ if (to_end-to_nxt < 2)
+ return codecvt_base::partial;
+ if (((((unsigned long)c1 & 7) << 18) +
+ (((unsigned long)c2 & 0x3F) << 12) +
+ (((unsigned long)c3 & 0x3F) << 6) + (c4 & 0x3F)) > Maxcode)
+ return codecvt_base::error;
+ *to_nxt = static_cast<uint16_t>(
+ 0xD800
+ | (((((c1 & 0x07) << 2) | ((c2 & 0x30) >> 4)) - 1) << 6)
+ | ((c2 & 0x0F) << 2)
+ | ((c3 & 0x30) >> 4));
+ *++to_nxt = static_cast<uint16_t>(
+ 0xDC00
+ | ((c3 & 0x0F) << 6)
+ | (c4 & 0x3F));
+ frm_nxt += 4;
+ }
+ else
+ {
+ return codecvt_base::error;
+ }
+ }
+ return frm_nxt < frm_end ? codecvt_base::partial : codecvt_base::ok;
+}
+
+static
+codecvt_base::result
+utf8_to_utf16(const uint8_t* frm, const uint8_t* frm_end, const uint8_t*& frm_nxt,
+ uint32_t* to, uint32_t* to_end, uint32_t*& to_nxt,
+ unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0))
+{
+ frm_nxt = frm;
+ to_nxt = to;
+ if (mode & consume_header)
+ {
+ if (frm_end-frm_nxt >= 3 && frm_nxt[0] == 0xEF && frm_nxt[1] == 0xBB &&
+ frm_nxt[2] == 0xBF)
+ frm_nxt += 3;
+ }
+ for (; frm_nxt < frm_end && to_nxt < to_end; ++to_nxt)
+ {
+ uint8_t c1 = *frm_nxt;
+ if (c1 > Maxcode)
+ return codecvt_base::error;
+ if (c1 < 0x80)
+ {
+ *to_nxt = static_cast<uint32_t>(c1);
+ ++frm_nxt;
+ }
+ else if (c1 < 0xC2)
+ {
+ return codecvt_base::error;
+ }
+ else if (c1 < 0xE0)
+ {
+ if (frm_end-frm_nxt < 2)
+ return codecvt_base::partial;
+ uint8_t c2 = frm_nxt[1];
+ if ((c2 & 0xC0) != 0x80)
+ return codecvt_base::error;
+ uint16_t t = static_cast<uint16_t>(((c1 & 0x1F) << 6) | (c2 & 0x3F));
+ if (t > Maxcode)
+ return codecvt_base::error;
+ *to_nxt = static_cast<uint32_t>(t);
+ frm_nxt += 2;
+ }
+ else if (c1 < 0xF0)
+ {
+ if (frm_end-frm_nxt < 3)
+ return codecvt_base::partial;
+ uint8_t c2 = frm_nxt[1];
+ uint8_t c3 = frm_nxt[2];
+ switch (c1)
+ {
+ case 0xE0:
+ if ((c2 & 0xE0) != 0xA0)
+ return codecvt_base::error;
+ break;
+ case 0xED:
+ if ((c2 & 0xE0) != 0x80)
+ return codecvt_base::error;
+ break;
+ default:
+ if ((c2 & 0xC0) != 0x80)
+ return codecvt_base::error;
+ break;
+ }
+ if ((c3 & 0xC0) != 0x80)
+ return codecvt_base::error;
+ uint16_t t = static_cast<uint16_t>(((c1 & 0x0F) << 12)
+ | ((c2 & 0x3F) << 6)
+ | (c3 & 0x3F));
+ if (t > Maxcode)
+ return codecvt_base::error;
+ *to_nxt = static_cast<uint32_t>(t);
+ frm_nxt += 3;
+ }
+ else if (c1 < 0xF5)
+ {
+ if (frm_end-frm_nxt < 4)
+ return codecvt_base::partial;
+ uint8_t c2 = frm_nxt[1];
+ uint8_t c3 = frm_nxt[2];
+ uint8_t c4 = frm_nxt[3];
+ switch (c1)
+ {
+ case 0xF0:
+ if (!(0x90 <= c2 && c2 <= 0xBF))
+ return codecvt_base::error;
+ break;
+ case 0xF4:
+ if ((c2 & 0xF0) != 0x80)
+ return codecvt_base::error;
+ break;
+ default:
+ if ((c2 & 0xC0) != 0x80)
+ return codecvt_base::error;
+ break;
+ }
+ if ((c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
+ return codecvt_base::error;
+ if (to_end-to_nxt < 2)
+ return codecvt_base::partial;
+ if (((((unsigned long)c1 & 7) << 18) +
+ (((unsigned long)c2 & 0x3F) << 12) +
+ (((unsigned long)c3 & 0x3F) << 6) + (c4 & 0x3F)) > Maxcode)
+ return codecvt_base::error;
+ *to_nxt = static_cast<uint32_t>(
+ 0xD800
+ | (((((c1 & 0x07) << 2) | ((c2 & 0x30) >> 4)) - 1) << 6)
+ | ((c2 & 0x0F) << 2)
+ | ((c3 & 0x30) >> 4));
+ *++to_nxt = static_cast<uint32_t>(
+ 0xDC00
+ | ((c3 & 0x0F) << 6)
+ | (c4 & 0x3F));
+ frm_nxt += 4;
+ }
+ else
+ {
+ return codecvt_base::error;
+ }
+ }
+ return frm_nxt < frm_end ? codecvt_base::partial : codecvt_base::ok;
+}
+
+static
+int
+utf8_to_utf16_length(const uint8_t* frm, const uint8_t* frm_end,
+ size_t mx, unsigned long Maxcode = 0x10FFFF,
+ codecvt_mode mode = codecvt_mode(0))
+{
+ const uint8_t* frm_nxt = frm;
+ if (mode & consume_header)
+ {
+ if (frm_end-frm_nxt >= 3 && frm_nxt[0] == 0xEF && frm_nxt[1] == 0xBB &&
+ frm_nxt[2] == 0xBF)
+ frm_nxt += 3;
+ }
+ for (size_t nchar16_t = 0; frm_nxt < frm_end && nchar16_t < mx; ++nchar16_t)
+ {
+ uint8_t c1 = *frm_nxt;
+ if (c1 > Maxcode)
+ break;
+ if (c1 < 0x80)
+ {
+ ++frm_nxt;
+ }
+ else if (c1 < 0xC2)
+ {
+ break;
+ }
+ else if (c1 < 0xE0)
+ {
+ if ((frm_end-frm_nxt < 2) || (frm_nxt[1] & 0xC0) != 0x80)
+ break;
+ uint16_t t = static_cast<uint16_t>(((c1 & 0x1F) << 6) | (frm_nxt[1] & 0x3F));
+ if (t > Maxcode)
+ break;
+ frm_nxt += 2;
+ }
+ else if (c1 < 0xF0)
+ {
+ if (frm_end-frm_nxt < 3)
+ break;
+ uint8_t c2 = frm_nxt[1];
+ uint8_t c3 = frm_nxt[2];
+ uint16_t t = static_cast<uint16_t>(((c1 & 0x0F) << 12)
+ | ((c2 & 0x3F) << 6)
+ | (c3 & 0x3F));
+ switch (c1)
+ {
+ case 0xE0:
+ if ((c2 & 0xE0) != 0xA0)
+ return static_cast<int>(frm_nxt - frm);
+ break;
+ case 0xED:
+ if ((c2 & 0xE0) != 0x80)
+ return static_cast<int>(frm_nxt - frm);
+ break;
+ default:
+ if ((c2 & 0xC0) != 0x80)
+ return static_cast<int>(frm_nxt - frm);
+ break;
+ }
+ if ((c3 & 0xC0) != 0x80)
+ break;
+ if ((((c1 & 0x0F) << 12) | ((c2 & 0x3F) << 6) | (c3 & 0x3F)) > Maxcode)
+ break;
+ frm_nxt += 3;
+ }
+ else if (c1 < 0xF5)
+ {
+ if (frm_end-frm_nxt < 4 || mx-nchar16_t < 2)
+ break;
+ uint8_t c2 = frm_nxt[1];
+ uint8_t c3 = frm_nxt[2];
+ uint8_t c4 = frm_nxt[3];
+ switch (c1)
+ {
+ case 0xF0:
+ if (!(0x90 <= c2 && c2 <= 0xBF))
+ return static_cast<int>(frm_nxt - frm);
+ break;
+ case 0xF4:
+ if ((c2 & 0xF0) != 0x80)
+ return static_cast<int>(frm_nxt - frm);
+ break;
+ default:
+ if ((c2 & 0xC0) != 0x80)
+ return static_cast<int>(frm_nxt - frm);
+ break;
+ }
+ if ((c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
+ break;
+ if (((((unsigned long)c1 & 7) << 18) +
+ (((unsigned long)c2 & 0x3F) << 12) +
+ (((unsigned long)c3 & 0x3F) << 6) + (c4 & 0x3F)) > Maxcode)
+ break;
+ ++nchar16_t;
+ frm_nxt += 4;
+ }
+ else
+ {
+ break;
+ }
+ }
+ return static_cast<int>(frm_nxt - frm);
+}
+
+static
+codecvt_base::result
+ucs4_to_utf8(const uint32_t* frm, const uint32_t* frm_end, const uint32_t*& frm_nxt,
+ uint8_t* to, uint8_t* to_end, uint8_t*& to_nxt,
+ unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0))
+{
+ frm_nxt = frm;
+ to_nxt = to;
+ if (mode & generate_header)
+ {
+ if (to_end-to_nxt < 3)
+ return codecvt_base::partial;
+ *to_nxt++ = static_cast<uint8_t>(0xEF);
+ *to_nxt++ = static_cast<uint8_t>(0xBB);
+ *to_nxt++ = static_cast<uint8_t>(0xBF);
+ }
+ for (; frm_nxt < frm_end; ++frm_nxt)
+ {
+ uint32_t wc = *frm_nxt;
+ if ((wc & 0xFFFFF800) == 0x00D800 || wc > Maxcode)
+ return codecvt_base::error;
+ if (wc < 0x000080)
+ {
+ if (to_end-to_nxt < 1)
+ return codecvt_base::partial;
+ *to_nxt++ = static_cast<uint8_t>(wc);
+ }
+ else if (wc < 0x000800)
+ {
+ if (to_end-to_nxt < 2)
+ return codecvt_base::partial;
+ *to_nxt++ = static_cast<uint8_t>(0xC0 | (wc >> 6));
+ *to_nxt++ = static_cast<uint8_t>(0x80 | (wc & 0x03F));
+ }
+ else if (wc < 0x010000)
+ {
+ if (to_end-to_nxt < 3)
+ return codecvt_base::partial;
+ *to_nxt++ = static_cast<uint8_t>(0xE0 | (wc >> 12));
+ *to_nxt++ = static_cast<uint8_t>(0x80 | ((wc & 0x0FC0) >> 6));
+ *to_nxt++ = static_cast<uint8_t>(0x80 | (wc & 0x003F));
+ }
+ else // if (wc < 0x110000)
+ {
+ if (to_end-to_nxt < 4)
+ return codecvt_base::partial;
+ *to_nxt++ = static_cast<uint8_t>(0xF0 | (wc >> 18));
+ *to_nxt++ = static_cast<uint8_t>(0x80 | ((wc & 0x03F000) >> 12));
+ *to_nxt++ = static_cast<uint8_t>(0x80 | ((wc & 0x000FC0) >> 6));
+ *to_nxt++ = static_cast<uint8_t>(0x80 | (wc & 0x00003F));
+ }
+ }
+ return codecvt_base::ok;
+}
+
+static
+codecvt_base::result
+utf8_to_ucs4(const uint8_t* frm, const uint8_t* frm_end, const uint8_t*& frm_nxt,
+ uint32_t* to, uint32_t* to_end, uint32_t*& to_nxt,
+ unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0))
+{
+ frm_nxt = frm;
+ to_nxt = to;
+ if (mode & consume_header)
+ {
+ if (frm_end-frm_nxt >= 3 && frm_nxt[0] == 0xEF && frm_nxt[1] == 0xBB &&
+ frm_nxt[2] == 0xBF)
+ frm_nxt += 3;
+ }
+ for (; frm_nxt < frm_end && to_nxt < to_end; ++to_nxt)
+ {
+ uint8_t c1 = static_cast<uint8_t>(*frm_nxt);
+ if (c1 < 0x80)
+ {
+ if (c1 > Maxcode)
+ return codecvt_base::error;
+ *to_nxt = static_cast<uint32_t>(c1);
+ ++frm_nxt;
+ }
+ else if (c1 < 0xC2)
+ {
+ return codecvt_base::error;
+ }
+ else if (c1 < 0xE0)
+ {
+ if (frm_end-frm_nxt < 2)
+ return codecvt_base::partial;
+ uint8_t c2 = frm_nxt[1];
+ if ((c2 & 0xC0) != 0x80)
+ return codecvt_base::error;
+ uint32_t t = static_cast<uint32_t>(((c1 & 0x1F) << 6)
+ | (c2 & 0x3F));
+ if (t > Maxcode)
+ return codecvt_base::error;
+ *to_nxt = t;
+ frm_nxt += 2;
+ }
+ else if (c1 < 0xF0)
+ {
+ if (frm_end-frm_nxt < 3)
+ return codecvt_base::partial;
+ uint8_t c2 = frm_nxt[1];
+ uint8_t c3 = frm_nxt[2];
+ switch (c1)
+ {
+ case 0xE0:
+ if ((c2 & 0xE0) != 0xA0)
+ return codecvt_base::error;
+ break;
+ case 0xED:
+ if ((c2 & 0xE0) != 0x80)
+ return codecvt_base::error;
+ break;
+ default:
+ if ((c2 & 0xC0) != 0x80)
+ return codecvt_base::error;
+ break;
+ }
+ if ((c3 & 0xC0) != 0x80)
+ return codecvt_base::error;
+ uint32_t t = static_cast<uint32_t>(((c1 & 0x0F) << 12)
+ | ((c2 & 0x3F) << 6)
+ | (c3 & 0x3F));
+ if (t > Maxcode)
+ return codecvt_base::error;
+ *to_nxt = t;
+ frm_nxt += 3;
+ }
+ else if (c1 < 0xF5)
+ {
+ if (frm_end-frm_nxt < 4)
+ return codecvt_base::partial;
+ uint8_t c2 = frm_nxt[1];
+ uint8_t c3 = frm_nxt[2];
+ uint8_t c4 = frm_nxt[3];
+ switch (c1)
+ {
+ case 0xF0:
+ if (!(0x90 <= c2 && c2 <= 0xBF))
+ return codecvt_base::error;
+ break;
+ case 0xF4:
+ if ((c2 & 0xF0) != 0x80)
+ return codecvt_base::error;
+ break;
+ default:
+ if ((c2 & 0xC0) != 0x80)
+ return codecvt_base::error;
+ break;
+ }
+ if ((c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
+ return codecvt_base::error;
+ uint32_t t = static_cast<uint32_t>(((c1 & 0x07) << 18)
+ | ((c2 & 0x3F) << 12)
+ | ((c3 & 0x3F) << 6)
+ | (c4 & 0x3F));
+ if (t > Maxcode)
+ return codecvt_base::error;
+ *to_nxt = t;
+ frm_nxt += 4;
+ }
+ else
+ {
+ return codecvt_base::error;
+ }
+ }
+ return frm_nxt < frm_end ? codecvt_base::partial : codecvt_base::ok;
+}
+
+static
+int
+utf8_to_ucs4_length(const uint8_t* frm, const uint8_t* frm_end,
+ size_t mx, unsigned long Maxcode = 0x10FFFF,
+ codecvt_mode mode = codecvt_mode(0))
+{
+ const uint8_t* frm_nxt = frm;
+ if (mode & consume_header)
+ {
+ if (frm_end-frm_nxt >= 3 && frm_nxt[0] == 0xEF && frm_nxt[1] == 0xBB &&
+ frm_nxt[2] == 0xBF)
+ frm_nxt += 3;
+ }
+ for (size_t nchar32_t = 0; frm_nxt < frm_end && nchar32_t < mx; ++nchar32_t)
+ {
+ uint8_t c1 = static_cast<uint8_t>(*frm_nxt);
+ if (c1 < 0x80)
+ {
+ if (c1 > Maxcode)
+ break;
+ ++frm_nxt;
+ }
+ else if (c1 < 0xC2)
+ {
+ break;
+ }
+ else if (c1 < 0xE0)
+ {
+ if ((frm_end-frm_nxt < 2) || ((frm_nxt[1] & 0xC0) != 0x80))
+ break;
+ if ((((c1 & 0x1F) << 6) | (frm_nxt[1] & 0x3F)) > Maxcode)
+ break;
+ frm_nxt += 2;
+ }
+ else if (c1 < 0xF0)
+ {
+ if (frm_end-frm_nxt < 3)
+ break;
+ uint8_t c2 = frm_nxt[1];
+ uint8_t c3 = frm_nxt[2];
+ switch (c1)
+ {
+ case 0xE0:
+ if ((c2 & 0xE0) != 0xA0)
+ return static_cast<int>(frm_nxt - frm);
+ break;
+ case 0xED:
+ if ((c2 & 0xE0) != 0x80)
+ return static_cast<int>(frm_nxt - frm);
+ break;
+ default:
+ if ((c2 & 0xC0) != 0x80)
+ return static_cast<int>(frm_nxt - frm);
+ break;
+ }
+ if ((c3 & 0xC0) != 0x80)
+ break;
+ if ((((c1 & 0x0F) << 12) | ((c2 & 0x3F) << 6) | (c3 & 0x3F)) > Maxcode)
+ break;
+ frm_nxt += 3;
+ }
+ else if (c1 < 0xF5)
+ {
+ if (frm_end-frm_nxt < 4)
+ break;
+ uint8_t c2 = frm_nxt[1];
+ uint8_t c3 = frm_nxt[2];
+ uint8_t c4 = frm_nxt[3];
+ switch (c1)
+ {
+ case 0xF0:
+ if (!(0x90 <= c2 && c2 <= 0xBF))
+ return static_cast<int>(frm_nxt - frm);
+ break;
+ case 0xF4:
+ if ((c2 & 0xF0) != 0x80)
+ return static_cast<int>(frm_nxt - frm);
+ break;
+ default:
+ if ((c2 & 0xC0) != 0x80)
+ return static_cast<int>(frm_nxt - frm);
+ break;
+ }
+ if ((c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
+ break;
+ uint32_t t = static_cast<uint32_t>(((c1 & 0x07) << 18)
+ | ((c2 & 0x3F) << 12)
+ | ((c3 & 0x3F) << 6)
+ | (c4 & 0x3F));
+ if ((((c1 & 0x07) << 18) | ((c2 & 0x3F) << 12) |
+ ((c3 & 0x3F) << 6) | (c4 & 0x3F)) > Maxcode)
+ break;
+ frm_nxt += 4;
+ }
+ else
+ {
+ break;
+ }
+ }
+ return static_cast<int>(frm_nxt - frm);
+}
+
+static
+codecvt_base::result
+ucs2_to_utf8(const uint16_t* frm, const uint16_t* frm_end, const uint16_t*& frm_nxt,
+ uint8_t* to, uint8_t* to_end, uint8_t*& to_nxt,
+ unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0))
+{
+ frm_nxt = frm;
+ to_nxt = to;
+ if (mode & generate_header)
+ {
+ if (to_end-to_nxt < 3)
+ return codecvt_base::partial;
+ *to_nxt++ = static_cast<uint8_t>(0xEF);
+ *to_nxt++ = static_cast<uint8_t>(0xBB);
+ *to_nxt++ = static_cast<uint8_t>(0xBF);
+ }
+ for (; frm_nxt < frm_end; ++frm_nxt)
+ {
+ uint16_t wc = *frm_nxt;
+ if ((wc & 0xF800) == 0xD800 || wc > Maxcode)
+ return codecvt_base::error;
+ if (wc < 0x0080)
+ {
+ if (to_end-to_nxt < 1)
+ return codecvt_base::partial;
+ *to_nxt++ = static_cast<uint8_t>(wc);
+ }
+ else if (wc < 0x0800)
+ {
+ if (to_end-to_nxt < 2)
+ return codecvt_base::partial;
+ *to_nxt++ = static_cast<uint8_t>(0xC0 | (wc >> 6));
+ *to_nxt++ = static_cast<uint8_t>(0x80 | (wc & 0x03F));
+ }
+ else // if (wc <= 0xFFFF)
+ {
+ if (to_end-to_nxt < 3)
+ return codecvt_base::partial;
+ *to_nxt++ = static_cast<uint8_t>(0xE0 | (wc >> 12));
+ *to_nxt++ = static_cast<uint8_t>(0x80 | ((wc & 0x0FC0) >> 6));
+ *to_nxt++ = static_cast<uint8_t>(0x80 | (wc & 0x003F));
+ }
+ }
+ return codecvt_base::ok;
+}
+
+static
+codecvt_base::result
+utf8_to_ucs2(const uint8_t* frm, const uint8_t* frm_end, const uint8_t*& frm_nxt,
+ uint16_t* to, uint16_t* to_end, uint16_t*& to_nxt,
+ unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0))
+{
+ frm_nxt = frm;
+ to_nxt = to;
+ if (mode & consume_header)
+ {
+ if (frm_end-frm_nxt >= 3 && frm_nxt[0] == 0xEF && frm_nxt[1] == 0xBB &&
+ frm_nxt[2] == 0xBF)
+ frm_nxt += 3;
+ }
+ for (; frm_nxt < frm_end && to_nxt < to_end; ++to_nxt)
+ {
+ uint8_t c1 = static_cast<uint8_t>(*frm_nxt);
+ if (c1 < 0x80)
+ {
+ if (c1 > Maxcode)
+ return codecvt_base::error;
+ *to_nxt = static_cast<uint16_t>(c1);
+ ++frm_nxt;
+ }
+ else if (c1 < 0xC2)
+ {
+ return codecvt_base::error;
+ }
+ else if (c1 < 0xE0)
+ {
+ if (frm_end-frm_nxt < 2)
+ return codecvt_base::partial;
+ uint8_t c2 = frm_nxt[1];
+ if ((c2 & 0xC0) != 0x80)
+ return codecvt_base::error;
+ uint16_t t = static_cast<uint16_t>(((c1 & 0x1F) << 6)
+ | (c2 & 0x3F));
+ if (t > Maxcode)
+ return codecvt_base::error;
+ *to_nxt = t;
+ frm_nxt += 2;
+ }
+ else if (c1 < 0xF0)
+ {
+ if (frm_end-frm_nxt < 3)
+ return codecvt_base::partial;
+ uint8_t c2 = frm_nxt[1];
+ uint8_t c3 = frm_nxt[2];
+ switch (c1)
+ {
+ case 0xE0:
+ if ((c2 & 0xE0) != 0xA0)
+ return codecvt_base::error;
+ break;
+ case 0xED:
+ if ((c2 & 0xE0) != 0x80)
+ return codecvt_base::error;
+ break;
+ default:
+ if ((c2 & 0xC0) != 0x80)
+ return codecvt_base::error;
+ break;
+ }
+ if ((c3 & 0xC0) != 0x80)
+ return codecvt_base::error;
+ uint16_t t = static_cast<uint16_t>(((c1 & 0x0F) << 12)
+ | ((c2 & 0x3F) << 6)
+ | (c3 & 0x3F));
+ if (t > Maxcode)
+ return codecvt_base::error;
+ *to_nxt = t;
+ frm_nxt += 3;
+ }
+ else
+ {
+ return codecvt_base::error;
+ }
+ }
+ return frm_nxt < frm_end ? codecvt_base::partial : codecvt_base::ok;
+}
+
+static
+int
+utf8_to_ucs2_length(const uint8_t* frm, const uint8_t* frm_end,
+ size_t mx, unsigned long Maxcode = 0x10FFFF,
+ codecvt_mode mode = codecvt_mode(0))
+{
+ const uint8_t* frm_nxt = frm;
+ if (mode & consume_header)
+ {
+ if (frm_end-frm_nxt >= 3 && frm_nxt[0] == 0xEF && frm_nxt[1] == 0xBB &&
+ frm_nxt[2] == 0xBF)
+ frm_nxt += 3;
+ }
+ for (size_t nchar32_t = 0; frm_nxt < frm_end && nchar32_t < mx; ++nchar32_t)
+ {
+ uint8_t c1 = static_cast<uint8_t>(*frm_nxt);
+ if (c1 < 0x80)
+ {
+ if (c1 > Maxcode)
+ break;
+ ++frm_nxt;
+ }
+ else if (c1 < 0xC2)
+ {
+ break;
+ }
+ else if (c1 < 0xE0)
+ {
+ if ((frm_end-frm_nxt < 2) || ((frm_nxt[1] & 0xC0) != 0x80))
+ break;
+ if ((((c1 & 0x1F) << 6) | (frm_nxt[1] & 0x3F)) > Maxcode)
+ break;
+ frm_nxt += 2;
+ }
+ else if (c1 < 0xF0)
+ {
+ if (frm_end-frm_nxt < 3)
+ break;
+ uint8_t c2 = frm_nxt[1];
+ uint8_t c3 = frm_nxt[2];
+ switch (c1)
+ {
+ case 0xE0:
+ if ((c2 & 0xE0) != 0xA0)
+ return static_cast<int>(frm_nxt - frm);
+ break;
+ case 0xED:
+ if ((c2 & 0xE0) != 0x80)
+ return static_cast<int>(frm_nxt - frm);
+ break;
+ default:
+ if ((c2 & 0xC0) != 0x80)
+ return static_cast<int>(frm_nxt - frm);
+ break;
+ }
+ if ((c3 & 0xC0) != 0x80)
+ break;
+ if ((((c1 & 0x0F) << 12) | ((c2 & 0x3F) << 6) | (c3 & 0x3F)) > Maxcode)
+ break;
+ frm_nxt += 3;
+ }
+ else
+ {
+ break;
+ }
+ }
+ return static_cast<int>(frm_nxt - frm);
+}
+
+static
+codecvt_base::result
+ucs4_to_utf16be(const uint32_t* frm, const uint32_t* frm_end, const uint32_t*& frm_nxt,
+ uint8_t* to, uint8_t* to_end, uint8_t*& to_nxt,
+ unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0))
+{
+ frm_nxt = frm;
+ to_nxt = to;
+ if (mode & generate_header)
+ {
+ if (to_end-to_nxt < 2)
+ return codecvt_base::partial;
+ *to_nxt++ = static_cast<uint8_t>(0xFE);
+ *to_nxt++ = static_cast<uint8_t>(0xFF);
+ }
+ for (; frm_nxt < frm_end; ++frm_nxt)
+ {
+ uint32_t wc = *frm_nxt;
+ if ((wc & 0xFFFFF800) == 0x00D800 || wc > Maxcode)
+ return codecvt_base::error;
+ if (wc < 0x010000)
+ {
+ if (to_end-to_nxt < 2)
+ return codecvt_base::partial;
+ *to_nxt++ = static_cast<uint8_t>(wc >> 8);
+ *to_nxt++ = static_cast<uint8_t>(wc);
+ }
+ else
+ {
+ if (to_end-to_nxt < 4)
+ return codecvt_base::partial;
+ uint16_t t = static_cast<uint16_t>(
+ 0xD800
+ | ((((wc & 0x1F0000) >> 16) - 1) << 6)
+ | ((wc & 0x00FC00) >> 10));
+ *to_nxt++ = static_cast<uint8_t>(t >> 8);
+ *to_nxt++ = static_cast<uint8_t>(t);
+ t = static_cast<uint16_t>(0xDC00 | (wc & 0x03FF));
+ *to_nxt++ = static_cast<uint8_t>(t >> 8);
+ *to_nxt++ = static_cast<uint8_t>(t);
+ }
+ }
+ return codecvt_base::ok;
+}
+
+static
+codecvt_base::result
+utf16be_to_ucs4(const uint8_t* frm, const uint8_t* frm_end, const uint8_t*& frm_nxt,
+ uint32_t* to, uint32_t* to_end, uint32_t*& to_nxt,
+ unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0))
+{
+ frm_nxt = frm;
+ to_nxt = to;
+ if (mode & consume_header)
+ {
+ if (frm_end-frm_nxt >= 2 && frm_nxt[0] == 0xFE && frm_nxt[1] == 0xFF)
+ frm_nxt += 2;
+ }
+ for (; frm_nxt < frm_end - 1 && to_nxt < to_end; ++to_nxt)
+ {
+ uint16_t c1 = frm_nxt[0] << 8 | frm_nxt[1];
+ if ((c1 & 0xFC00) == 0xDC00)
+ return codecvt_base::error;
+ if ((c1 & 0xFC00) != 0xD800)
+ {
+ if (c1 > Maxcode)
+ return codecvt_base::error;
+ *to_nxt = static_cast<uint32_t>(c1);
+ frm_nxt += 2;
+ }
+ else
+ {
+ if (frm_end-frm_nxt < 4)
+ return codecvt_base::partial;
+ uint16_t c2 = frm_nxt[2] << 8 | frm_nxt[3];
+ if ((c2 & 0xFC00) != 0xDC00)
+ return codecvt_base::error;
+ uint32_t t = static_cast<uint32_t>(
+ ((((c1 & 0x03C0) >> 6) + 1) << 16)
+ | ((c1 & 0x003F) << 10)
+ | (c2 & 0x03FF));
+ if (t > Maxcode)
+ return codecvt_base::error;
+ *to_nxt = t;
+ frm_nxt += 4;
+ }
+ }
+ return frm_nxt < frm_end ? codecvt_base::partial : codecvt_base::ok;
+}
+
+static
+int
+utf16be_to_ucs4_length(const uint8_t* frm, const uint8_t* frm_end,
+ size_t mx, unsigned long Maxcode = 0x10FFFF,
+ codecvt_mode mode = codecvt_mode(0))
+{
+ const uint8_t* frm_nxt = frm;
+ frm_nxt = frm;
+ if (mode & consume_header)
+ {
+ if (frm_end-frm_nxt >= 2 && frm_nxt[0] == 0xFE && frm_nxt[1] == 0xFF)
+ frm_nxt += 2;
+ }
+ for (size_t nchar32_t = 0; frm_nxt < frm_end - 1 && nchar32_t < mx; ++nchar32_t)
+ {
+ uint16_t c1 = frm_nxt[0] << 8 | frm_nxt[1];
+ if ((c1 & 0xFC00) == 0xDC00)
+ break;
+ if ((c1 & 0xFC00) != 0xD800)
+ {
+ if (c1 > Maxcode)
+ break;
+ frm_nxt += 2;
+ }
+ else
+ {
+ if (frm_end-frm_nxt < 4)
+ break;
+ uint16_t c2 = frm_nxt[2] << 8 | frm_nxt[3];
+ if ((c2 & 0xFC00) != 0xDC00)
+ break;
+ uint32_t t = static_cast<uint32_t>(
+ ((((c1 & 0x03C0) >> 6) + 1) << 16)
+ | ((c1 & 0x003F) << 10)
+ | (c2 & 0x03FF));
+ if (t > Maxcode)
+ break;
+ frm_nxt += 4;
+ }
+ }
+ return static_cast<int>(frm_nxt - frm);
+}
+
+static
+codecvt_base::result
+ucs4_to_utf16le(const uint32_t* frm, const uint32_t* frm_end, const uint32_t*& frm_nxt,
+ uint8_t* to, uint8_t* to_end, uint8_t*& to_nxt,
+ unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0))
+{
+ frm_nxt = frm;
+ to_nxt = to;
+ if (mode & generate_header)
+ {
+ if (to_end-to_nxt < 2)
+ return codecvt_base::partial;
+ *to_nxt++ = static_cast<uint8_t>(0xFF);
+ *to_nxt++ = static_cast<uint8_t>(0xFE);
+ }
+ for (; frm_nxt < frm_end; ++frm_nxt)
+ {
+ uint32_t wc = *frm_nxt;
+ if ((wc & 0xFFFFF800) == 0x00D800 || wc > Maxcode)
+ return codecvt_base::error;
+ if (wc < 0x010000)
+ {
+ if (to_end-to_nxt < 2)
+ return codecvt_base::partial;
+ *to_nxt++ = static_cast<uint8_t>(wc);
+ *to_nxt++ = static_cast<uint8_t>(wc >> 8);
+ }
+ else
+ {
+ if (to_end-to_nxt < 4)
+ return codecvt_base::partial;
+ uint16_t t = static_cast<uint16_t>(
+ 0xD800
+ | ((((wc & 0x1F0000) >> 16) - 1) << 6)
+ | ((wc & 0x00FC00) >> 10));
+ *to_nxt++ = static_cast<uint8_t>(t);
+ *to_nxt++ = static_cast<uint8_t>(t >> 8);
+ t = static_cast<uint16_t>(0xDC00 | (wc & 0x03FF));
+ *to_nxt++ = static_cast<uint8_t>(t);
+ *to_nxt++ = static_cast<uint8_t>(t >> 8);
+ }
+ }
+ return codecvt_base::ok;
+}
+
+static
+codecvt_base::result
+utf16le_to_ucs4(const uint8_t* frm, const uint8_t* frm_end, const uint8_t*& frm_nxt,
+ uint32_t* to, uint32_t* to_end, uint32_t*& to_nxt,
+ unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0))
+{
+ frm_nxt = frm;
+ to_nxt = to;
+ if (mode & consume_header)
+ {
+ if (frm_end-frm_nxt >= 2 && frm_nxt[0] == 0xFF && frm_nxt[1] == 0xFE)
+ frm_nxt += 2;
+ }
+ for (; frm_nxt < frm_end - 1 && to_nxt < to_end; ++to_nxt)
+ {
+ uint16_t c1 = frm_nxt[1] << 8 | frm_nxt[0];
+ if ((c1 & 0xFC00) == 0xDC00)
+ return codecvt_base::error;
+ if ((c1 & 0xFC00) != 0xD800)
+ {
+ if (c1 > Maxcode)
+ return codecvt_base::error;
+ *to_nxt = static_cast<uint32_t>(c1);
+ frm_nxt += 2;
+ }
+ else
+ {
+ if (frm_end-frm_nxt < 4)
+ return codecvt_base::partial;
+ uint16_t c2 = frm_nxt[3] << 8 | frm_nxt[2];
+ if ((c2 & 0xFC00) != 0xDC00)
+ return codecvt_base::error;
+ uint32_t t = static_cast<uint32_t>(
+ ((((c1 & 0x03C0) >> 6) + 1) << 16)
+ | ((c1 & 0x003F) << 10)
+ | (c2 & 0x03FF));
+ if (t > Maxcode)
+ return codecvt_base::error;
+ *to_nxt = t;
+ frm_nxt += 4;
+ }
+ }
+ return frm_nxt < frm_end ? codecvt_base::partial : codecvt_base::ok;
+}
+
+static
+int
+utf16le_to_ucs4_length(const uint8_t* frm, const uint8_t* frm_end,
+ size_t mx, unsigned long Maxcode = 0x10FFFF,
+ codecvt_mode mode = codecvt_mode(0))
+{
+ const uint8_t* frm_nxt = frm;
+ frm_nxt = frm;
+ if (mode & consume_header)
+ {
+ if (frm_end-frm_nxt >= 2 && frm_nxt[0] == 0xFF && frm_nxt[1] == 0xFE)
+ frm_nxt += 2;
+ }
+ for (size_t nchar32_t = 0; frm_nxt < frm_end - 1 && nchar32_t < mx; ++nchar32_t)
+ {
+ uint16_t c1 = frm_nxt[1] << 8 | frm_nxt[0];
+ if ((c1 & 0xFC00) == 0xDC00)
+ break;
+ if ((c1 & 0xFC00) != 0xD800)
+ {
+ if (c1 > Maxcode)
+ break;
+ frm_nxt += 2;
+ }
+ else
+ {
+ if (frm_end-frm_nxt < 4)
+ break;
+ uint16_t c2 = frm_nxt[3] << 8 | frm_nxt[2];
+ if ((c2 & 0xFC00) != 0xDC00)
+ break;
+ uint32_t t = static_cast<uint32_t>(
+ ((((c1 & 0x03C0) >> 6) + 1) << 16)
+ | ((c1 & 0x003F) << 10)
+ | (c2 & 0x03FF));
+ if (t > Maxcode)
+ break;
+ frm_nxt += 4;
+ }
+ }
+ return static_cast<int>(frm_nxt - frm);
+}
+
+static
+codecvt_base::result
+ucs2_to_utf16be(const uint16_t* frm, const uint16_t* frm_end, const uint16_t*& frm_nxt,
+ uint8_t* to, uint8_t* to_end, uint8_t*& to_nxt,
+ unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0))
+{
+ frm_nxt = frm;
+ to_nxt = to;
+ if (mode & generate_header)
+ {
+ if (to_end-to_nxt < 2)
+ return codecvt_base::partial;
+ *to_nxt++ = static_cast<uint8_t>(0xFE);
+ *to_nxt++ = static_cast<uint8_t>(0xFF);
+ }
+ for (; frm_nxt < frm_end; ++frm_nxt)
+ {
+ uint16_t wc = *frm_nxt;
+ if ((wc & 0xF800) == 0xD800 || wc > Maxcode)
+ return codecvt_base::error;
+ if (to_end-to_nxt < 2)
+ return codecvt_base::partial;
+ *to_nxt++ = static_cast<uint8_t>(wc >> 8);
+ *to_nxt++ = static_cast<uint8_t>(wc);
+ }
+ return codecvt_base::ok;
+}
+
+static
+codecvt_base::result
+utf16be_to_ucs2(const uint8_t* frm, const uint8_t* frm_end, const uint8_t*& frm_nxt,
+ uint16_t* to, uint16_t* to_end, uint16_t*& to_nxt,
+ unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0))
+{
+ frm_nxt = frm;
+ to_nxt = to;
+ if (mode & consume_header)
+ {
+ if (frm_end-frm_nxt >= 2 && frm_nxt[0] == 0xFE && frm_nxt[1] == 0xFF)
+ frm_nxt += 2;
+ }
+ for (; frm_nxt < frm_end - 1 && to_nxt < to_end; ++to_nxt)
+ {
+ uint16_t c1 = frm_nxt[0] << 8 | frm_nxt[1];
+ if ((c1 & 0xF800) == 0xD800 || c1 > Maxcode)
+ return codecvt_base::error;
+ *to_nxt = c1;
+ frm_nxt += 2;
+ }
+ return frm_nxt < frm_end ? codecvt_base::partial : codecvt_base::ok;
+}
+
+static
+int
+utf16be_to_ucs2_length(const uint8_t* frm, const uint8_t* frm_end,
+ size_t mx, unsigned long Maxcode = 0x10FFFF,
+ codecvt_mode mode = codecvt_mode(0))
+{
+ const uint8_t* frm_nxt = frm;
+ frm_nxt = frm;
+ if (mode & consume_header)
+ {
+ if (frm_end-frm_nxt >= 2 && frm_nxt[0] == 0xFE && frm_nxt[1] == 0xFF)
+ frm_nxt += 2;
+ }
+ for (size_t nchar16_t = 0; frm_nxt < frm_end - 1 && nchar16_t < mx; ++nchar16_t)
+ {
+ uint16_t c1 = frm_nxt[0] << 8 | frm_nxt[1];
+ if ((c1 & 0xF800) == 0xD800 || c1 > Maxcode)
+ break;
+ frm_nxt += 2;
+ }
+ return static_cast<int>(frm_nxt - frm);
+}
+
+static
+codecvt_base::result
+ucs2_to_utf16le(const uint16_t* frm, const uint16_t* frm_end, const uint16_t*& frm_nxt,
+ uint8_t* to, uint8_t* to_end, uint8_t*& to_nxt,
+ unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0))
+{
+ frm_nxt = frm;
+ to_nxt = to;
+ if (mode & generate_header)
+ {
+ if (to_end-to_nxt < 2)
+ return codecvt_base::partial;
+ *to_nxt++ = static_cast<uint8_t>(0xFF);
+ *to_nxt++ = static_cast<uint8_t>(0xFE);
+ }
+ for (; frm_nxt < frm_end; ++frm_nxt)
+ {
+ uint16_t wc = *frm_nxt;
+ if ((wc & 0xF800) == 0xD800 || wc > Maxcode)
+ return codecvt_base::error;
+ if (to_end-to_nxt < 2)
+ return codecvt_base::partial;
+ *to_nxt++ = static_cast<uint8_t>(wc);
+ *to_nxt++ = static_cast<uint8_t>(wc >> 8);
+ }
+ return codecvt_base::ok;
+}
+
+static
+codecvt_base::result
+utf16le_to_ucs2(const uint8_t* frm, const uint8_t* frm_end, const uint8_t*& frm_nxt,
+ uint16_t* to, uint16_t* to_end, uint16_t*& to_nxt,
+ unsigned long Maxcode = 0x10FFFF, codecvt_mode mode = codecvt_mode(0))
+{
+ frm_nxt = frm;
+ to_nxt = to;
+ if (mode & consume_header)
+ {
+ if (frm_end-frm_nxt >= 2 && frm_nxt[0] == 0xFF && frm_nxt[1] == 0xFE)
+ frm_nxt += 2;
+ }
+ for (; frm_nxt < frm_end - 1 && to_nxt < to_end; ++to_nxt)
+ {
+ uint16_t c1 = frm_nxt[1] << 8 | frm_nxt[0];
+ if ((c1 & 0xF800) == 0xD800 || c1 > Maxcode)
+ return codecvt_base::error;
+ *to_nxt = c1;
+ frm_nxt += 2;
+ }
+ return frm_nxt < frm_end ? codecvt_base::partial : codecvt_base::ok;
+}
+
+static
+int
+utf16le_to_ucs2_length(const uint8_t* frm, const uint8_t* frm_end,
+ size_t mx, unsigned long Maxcode = 0x10FFFF,
+ codecvt_mode mode = codecvt_mode(0))
+{
+ const uint8_t* frm_nxt = frm;
+ frm_nxt = frm;
+ if (mode & consume_header)
+ {
+ if (frm_end-frm_nxt >= 2 && frm_nxt[0] == 0xFF && frm_nxt[1] == 0xFE)
+ frm_nxt += 2;
+ }
+ for (size_t nchar16_t = 0; frm_nxt < frm_end - 1 && nchar16_t < mx; ++nchar16_t)
+ {
+ uint16_t c1 = frm_nxt[1] << 8 | frm_nxt[0];
+ if ((c1 & 0xF800) == 0xD800 || c1 > Maxcode)
+ break;
+ frm_nxt += 2;
+ }
+ return static_cast<int>(frm_nxt - frm);
+}
+
// template <> class codecvt<char16_t, char, mbstate_t>
locale::id codecvt<char16_t, char, mbstate_t>::id;
@@ -1317,60 +2701,16 @@
const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt,
extern_type* to, extern_type* to_end, extern_type*& to_nxt) const
{
- for (frm_nxt = frm, to_nxt = to; frm_nxt < frm_end; ++frm_nxt)
- {
- intern_type wc1 = *frm_nxt;
- if (wc1 < 0x0080)
- {
- if (to_end-to_nxt < 1)
- return partial;
- *to_nxt++ = static_cast<extern_type>(wc1);
- }
- else if (wc1 < 0x0800)
- {
- if (to_end-to_nxt < 2)
- return partial;
- *to_nxt++ = static_cast<extern_type>(0xC0 | (wc1 >> 6));
- *to_nxt++ = static_cast<extern_type>(0x80 | (wc1 & 0x03F));
- }
- else if (wc1 < 0xD800)
- {
- if (to_end-to_nxt < 3)
- return partial;
- *to_nxt++ = static_cast<extern_type>(0xE0 | (wc1 >> 12));
- *to_nxt++ = static_cast<extern_type>(0x80 | ((wc1 & 0x0FC0) >> 6));
- *to_nxt++ = static_cast<extern_type>(0x80 | (wc1 & 0x003F));
- }
- else if (wc1 < 0xDC00)
- {
- if (frm_end-frm_nxt < 2)
- return partial;
- intern_type wc2 = frm_nxt[1];
- if ((wc2 & 0xFC00) != 0xDC00)
- return error;
- if (to_end-to_nxt < 4)
- return partial;
- ++frm_nxt;
- unsigned char z = ((wc1 & 0x03C0) >> 6) + 1;
- *to_nxt++ = static_cast<extern_type>(0xF0 | (z >> 2));
- *to_nxt++ = static_cast<extern_type>(0x80 | ((z & 0x03) << 4) | ((wc1 & 0x003C) >> 2));
- *to_nxt++ = static_cast<extern_type>(0x80 | ((wc1 & 0x0003) << 4) | ((wc2 & 0x03C0) >> 6));
- *to_nxt++ = static_cast<extern_type>(0x80 | (wc2 & 0x003F));
- }
- else if (wc1 < 0xE000)
- {
- return error;
- }
- else
- {
- if (to_end-to_nxt < 3)
- return partial;
- *to_nxt++ = static_cast<extern_type>(0xE0 | (wc1 >> 12));
- *to_nxt++ = static_cast<extern_type>(0x80 | ((wc1 & 0x0FC0) >> 6));
- *to_nxt++ = static_cast<extern_type>(0x80 | (wc1 & 0x003F));
- }
- }
- return ok;
+ const uint16_t* _frm = reinterpret_cast<const uint16_t*>(frm);
+ const uint16_t* _frm_end = reinterpret_cast<const uint16_t*>(frm_end);
+ const uint16_t* _frm_nxt = _frm;
+ uint8_t* _to = reinterpret_cast<uint8_t*>(to);
+ uint8_t* _to_end = reinterpret_cast<uint8_t*>(to_end);
+ uint8_t* _to_nxt = _to;
+ result r = utf16_to_utf8(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt);
+ frm_nxt = frm + (_frm_nxt - _frm);
+ to_nxt = to + (_to_nxt - _to);
+ return r;
}
codecvt<char16_t, char, mbstate_t>::result
@@ -1378,100 +2718,16 @@
const extern_type* frm, const extern_type* frm_end, const extern_type*& frm_nxt,
intern_type* to, intern_type* to_end, intern_type*& to_nxt) const
{
- for (frm_nxt = frm, to_nxt = to; frm_nxt < frm_end && to_nxt < to_end; ++to_nxt)
- {
- unsigned char c1 = static_cast<unsigned char>(*frm_nxt);
- if (c1 < 0x80)
- {
- *to_nxt = static_cast<intern_type>(c1);
- ++frm_nxt;
- }
- else if (c1 < 0xC2)
- {
- return error;
- }
- else if (c1 < 0xE0)
- {
- if (frm_end-frm_nxt < 2)
- return partial;
- unsigned char c2 = frm_nxt[1];
- if ((c2 & 0xC0) != 0x80)
- return error;
- *to_nxt = static_cast<intern_type>(((c1 & 0x1F) << 6)
- | (c2 & 0x3F));
- frm_nxt += 2;
- }
- else if (c1 < 0xF0)
- {
- if (frm_end-frm_nxt < 3)
- return partial;
- unsigned char c2 = frm_nxt[1];
- unsigned char c3 = frm_nxt[2];
- switch (c1)
- {
- case 0xE0:
- if ((c2 & 0xE0) != 0xA0)
- return error;
- break;
- case 0xED:
- if ((c2 & 0xE0) != 0x80)
- return error;
- break;
- default:
- if ((c2 & 0xC0) != 0x80)
- return error;
- break;
- }
- if ((c3 & 0xC0) != 0x80)
- return error;
- *to_nxt = static_cast<intern_type>(((c1 & 0x0F) << 12)
- | ((c2 & 0x3F) << 6)
- | (c3 & 0x3F));
- frm_nxt += 3;
- }
- else if (c1 < 0xF5)
- {
- if (frm_end-frm_nxt < 4)
- return partial;
- unsigned char c2 = frm_nxt[1];
- unsigned char c3 = frm_nxt[2];
- unsigned char c4 = frm_nxt[3];
- switch (c1)
- {
- case 0xF0:
- if (!(0x90 <= c2 && c2 <= 0xBF))
- return error;
- break;
- case 0xF4:
- if ((c2 & 0xF0) != 0x80)
- return error;
- break;
- default:
- if ((c2 & 0xC0) != 0x80)
- return error;
- break;
- }
- if ((c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
- return error;
- if (to_end-to_nxt < 2)
- return partial;
- *to_nxt = static_cast<intern_type>(
- 0xD800
- | (((((c1 & 0x07) << 2) | ((c2 & 0x30) >> 4)) - 1) << 6)
- | ((c2 & 0x0F) << 2)
- | ((c3 & 0x30) >> 4));
- *++to_nxt = static_cast<intern_type>(
- 0xDC00
- | ((c3 & 0x0F) << 6)
- | (c4 & 0x3F));
- frm_nxt += 4;
- }
- else
- {
- return error;
- }
- }
- return frm_nxt < frm_end ? partial : ok;
+ const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
+ const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+ const uint8_t* _frm_nxt = _frm;
+ uint16_t* _to = reinterpret_cast<uint16_t*>(to);
+ uint16_t* _to_end = reinterpret_cast<uint16_t*>(to_end);
+ uint16_t* _to_nxt = _to;
+ result r = utf8_to_utf16(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt);
+ frm_nxt = frm + (_frm_nxt - _frm);
+ to_nxt = to + (_to_nxt - _to);
+ return r;
}
codecvt<char16_t, char, mbstate_t>::result
@@ -1498,82 +2754,9 @@
codecvt<char16_t, char, mbstate_t>::do_length(state_type&,
const extern_type* frm, const extern_type* frm_end, size_t mx) const
{
- const extern_type* frm_nxt = frm;
- for (size_t nchar16_t = 0; frm_nxt < frm_end && nchar16_t < mx; ++nchar16_t)
- {
- unsigned char c1 = static_cast<unsigned char>(*frm_nxt);
- if (c1 < 0x80)
- {
- ++frm_nxt;
- }
- else if (c1 < 0xC2)
- {
- break;
- }
- else if (c1 < 0xE0)
- {
- if ((frm_end-frm_nxt < 2) || (frm_nxt[1] & 0xC0) != 0x80)
- break;
- frm_nxt += 2;
- }
- else if (c1 < 0xF0)
- {
- if (frm_end-frm_nxt < 3)
- break;
- unsigned char c2 = frm_nxt[1];
- unsigned char c3 = frm_nxt[2];
- switch (c1)
- {
- case 0xE0:
- if ((c2 & 0xE0) != 0xA0)
- return static_cast<int>(frm_nxt - frm);
- break;
- case 0xED:
- if ((c2 & 0xE0) != 0x80)
- return static_cast<int>(frm_nxt - frm);
- break;
- default:
- if ((c2 & 0xC0) != 0x80)
- return static_cast<int>(frm_nxt - frm);
- break;
- }
- if ((c3 & 0xC0) != 0x80)
- break;
- frm_nxt += 3;
- }
- else if (c1 < 0xF5)
- {
- if (frm_end-frm_nxt < 4 || mx-nchar16_t < 2)
- break;
- unsigned char c2 = frm_nxt[1];
- unsigned char c3 = frm_nxt[2];
- unsigned char c4 = frm_nxt[3];
- switch (c1)
- {
- case 0xF0:
- if (!(0x90 <= c2 && c2 <= 0xBF))
- return static_cast<int>(frm_nxt - frm);
- break;
- case 0xF4:
- if ((c2 & 0xF0) != 0x80)
- return static_cast<int>(frm_nxt - frm);
- break;
- default:
- if ((c2 & 0xC0) != 0x80)
- return static_cast<int>(frm_nxt - frm);
- break;
- }
- if ((c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
- break;
- ++nchar16_t;
- frm_nxt += 4;
- }
- else
- {
- break;
- }
- }
- return static_cast<int>(frm_nxt - frm);
+ const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
+ const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+ return utf8_to_utf16_length(_frm, _frm_end, mx);
}
int
@@ -1595,43 +2778,16 @@
const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt,
extern_type* to, extern_type* to_end, extern_type*& to_nxt) const
{
- for (frm_nxt = frm, to_nxt = to; frm_nxt < frm_end; ++frm_nxt)
- {
- intern_type wc = *frm_nxt;
- if ((wc & 0xFFFFF800) == 0x00D800 || wc >= 0x110000)
- return error;
- if (wc < 0x000080)
- {
- if (to_end-to_nxt < 1)
- return partial;
- *to_nxt++ = static_cast<extern_type>(wc);
- }
- else if (wc < 0x000800)
- {
- if (to_end-to_nxt < 2)
- return partial;
- *to_nxt++ = static_cast<extern_type>(0xC0 | (wc >> 6));
- *to_nxt++ = static_cast<extern_type>(0x80 | (wc & 0x03F));
- }
- else if (wc < 0x010000)
- {
- if (to_end-to_nxt < 3)
- return partial;
- *to_nxt++ = static_cast<extern_type>(0xE0 | (wc >> 12));
- *to_nxt++ = static_cast<extern_type>(0x80 | ((wc & 0x0FC0) >> 6));
- *to_nxt++ = static_cast<extern_type>(0x80 | (wc & 0x003F));
- }
- else // if (wc < 0x110000)
- {
- if (to_end-to_nxt < 4)
- return partial;
- *to_nxt++ = static_cast<extern_type>(0xF0 | (wc >> 18));
- *to_nxt++ = static_cast<extern_type>(0x80 | ((wc & 0x03F000) >> 12));
- *to_nxt++ = static_cast<extern_type>(0x80 | ((wc & 0x000FC0) >> 6));
- *to_nxt++ = static_cast<extern_type>(0x80 | (wc & 0x00003F));
- }
- }
- return ok;
+ const uint32_t* _frm = reinterpret_cast<const uint32_t*>(frm);
+ const uint32_t* _frm_end = reinterpret_cast<const uint32_t*>(frm_end);
+ const uint32_t* _frm_nxt = _frm;
+ uint8_t* _to = reinterpret_cast<uint8_t*>(to);
+ uint8_t* _to_end = reinterpret_cast<uint8_t*>(to_end);
+ uint8_t* _to_nxt = _to;
+ result r = ucs4_to_utf8(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt);
+ frm_nxt = frm + (_frm_nxt - _frm);
+ to_nxt = to + (_to_nxt - _to);
+ return r;
}
codecvt<char32_t, char, mbstate_t>::result
@@ -1639,93 +2795,16 @@
const extern_type* frm, const extern_type* frm_end, const extern_type*& frm_nxt,
intern_type* to, intern_type* to_end, intern_type*& to_nxt) const
{
- for (frm_nxt = frm, to_nxt = to; frm_nxt < frm_end && to_nxt < to_end; ++to_nxt)
- {
- unsigned char c1 = static_cast<unsigned char>(*frm_nxt);
- if (c1 < 0x80)
- {
- *to_nxt = static_cast<intern_type>(c1);
- ++frm_nxt;
- }
- else if (c1 < 0xC2)
- {
- return error;
- }
- else if (c1 < 0xE0)
- {
- if (frm_end-frm_nxt < 2)
- return partial;
- unsigned char c2 = frm_nxt[1];
- if ((c2 & 0xC0) != 0x80)
- return error;
- *to_nxt = static_cast<intern_type>(((c1 & 0x1F) << 6)
- | (c2 & 0x3F));
- frm_nxt += 2;
- }
- else if (c1 < 0xF0)
- {
- if (frm_end-frm_nxt < 3)
- return partial;
- unsigned char c2 = frm_nxt[1];
- unsigned char c3 = frm_nxt[2];
- switch (c1)
- {
- case 0xE0:
- if ((c2 & 0xE0) != 0xA0)
- return error;
- break;
- case 0xED:
- if ((c2 & 0xE0) != 0x80)
- return error;
- break;
- default:
- if ((c2 & 0xC0) != 0x80)
- return error;
- break;
- }
- if ((c3 & 0xC0) != 0x80)
- return error;
- *to_nxt = static_cast<intern_type>(((c1 & 0x0F) << 12)
- | ((c2 & 0x3F) << 6)
- | (c3 & 0x3F));
- frm_nxt += 3;
- }
- else if (c1 < 0xF5)
- {
- if (frm_end-frm_nxt < 4)
- return partial;
- unsigned char c2 = frm_nxt[1];
- unsigned char c3 = frm_nxt[2];
- unsigned char c4 = frm_nxt[3];
- switch (c1)
- {
- case 0xF0:
- if (!(0x90 <= c2 && c2 <= 0xBF))
- return error;
- break;
- case 0xF4:
- if ((c2 & 0xF0) != 0x80)
- return error;
- break;
- default:
- if ((c2 & 0xC0) != 0x80)
- return error;
- break;
- }
- if ((c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
- return error;
- *to_nxt = static_cast<intern_type>(((c1 & 0x07) << 18)
- | ((c2 & 0x3F) << 12)
- | ((c3 & 0x3F) << 6)
- | (c4 & 0x3F));
- frm_nxt += 4;
- }
- else
- {
- return error;
- }
- }
- return frm_nxt < frm_end ? partial : ok;
+ const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
+ const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+ const uint8_t* _frm_nxt = _frm;
+ uint32_t* _to = reinterpret_cast<uint32_t*>(to);
+ uint32_t* _to_end = reinterpret_cast<uint32_t*>(to_end);
+ uint32_t* _to_nxt = _to;
+ result r = utf8_to_ucs4(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt);
+ frm_nxt = frm + (_frm_nxt - _frm);
+ to_nxt = to + (_to_nxt - _to);
+ return r;
}
codecvt<char32_t, char, mbstate_t>::result
@@ -1752,81 +2831,9 @@
codecvt<char32_t, char, mbstate_t>::do_length(state_type&,
const extern_type* frm, const extern_type* frm_end, size_t mx) const
{
- const extern_type* frm_nxt = frm;
- for (size_t nchar32_t = 0; frm_nxt < frm_end && nchar32_t < mx; ++nchar32_t)
- {
- unsigned char c1 = static_cast<unsigned char>(*frm_nxt);
- if (c1 < 0x80)
- {
- ++frm_nxt;
- }
- else if (c1 < 0xC2)
- {
- break;
- }
- else if (c1 < 0xE0)
- {
- if ((frm_end-frm_nxt < 2) || ((frm_nxt[1] & 0xC0) != 0x80))
- break;
- frm_nxt += 2;
- }
- else if (c1 < 0xF0)
- {
- if (frm_end-frm_nxt < 3)
- break;
- unsigned char c2 = frm_nxt[1];
- unsigned char c3 = frm_nxt[2];
- switch (c1)
- {
- case 0xE0:
- if ((c2 & 0xE0) != 0xA0)
- return static_cast<int>(frm_nxt - frm);
- break;
- case 0xED:
- if ((c2 & 0xE0) != 0x80)
- return static_cast<int>(frm_nxt - frm);
- break;
- default:
- if ((c2 & 0xC0) != 0x80)
- return static_cast<int>(frm_nxt - frm);
- break;
- }
- if ((c3 & 0xC0) != 0x80)
- break;
- frm_nxt += 3;
- }
- else if (c1 < 0xF5)
- {
- if (frm_end-frm_nxt < 4)
- break;
- unsigned char c2 = frm_nxt[1];
- unsigned char c3 = frm_nxt[2];
- unsigned char c4 = frm_nxt[3];
- switch (c1)
- {
- case 0xF0:
- if (!(0x90 <= c2 && c2 <= 0xBF))
- return static_cast<int>(frm_nxt - frm);
- break;
- case 0xF4:
- if ((c2 & 0xF0) != 0x80)
- return static_cast<int>(frm_nxt - frm);
- break;
- default:
- if ((c2 & 0xC0) != 0x80)
- return static_cast<int>(frm_nxt - frm);
- break;
- }
- if ((c3 & 0xC0) != 0x80 || (c4 & 0xC0) != 0x80)
- break;
- frm_nxt += 4;
- }
- else
- {
- break;
- }
- }
- return static_cast<int>(frm_nxt - frm);
+ const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
+ const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+ return utf8_to_ucs4_length(_frm, _frm_end, mx);
}
int
@@ -1835,80 +2842,46 @@
return 4;
}
-// template <> class codecvt<char32_t, char16_t, mbstate_t>
+// __codecvt_utf8<wchar_t>
-locale::id codecvt<char32_t, char16_t, mbstate_t>::id;
-
-codecvt<char32_t, char16_t, mbstate_t>::~codecvt()
-{
-}
-
-codecvt<char32_t, char16_t, mbstate_t>::result
-codecvt<char32_t, char16_t, mbstate_t>::do_out(state_type&,
- const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt,
+__codecvt_utf8<wchar_t>::result
+__codecvt_utf8<wchar_t>::do_out(state_type&,
+ const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt,
extern_type* to, extern_type* to_end, extern_type*& to_nxt) const
{
- for (frm_nxt = frm, to_nxt = to; frm_nxt < frm_end; ++frm_nxt)
- {
- intern_type wc = *frm_nxt;
- if ((wc & 0xFFFFF800) == 0x00D800 || wc >= 0x110000)
- return error;
- if (wc < 0x010000)
- {
- if (to_end-to_nxt < 1)
- return partial;
- *to_nxt++ = static_cast<extern_type>(wc);
- }
- else
- {
- if (to_end-to_nxt < 2)
- return partial;
- *to_nxt++ = static_cast<extern_type>(
- 0xD800
- | ((((wc & 0x1F0000) >> 16) - 1) << 6)
- | ((wc & 0x00FC00) >> 10));
- *to_nxt++ = static_cast<extern_type>(
- 0xDC00
- | (wc & 0x03FF));
- }
- }
- return ok;
+ const uint32_t* _frm = reinterpret_cast<const uint32_t*>(frm);
+ const uint32_t* _frm_end = reinterpret_cast<const uint32_t*>(frm_end);
+ const uint32_t* _frm_nxt = _frm;
+ uint8_t* _to = reinterpret_cast<uint8_t*>(to);
+ uint8_t* _to_end = reinterpret_cast<uint8_t*>(to_end);
+ uint8_t* _to_nxt = _to;
+ result r = ucs4_to_utf8(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+ _Maxcode_, _Mode_);
+ frm_nxt = frm + (_frm_nxt - _frm);
+ to_nxt = to + (_to_nxt - _to);
+ return r;
}
-codecvt<char32_t, char16_t, mbstate_t>::result
-codecvt<char32_t, char16_t, mbstate_t>::do_in(state_type&,
- const extern_type* frm, const extern_type* frm_end, const extern_type*& frm_nxt,
+__codecvt_utf8<wchar_t>::result
+__codecvt_utf8<wchar_t>::do_in(state_type&,
+ const extern_type* frm, const extern_type* frm_end, const extern_type*& frm_nxt,
intern_type* to, intern_type* to_end, intern_type*& to_nxt) const
{
- for (frm_nxt = frm, to_nxt = to; frm_nxt < frm_end && to_nxt < to_end; ++to_nxt)
- {
- extern_type c1 = *frm_nxt;
- if ((c1 & 0xFC00) == 0xDC00)
- return error;
- if ((c1 & 0xFC00) != 0xD800)
- {
- *to_nxt = static_cast<intern_type>(c1);
- ++frm_nxt;
- }
- else
- {
- if (frm_end-frm_nxt < 2)
- return partial;
- extern_type c2 = frm_nxt[1];
- if ((c2 & 0xFC00) != 0xDC00)
- return error;
- *to_nxt = static_cast<intern_type>(
- ((((c1 & 0x03C0) >> 6) + 1) << 16)
- | ((c1 & 0x003F) << 10)
- | (c2 & 0x03FF));
- frm_nxt += 2;
- }
- }
- return frm_nxt < frm_end ? partial : ok;
+ const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
+ const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+ const uint8_t* _frm_nxt = _frm;
+ uint32_t* _to = reinterpret_cast<uint32_t*>(to);
+ uint32_t* _to_end = reinterpret_cast<uint32_t*>(to_end);
+ uint32_t* _to_nxt = _to;
+ result r = utf8_to_ucs4(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+ _Maxcode_, _Mode_);
+ frm_nxt = frm + (_frm_nxt - _frm);
+ to_nxt = to + (_to_nxt - _to);
+ return r;
}
-codecvt<char32_t, char16_t, mbstate_t>::result
-codecvt<char32_t, char16_t, mbstate_t>::do_unshift(state_type&,
+__codecvt_utf8<wchar_t>::result
+__codecvt_utf8<wchar_t>::do_unshift(state_type&,
extern_type* to, extern_type*, extern_type*& to_nxt) const
{
to_nxt = to;
@@ -1916,47 +2889,859 @@
}
int
-codecvt<char32_t, char16_t, mbstate_t>::do_encoding() const throw()
+__codecvt_utf8<wchar_t>::do_encoding() const throw()
{
return 0;
}
bool
-codecvt<char32_t, char16_t, mbstate_t>::do_always_noconv() const throw()
+__codecvt_utf8<wchar_t>::do_always_noconv() const throw()
{
return false;
}
int
-codecvt<char32_t, char16_t, mbstate_t>::do_length(state_type&,
+__codecvt_utf8<wchar_t>::do_length(state_type&,
const extern_type* frm, const extern_type* frm_end, size_t mx) const
{
- const extern_type* frm_nxt = frm;
- for (size_t nchar32_t = 0; frm_nxt < frm_end && nchar32_t < mx; ++nchar32_t)
- {
- extern_type c1 = *frm_nxt;
- if ((c1 & 0xFC00) == 0xDC00)
- break;
- if ((c1 & 0xFC00) != 0xD800)
- {
- ++frm_nxt;
- }
- else
- {
- if ((frm_end-frm_nxt < 2) || (frm_nxt[1] & 0xFC00) != 0xDC00)
- break;
- frm_nxt += 2;
- }
- }
- return static_cast<int>(frm_nxt - frm);
+ const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
+ const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+ return utf8_to_ucs4_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
}
int
-codecvt<char32_t, char16_t, mbstate_t>::do_max_length() const throw()
+__codecvt_utf8<wchar_t>::do_max_length() const throw()
{
+ if (_Mode_ & consume_header)
+ return 7;
+ return 4;
+}
+
+// __codecvt_utf8<char16_t>
+
+__codecvt_utf8<char16_t>::result
+__codecvt_utf8<char16_t>::do_out(state_type&,
+ const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt,
+ extern_type* to, extern_type* to_end, extern_type*& to_nxt) const
+{
+ const uint16_t* _frm = reinterpret_cast<const uint16_t*>(frm);
+ const uint16_t* _frm_end = reinterpret_cast<const uint16_t*>(frm_end);
+ const uint16_t* _frm_nxt = _frm;
+ uint8_t* _to = reinterpret_cast<uint8_t*>(to);
+ uint8_t* _to_end = reinterpret_cast<uint8_t*>(to_end);
+ uint8_t* _to_nxt = _to;
+ result r = ucs2_to_utf8(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+ _Maxcode_, _Mode_);
+ frm_nxt = frm + (_frm_nxt - _frm);
+ to_nxt = to + (_to_nxt - _to);
+ return r;
+}
+
+__codecvt_utf8<char16_t>::result
+__codecvt_utf8<char16_t>::do_in(state_type&,
+ const extern_type* frm, const extern_type* frm_end, const extern_type*& frm_nxt,
+ intern_type* to, intern_type* to_end, intern_type*& to_nxt) const
+{
+ const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
+ const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+ const uint8_t* _frm_nxt = _frm;
+ uint16_t* _to = reinterpret_cast<uint16_t*>(to);
+ uint16_t* _to_end = reinterpret_cast<uint16_t*>(to_end);
+ uint16_t* _to_nxt = _to;
+ result r = utf8_to_ucs2(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+ _Maxcode_, _Mode_);
+ frm_nxt = frm + (_frm_nxt - _frm);
+ to_nxt = to + (_to_nxt - _to);
+ return r;
+}
+
+__codecvt_utf8<char16_t>::result
+__codecvt_utf8<char16_t>::do_unshift(state_type&,
+ extern_type* to, extern_type*, extern_type*& to_nxt) const
+{
+ to_nxt = to;
+ return noconv;
+}
+
+int
+__codecvt_utf8<char16_t>::do_encoding() const throw()
+{
+ return 0;
+}
+
+bool
+__codecvt_utf8<char16_t>::do_always_noconv() const throw()
+{
+ return false;
+}
+
+int
+__codecvt_utf8<char16_t>::do_length(state_type&,
+ const extern_type* frm, const extern_type* frm_end, size_t mx) const
+{
+ const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
+ const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+ return utf8_to_ucs2_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
+}
+
+int
+__codecvt_utf8<char16_t>::do_max_length() const throw()
+{
+ if (_Mode_ & consume_header)
+ return 6;
+ return 3;
+}
+
+// __codecvt_utf8<char32_t>
+
+__codecvt_utf8<char32_t>::result
+__codecvt_utf8<char32_t>::do_out(state_type&,
+ const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt,
+ extern_type* to, extern_type* to_end, extern_type*& to_nxt) const
+{
+ const uint32_t* _frm = reinterpret_cast<const uint32_t*>(frm);
+ const uint32_t* _frm_end = reinterpret_cast<const uint32_t*>(frm_end);
+ const uint32_t* _frm_nxt = _frm;
+ uint8_t* _to = reinterpret_cast<uint8_t*>(to);
+ uint8_t* _to_end = reinterpret_cast<uint8_t*>(to_end);
+ uint8_t* _to_nxt = _to;
+ result r = ucs4_to_utf8(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+ _Maxcode_, _Mode_);
+ frm_nxt = frm + (_frm_nxt - _frm);
+ to_nxt = to + (_to_nxt - _to);
+ return r;
+}
+
+__codecvt_utf8<char32_t>::result
+__codecvt_utf8<char32_t>::do_in(state_type&,
+ const extern_type* frm, const extern_type* frm_end, const extern_type*& frm_nxt,
+ intern_type* to, intern_type* to_end, intern_type*& to_nxt) const
+{
+ const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
+ const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+ const uint8_t* _frm_nxt = _frm;
+ uint32_t* _to = reinterpret_cast<uint32_t*>(to);
+ uint32_t* _to_end = reinterpret_cast<uint32_t*>(to_end);
+ uint32_t* _to_nxt = _to;
+ result r = utf8_to_ucs4(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+ _Maxcode_, _Mode_);
+ frm_nxt = frm + (_frm_nxt - _frm);
+ to_nxt = to + (_to_nxt - _to);
+ return r;
+}
+
+__codecvt_utf8<char32_t>::result
+__codecvt_utf8<char32_t>::do_unshift(state_type&,
+ extern_type* to, extern_type*, extern_type*& to_nxt) const
+{
+ to_nxt = to;
+ return noconv;
+}
+
+int
+__codecvt_utf8<char32_t>::do_encoding() const throw()
+{
+ return 0;
+}
+
+bool
+__codecvt_utf8<char32_t>::do_always_noconv() const throw()
+{
+ return false;
+}
+
+int
+__codecvt_utf8<char32_t>::do_length(state_type&,
+ const extern_type* frm, const extern_type* frm_end, size_t mx) const
+{
+ const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
+ const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+ return utf8_to_ucs4_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
+}
+
+int
+__codecvt_utf8<char32_t>::do_max_length() const throw()
+{
+ if (_Mode_ & consume_header)
+ return 7;
+ return 4;
+}
+
+// __codecvt_utf16<wchar_t, false>
+
+__codecvt_utf16<wchar_t, false>::result
+__codecvt_utf16<wchar_t, false>::do_out(state_type&,
+ const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt,
+ extern_type* to, extern_type* to_end, extern_type*& to_nxt) const
+{
+ const uint32_t* _frm = reinterpret_cast<const uint32_t*>(frm);
+ const uint32_t* _frm_end = reinterpret_cast<const uint32_t*>(frm_end);
+ const uint32_t* _frm_nxt = _frm;
+ uint8_t* _to = reinterpret_cast<uint8_t*>(to);
+ uint8_t* _to_end = reinterpret_cast<uint8_t*>(to_end);
+ uint8_t* _to_nxt = _to;
+ result r = ucs4_to_utf16be(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+ _Maxcode_, _Mode_);
+ frm_nxt = frm + (_frm_nxt - _frm);
+ to_nxt = to + (_to_nxt - _to);
+ return r;
+}
+
+__codecvt_utf16<wchar_t, false>::result
+__codecvt_utf16<wchar_t, false>::do_in(state_type&,
+ const extern_type* frm, const extern_type* frm_end, const extern_type*& frm_nxt,
+ intern_type* to, intern_type* to_end, intern_type*& to_nxt) const
+{
+ const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
+ const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+ const uint8_t* _frm_nxt = _frm;
+ uint32_t* _to = reinterpret_cast<uint32_t*>(to);
+ uint32_t* _to_end = reinterpret_cast<uint32_t*>(to_end);
+ uint32_t* _to_nxt = _to;
+ result r = utf16be_to_ucs4(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+ _Maxcode_, _Mode_);
+ frm_nxt = frm + (_frm_nxt - _frm);
+ to_nxt = to + (_to_nxt - _to);
+ return r;
+}
+
+__codecvt_utf16<wchar_t, false>::result
+__codecvt_utf16<wchar_t, false>::do_unshift(state_type&,
+ extern_type* to, extern_type*, extern_type*& to_nxt) const
+{
+ to_nxt = to;
+ return noconv;
+}
+
+int
+__codecvt_utf16<wchar_t, false>::do_encoding() const throw()
+{
+ return 0;
+}
+
+bool
+__codecvt_utf16<wchar_t, false>::do_always_noconv() const throw()
+{
+ return false;
+}
+
+int
+__codecvt_utf16<wchar_t, false>::do_length(state_type&,
+ const extern_type* frm, const extern_type* frm_end, size_t mx) const
+{
+ const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
+ const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+ return utf16be_to_ucs4_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
+}
+
+int
+__codecvt_utf16<wchar_t, false>::do_max_length() const throw()
+{
+ if (_Mode_ & consume_header)
+ return 6;
+ return 4;
+}
+
+// __codecvt_utf16<wchar_t, true>
+
+__codecvt_utf16<wchar_t, true>::result
+__codecvt_utf16<wchar_t, true>::do_out(state_type&,
+ const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt,
+ extern_type* to, extern_type* to_end, extern_type*& to_nxt) const
+{
+ const uint32_t* _frm = reinterpret_cast<const uint32_t*>(frm);
+ const uint32_t* _frm_end = reinterpret_cast<const uint32_t*>(frm_end);
+ const uint32_t* _frm_nxt = _frm;
+ uint8_t* _to = reinterpret_cast<uint8_t*>(to);
+ uint8_t* _to_end = reinterpret_cast<uint8_t*>(to_end);
+ uint8_t* _to_nxt = _to;
+ result r = ucs4_to_utf16le(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+ _Maxcode_, _Mode_);
+ frm_nxt = frm + (_frm_nxt - _frm);
+ to_nxt = to + (_to_nxt - _to);
+ return r;
+}
+
+__codecvt_utf16<wchar_t, true>::result
+__codecvt_utf16<wchar_t, true>::do_in(state_type&,
+ const extern_type* frm, const extern_type* frm_end, const extern_type*& frm_nxt,
+ intern_type* to, intern_type* to_end, intern_type*& to_nxt) const
+{
+ const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
+ const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+ const uint8_t* _frm_nxt = _frm;
+ uint32_t* _to = reinterpret_cast<uint32_t*>(to);
+ uint32_t* _to_end = reinterpret_cast<uint32_t*>(to_end);
+ uint32_t* _to_nxt = _to;
+ result r = utf16le_to_ucs4(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+ _Maxcode_, _Mode_);
+ frm_nxt = frm + (_frm_nxt - _frm);
+ to_nxt = to + (_to_nxt - _to);
+ return r;
+}
+
+__codecvt_utf16<wchar_t, true>::result
+__codecvt_utf16<wchar_t, true>::do_unshift(state_type&,
+ extern_type* to, extern_type*, extern_type*& to_nxt) const
+{
+ to_nxt = to;
+ return noconv;
+}
+
+int
+__codecvt_utf16<wchar_t, true>::do_encoding() const throw()
+{
+ return 0;
+}
+
+bool
+__codecvt_utf16<wchar_t, true>::do_always_noconv() const throw()
+{
+ return true;
+}
+
+int
+__codecvt_utf16<wchar_t, true>::do_length(state_type&,
+ const extern_type* frm, const extern_type* frm_end, size_t mx) const
+{
+ const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
+ const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+ return utf16le_to_ucs4_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
+}
+
+int
+__codecvt_utf16<wchar_t, true>::do_max_length() const throw()
+{
+ if (_Mode_ & consume_header)
+ return 6;
+ return 4;
+}
+
+// __codecvt_utf16<char16_t, false>
+
+__codecvt_utf16<char16_t, false>::result
+__codecvt_utf16<char16_t, false>::do_out(state_type&,
+ const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt,
+ extern_type* to, extern_type* to_end, extern_type*& to_nxt) const
+{
+ const uint16_t* _frm = reinterpret_cast<const uint16_t*>(frm);
+ const uint16_t* _frm_end = reinterpret_cast<const uint16_t*>(frm_end);
+ const uint16_t* _frm_nxt = _frm;
+ uint8_t* _to = reinterpret_cast<uint8_t*>(to);
+ uint8_t* _to_end = reinterpret_cast<uint8_t*>(to_end);
+ uint8_t* _to_nxt = _to;
+ result r = ucs2_to_utf16be(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+ _Maxcode_, _Mode_);
+ frm_nxt = frm + (_frm_nxt - _frm);
+ to_nxt = to + (_to_nxt - _to);
+ return r;
+}
+
+__codecvt_utf16<char16_t, false>::result
+__codecvt_utf16<char16_t, false>::do_in(state_type&,
+ const extern_type* frm, const extern_type* frm_end, const extern_type*& frm_nxt,
+ intern_type* to, intern_type* to_end, intern_type*& to_nxt) const
+{
+ const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
+ const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+ const uint8_t* _frm_nxt = _frm;
+ uint16_t* _to = reinterpret_cast<uint16_t*>(to);
+ uint16_t* _to_end = reinterpret_cast<uint16_t*>(to_end);
+ uint16_t* _to_nxt = _to;
+ result r = utf16be_to_ucs2(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+ _Maxcode_, _Mode_);
+ frm_nxt = frm + (_frm_nxt - _frm);
+ to_nxt = to + (_to_nxt - _to);
+ return r;
+}
+
+__codecvt_utf16<char16_t, false>::result
+__codecvt_utf16<char16_t, false>::do_unshift(state_type&,
+ extern_type* to, extern_type*, extern_type*& to_nxt) const
+{
+ to_nxt = to;
+ return noconv;
+}
+
+int
+__codecvt_utf16<char16_t, false>::do_encoding() const throw()
+{
+ return 0;
+}
+
+bool
+__codecvt_utf16<char16_t, false>::do_always_noconv() const throw()
+{
+ return false;
+}
+
+int
+__codecvt_utf16<char16_t, false>::do_length(state_type&,
+ const extern_type* frm, const extern_type* frm_end, size_t mx) const
+{
+ const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
+ const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+ return utf16be_to_ucs2_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
+}
+
+int
+__codecvt_utf16<char16_t, false>::do_max_length() const throw()
+{
+ if (_Mode_ & consume_header)
+ return 4;
return 2;
}
+// __codecvt_utf16<char16_t, true>
+
+__codecvt_utf16<char16_t, true>::result
+__codecvt_utf16<char16_t, true>::do_out(state_type&,
+ const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt,
+ extern_type* to, extern_type* to_end, extern_type*& to_nxt) const
+{
+ const uint16_t* _frm = reinterpret_cast<const uint16_t*>(frm);
+ const uint16_t* _frm_end = reinterpret_cast<const uint16_t*>(frm_end);
+ const uint16_t* _frm_nxt = _frm;
+ uint8_t* _to = reinterpret_cast<uint8_t*>(to);
+ uint8_t* _to_end = reinterpret_cast<uint8_t*>(to_end);
+ uint8_t* _to_nxt = _to;
+ result r = ucs2_to_utf16le(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+ _Maxcode_, _Mode_);
+ frm_nxt = frm + (_frm_nxt - _frm);
+ to_nxt = to + (_to_nxt - _to);
+ return r;
+}
+
+__codecvt_utf16<char16_t, true>::result
+__codecvt_utf16<char16_t, true>::do_in(state_type&,
+ const extern_type* frm, const extern_type* frm_end, const extern_type*& frm_nxt,
+ intern_type* to, intern_type* to_end, intern_type*& to_nxt) const
+{
+ const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
+ const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+ const uint8_t* _frm_nxt = _frm;
+ uint16_t* _to = reinterpret_cast<uint16_t*>(to);
+ uint16_t* _to_end = reinterpret_cast<uint16_t*>(to_end);
+ uint16_t* _to_nxt = _to;
+ result r = utf16le_to_ucs2(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+ _Maxcode_, _Mode_);
+ frm_nxt = frm + (_frm_nxt - _frm);
+ to_nxt = to + (_to_nxt - _to);
+ return r;
+}
+
+__codecvt_utf16<char16_t, true>::result
+__codecvt_utf16<char16_t, true>::do_unshift(state_type&,
+ extern_type* to, extern_type*, extern_type*& to_nxt) const
+{
+ to_nxt = to;
+ return noconv;
+}
+
+int
+__codecvt_utf16<char16_t, true>::do_encoding() const throw()
+{
+ return 0;
+}
+
+bool
+__codecvt_utf16<char16_t, true>::do_always_noconv() const throw()
+{
+ return true;
+}
+
+int
+__codecvt_utf16<char16_t, true>::do_length(state_type&,
+ const extern_type* frm, const extern_type* frm_end, size_t mx) const
+{
+ const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
+ const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+ return utf16le_to_ucs2_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
+}
+
+int
+__codecvt_utf16<char16_t, true>::do_max_length() const throw()
+{
+ if (_Mode_ & consume_header)
+ return 4;
+ return 2;
+}
+
+// __codecvt_utf16<char32_t, false>
+
+__codecvt_utf16<char32_t, false>::result
+__codecvt_utf16<char32_t, false>::do_out(state_type&,
+ const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt,
+ extern_type* to, extern_type* to_end, extern_type*& to_nxt) const
+{
+ const uint32_t* _frm = reinterpret_cast<const uint32_t*>(frm);
+ const uint32_t* _frm_end = reinterpret_cast<const uint32_t*>(frm_end);
+ const uint32_t* _frm_nxt = _frm;
+ uint8_t* _to = reinterpret_cast<uint8_t*>(to);
+ uint8_t* _to_end = reinterpret_cast<uint8_t*>(to_end);
+ uint8_t* _to_nxt = _to;
+ result r = ucs4_to_utf16be(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+ _Maxcode_, _Mode_);
+ frm_nxt = frm + (_frm_nxt - _frm);
+ to_nxt = to + (_to_nxt - _to);
+ return r;
+}
+
+__codecvt_utf16<char32_t, false>::result
+__codecvt_utf16<char32_t, false>::do_in(state_type&,
+ const extern_type* frm, const extern_type* frm_end, const extern_type*& frm_nxt,
+ intern_type* to, intern_type* to_end, intern_type*& to_nxt) const
+{
+ const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
+ const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+ const uint8_t* _frm_nxt = _frm;
+ uint32_t* _to = reinterpret_cast<uint32_t*>(to);
+ uint32_t* _to_end = reinterpret_cast<uint32_t*>(to_end);
+ uint32_t* _to_nxt = _to;
+ result r = utf16be_to_ucs4(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+ _Maxcode_, _Mode_);
+ frm_nxt = frm + (_frm_nxt - _frm);
+ to_nxt = to + (_to_nxt - _to);
+ return r;
+}
+
+__codecvt_utf16<char32_t, false>::result
+__codecvt_utf16<char32_t, false>::do_unshift(state_type&,
+ extern_type* to, extern_type*, extern_type*& to_nxt) const
+{
+ to_nxt = to;
+ return noconv;
+}
+
+int
+__codecvt_utf16<char32_t, false>::do_encoding() const throw()
+{
+ return 0;
+}
+
+bool
+__codecvt_utf16<char32_t, false>::do_always_noconv() const throw()
+{
+ return false;
+}
+
+int
+__codecvt_utf16<char32_t, false>::do_length(state_type&,
+ const extern_type* frm, const extern_type* frm_end, size_t mx) const
+{
+ const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
+ const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+ return utf16be_to_ucs4_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
+}
+
+int
+__codecvt_utf16<char32_t, false>::do_max_length() const throw()
+{
+ if (_Mode_ & consume_header)
+ return 6;
+ return 4;
+}
+
+// __codecvt_utf16<char32_t, true>
+
+__codecvt_utf16<char32_t, true>::result
+__codecvt_utf16<char32_t, true>::do_out(state_type&,
+ const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt,
+ extern_type* to, extern_type* to_end, extern_type*& to_nxt) const
+{
+ const uint32_t* _frm = reinterpret_cast<const uint32_t*>(frm);
+ const uint32_t* _frm_end = reinterpret_cast<const uint32_t*>(frm_end);
+ const uint32_t* _frm_nxt = _frm;
+ uint8_t* _to = reinterpret_cast<uint8_t*>(to);
+ uint8_t* _to_end = reinterpret_cast<uint8_t*>(to_end);
+ uint8_t* _to_nxt = _to;
+ result r = ucs4_to_utf16le(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+ _Maxcode_, _Mode_);
+ frm_nxt = frm + (_frm_nxt - _frm);
+ to_nxt = to + (_to_nxt - _to);
+ return r;
+}
+
+__codecvt_utf16<char32_t, true>::result
+__codecvt_utf16<char32_t, true>::do_in(state_type&,
+ const extern_type* frm, const extern_type* frm_end, const extern_type*& frm_nxt,
+ intern_type* to, intern_type* to_end, intern_type*& to_nxt) const
+{
+ const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
+ const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+ const uint8_t* _frm_nxt = _frm;
+ uint32_t* _to = reinterpret_cast<uint32_t*>(to);
+ uint32_t* _to_end = reinterpret_cast<uint32_t*>(to_end);
+ uint32_t* _to_nxt = _to;
+ result r = utf16le_to_ucs4(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+ _Maxcode_, _Mode_);
+ frm_nxt = frm + (_frm_nxt - _frm);
+ to_nxt = to + (_to_nxt - _to);
+ return r;
+}
+
+__codecvt_utf16<char32_t, true>::result
+__codecvt_utf16<char32_t, true>::do_unshift(state_type&,
+ extern_type* to, extern_type*, extern_type*& to_nxt) const
+{
+ to_nxt = to;
+ return noconv;
+}
+
+int
+__codecvt_utf16<char32_t, true>::do_encoding() const throw()
+{
+ return 0;
+}
+
+bool
+__codecvt_utf16<char32_t, true>::do_always_noconv() const throw()
+{
+ return true;
+}
+
+int
+__codecvt_utf16<char32_t, true>::do_length(state_type&,
+ const extern_type* frm, const extern_type* frm_end, size_t mx) const
+{
+ const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
+ const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+ return utf16le_to_ucs4_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
+}
+
+int
+__codecvt_utf16<char32_t, true>::do_max_length() const throw()
+{
+ if (_Mode_ & consume_header)
+ return 6;
+ return 4;
+}
+
+// __codecvt_utf8_utf16<wchar_t>
+
+__codecvt_utf8_utf16<wchar_t>::result
+__codecvt_utf8_utf16<wchar_t>::do_out(state_type&,
+ const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt,
+ extern_type* to, extern_type* to_end, extern_type*& to_nxt) const
+{
+ const uint32_t* _frm = reinterpret_cast<const uint32_t*>(frm);
+ const uint32_t* _frm_end = reinterpret_cast<const uint32_t*>(frm_end);
+ const uint32_t* _frm_nxt = _frm;
+ uint8_t* _to = reinterpret_cast<uint8_t*>(to);
+ uint8_t* _to_end = reinterpret_cast<uint8_t*>(to_end);
+ uint8_t* _to_nxt = _to;
+ result r = utf16_to_utf8(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+ _Maxcode_, _Mode_);
+ frm_nxt = frm + (_frm_nxt - _frm);
+ to_nxt = to + (_to_nxt - _to);
+ return r;
+}
+
+__codecvt_utf8_utf16<wchar_t>::result
+__codecvt_utf8_utf16<wchar_t>::do_in(state_type&,
+ const extern_type* frm, const extern_type* frm_end, const extern_type*& frm_nxt,
+ intern_type* to, intern_type* to_end, intern_type*& to_nxt) const
+{
+ const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
+ const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+ const uint8_t* _frm_nxt = _frm;
+ uint32_t* _to = reinterpret_cast<uint32_t*>(to);
+ uint32_t* _to_end = reinterpret_cast<uint32_t*>(to_end);
+ uint32_t* _to_nxt = _to;
+ result r = utf8_to_utf16(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+ _Maxcode_, _Mode_);
+ frm_nxt = frm + (_frm_nxt - _frm);
+ to_nxt = to + (_to_nxt - _to);
+ return r;
+}
+
+__codecvt_utf8_utf16<wchar_t>::result
+__codecvt_utf8_utf16<wchar_t>::do_unshift(state_type&,
+ extern_type* to, extern_type*, extern_type*& to_nxt) const
+{
+ to_nxt = to;
+ return noconv;
+}
+
+int
+__codecvt_utf8_utf16<wchar_t>::do_encoding() const throw()
+{
+ return 0;
+}
+
+bool
+__codecvt_utf8_utf16<wchar_t>::do_always_noconv() const throw()
+{
+ return false;
+}
+
+int
+__codecvt_utf8_utf16<wchar_t>::do_length(state_type&,
+ const extern_type* frm, const extern_type* frm_end, size_t mx) const
+{
+ const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
+ const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+ return utf8_to_utf16_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
+}
+
+int
+__codecvt_utf8_utf16<wchar_t>::do_max_length() const throw()
+{
+ if (_Mode_ & consume_header)
+ return 7;
+ return 4;
+}
+
+// __codecvt_utf8_utf16<char16_t>
+
+__codecvt_utf8_utf16<char16_t>::result
+__codecvt_utf8_utf16<char16_t>::do_out(state_type&,
+ const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt,
+ extern_type* to, extern_type* to_end, extern_type*& to_nxt) const
+{
+ const uint16_t* _frm = reinterpret_cast<const uint16_t*>(frm);
+ const uint16_t* _frm_end = reinterpret_cast<const uint16_t*>(frm_end);
+ const uint16_t* _frm_nxt = _frm;
+ uint8_t* _to = reinterpret_cast<uint8_t*>(to);
+ uint8_t* _to_end = reinterpret_cast<uint8_t*>(to_end);
+ uint8_t* _to_nxt = _to;
+ result r = utf16_to_utf8(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+ _Maxcode_, _Mode_);
+ frm_nxt = frm + (_frm_nxt - _frm);
+ to_nxt = to + (_to_nxt - _to);
+ return r;
+}
+
+__codecvt_utf8_utf16<char16_t>::result
+__codecvt_utf8_utf16<char16_t>::do_in(state_type&,
+ const extern_type* frm, const extern_type* frm_end, const extern_type*& frm_nxt,
+ intern_type* to, intern_type* to_end, intern_type*& to_nxt) const
+{
+ const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
+ const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+ const uint8_t* _frm_nxt = _frm;
+ uint16_t* _to = reinterpret_cast<uint16_t*>(to);
+ uint16_t* _to_end = reinterpret_cast<uint16_t*>(to_end);
+ uint16_t* _to_nxt = _to;
+ result r = utf8_to_utf16(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+ _Maxcode_, _Mode_);
+ frm_nxt = frm + (_frm_nxt - _frm);
+ to_nxt = to + (_to_nxt - _to);
+ return r;
+}
+
+__codecvt_utf8_utf16<char16_t>::result
+__codecvt_utf8_utf16<char16_t>::do_unshift(state_type&,
+ extern_type* to, extern_type*, extern_type*& to_nxt) const
+{
+ to_nxt = to;
+ return noconv;
+}
+
+int
+__codecvt_utf8_utf16<char16_t>::do_encoding() const throw()
+{
+ return 0;
+}
+
+bool
+__codecvt_utf8_utf16<char16_t>::do_always_noconv() const throw()
+{
+ return false;
+}
+
+int
+__codecvt_utf8_utf16<char16_t>::do_length(state_type&,
+ const extern_type* frm, const extern_type* frm_end, size_t mx) const
+{
+ const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
+ const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+ return utf8_to_utf16_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
+}
+
+int
+__codecvt_utf8_utf16<char16_t>::do_max_length() const throw()
+{
+ if (_Mode_ & consume_header)
+ return 7;
+ return 4;
+}
+
+// __codecvt_utf8_utf16<char32_t>
+
+__codecvt_utf8_utf16<char32_t>::result
+__codecvt_utf8_utf16<char32_t>::do_out(state_type&,
+ const intern_type* frm, const intern_type* frm_end, const intern_type*& frm_nxt,
+ extern_type* to, extern_type* to_end, extern_type*& to_nxt) const
+{
+ const uint32_t* _frm = reinterpret_cast<const uint32_t*>(frm);
+ const uint32_t* _frm_end = reinterpret_cast<const uint32_t*>(frm_end);
+ const uint32_t* _frm_nxt = _frm;
+ uint8_t* _to = reinterpret_cast<uint8_t*>(to);
+ uint8_t* _to_end = reinterpret_cast<uint8_t*>(to_end);
+ uint8_t* _to_nxt = _to;
+ result r = utf16_to_utf8(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+ _Maxcode_, _Mode_);
+ frm_nxt = frm + (_frm_nxt - _frm);
+ to_nxt = to + (_to_nxt - _to);
+ return r;
+}
+
+__codecvt_utf8_utf16<char32_t>::result
+__codecvt_utf8_utf16<char32_t>::do_in(state_type&,
+ const extern_type* frm, const extern_type* frm_end, const extern_type*& frm_nxt,
+ intern_type* to, intern_type* to_end, intern_type*& to_nxt) const
+{
+ const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
+ const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+ const uint8_t* _frm_nxt = _frm;
+ uint32_t* _to = reinterpret_cast<uint32_t*>(to);
+ uint32_t* _to_end = reinterpret_cast<uint32_t*>(to_end);
+ uint32_t* _to_nxt = _to;
+ result r = utf8_to_utf16(_frm, _frm_end, _frm_nxt, _to, _to_end, _to_nxt,
+ _Maxcode_, _Mode_);
+ frm_nxt = frm + (_frm_nxt - _frm);
+ to_nxt = to + (_to_nxt - _to);
+ return r;
+}
+
+__codecvt_utf8_utf16<char32_t>::result
+__codecvt_utf8_utf16<char32_t>::do_unshift(state_type&,
+ extern_type* to, extern_type*, extern_type*& to_nxt) const
+{
+ to_nxt = to;
+ return noconv;
+}
+
+int
+__codecvt_utf8_utf16<char32_t>::do_encoding() const throw()
+{
+ return 0;
+}
+
+bool
+__codecvt_utf8_utf16<char32_t>::do_always_noconv() const throw()
+{
+ return false;
+}
+
+int
+__codecvt_utf8_utf16<char32_t>::do_length(state_type&,
+ const extern_type* frm, const extern_type* frm_end, size_t mx) const
+{
+ const uint8_t* _frm = reinterpret_cast<const uint8_t*>(frm);
+ const uint8_t* _frm_end = reinterpret_cast<const uint8_t*>(frm_end);
+ return utf8_to_utf16_length(_frm, _frm_end, mx, _Maxcode_, _Mode_);
+}
+
+int
+__codecvt_utf8_utf16<char32_t>::do_max_length() const throw()
+{
+ if (_Mode_ & consume_header)
+ return 7;
+ return 4;
+}
+
// __narrow_to_utf8<16>
__narrow_to_utf8<16>::~__narrow_to_utf8()