| |
| /* |
| * Copyright 2006 The Android Open Source Project |
| * |
| * Use of this source code is governed by a BSD-style license that can be |
| * found in the LICENSE file. |
| */ |
| |
| |
| #include "SkUtils.h" |
| |
| /* 0xxxxxxx 1 total |
| 10xxxxxx // never a leading byte |
| 110xxxxx 2 total |
| 1110xxxx 3 total |
| 11110xxx 4 total |
| |
| 11 10 01 01 xx xx xx xx 0... |
| 0xE5XX0000 |
| 0xE5 << 24 |
| */ |
| |
| #ifdef SK_DEBUG |
| static void assert_utf8_leadingbyte(unsigned c) { |
| SkASSERT(c <= 0xF7); // otherwise leading byte is too big (more than 4 bytes) |
| SkASSERT((c & 0xC0) != 0x80); // can't begin with a middle char |
| } |
| |
| int SkUTF8_LeadByteToCount(unsigned c) { |
| assert_utf8_leadingbyte(c); |
| return (((0xE5 << 24) >> (c >> 4 << 1)) & 3) + 1; |
| } |
| #else |
| #define assert_utf8_leadingbyte(c) |
| #endif |
| |
| int SkUTF8_CountUnichars(const char utf8[]) { |
| SkASSERT(utf8); |
| |
| int count = 0; |
| |
| for (;;) { |
| int c = *(const uint8_t*)utf8; |
| if (c == 0) { |
| break; |
| } |
| utf8 += SkUTF8_LeadByteToCount(c); |
| count += 1; |
| } |
| return count; |
| } |
| |
| int SkUTF8_CountUnichars(const char utf8[], size_t byteLength) { |
| SkASSERT(utf8 || 0 == byteLength); |
| |
| int count = 0; |
| const char* stop = utf8 + byteLength; |
| |
| while (utf8 < stop) { |
| utf8 += SkUTF8_LeadByteToCount(*(const uint8_t*)utf8); |
| count += 1; |
| } |
| return count; |
| } |
| |
| SkUnichar SkUTF8_ToUnichar(const char utf8[]) { |
| SkASSERT(utf8); |
| |
| const uint8_t* p = (const uint8_t*)utf8; |
| int c = *p; |
| int hic = c << 24; |
| |
| assert_utf8_leadingbyte(c); |
| |
| if (hic < 0) { |
| uint32_t mask = (uint32_t)~0x3F; |
| hic = SkLeftShift(hic, 1); |
| do { |
| c = (c << 6) | (*++p & 0x3F); |
| mask <<= 5; |
| } while ((hic = SkLeftShift(hic, 1)) < 0); |
| c &= ~mask; |
| } |
| return c; |
| } |
| |
| SkUnichar SkUTF8_NextUnichar(const char** ptr) { |
| SkASSERT(ptr && *ptr); |
| |
| const uint8_t* p = (const uint8_t*)*ptr; |
| int c = *p; |
| int hic = c << 24; |
| |
| assert_utf8_leadingbyte(c); |
| |
| if (hic < 0) { |
| uint32_t mask = (uint32_t)~0x3F; |
| hic = SkLeftShift(hic, 1); |
| do { |
| c = (c << 6) | (*++p & 0x3F); |
| mask <<= 5; |
| } while ((hic = SkLeftShift(hic, 1)) < 0); |
| c &= ~mask; |
| } |
| *ptr = (char*)p + 1; |
| return c; |
| } |
| |
| SkUnichar SkUTF8_PrevUnichar(const char** ptr) { |
| SkASSERT(ptr && *ptr); |
| |
| const char* p = *ptr; |
| |
| if (*--p & 0x80) { |
| while (*--p & 0x40) { |
| ; |
| } |
| } |
| |
| *ptr = (char*)p; |
| return SkUTF8_NextUnichar(&p); |
| } |
| |
| size_t SkUTF8_FromUnichar(SkUnichar uni, char utf8[]) { |
| if ((uint32_t)uni > 0x10FFFF) { |
| SkDEBUGFAIL("bad unichar"); |
| return 0; |
| } |
| |
| if (uni <= 127) { |
| if (utf8) { |
| *utf8 = (char)uni; |
| } |
| return 1; |
| } |
| |
| char tmp[4]; |
| char* p = tmp; |
| size_t count = 1; |
| |
| SkDEBUGCODE(SkUnichar orig = uni;) |
| |
| while (uni > 0x7F >> count) { |
| *p++ = (char)(0x80 | (uni & 0x3F)); |
| uni >>= 6; |
| count += 1; |
| } |
| |
| if (utf8) { |
| p = tmp; |
| utf8 += count; |
| while (p < tmp + count - 1) { |
| *--utf8 = *p++; |
| } |
| *--utf8 = (char)(~(0xFF >> count) | uni); |
| } |
| |
| SkASSERT(utf8 == nullptr || orig == SkUTF8_ToUnichar(utf8)); |
| return count; |
| } |
| |
| /////////////////////////////////////////////////////////////////////////////// |
| |
| int SkUTF16_CountUnichars(const uint16_t src[]) { |
| SkASSERT(src); |
| |
| int count = 0; |
| unsigned c; |
| while ((c = *src++) != 0) { |
| SkASSERT(!SkUTF16_IsLowSurrogate(c)); |
| if (SkUTF16_IsHighSurrogate(c)) { |
| c = *src++; |
| SkASSERT(SkUTF16_IsLowSurrogate(c)); |
| } |
| count += 1; |
| } |
| return count; |
| } |
| |
| int SkUTF16_CountUnichars(const uint16_t src[], int numberOf16BitValues) { |
| SkASSERT(src); |
| |
| const uint16_t* stop = src + numberOf16BitValues; |
| int count = 0; |
| while (src < stop) { |
| unsigned c = *src++; |
| SkASSERT(!SkUTF16_IsLowSurrogate(c)); |
| if (SkUTF16_IsHighSurrogate(c)) { |
| SkASSERT(src < stop); |
| c = *src++; |
| SkASSERT(SkUTF16_IsLowSurrogate(c)); |
| } |
| count += 1; |
| } |
| return count; |
| } |
| |
| SkUnichar SkUTF16_NextUnichar(const uint16_t** srcPtr) { |
| SkASSERT(srcPtr && *srcPtr); |
| |
| const uint16_t* src = *srcPtr; |
| SkUnichar c = *src++; |
| |
| SkASSERT(!SkUTF16_IsLowSurrogate(c)); |
| if (SkUTF16_IsHighSurrogate(c)) { |
| unsigned c2 = *src++; |
| SkASSERT(SkUTF16_IsLowSurrogate(c2)); |
| |
| // c = ((c & 0x3FF) << 10) + (c2 & 0x3FF) + 0x10000 |
| // c = (((c & 0x3FF) + 64) << 10) + (c2 & 0x3FF) |
| c = (c << 10) + c2 + (0x10000 - (0xD800 << 10) - 0xDC00); |
| } |
| *srcPtr = src; |
| return c; |
| } |
| |
| SkUnichar SkUTF16_PrevUnichar(const uint16_t** srcPtr) { |
| SkASSERT(srcPtr && *srcPtr); |
| |
| const uint16_t* src = *srcPtr; |
| SkUnichar c = *--src; |
| |
| SkASSERT(!SkUTF16_IsHighSurrogate(c)); |
| if (SkUTF16_IsLowSurrogate(c)) { |
| unsigned c2 = *--src; |
| SkASSERT(SkUTF16_IsHighSurrogate(c2)); |
| c = (c2 << 10) + c + (0x10000 - (0xD800 << 10) - 0xDC00); |
| } |
| *srcPtr = src; |
| return c; |
| } |
| |
| size_t SkUTF16_FromUnichar(SkUnichar uni, uint16_t dst[]) { |
| SkASSERT((unsigned)uni <= 0x10FFFF); |
| |
| int extra = (uni > 0xFFFF); |
| |
| if (dst) { |
| if (extra) { |
| // dst[0] = SkToU16(0xD800 | ((uni - 0x10000) >> 10)); |
| // dst[0] = SkToU16(0xD800 | ((uni >> 10) - 64)); |
| dst[0] = SkToU16((0xD800 - 64) + (uni >> 10)); |
| dst[1] = SkToU16(0xDC00 | (uni & 0x3FF)); |
| |
| SkASSERT(SkUTF16_IsHighSurrogate(dst[0])); |
| SkASSERT(SkUTF16_IsLowSurrogate(dst[1])); |
| } else { |
| dst[0] = SkToU16(uni); |
| SkASSERT(!SkUTF16_IsHighSurrogate(dst[0])); |
| SkASSERT(!SkUTF16_IsLowSurrogate(dst[0])); |
| } |
| } |
| return 1 + extra; |
| } |
| |
| size_t SkUTF16_ToUTF8(const uint16_t utf16[], int numberOf16BitValues, |
| char utf8[]) { |
| SkASSERT(numberOf16BitValues >= 0); |
| if (numberOf16BitValues <= 0) { |
| return 0; |
| } |
| |
| SkASSERT(utf16 != nullptr); |
| |
| const uint16_t* stop = utf16 + numberOf16BitValues; |
| size_t size = 0; |
| |
| if (utf8 == nullptr) { // just count |
| while (utf16 < stop) { |
| size += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&utf16), nullptr); |
| } |
| } else { |
| char* start = utf8; |
| while (utf16 < stop) { |
| utf8 += SkUTF8_FromUnichar(SkUTF16_NextUnichar(&utf16), utf8); |
| } |
| size = utf8 - start; |
| } |
| return size; |
| } |