blob: 81c93ca2aec47b74477d9841b8108d8c7b34dafe [file] [log] [blame]
Elliott Hughes814e4032011-08-23 12:07:56 -07001// Copyright 2011 Google Inc. All Rights Reserved.
2
3#include "utf.h"
4
5namespace art {
6
7size_t CountModifiedUtf8Chars(const char* utf8) {
8 size_t len = 0;
9 int ic;
10 while ((ic = *utf8++) != '\0') {
11 len++;
12 if ((ic & 0x80) == 0) {
13 // one-byte encoding
14 continue;
15 }
16 // two- or three-byte encoding
17 utf8++;
18 if ((ic & 0x20) == 0) {
19 // two-byte encoding
20 continue;
21 }
22 // three-byte encoding
23 utf8++;
24 }
25 return len;
26}
27
28void ConvertModifiedUtf8ToUtf16(uint16_t* utf16_data_out, const char* utf8_data_in) {
29 while (*utf8_data_in != '\0') {
30 *utf16_data_out++ = GetUtf16FromUtf8(&utf8_data_in);
31 }
32}
33
34int32_t ComputeUtf16Hash(const uint16_t* chars, size_t char_count) {
35 int32_t hash = 0;
36 while (char_count--) {
37 hash = hash * 31 + *chars++;
38 }
39 return hash;
40}
41
42uint16_t GetUtf16FromUtf8(const char** utf8_data_in) {
43 uint8_t one = *(*utf8_data_in)++;
44 if ((one & 0x80) == 0) {
45 // one-byte encoding
46 return one;
47 }
48 // two- or three-byte encoding
49 uint8_t two = *(*utf8_data_in)++;
50 if ((one & 0x20) == 0) {
51 // two-byte encoding
52 return ((one & 0x1f) << 6) | (two & 0x3f);
53 }
54 // three-byte encoding
55 uint8_t three = *(*utf8_data_in)++;
56 return ((one & 0x0f) << 12) | ((two & 0x3f) << 6) | (three & 0x3f);
57}
58
59size_t CountUtf8Bytes(const uint16_t* chars, size_t char_count) {
60 size_t result = 0;
61 while (char_count--) {
62 uint16_t ch = *chars++;
63 if (ch > 0 && ch <= 0x7f) {
64 ++result;
65 } else {
66 if (ch > 0x7ff) {
67 result += 3;
68 } else {
69 result += 2;
70 }
71 }
72 }
73 return result;
74}
75
76} // namespace art