blob: e3fb97b615e37c872641a7e2262bc94cebd97269 [file] [log] [blame]
kumarashishg826308d2023-06-23 13:21:22 +00001// Copyright 2014 The PDFium Authors
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -07002// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5// Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com
6
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -07007#include "core/fxcrt/fx_unicode.h"
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -07008
kumarashishg826308d2023-06-23 13:21:22 +00009#include <stddef.h>
10
11#include <iterator>
12
13#include "third_party/base/check.h"
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -070014
15namespace {
16
Haibo Huang49cc9302020-04-27 16:14:24 -070017// Format of uint16_t values in kTextLayoutCodeProperties[].
18constexpr uint16_t kBidiClassBitPos = 0;
19constexpr uint16_t kBidiClassBitCount = 5;
20constexpr uint16_t kBidiClassBitMask =
21 (((1u << kBidiClassBitCount) - 1) << kBidiClassBitPos);
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -070022
Haibo Huang49cc9302020-04-27 16:14:24 -070023constexpr uint16_t kMirrorBitPos = 5;
24constexpr uint16_t kMirrorBitCount = 9;
25constexpr uint16_t kMirrorMax = (1 << kMirrorBitCount) - 1;
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -070026
Haibo Huang49cc9302020-04-27 16:14:24 -070027#undef CHARPROP____
28#define CHARPROP____(mirror, ct, bd, bt) \
29 ((mirror << kMirrorBitPos) | \
30 (static_cast<uint16_t>(FX_BIDICLASS::bd) << kBidiClassBitPos)),
31constexpr uint16_t kTextLayoutCodeProperties[] = {
kumarashishg826308d2023-06-23 13:21:22 +000032#include "core/fxcrt/fx_ucddata.inc" // NOLINT(build/include)
Haibo Huang49cc9302020-04-27 16:14:24 -070033};
34#undef CHARPROP____
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -070035
Haibo Huang49cc9302020-04-27 16:14:24 -070036constexpr size_t kTextLayoutCodePropertiesSize =
kumarashishg826308d2023-06-23 13:21:22 +000037 std::size(kTextLayoutCodeProperties);
Haibo Huang49cc9302020-04-27 16:14:24 -070038
39static_assert(kTextLayoutCodePropertiesSize == 65536, "missing characters");
40
41uint16_t GetUnicodeProperties(wchar_t wch) {
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -070042 size_t idx = static_cast<size_t>(wch);
43 if (idx < kTextLayoutCodePropertiesSize)
Philip P. Moltmannd904c1e2018-03-19 09:26:45 -070044 return kTextLayoutCodeProperties[idx];
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -070045 return 0;
46}
47
Haibo Huang49cc9302020-04-27 16:14:24 -070048#ifdef PDF_ENABLE_XFA
49// Format of uint16_t values in kExtendedTextLayoutCodeProperties[].
50constexpr uint16_t kBreakTypeBitPos = 0;
51constexpr uint16_t kBreakTypeBitCount = 6;
52constexpr uint16_t kBreakTypeBitMask =
53 (((1u << kBreakTypeBitCount) - 1) << kBreakTypeBitPos);
54
55constexpr uint16_t kCharTypeBitPos = 6;
56constexpr uint16_t kCharTypeBitCount = 4;
57constexpr uint16_t kCharTypeBitMask =
58 (((1u << kCharTypeBitCount) - 1) << kCharTypeBitPos);
59
60#undef CHARPROP____
61#define CHARPROP____(mirror, ct, bd, bt) \
62 ((static_cast<uint16_t>(FX_CHARTYPE::ct) << kCharTypeBitPos) | \
63 (static_cast<uint16_t>(FX_BREAKPROPERTY::bt) << kBreakTypeBitPos)),
64constexpr uint16_t kExtendedTextLayoutCodeProperties[] = {
kumarashishg826308d2023-06-23 13:21:22 +000065#include "core/fxcrt/fx_ucddata.inc" // NOLINT(build/include)
Haibo Huang49cc9302020-04-27 16:14:24 -070066};
67#undef CHARPROP____
68
69constexpr size_t kExtendedTextLayoutCodePropertiesSize =
kumarashishg826308d2023-06-23 13:21:22 +000070 std::size(kExtendedTextLayoutCodeProperties);
Haibo Huang49cc9302020-04-27 16:14:24 -070071
72static_assert(kExtendedTextLayoutCodePropertiesSize == 65536,
73 "missing characters");
74
75uint16_t GetExtendedUnicodeProperties(wchar_t wch) {
76 size_t idx = static_cast<size_t>(wch);
77 if (idx < kExtendedTextLayoutCodePropertiesSize)
78 return kExtendedTextLayoutCodeProperties[idx];
79 return 0;
80}
81
82#endif // PDF_ENABLE_XFA
83
84constexpr uint16_t kFXTextLayoutBidiMirror[] = {
85 0x0029, 0x0028, 0x003E, 0x003C, 0x005D, 0x005B, 0x007D, 0x007B, 0x00BB,
86 0x00AB, 0x0F3B, 0x0F3A, 0x0F3D, 0x0F3C, 0x169C, 0x169B, 0x2019, 0x2018,
87 0x201D, 0x201C, 0x203A, 0x2039, 0x2046, 0x2045, 0x207E, 0x207D, 0x208E,
88 0x208D, 0x220B, 0x220C, 0x220D, 0x2208, 0x2209, 0x220A, 0x29F5, 0x223D,
89 0x223C, 0x22CD, 0x2253, 0x2252, 0x2255, 0x2254, 0x2265, 0x2264, 0x2267,
90 0x2266, 0x2269, 0x2268, 0x226B, 0x226A, 0x226F, 0x226E, 0x2271, 0x2270,
91 0x2273, 0x2272, 0x2275, 0x2274, 0x2277, 0x2276, 0x2279, 0x2278, 0x227B,
92 0x227A, 0x227D, 0x227C, 0x227F, 0x227E, 0x2281, 0x2280, 0x2283, 0x2282,
93 0x2285, 0x2284, 0x2287, 0x2286, 0x2289, 0x2288, 0x228B, 0x228A, 0x2290,
94 0x228F, 0x2292, 0x2291, 0x29B8, 0x22A3, 0x22A2, 0x2ADE, 0x2AE4, 0x2AE3,
95 0x2AE5, 0x22B1, 0x22B0, 0x22B3, 0x22B2, 0x22B5, 0x22B4, 0x22B7, 0x22B6,
96 0x22CA, 0x22C9, 0x22CC, 0x22CB, 0x2243, 0x22D1, 0x22D0, 0x22D7, 0x22D6,
97 0x22D9, 0x22D8, 0x22DB, 0x22DA, 0x22DD, 0x22DC, 0x22DF, 0x22DE, 0x22E1,
98 0x22E0, 0x22E3, 0x22E2, 0x22E5, 0x22E4, 0x22E7, 0x22E6, 0x22E9, 0x22E8,
99 0x22EB, 0x22EA, 0x22ED, 0x22EC, 0x22F1, 0x22F0, 0x22FA, 0x22FB, 0x22FC,
100 0x22FD, 0x22FE, 0x22F2, 0x22F3, 0x22F4, 0x22F6, 0x22F7, 0x2309, 0x2308,
101 0x230B, 0x230A, 0x232A, 0x2329, 0x2769, 0x2768, 0x276B, 0x276A, 0x276D,
102 0x276C, 0x276F, 0x276E, 0x2771, 0x2770, 0x2773, 0x2772, 0x2775, 0x2774,
103 0x27C4, 0x27C3, 0x27C6, 0x27C5, 0x27C9, 0x27C8, 0x27D6, 0x27D5, 0x27DE,
104 0x27DD, 0x27E3, 0x27E2, 0x27E5, 0x27E4, 0x27E7, 0x27E6, 0x27E9, 0x27E8,
105 0x27EB, 0x27EA, 0x27ED, 0x27EC, 0x27EF, 0x27EE, 0x2984, 0x2983, 0x2986,
106 0x2985, 0x2988, 0x2987, 0x298A, 0x2989, 0x298C, 0x298B, 0x2990, 0x298F,
107 0x298E, 0x298D, 0x2992, 0x2991, 0x2994, 0x2993, 0x2996, 0x2995, 0x2998,
108 0x2997, 0x2298, 0x29C1, 0x29C0, 0x29C5, 0x29C4, 0x29D0, 0x29CF, 0x29D2,
109 0x29D1, 0x29D5, 0x29D4, 0x29D9, 0x29D8, 0x29DB, 0x29DA, 0x2215, 0x29F9,
110 0x29F8, 0x29FD, 0x29FC, 0x2A2C, 0x2A2B, 0x2A2E, 0x2A2D, 0x2A35, 0x2A34,
111 0x2A3D, 0x2A3C, 0x2A65, 0x2A64, 0x2A7A, 0x2A79, 0x2A7E, 0x2A7D, 0x2A80,
112 0x2A7F, 0x2A82, 0x2A81, 0x2A84, 0x2A83, 0x2A8C, 0x2A8B, 0x2A92, 0x2A91,
113 0x2A94, 0x2A93, 0x2A96, 0x2A95, 0x2A98, 0x2A97, 0x2A9A, 0x2A99, 0x2A9C,
114 0x2A9B, 0x2AA2, 0x2AA1, 0x2AA7, 0x2AA6, 0x2AA9, 0x2AA8, 0x2AAB, 0x2AAA,
115 0x2AAD, 0x2AAC, 0x2AB0, 0x2AAF, 0x2AB4, 0x2AB3, 0x2ABC, 0x2ABB, 0x2ABE,
116 0x2ABD, 0x2AC0, 0x2ABF, 0x2AC2, 0x2AC1, 0x2AC4, 0x2AC3, 0x2AC6, 0x2AC5,
117 0x2ACE, 0x2ACD, 0x2AD0, 0x2ACF, 0x2AD2, 0x2AD1, 0x2AD4, 0x2AD3, 0x2AD6,
118 0x2AD5, 0x22A6, 0x22A9, 0x22A8, 0x22AB, 0x2AED, 0x2AEC, 0x2AF8, 0x2AF7,
119 0x2AFA, 0x2AF9, 0x2E03, 0x2E02, 0x2E05, 0x2E04, 0x2E0A, 0x2E09, 0x2E0D,
120 0x2E0C, 0x2E1D, 0x2E1C, 0x2E21, 0x2E20, 0x2E23, 0x2E22, 0x2E25, 0x2E24,
121 0x2E27, 0x2E26, 0x2E29, 0x2E28, 0x3009, 0x3008, 0x300B, 0x300A, 0x300D,
122 0x300C, 0x300F, 0x300E, 0x3011, 0x3010, 0x3015, 0x3014, 0x3017, 0x3016,
123 0x3019, 0x3018, 0x301B, 0x301A, 0xFE5A, 0xFE59, 0xFE5C, 0xFE5B, 0xFE5E,
124 0xFE5D, 0xFE65, 0xFE64, 0xFF09, 0xFF08, 0xFF1E, 0xFF1C, 0xFF3D, 0xFF3B,
125 0xFF5D, 0xFF5B, 0xFF60, 0xFF5F, 0xFF63, 0xFF62,
126};
127
128constexpr size_t kFXTextLayoutBidiMirrorSize =
kumarashishg826308d2023-06-23 13:21:22 +0000129 std::size(kFXTextLayoutBidiMirror);
Haibo Huang49cc9302020-04-27 16:14:24 -0700130
131// Check that the mirror indicies in the fx_ucddata.inc table are in bounds.
132#undef CHARPROP____
133#define CHARPROP____(mirror, ct, bd, bt) \
134 static_assert(mirror == kMirrorMax || mirror < kFXTextLayoutBidiMirrorSize, \
135 "Bad mirror index");
kumarashishg826308d2023-06-23 13:21:22 +0000136#include "core/fxcrt/fx_ucddata.inc" // NOLINT(build/include)
Haibo Huang49cc9302020-04-27 16:14:24 -0700137#undef CHARPROP____
138
139} // namespace
140
kumarashishg826308d2023-06-23 13:21:22 +0000141namespace pdfium {
142namespace unicode {
143
144wchar_t GetMirrorChar(wchar_t wch) {
Haibo Huang49cc9302020-04-27 16:14:24 -0700145 uint16_t prop = GetUnicodeProperties(wch);
146 size_t idx = prop >> kMirrorBitPos;
147 if (idx == kMirrorMax)
148 return wch;
kumarashishg826308d2023-06-23 13:21:22 +0000149 DCHECK(idx < kFXTextLayoutBidiMirrorSize);
Haibo Huang49cc9302020-04-27 16:14:24 -0700150 return kFXTextLayoutBidiMirror[idx];
151}
152
kumarashishg826308d2023-06-23 13:21:22 +0000153FX_BIDICLASS GetBidiClass(wchar_t wch) {
Haibo Huang49cc9302020-04-27 16:14:24 -0700154 uint16_t prop = GetUnicodeProperties(wch);
155 uint16_t result = (prop & kBidiClassBitMask) >> kBidiClassBitPos;
kumarashishg826308d2023-06-23 13:21:22 +0000156 DCHECK(result <= static_cast<uint16_t>(FX_BIDICLASS::kPDF));
Haibo Huang49cc9302020-04-27 16:14:24 -0700157 return static_cast<FX_BIDICLASS>(result);
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700158}
159
160#ifdef PDF_ENABLE_XFA
kumarashishg826308d2023-06-23 13:21:22 +0000161FX_CHARTYPE GetCharType(wchar_t wch) {
Haibo Huang49cc9302020-04-27 16:14:24 -0700162 uint16_t prop = GetExtendedUnicodeProperties(wch);
163 uint16_t result = (prop & kCharTypeBitMask) >> kCharTypeBitPos;
kumarashishg826308d2023-06-23 13:21:22 +0000164 DCHECK(result <= static_cast<uint16_t>(FX_CHARTYPE::kArabic));
Haibo Huang49cc9302020-04-27 16:14:24 -0700165 return static_cast<FX_CHARTYPE>(result);
166}
167
kumarashishg826308d2023-06-23 13:21:22 +0000168FX_BREAKPROPERTY GetBreakProperty(wchar_t wch) {
Haibo Huang49cc9302020-04-27 16:14:24 -0700169 uint16_t prop = GetExtendedUnicodeProperties(wch);
170 uint16_t result = (prop & kBreakTypeBitMask) >> kBreakTypeBitPos;
kumarashishg826308d2023-06-23 13:21:22 +0000171 DCHECK(result <= static_cast<uint16_t>(FX_BREAKPROPERTY::kTB));
Haibo Huang49cc9302020-04-27 16:14:24 -0700172 return static_cast<FX_BREAKPROPERTY>(result);
Philip P. Moltmann4d3acf42017-03-20 11:05:52 -0700173}
174#endif // PDF_ENABLE_XFA
kumarashishg826308d2023-06-23 13:21:22 +0000175
176} // namespace unicode
177} // namespace pdfium