kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 1 | // Copyright 2014 The PDFium Authors |
Philip P. Moltmann | 4d3acf4 | 2017-03-20 11:05:52 -0700 | [diff] [blame] | 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | // Original code copyright 2014 Foxit Software Inc. http://www.foxitsoftware.com |
| 6 | |
Philip P. Moltmann | d904c1e | 2018-03-19 09:26:45 -0700 | [diff] [blame] | 7 | #include "core/fxcrt/fx_unicode.h" |
Philip P. Moltmann | 4d3acf4 | 2017-03-20 11:05:52 -0700 | [diff] [blame] | 8 | |
kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 9 | #include <stddef.h> |
| 10 | |
| 11 | #include <iterator> |
| 12 | |
| 13 | #include "third_party/base/check.h" |
Philip P. Moltmann | d904c1e | 2018-03-19 09:26:45 -0700 | [diff] [blame] | 14 | |
| 15 | namespace { |
| 16 | |
Haibo Huang | 49cc930 | 2020-04-27 16:14:24 -0700 | [diff] [blame] | 17 | // Format of uint16_t values in kTextLayoutCodeProperties[]. |
| 18 | constexpr uint16_t kBidiClassBitPos = 0; |
| 19 | constexpr uint16_t kBidiClassBitCount = 5; |
| 20 | constexpr uint16_t kBidiClassBitMask = |
| 21 | (((1u << kBidiClassBitCount) - 1) << kBidiClassBitPos); |
Philip P. Moltmann | d904c1e | 2018-03-19 09:26:45 -0700 | [diff] [blame] | 22 | |
Haibo Huang | 49cc930 | 2020-04-27 16:14:24 -0700 | [diff] [blame] | 23 | constexpr uint16_t kMirrorBitPos = 5; |
| 24 | constexpr uint16_t kMirrorBitCount = 9; |
| 25 | constexpr uint16_t kMirrorMax = (1 << kMirrorBitCount) - 1; |
Philip P. Moltmann | d904c1e | 2018-03-19 09:26:45 -0700 | [diff] [blame] | 26 | |
Haibo Huang | 49cc930 | 2020-04-27 16:14:24 -0700 | [diff] [blame] | 27 | #undef CHARPROP____ |
| 28 | #define CHARPROP____(mirror, ct, bd, bt) \ |
| 29 | ((mirror << kMirrorBitPos) | \ |
| 30 | (static_cast<uint16_t>(FX_BIDICLASS::bd) << kBidiClassBitPos)), |
| 31 | constexpr uint16_t kTextLayoutCodeProperties[] = { |
kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 32 | #include "core/fxcrt/fx_ucddata.inc" // NOLINT(build/include) |
Haibo Huang | 49cc930 | 2020-04-27 16:14:24 -0700 | [diff] [blame] | 33 | }; |
| 34 | #undef CHARPROP____ |
Philip P. Moltmann | d904c1e | 2018-03-19 09:26:45 -0700 | [diff] [blame] | 35 | |
Haibo Huang | 49cc930 | 2020-04-27 16:14:24 -0700 | [diff] [blame] | 36 | constexpr size_t kTextLayoutCodePropertiesSize = |
kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 37 | std::size(kTextLayoutCodeProperties); |
Haibo Huang | 49cc930 | 2020-04-27 16:14:24 -0700 | [diff] [blame] | 38 | |
| 39 | static_assert(kTextLayoutCodePropertiesSize == 65536, "missing characters"); |
| 40 | |
| 41 | uint16_t GetUnicodeProperties(wchar_t wch) { |
Philip P. Moltmann | 4d3acf4 | 2017-03-20 11:05:52 -0700 | [diff] [blame] | 42 | size_t idx = static_cast<size_t>(wch); |
| 43 | if (idx < kTextLayoutCodePropertiesSize) |
Philip P. Moltmann | d904c1e | 2018-03-19 09:26:45 -0700 | [diff] [blame] | 44 | return kTextLayoutCodeProperties[idx]; |
Philip P. Moltmann | 4d3acf4 | 2017-03-20 11:05:52 -0700 | [diff] [blame] | 45 | return 0; |
| 46 | } |
| 47 | |
Haibo Huang | 49cc930 | 2020-04-27 16:14:24 -0700 | [diff] [blame] | 48 | #ifdef PDF_ENABLE_XFA |
| 49 | // Format of uint16_t values in kExtendedTextLayoutCodeProperties[]. |
| 50 | constexpr uint16_t kBreakTypeBitPos = 0; |
| 51 | constexpr uint16_t kBreakTypeBitCount = 6; |
| 52 | constexpr uint16_t kBreakTypeBitMask = |
| 53 | (((1u << kBreakTypeBitCount) - 1) << kBreakTypeBitPos); |
| 54 | |
| 55 | constexpr uint16_t kCharTypeBitPos = 6; |
| 56 | constexpr uint16_t kCharTypeBitCount = 4; |
| 57 | constexpr uint16_t kCharTypeBitMask = |
| 58 | (((1u << kCharTypeBitCount) - 1) << kCharTypeBitPos); |
| 59 | |
| 60 | #undef CHARPROP____ |
| 61 | #define CHARPROP____(mirror, ct, bd, bt) \ |
| 62 | ((static_cast<uint16_t>(FX_CHARTYPE::ct) << kCharTypeBitPos) | \ |
| 63 | (static_cast<uint16_t>(FX_BREAKPROPERTY::bt) << kBreakTypeBitPos)), |
| 64 | constexpr uint16_t kExtendedTextLayoutCodeProperties[] = { |
kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 65 | #include "core/fxcrt/fx_ucddata.inc" // NOLINT(build/include) |
Haibo Huang | 49cc930 | 2020-04-27 16:14:24 -0700 | [diff] [blame] | 66 | }; |
| 67 | #undef CHARPROP____ |
| 68 | |
| 69 | constexpr size_t kExtendedTextLayoutCodePropertiesSize = |
kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 70 | std::size(kExtendedTextLayoutCodeProperties); |
Haibo Huang | 49cc930 | 2020-04-27 16:14:24 -0700 | [diff] [blame] | 71 | |
| 72 | static_assert(kExtendedTextLayoutCodePropertiesSize == 65536, |
| 73 | "missing characters"); |
| 74 | |
| 75 | uint16_t GetExtendedUnicodeProperties(wchar_t wch) { |
| 76 | size_t idx = static_cast<size_t>(wch); |
| 77 | if (idx < kExtendedTextLayoutCodePropertiesSize) |
| 78 | return kExtendedTextLayoutCodeProperties[idx]; |
| 79 | return 0; |
| 80 | } |
| 81 | |
| 82 | #endif // PDF_ENABLE_XFA |
| 83 | |
| 84 | constexpr uint16_t kFXTextLayoutBidiMirror[] = { |
| 85 | 0x0029, 0x0028, 0x003E, 0x003C, 0x005D, 0x005B, 0x007D, 0x007B, 0x00BB, |
| 86 | 0x00AB, 0x0F3B, 0x0F3A, 0x0F3D, 0x0F3C, 0x169C, 0x169B, 0x2019, 0x2018, |
| 87 | 0x201D, 0x201C, 0x203A, 0x2039, 0x2046, 0x2045, 0x207E, 0x207D, 0x208E, |
| 88 | 0x208D, 0x220B, 0x220C, 0x220D, 0x2208, 0x2209, 0x220A, 0x29F5, 0x223D, |
| 89 | 0x223C, 0x22CD, 0x2253, 0x2252, 0x2255, 0x2254, 0x2265, 0x2264, 0x2267, |
| 90 | 0x2266, 0x2269, 0x2268, 0x226B, 0x226A, 0x226F, 0x226E, 0x2271, 0x2270, |
| 91 | 0x2273, 0x2272, 0x2275, 0x2274, 0x2277, 0x2276, 0x2279, 0x2278, 0x227B, |
| 92 | 0x227A, 0x227D, 0x227C, 0x227F, 0x227E, 0x2281, 0x2280, 0x2283, 0x2282, |
| 93 | 0x2285, 0x2284, 0x2287, 0x2286, 0x2289, 0x2288, 0x228B, 0x228A, 0x2290, |
| 94 | 0x228F, 0x2292, 0x2291, 0x29B8, 0x22A3, 0x22A2, 0x2ADE, 0x2AE4, 0x2AE3, |
| 95 | 0x2AE5, 0x22B1, 0x22B0, 0x22B3, 0x22B2, 0x22B5, 0x22B4, 0x22B7, 0x22B6, |
| 96 | 0x22CA, 0x22C9, 0x22CC, 0x22CB, 0x2243, 0x22D1, 0x22D0, 0x22D7, 0x22D6, |
| 97 | 0x22D9, 0x22D8, 0x22DB, 0x22DA, 0x22DD, 0x22DC, 0x22DF, 0x22DE, 0x22E1, |
| 98 | 0x22E0, 0x22E3, 0x22E2, 0x22E5, 0x22E4, 0x22E7, 0x22E6, 0x22E9, 0x22E8, |
| 99 | 0x22EB, 0x22EA, 0x22ED, 0x22EC, 0x22F1, 0x22F0, 0x22FA, 0x22FB, 0x22FC, |
| 100 | 0x22FD, 0x22FE, 0x22F2, 0x22F3, 0x22F4, 0x22F6, 0x22F7, 0x2309, 0x2308, |
| 101 | 0x230B, 0x230A, 0x232A, 0x2329, 0x2769, 0x2768, 0x276B, 0x276A, 0x276D, |
| 102 | 0x276C, 0x276F, 0x276E, 0x2771, 0x2770, 0x2773, 0x2772, 0x2775, 0x2774, |
| 103 | 0x27C4, 0x27C3, 0x27C6, 0x27C5, 0x27C9, 0x27C8, 0x27D6, 0x27D5, 0x27DE, |
| 104 | 0x27DD, 0x27E3, 0x27E2, 0x27E5, 0x27E4, 0x27E7, 0x27E6, 0x27E9, 0x27E8, |
| 105 | 0x27EB, 0x27EA, 0x27ED, 0x27EC, 0x27EF, 0x27EE, 0x2984, 0x2983, 0x2986, |
| 106 | 0x2985, 0x2988, 0x2987, 0x298A, 0x2989, 0x298C, 0x298B, 0x2990, 0x298F, |
| 107 | 0x298E, 0x298D, 0x2992, 0x2991, 0x2994, 0x2993, 0x2996, 0x2995, 0x2998, |
| 108 | 0x2997, 0x2298, 0x29C1, 0x29C0, 0x29C5, 0x29C4, 0x29D0, 0x29CF, 0x29D2, |
| 109 | 0x29D1, 0x29D5, 0x29D4, 0x29D9, 0x29D8, 0x29DB, 0x29DA, 0x2215, 0x29F9, |
| 110 | 0x29F8, 0x29FD, 0x29FC, 0x2A2C, 0x2A2B, 0x2A2E, 0x2A2D, 0x2A35, 0x2A34, |
| 111 | 0x2A3D, 0x2A3C, 0x2A65, 0x2A64, 0x2A7A, 0x2A79, 0x2A7E, 0x2A7D, 0x2A80, |
| 112 | 0x2A7F, 0x2A82, 0x2A81, 0x2A84, 0x2A83, 0x2A8C, 0x2A8B, 0x2A92, 0x2A91, |
| 113 | 0x2A94, 0x2A93, 0x2A96, 0x2A95, 0x2A98, 0x2A97, 0x2A9A, 0x2A99, 0x2A9C, |
| 114 | 0x2A9B, 0x2AA2, 0x2AA1, 0x2AA7, 0x2AA6, 0x2AA9, 0x2AA8, 0x2AAB, 0x2AAA, |
| 115 | 0x2AAD, 0x2AAC, 0x2AB0, 0x2AAF, 0x2AB4, 0x2AB3, 0x2ABC, 0x2ABB, 0x2ABE, |
| 116 | 0x2ABD, 0x2AC0, 0x2ABF, 0x2AC2, 0x2AC1, 0x2AC4, 0x2AC3, 0x2AC6, 0x2AC5, |
| 117 | 0x2ACE, 0x2ACD, 0x2AD0, 0x2ACF, 0x2AD2, 0x2AD1, 0x2AD4, 0x2AD3, 0x2AD6, |
| 118 | 0x2AD5, 0x22A6, 0x22A9, 0x22A8, 0x22AB, 0x2AED, 0x2AEC, 0x2AF8, 0x2AF7, |
| 119 | 0x2AFA, 0x2AF9, 0x2E03, 0x2E02, 0x2E05, 0x2E04, 0x2E0A, 0x2E09, 0x2E0D, |
| 120 | 0x2E0C, 0x2E1D, 0x2E1C, 0x2E21, 0x2E20, 0x2E23, 0x2E22, 0x2E25, 0x2E24, |
| 121 | 0x2E27, 0x2E26, 0x2E29, 0x2E28, 0x3009, 0x3008, 0x300B, 0x300A, 0x300D, |
| 122 | 0x300C, 0x300F, 0x300E, 0x3011, 0x3010, 0x3015, 0x3014, 0x3017, 0x3016, |
| 123 | 0x3019, 0x3018, 0x301B, 0x301A, 0xFE5A, 0xFE59, 0xFE5C, 0xFE5B, 0xFE5E, |
| 124 | 0xFE5D, 0xFE65, 0xFE64, 0xFF09, 0xFF08, 0xFF1E, 0xFF1C, 0xFF3D, 0xFF3B, |
| 125 | 0xFF5D, 0xFF5B, 0xFF60, 0xFF5F, 0xFF63, 0xFF62, |
| 126 | }; |
| 127 | |
| 128 | constexpr size_t kFXTextLayoutBidiMirrorSize = |
kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 129 | std::size(kFXTextLayoutBidiMirror); |
Haibo Huang | 49cc930 | 2020-04-27 16:14:24 -0700 | [diff] [blame] | 130 | |
| 131 | // Check that the mirror indicies in the fx_ucddata.inc table are in bounds. |
| 132 | #undef CHARPROP____ |
| 133 | #define CHARPROP____(mirror, ct, bd, bt) \ |
| 134 | static_assert(mirror == kMirrorMax || mirror < kFXTextLayoutBidiMirrorSize, \ |
| 135 | "Bad mirror index"); |
kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 136 | #include "core/fxcrt/fx_ucddata.inc" // NOLINT(build/include) |
Haibo Huang | 49cc930 | 2020-04-27 16:14:24 -0700 | [diff] [blame] | 137 | #undef CHARPROP____ |
| 138 | |
| 139 | } // namespace |
| 140 | |
kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 141 | namespace pdfium { |
| 142 | namespace unicode { |
| 143 | |
| 144 | wchar_t GetMirrorChar(wchar_t wch) { |
Haibo Huang | 49cc930 | 2020-04-27 16:14:24 -0700 | [diff] [blame] | 145 | uint16_t prop = GetUnicodeProperties(wch); |
| 146 | size_t idx = prop >> kMirrorBitPos; |
| 147 | if (idx == kMirrorMax) |
| 148 | return wch; |
kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 149 | DCHECK(idx < kFXTextLayoutBidiMirrorSize); |
Haibo Huang | 49cc930 | 2020-04-27 16:14:24 -0700 | [diff] [blame] | 150 | return kFXTextLayoutBidiMirror[idx]; |
| 151 | } |
| 152 | |
kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 153 | FX_BIDICLASS GetBidiClass(wchar_t wch) { |
Haibo Huang | 49cc930 | 2020-04-27 16:14:24 -0700 | [diff] [blame] | 154 | uint16_t prop = GetUnicodeProperties(wch); |
| 155 | uint16_t result = (prop & kBidiClassBitMask) >> kBidiClassBitPos; |
kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 156 | DCHECK(result <= static_cast<uint16_t>(FX_BIDICLASS::kPDF)); |
Haibo Huang | 49cc930 | 2020-04-27 16:14:24 -0700 | [diff] [blame] | 157 | return static_cast<FX_BIDICLASS>(result); |
Philip P. Moltmann | 4d3acf4 | 2017-03-20 11:05:52 -0700 | [diff] [blame] | 158 | } |
| 159 | |
| 160 | #ifdef PDF_ENABLE_XFA |
kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 161 | FX_CHARTYPE GetCharType(wchar_t wch) { |
Haibo Huang | 49cc930 | 2020-04-27 16:14:24 -0700 | [diff] [blame] | 162 | uint16_t prop = GetExtendedUnicodeProperties(wch); |
| 163 | uint16_t result = (prop & kCharTypeBitMask) >> kCharTypeBitPos; |
kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 164 | DCHECK(result <= static_cast<uint16_t>(FX_CHARTYPE::kArabic)); |
Haibo Huang | 49cc930 | 2020-04-27 16:14:24 -0700 | [diff] [blame] | 165 | return static_cast<FX_CHARTYPE>(result); |
| 166 | } |
| 167 | |
kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 168 | FX_BREAKPROPERTY GetBreakProperty(wchar_t wch) { |
Haibo Huang | 49cc930 | 2020-04-27 16:14:24 -0700 | [diff] [blame] | 169 | uint16_t prop = GetExtendedUnicodeProperties(wch); |
| 170 | uint16_t result = (prop & kBreakTypeBitMask) >> kBreakTypeBitPos; |
kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 171 | DCHECK(result <= static_cast<uint16_t>(FX_BREAKPROPERTY::kTB)); |
Haibo Huang | 49cc930 | 2020-04-27 16:14:24 -0700 | [diff] [blame] | 172 | return static_cast<FX_BREAKPROPERTY>(result); |
Philip P. Moltmann | 4d3acf4 | 2017-03-20 11:05:52 -0700 | [diff] [blame] | 173 | } |
| 174 | #endif // PDF_ENABLE_XFA |
kumarashishg | 826308d | 2023-06-23 13:21:22 +0000 | [diff] [blame] | 175 | |
| 176 | } // namespace unicode |
| 177 | } // namespace pdfium |