blob: 282238982dc941d757672bfeef5beb349eacc066 [file] [log] [blame]
Nick Kralevichf73ff172014-09-27 12:41:49 -07001/*************************************************
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01002* Perl-Compatible Regular Expressions *
Nick Kralevichf73ff172014-09-27 12:41:49 -07003*************************************************/
4
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01005/* PCRE is a library of functions to support regular expressions whose syntax
6and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Original API code Copyright (c) 1997-2012 University of Cambridge
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070010 New API code Copyright (c) 2016-2022 University of Cambridge
11
12This module is auto-generated from Unicode data files. DO NOT EDIT MANUALLY!
13Instead, modify the maint/GenerateUcpHeader.py script and run it to generate
14a new version of this code.
Janis Danisevskis112c9cc2016-03-31 13:35:25 +010015
16-----------------------------------------------------------------------------
17Redistribution and use in source and binary forms, with or without
18modification, are permitted provided that the following conditions are met:
19
20 * Redistributions of source code must retain the above copyright notice,
21 this list of conditions and the following disclaimer.
22
23 * Redistributions in binary form must reproduce the above copyright
24 notice, this list of conditions and the following disclaimer in the
25 documentation and/or other materials provided with the distribution.
26
27 * Neither the name of the University of Cambridge nor the names of its
28 contributors may be used to endorse or promote products derived from
29 this software without specific prior written permission.
30
31THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
32AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
33IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
34ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
35LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
36CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
37SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
38INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
39CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
40ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
41POSSIBILITY OF SUCH DAMAGE.
42-----------------------------------------------------------------------------
43*/
44
Elliott Hughes9bc971b2018-07-27 13:23:14 -070045#ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD
46#define PCRE2_UCP_H_IDEMPOTENT_GUARD
Nick Kralevichf73ff172014-09-27 12:41:49 -070047
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070048/* This file contains definitions of the Unicode property values that are
49returned by the UCD access macros and used throughout PCRE2.
Nick Kralevichf73ff172014-09-27 12:41:49 -070050
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070051IMPORTANT: The specific values of the first two enums (general and particular
52character categories) are assumed by the table called catposstab in the file
53pcre2_auto_possess.c. They are unlikely to change, but should be checked after
54an update. */
Nick Kralevichf73ff172014-09-27 12:41:49 -070055
56/* These are the general character categories. */
57
58enum {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -070059 ucp_C,
60 ucp_L,
61 ucp_M,
62 ucp_N,
63 ucp_P,
64 ucp_S,
65 ucp_Z,
Nick Kralevichf73ff172014-09-27 12:41:49 -070066};
67
68/* These are the particular character categories. */
69
70enum {
71 ucp_Cc, /* Control */
72 ucp_Cf, /* Format */
73 ucp_Cn, /* Unassigned */
74 ucp_Co, /* Private use */
75 ucp_Cs, /* Surrogate */
76 ucp_Ll, /* Lower case letter */
77 ucp_Lm, /* Modifier letter */
78 ucp_Lo, /* Other letter */
79 ucp_Lt, /* Title case letter */
80 ucp_Lu, /* Upper case letter */
81 ucp_Mc, /* Spacing mark */
82 ucp_Me, /* Enclosing mark */
83 ucp_Mn, /* Non-spacing mark */
84 ucp_Nd, /* Decimal number */
85 ucp_Nl, /* Letter number */
86 ucp_No, /* Other number */
87 ucp_Pc, /* Connector punctuation */
88 ucp_Pd, /* Dash punctuation */
89 ucp_Pe, /* Close punctuation */
90 ucp_Pf, /* Final punctuation */
91 ucp_Pi, /* Initial punctuation */
92 ucp_Po, /* Other punctuation */
93 ucp_Ps, /* Open punctuation */
94 ucp_Sc, /* Currency symbol */
95 ucp_Sk, /* Modifier symbol */
96 ucp_Sm, /* Mathematical symbol */
97 ucp_So, /* Other symbol */
98 ucp_Zl, /* Line separator */
99 ucp_Zp, /* Paragraph separator */
Elliott Hughes4e19c8e2022-04-15 15:11:02 -0700100 ucp_Zs, /* Space separator */
101};
102
103/* These are Boolean properties. */
104
105enum {
106 ucp_ASCII,
107 ucp_ASCII_Hex_Digit,
108 ucp_Alphabetic,
109 ucp_Bidi_Control,
110 ucp_Bidi_Mirrored,
111 ucp_Case_Ignorable,
112 ucp_Cased,
113 ucp_Changes_When_Casefolded,
114 ucp_Changes_When_Casemapped,
115 ucp_Changes_When_Lowercased,
116 ucp_Changes_When_Titlecased,
117 ucp_Changes_When_Uppercased,
118 ucp_Dash,
119 ucp_Default_Ignorable_Code_Point,
120 ucp_Deprecated,
121 ucp_Diacritic,
122 ucp_Emoji,
123 ucp_Emoji_Component,
124 ucp_Emoji_Modifier,
125 ucp_Emoji_Modifier_Base,
126 ucp_Emoji_Presentation,
127 ucp_Extended_Pictographic,
128 ucp_Extender,
129 ucp_Grapheme_Base,
130 ucp_Grapheme_Extend,
131 ucp_Grapheme_Link,
132 ucp_Hex_Digit,
133 ucp_IDS_Binary_Operator,
134 ucp_IDS_Trinary_Operator,
135 ucp_ID_Continue,
136 ucp_ID_Start,
137 ucp_Ideographic,
138 ucp_Join_Control,
139 ucp_Logical_Order_Exception,
140 ucp_Lowercase,
141 ucp_Math,
142 ucp_Noncharacter_Code_Point,
143 ucp_Pattern_Syntax,
144 ucp_Pattern_White_Space,
145 ucp_Prepended_Concatenation_Mark,
146 ucp_Quotation_Mark,
147 ucp_Radical,
148 ucp_Regional_Indicator,
149 ucp_Sentence_Terminal,
150 ucp_Soft_Dotted,
151 ucp_Terminal_Punctuation,
152 ucp_Unified_Ideograph,
153 ucp_Uppercase,
154 ucp_Variation_Selector,
155 ucp_White_Space,
156 ucp_XID_Continue,
157 ucp_XID_Start,
158 /* This must be last */
159 ucp_Bprop_Count
160};
161
162/* Size of entries in ucd_boolprop_sets[] */
163
164#define ucd_boolprop_sets_item_size 2
165
166/* These are the bidi class values. */
167
168enum {
169 ucp_bidiAL, /* Arabic letter */
170 ucp_bidiAN, /* Arabic number */
171 ucp_bidiB, /* Paragraph separator */
172 ucp_bidiBN, /* Boundary neutral */
173 ucp_bidiCS, /* Common separator */
174 ucp_bidiEN, /* European number */
175 ucp_bidiES, /* European separator */
176 ucp_bidiET, /* European terminator */
177 ucp_bidiFSI, /* First strong isolate */
178 ucp_bidiL, /* Left to right */
179 ucp_bidiLRE, /* Left to right embedding */
180 ucp_bidiLRI, /* Left to right isolate */
181 ucp_bidiLRO, /* Left to right override */
182 ucp_bidiNSM, /* Non-spacing mark */
183 ucp_bidiON, /* Other neutral */
184 ucp_bidiPDF, /* Pop directional format */
185 ucp_bidiPDI, /* Pop directional isolate */
186 ucp_bidiR, /* Right to left */
187 ucp_bidiRLE, /* Right to left embedding */
188 ucp_bidiRLI, /* Right to left isolate */
189 ucp_bidiRLO, /* Right to left override */
190 ucp_bidiS, /* Segment separator */
191 ucp_bidiWS, /* White space */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700192};
193
Elliott Hughes653c2102019-01-09 15:41:36 -0800194/* These are grapheme break properties. The Extended Pictographic property
195comes from the emoji-data.txt file. */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700196
197enum {
Elliott Hughes653c2102019-01-09 15:41:36 -0800198 ucp_gbCR, /* 0 */
199 ucp_gbLF, /* 1 */
200 ucp_gbControl, /* 2 */
201 ucp_gbExtend, /* 3 */
202 ucp_gbPrepend, /* 4 */
203 ucp_gbSpacingMark, /* 5 */
204 ucp_gbL, /* 6 Hangul syllable type L */
205 ucp_gbV, /* 7 Hangul syllable type V */
206 ucp_gbT, /* 8 Hangul syllable type T */
207 ucp_gbLV, /* 9 Hangul syllable type LV */
208 ucp_gbLVT, /* 10 Hangul syllable type LVT */
Elliott Hughes4e19c8e2022-04-15 15:11:02 -0700209 ucp_gbRegional_Indicator, /* 11 */
Elliott Hughes653c2102019-01-09 15:41:36 -0800210 ucp_gbOther, /* 12 */
211 ucp_gbZWJ, /* 13 */
Elliott Hughes4e19c8e2022-04-15 15:11:02 -0700212 ucp_gbExtended_Pictographic, /* 14 */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700213};
214
215/* These are the script identifications. */
216
217enum {
Elliott Hughes4e19c8e2022-04-15 15:11:02 -0700218 /* Scripts which has characters in other scripts. */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700219 ucp_Latin,
Elliott Hughes4e19c8e2022-04-15 15:11:02 -0700220 ucp_Greek,
221 ucp_Cyrillic,
222 ucp_Arabic,
Nick Kralevichf73ff172014-09-27 12:41:49 -0700223 ucp_Syriac,
Elliott Hughes4e19c8e2022-04-15 15:11:02 -0700224 ucp_Thaana,
225 ucp_Devanagari,
226 ucp_Bengali,
227 ucp_Gurmukhi,
228 ucp_Gujarati,
229 ucp_Oriya,
Nick Kralevichf73ff172014-09-27 12:41:49 -0700230 ucp_Tamil,
231 ucp_Telugu,
Elliott Hughes4e19c8e2022-04-15 15:11:02 -0700232 ucp_Kannada,
233 ucp_Malayalam,
234 ucp_Sinhala,
235 ucp_Myanmar,
236 ucp_Georgian,
237 ucp_Hangul,
238 ucp_Mongolian,
239 ucp_Hiragana,
240 ucp_Katakana,
241 ucp_Bopomofo,
242 ucp_Han,
Nick Kralevichf73ff172014-09-27 12:41:49 -0700243 ucp_Yi,
Elliott Hughes4e19c8e2022-04-15 15:11:02 -0700244 ucp_Tagalog,
245 ucp_Hanunoo,
246 ucp_Buhid,
247 ucp_Tagbanwa,
248 ucp_Limbu,
249 ucp_Tai_Le,
250 ucp_Linear_B,
251 ucp_Cypriot,
252 ucp_Buginese,
253 ucp_Coptic,
254 ucp_Glagolitic,
255 ucp_Syloti_Nagri,
Nick Kralevichf73ff172014-09-27 12:41:49 -0700256 ucp_Phags_Pa,
Elliott Hughes4e19c8e2022-04-15 15:11:02 -0700257 ucp_Nko,
Nick Kralevichf73ff172014-09-27 12:41:49 -0700258 ucp_Kayah_Li,
Nick Kralevichf73ff172014-09-27 12:41:49 -0700259 ucp_Javanese,
260 ucp_Kaithi,
Nick Kralevichf73ff172014-09-27 12:41:49 -0700261 ucp_Mandaic,
Nick Kralevichf73ff172014-09-27 12:41:49 -0700262 ucp_Chakma,
Nick Kralevichf73ff172014-09-27 12:41:49 -0700263 ucp_Sharada,
Nick Kralevichf73ff172014-09-27 12:41:49 -0700264 ucp_Takri,
Nick Kralevichf73ff172014-09-27 12:41:49 -0700265 ucp_Duployan,
Nick Kralevichf73ff172014-09-27 12:41:49 -0700266 ucp_Grantha,
267 ucp_Khojki,
Nick Kralevichf73ff172014-09-27 12:41:49 -0700268 ucp_Linear_A,
269 ucp_Mahajani,
270 ucp_Manichaean,
Nick Kralevichf73ff172014-09-27 12:41:49 -0700271 ucp_Modi,
Nick Kralevichf73ff172014-09-27 12:41:49 -0700272 ucp_Old_Permic,
Nick Kralevichf73ff172014-09-27 12:41:49 -0700273 ucp_Psalter_Pahlavi,
Elliott Hughes4e19c8e2022-04-15 15:11:02 -0700274 ucp_Khudawadi,
275 ucp_Tirhuta,
276 ucp_Multani,
277 ucp_Adlam,
278 ucp_Masaram_Gondi,
279 ucp_Dogra,
280 ucp_Gunjala_Gondi,
281 ucp_Hanifi_Rohingya,
282 ucp_Sogdian,
283 ucp_Nandinagari,
284 ucp_Yezidi,
285 ucp_Cypro_Minoan,
286 ucp_Old_Uyghur,
287
288 /* Scripts which has no characters in other scripts. */
289 ucp_Unknown,
290 ucp_Common,
291 ucp_Armenian,
292 ucp_Hebrew,
293 ucp_Thai,
294 ucp_Lao,
295 ucp_Tibetan,
296 ucp_Ethiopic,
297 ucp_Cherokee,
298 ucp_Canadian_Aboriginal,
299 ucp_Ogham,
300 ucp_Runic,
301 ucp_Khmer,
302 ucp_Old_Italic,
303 ucp_Gothic,
304 ucp_Deseret,
305 ucp_Inherited,
306 ucp_Ugaritic,
307 ucp_Shavian,
308 ucp_Osmanya,
309 ucp_Braille,
310 ucp_New_Tai_Lue,
311 ucp_Tifinagh,
312 ucp_Old_Persian,
313 ucp_Kharoshthi,
314 ucp_Balinese,
315 ucp_Cuneiform,
316 ucp_Phoenician,
317 ucp_Sundanese,
318 ucp_Lepcha,
319 ucp_Ol_Chiki,
320 ucp_Vai,
321 ucp_Saurashtra,
322 ucp_Rejang,
323 ucp_Lycian,
324 ucp_Carian,
325 ucp_Lydian,
326 ucp_Cham,
327 ucp_Tai_Tham,
328 ucp_Tai_Viet,
329 ucp_Avestan,
330 ucp_Egyptian_Hieroglyphs,
331 ucp_Samaritan,
332 ucp_Lisu,
333 ucp_Bamum,
334 ucp_Meetei_Mayek,
335 ucp_Imperial_Aramaic,
336 ucp_Old_South_Arabian,
337 ucp_Inscriptional_Parthian,
338 ucp_Inscriptional_Pahlavi,
339 ucp_Old_Turkic,
340 ucp_Batak,
341 ucp_Brahmi,
342 ucp_Meroitic_Cursive,
343 ucp_Meroitic_Hieroglyphs,
344 ucp_Miao,
345 ucp_Sora_Sompeng,
346 ucp_Caucasian_Albanian,
347 ucp_Bassa_Vah,
348 ucp_Elbasan,
349 ucp_Pahawh_Hmong,
350 ucp_Mende_Kikakui,
351 ucp_Mro,
352 ucp_Old_North_Arabian,
353 ucp_Nabataean,
354 ucp_Palmyrene,
Nick Kralevichf73ff172014-09-27 12:41:49 -0700355 ucp_Pau_Cin_Hau,
356 ucp_Siddham,
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100357 ucp_Warang_Citi,
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100358 ucp_Ahom,
359 ucp_Anatolian_Hieroglyphs,
360 ucp_Hatran,
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100361 ucp_Old_Hungarian,
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700362 ucp_SignWriting,
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700363 ucp_Bhaiksuki,
364 ucp_Marchen,
365 ucp_Newa,
366 ucp_Osage,
367 ucp_Tangut,
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700368 ucp_Nushu,
369 ucp_Soyombo,
Elliott Hughes653c2102019-01-09 15:41:36 -0800370 ucp_Zanabazar_Square,
Elliott Hughes653c2102019-01-09 15:41:36 -0800371 ucp_Makasar,
372 ucp_Medefaidrin,
373 ucp_Old_Sogdian,
Elliott Hughes2dbd7d22020-06-03 14:32:37 -0700374 ucp_Elymaic,
Elliott Hughes2dbd7d22020-06-03 14:32:37 -0700375 ucp_Nyiakeng_Puachue_Hmong,
376 ucp_Wancho,
Elliott Hughes2dbd7d22020-06-03 14:32:37 -0700377 ucp_Chorasmian,
378 ucp_Dives_Akuru,
379 ucp_Khitan_Small_Script,
Elliott Hughes16619d62021-10-29 12:10:38 -0700380 ucp_Tangsa,
381 ucp_Toto,
Elliott Hughes4e19c8e2022-04-15 15:11:02 -0700382 ucp_Vithkuqi,
383
384 /* This must be last */
385 ucp_Script_Count
Nick Kralevichf73ff172014-09-27 12:41:49 -0700386};
387
Elliott Hughes4e19c8e2022-04-15 15:11:02 -0700388/* Size of entries in ucd_script_sets[] */
389
390#define ucd_script_sets_item_size 3
391
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700392#endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700393
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100394/* End of pcre2_ucp.h */