blob: defba4c10e6d2a2b1b2fec17ea3cb52614508488 [file] [log] [blame]
Nick Kralevichf73ff172014-09-27 12:41:49 -07001/*************************************************
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01002* Perl-Compatible Regular Expressions *
Nick Kralevichf73ff172014-09-27 12:41:49 -07003*************************************************/
4
Janis Danisevskis112c9cc2016-03-31 13:35:25 +01005/* PCRE is a library of functions to support regular expressions whose syntax
6and semantics are as close as possible to those of the Perl 5 language.
7
8 Written by Philip Hazel
9 Original API code Copyright (c) 1997-2012 University of Cambridge
10 New API code Copyright (c) 2016 University of Cambridge
11
12-----------------------------------------------------------------------------
13Redistribution and use in source and binary forms, with or without
14modification, are permitted provided that the following conditions are met:
15
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
18
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
22
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
26
27THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37POSSIBILITY OF SUCH DAMAGE.
38-----------------------------------------------------------------------------
39*/
40
41
Elliott Hughes9bc971b2018-07-27 13:23:14 -070042#ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD
43#define PCRE2_UCP_H_IDEMPOTENT_GUARD
Nick Kralevichf73ff172014-09-27 12:41:49 -070044
45/* This file contains definitions of the property values that are returned by
46the UCD access macros. New values that are added for new releases of Unicode
47should always be at the end of each enum, for backwards compatibility.
48
49IMPORTANT: Note also that the specific numeric values of the enums have to be
50the same as the values that are generated by the maint/MultiStage2.py script,
51where the equivalent property descriptive names are listed in vectors.
52
53ALSO: The specific values of the first two enums are assumed for the table
Janis Danisevskis112c9cc2016-03-31 13:35:25 +010054called catposstab in pcre2_compile.c. */
Nick Kralevichf73ff172014-09-27 12:41:49 -070055
56/* These are the general character categories. */
57
58enum {
59 ucp_C, /* Other */
60 ucp_L, /* Letter */
61 ucp_M, /* Mark */
62 ucp_N, /* Number */
63 ucp_P, /* Punctuation */
64 ucp_S, /* Symbol */
65 ucp_Z /* Separator */
66};
67
68/* These are the particular character categories. */
69
70enum {
71 ucp_Cc, /* Control */
72 ucp_Cf, /* Format */
73 ucp_Cn, /* Unassigned */
74 ucp_Co, /* Private use */
75 ucp_Cs, /* Surrogate */
76 ucp_Ll, /* Lower case letter */
77 ucp_Lm, /* Modifier letter */
78 ucp_Lo, /* Other letter */
79 ucp_Lt, /* Title case letter */
80 ucp_Lu, /* Upper case letter */
81 ucp_Mc, /* Spacing mark */
82 ucp_Me, /* Enclosing mark */
83 ucp_Mn, /* Non-spacing mark */
84 ucp_Nd, /* Decimal number */
85 ucp_Nl, /* Letter number */
86 ucp_No, /* Other number */
87 ucp_Pc, /* Connector punctuation */
88 ucp_Pd, /* Dash punctuation */
89 ucp_Pe, /* Close punctuation */
90 ucp_Pf, /* Final punctuation */
91 ucp_Pi, /* Initial punctuation */
92 ucp_Po, /* Other punctuation */
93 ucp_Ps, /* Open punctuation */
94 ucp_Sc, /* Currency symbol */
95 ucp_Sk, /* Modifier symbol */
96 ucp_Sm, /* Mathematical symbol */
97 ucp_So, /* Other symbol */
98 ucp_Zl, /* Line separator */
99 ucp_Zp, /* Paragraph separator */
100 ucp_Zs /* Space separator */
101};
102
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700103/* These are grapheme break properties. */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700104
105enum {
106 ucp_gbCR, /* 0 */
107 ucp_gbLF, /* 1 */
108 ucp_gbControl, /* 2 */
109 ucp_gbExtend, /* 3 */
110 ucp_gbPrepend, /* 4 */
111 ucp_gbSpacingMark, /* 5 */
112 ucp_gbL, /* 6 Hangul syllable type L */
113 ucp_gbV, /* 7 Hangul syllable type V */
114 ucp_gbT, /* 8 Hangul syllable type T */
115 ucp_gbLV, /* 9 Hangul syllable type LV */
116 ucp_gbLVT, /* 10 Hangul syllable type LVT */
117 ucp_gbRegionalIndicator, /* 11 */
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700118 ucp_gbOther, /* 12 */
119 ucp_gbE_Base, /* 13 */
120 ucp_gbE_Modifier, /* 14 */
121 ucp_gbE_Base_GAZ, /* 15 */
122 ucp_gbZWJ, /* 16 */
123 ucp_gbGlue_After_Zwj /* 17 */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700124};
125
126/* These are the script identifications. */
127
128enum {
129 ucp_Arabic,
130 ucp_Armenian,
131 ucp_Bengali,
132 ucp_Bopomofo,
133 ucp_Braille,
134 ucp_Buginese,
135 ucp_Buhid,
136 ucp_Canadian_Aboriginal,
137 ucp_Cherokee,
138 ucp_Common,
139 ucp_Coptic,
140 ucp_Cypriot,
141 ucp_Cyrillic,
142 ucp_Deseret,
143 ucp_Devanagari,
144 ucp_Ethiopic,
145 ucp_Georgian,
146 ucp_Glagolitic,
147 ucp_Gothic,
148 ucp_Greek,
149 ucp_Gujarati,
150 ucp_Gurmukhi,
151 ucp_Han,
152 ucp_Hangul,
153 ucp_Hanunoo,
154 ucp_Hebrew,
155 ucp_Hiragana,
156 ucp_Inherited,
157 ucp_Kannada,
158 ucp_Katakana,
159 ucp_Kharoshthi,
160 ucp_Khmer,
161 ucp_Lao,
162 ucp_Latin,
163 ucp_Limbu,
164 ucp_Linear_B,
165 ucp_Malayalam,
166 ucp_Mongolian,
167 ucp_Myanmar,
168 ucp_New_Tai_Lue,
169 ucp_Ogham,
170 ucp_Old_Italic,
171 ucp_Old_Persian,
172 ucp_Oriya,
173 ucp_Osmanya,
174 ucp_Runic,
175 ucp_Shavian,
176 ucp_Sinhala,
177 ucp_Syloti_Nagri,
178 ucp_Syriac,
179 ucp_Tagalog,
180 ucp_Tagbanwa,
181 ucp_Tai_Le,
182 ucp_Tamil,
183 ucp_Telugu,
184 ucp_Thaana,
185 ucp_Thai,
186 ucp_Tibetan,
187 ucp_Tifinagh,
188 ucp_Ugaritic,
189 ucp_Yi,
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700190 /* New for Unicode 5.0 */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700191 ucp_Balinese,
192 ucp_Cuneiform,
193 ucp_Nko,
194 ucp_Phags_Pa,
195 ucp_Phoenician,
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700196 /* New for Unicode 5.1 */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700197 ucp_Carian,
198 ucp_Cham,
199 ucp_Kayah_Li,
200 ucp_Lepcha,
201 ucp_Lycian,
202 ucp_Lydian,
203 ucp_Ol_Chiki,
204 ucp_Rejang,
205 ucp_Saurashtra,
206 ucp_Sundanese,
207 ucp_Vai,
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700208 /* New for Unicode 5.2 */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700209 ucp_Avestan,
210 ucp_Bamum,
211 ucp_Egyptian_Hieroglyphs,
212 ucp_Imperial_Aramaic,
213 ucp_Inscriptional_Pahlavi,
214 ucp_Inscriptional_Parthian,
215 ucp_Javanese,
216 ucp_Kaithi,
217 ucp_Lisu,
218 ucp_Meetei_Mayek,
219 ucp_Old_South_Arabian,
220 ucp_Old_Turkic,
221 ucp_Samaritan,
222 ucp_Tai_Tham,
223 ucp_Tai_Viet,
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700224 /* New for Unicode 6.0.0 */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700225 ucp_Batak,
226 ucp_Brahmi,
227 ucp_Mandaic,
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700228 /* New for Unicode 6.1.0 */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700229 ucp_Chakma,
230 ucp_Meroitic_Cursive,
231 ucp_Meroitic_Hieroglyphs,
232 ucp_Miao,
233 ucp_Sharada,
234 ucp_Sora_Sompeng,
235 ucp_Takri,
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700236 /* New for Unicode 7.0.0 */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700237 ucp_Bassa_Vah,
238 ucp_Caucasian_Albanian,
239 ucp_Duployan,
240 ucp_Elbasan,
241 ucp_Grantha,
242 ucp_Khojki,
243 ucp_Khudawadi,
244 ucp_Linear_A,
245 ucp_Mahajani,
246 ucp_Manichaean,
247 ucp_Mende_Kikakui,
248 ucp_Modi,
249 ucp_Mro,
250 ucp_Nabataean,
251 ucp_Old_North_Arabian,
252 ucp_Old_Permic,
253 ucp_Pahawh_Hmong,
254 ucp_Palmyrene,
255 ucp_Psalter_Pahlavi,
256 ucp_Pau_Cin_Hau,
257 ucp_Siddham,
258 ucp_Tirhuta,
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100259 ucp_Warang_Citi,
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700260 /* New for Unicode 8.0.0 */
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100261 ucp_Ahom,
262 ucp_Anatolian_Hieroglyphs,
263 ucp_Hatran,
264 ucp_Multani,
265 ucp_Old_Hungarian,
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700266 ucp_SignWriting,
267 /* New for Unicode 10.0.0 (no update since 8.0.0) */
268 ucp_Adlam,
269 ucp_Bhaiksuki,
270 ucp_Marchen,
271 ucp_Newa,
272 ucp_Osage,
273 ucp_Tangut,
274 ucp_Masaram_Gondi,
275 ucp_Nushu,
276 ucp_Soyombo,
277 ucp_Zanabazar_Square
Nick Kralevichf73ff172014-09-27 12:41:49 -0700278};
279
Elliott Hughes9bc971b2018-07-27 13:23:14 -0700280#endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */
Nick Kralevichf73ff172014-09-27 12:41:49 -0700281
Janis Danisevskis112c9cc2016-03-31 13:35:25 +0100282/* End of pcre2_ucp.h */