Nick Kralevich | f73ff17 | 2014-09-27 12:41:49 -0700 | [diff] [blame] | 1 | /************************************************* |
Janis Danisevskis | 112c9cc | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 2 | * Perl-Compatible Regular Expressions * |
Nick Kralevich | f73ff17 | 2014-09-27 12:41:49 -0700 | [diff] [blame] | 3 | *************************************************/ |
| 4 | |
Janis Danisevskis | 112c9cc | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 5 | /* PCRE is a library of functions to support regular expressions whose syntax |
| 6 | and semantics are as close as possible to those of the Perl 5 language. |
| 7 | |
| 8 | Written by Philip Hazel |
| 9 | Original API code Copyright (c) 1997-2012 University of Cambridge |
| 10 | New API code Copyright (c) 2016 University of Cambridge |
| 11 | |
| 12 | ----------------------------------------------------------------------------- |
| 13 | Redistribution and use in source and binary forms, with or without |
| 14 | modification, are permitted provided that the following conditions are met: |
| 15 | |
| 16 | * Redistributions of source code must retain the above copyright notice, |
| 17 | this list of conditions and the following disclaimer. |
| 18 | |
| 19 | * Redistributions in binary form must reproduce the above copyright |
| 20 | notice, this list of conditions and the following disclaimer in the |
| 21 | documentation and/or other materials provided with the distribution. |
| 22 | |
| 23 | * Neither the name of the University of Cambridge nor the names of its |
| 24 | contributors may be used to endorse or promote products derived from |
| 25 | this software without specific prior written permission. |
| 26 | |
| 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 37 | POSSIBILITY OF SUCH DAMAGE. |
| 38 | ----------------------------------------------------------------------------- |
| 39 | */ |
| 40 | |
| 41 | |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame^] | 42 | #ifndef PCRE2_UCP_H_IDEMPOTENT_GUARD |
| 43 | #define PCRE2_UCP_H_IDEMPOTENT_GUARD |
Nick Kralevich | f73ff17 | 2014-09-27 12:41:49 -0700 | [diff] [blame] | 44 | |
| 45 | /* This file contains definitions of the property values that are returned by |
| 46 | the UCD access macros. New values that are added for new releases of Unicode |
| 47 | should always be at the end of each enum, for backwards compatibility. |
| 48 | |
| 49 | IMPORTANT: Note also that the specific numeric values of the enums have to be |
| 50 | the same as the values that are generated by the maint/MultiStage2.py script, |
| 51 | where the equivalent property descriptive names are listed in vectors. |
| 52 | |
| 53 | ALSO: The specific values of the first two enums are assumed for the table |
Janis Danisevskis | 112c9cc | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 54 | called catposstab in pcre2_compile.c. */ |
Nick Kralevich | f73ff17 | 2014-09-27 12:41:49 -0700 | [diff] [blame] | 55 | |
| 56 | /* These are the general character categories. */ |
| 57 | |
| 58 | enum { |
| 59 | ucp_C, /* Other */ |
| 60 | ucp_L, /* Letter */ |
| 61 | ucp_M, /* Mark */ |
| 62 | ucp_N, /* Number */ |
| 63 | ucp_P, /* Punctuation */ |
| 64 | ucp_S, /* Symbol */ |
| 65 | ucp_Z /* Separator */ |
| 66 | }; |
| 67 | |
| 68 | /* These are the particular character categories. */ |
| 69 | |
| 70 | enum { |
| 71 | ucp_Cc, /* Control */ |
| 72 | ucp_Cf, /* Format */ |
| 73 | ucp_Cn, /* Unassigned */ |
| 74 | ucp_Co, /* Private use */ |
| 75 | ucp_Cs, /* Surrogate */ |
| 76 | ucp_Ll, /* Lower case letter */ |
| 77 | ucp_Lm, /* Modifier letter */ |
| 78 | ucp_Lo, /* Other letter */ |
| 79 | ucp_Lt, /* Title case letter */ |
| 80 | ucp_Lu, /* Upper case letter */ |
| 81 | ucp_Mc, /* Spacing mark */ |
| 82 | ucp_Me, /* Enclosing mark */ |
| 83 | ucp_Mn, /* Non-spacing mark */ |
| 84 | ucp_Nd, /* Decimal number */ |
| 85 | ucp_Nl, /* Letter number */ |
| 86 | ucp_No, /* Other number */ |
| 87 | ucp_Pc, /* Connector punctuation */ |
| 88 | ucp_Pd, /* Dash punctuation */ |
| 89 | ucp_Pe, /* Close punctuation */ |
| 90 | ucp_Pf, /* Final punctuation */ |
| 91 | ucp_Pi, /* Initial punctuation */ |
| 92 | ucp_Po, /* Other punctuation */ |
| 93 | ucp_Ps, /* Open punctuation */ |
| 94 | ucp_Sc, /* Currency symbol */ |
| 95 | ucp_Sk, /* Modifier symbol */ |
| 96 | ucp_Sm, /* Mathematical symbol */ |
| 97 | ucp_So, /* Other symbol */ |
| 98 | ucp_Zl, /* Line separator */ |
| 99 | ucp_Zp, /* Paragraph separator */ |
| 100 | ucp_Zs /* Space separator */ |
| 101 | }; |
| 102 | |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame^] | 103 | /* These are grapheme break properties. */ |
Nick Kralevich | f73ff17 | 2014-09-27 12:41:49 -0700 | [diff] [blame] | 104 | |
| 105 | enum { |
| 106 | ucp_gbCR, /* 0 */ |
| 107 | ucp_gbLF, /* 1 */ |
| 108 | ucp_gbControl, /* 2 */ |
| 109 | ucp_gbExtend, /* 3 */ |
| 110 | ucp_gbPrepend, /* 4 */ |
| 111 | ucp_gbSpacingMark, /* 5 */ |
| 112 | ucp_gbL, /* 6 Hangul syllable type L */ |
| 113 | ucp_gbV, /* 7 Hangul syllable type V */ |
| 114 | ucp_gbT, /* 8 Hangul syllable type T */ |
| 115 | ucp_gbLV, /* 9 Hangul syllable type LV */ |
| 116 | ucp_gbLVT, /* 10 Hangul syllable type LVT */ |
| 117 | ucp_gbRegionalIndicator, /* 11 */ |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame^] | 118 | ucp_gbOther, /* 12 */ |
| 119 | ucp_gbE_Base, /* 13 */ |
| 120 | ucp_gbE_Modifier, /* 14 */ |
| 121 | ucp_gbE_Base_GAZ, /* 15 */ |
| 122 | ucp_gbZWJ, /* 16 */ |
| 123 | ucp_gbGlue_After_Zwj /* 17 */ |
Nick Kralevich | f73ff17 | 2014-09-27 12:41:49 -0700 | [diff] [blame] | 124 | }; |
| 125 | |
| 126 | /* These are the script identifications. */ |
| 127 | |
| 128 | enum { |
| 129 | ucp_Arabic, |
| 130 | ucp_Armenian, |
| 131 | ucp_Bengali, |
| 132 | ucp_Bopomofo, |
| 133 | ucp_Braille, |
| 134 | ucp_Buginese, |
| 135 | ucp_Buhid, |
| 136 | ucp_Canadian_Aboriginal, |
| 137 | ucp_Cherokee, |
| 138 | ucp_Common, |
| 139 | ucp_Coptic, |
| 140 | ucp_Cypriot, |
| 141 | ucp_Cyrillic, |
| 142 | ucp_Deseret, |
| 143 | ucp_Devanagari, |
| 144 | ucp_Ethiopic, |
| 145 | ucp_Georgian, |
| 146 | ucp_Glagolitic, |
| 147 | ucp_Gothic, |
| 148 | ucp_Greek, |
| 149 | ucp_Gujarati, |
| 150 | ucp_Gurmukhi, |
| 151 | ucp_Han, |
| 152 | ucp_Hangul, |
| 153 | ucp_Hanunoo, |
| 154 | ucp_Hebrew, |
| 155 | ucp_Hiragana, |
| 156 | ucp_Inherited, |
| 157 | ucp_Kannada, |
| 158 | ucp_Katakana, |
| 159 | ucp_Kharoshthi, |
| 160 | ucp_Khmer, |
| 161 | ucp_Lao, |
| 162 | ucp_Latin, |
| 163 | ucp_Limbu, |
| 164 | ucp_Linear_B, |
| 165 | ucp_Malayalam, |
| 166 | ucp_Mongolian, |
| 167 | ucp_Myanmar, |
| 168 | ucp_New_Tai_Lue, |
| 169 | ucp_Ogham, |
| 170 | ucp_Old_Italic, |
| 171 | ucp_Old_Persian, |
| 172 | ucp_Oriya, |
| 173 | ucp_Osmanya, |
| 174 | ucp_Runic, |
| 175 | ucp_Shavian, |
| 176 | ucp_Sinhala, |
| 177 | ucp_Syloti_Nagri, |
| 178 | ucp_Syriac, |
| 179 | ucp_Tagalog, |
| 180 | ucp_Tagbanwa, |
| 181 | ucp_Tai_Le, |
| 182 | ucp_Tamil, |
| 183 | ucp_Telugu, |
| 184 | ucp_Thaana, |
| 185 | ucp_Thai, |
| 186 | ucp_Tibetan, |
| 187 | ucp_Tifinagh, |
| 188 | ucp_Ugaritic, |
| 189 | ucp_Yi, |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame^] | 190 | /* New for Unicode 5.0 */ |
Nick Kralevich | f73ff17 | 2014-09-27 12:41:49 -0700 | [diff] [blame] | 191 | ucp_Balinese, |
| 192 | ucp_Cuneiform, |
| 193 | ucp_Nko, |
| 194 | ucp_Phags_Pa, |
| 195 | ucp_Phoenician, |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame^] | 196 | /* New for Unicode 5.1 */ |
Nick Kralevich | f73ff17 | 2014-09-27 12:41:49 -0700 | [diff] [blame] | 197 | ucp_Carian, |
| 198 | ucp_Cham, |
| 199 | ucp_Kayah_Li, |
| 200 | ucp_Lepcha, |
| 201 | ucp_Lycian, |
| 202 | ucp_Lydian, |
| 203 | ucp_Ol_Chiki, |
| 204 | ucp_Rejang, |
| 205 | ucp_Saurashtra, |
| 206 | ucp_Sundanese, |
| 207 | ucp_Vai, |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame^] | 208 | /* New for Unicode 5.2 */ |
Nick Kralevich | f73ff17 | 2014-09-27 12:41:49 -0700 | [diff] [blame] | 209 | ucp_Avestan, |
| 210 | ucp_Bamum, |
| 211 | ucp_Egyptian_Hieroglyphs, |
| 212 | ucp_Imperial_Aramaic, |
| 213 | ucp_Inscriptional_Pahlavi, |
| 214 | ucp_Inscriptional_Parthian, |
| 215 | ucp_Javanese, |
| 216 | ucp_Kaithi, |
| 217 | ucp_Lisu, |
| 218 | ucp_Meetei_Mayek, |
| 219 | ucp_Old_South_Arabian, |
| 220 | ucp_Old_Turkic, |
| 221 | ucp_Samaritan, |
| 222 | ucp_Tai_Tham, |
| 223 | ucp_Tai_Viet, |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame^] | 224 | /* New for Unicode 6.0.0 */ |
Nick Kralevich | f73ff17 | 2014-09-27 12:41:49 -0700 | [diff] [blame] | 225 | ucp_Batak, |
| 226 | ucp_Brahmi, |
| 227 | ucp_Mandaic, |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame^] | 228 | /* New for Unicode 6.1.0 */ |
Nick Kralevich | f73ff17 | 2014-09-27 12:41:49 -0700 | [diff] [blame] | 229 | ucp_Chakma, |
| 230 | ucp_Meroitic_Cursive, |
| 231 | ucp_Meroitic_Hieroglyphs, |
| 232 | ucp_Miao, |
| 233 | ucp_Sharada, |
| 234 | ucp_Sora_Sompeng, |
| 235 | ucp_Takri, |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame^] | 236 | /* New for Unicode 7.0.0 */ |
Nick Kralevich | f73ff17 | 2014-09-27 12:41:49 -0700 | [diff] [blame] | 237 | ucp_Bassa_Vah, |
| 238 | ucp_Caucasian_Albanian, |
| 239 | ucp_Duployan, |
| 240 | ucp_Elbasan, |
| 241 | ucp_Grantha, |
| 242 | ucp_Khojki, |
| 243 | ucp_Khudawadi, |
| 244 | ucp_Linear_A, |
| 245 | ucp_Mahajani, |
| 246 | ucp_Manichaean, |
| 247 | ucp_Mende_Kikakui, |
| 248 | ucp_Modi, |
| 249 | ucp_Mro, |
| 250 | ucp_Nabataean, |
| 251 | ucp_Old_North_Arabian, |
| 252 | ucp_Old_Permic, |
| 253 | ucp_Pahawh_Hmong, |
| 254 | ucp_Palmyrene, |
| 255 | ucp_Psalter_Pahlavi, |
| 256 | ucp_Pau_Cin_Hau, |
| 257 | ucp_Siddham, |
| 258 | ucp_Tirhuta, |
Janis Danisevskis | 112c9cc | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 259 | ucp_Warang_Citi, |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame^] | 260 | /* New for Unicode 8.0.0 */ |
Janis Danisevskis | 112c9cc | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 261 | ucp_Ahom, |
| 262 | ucp_Anatolian_Hieroglyphs, |
| 263 | ucp_Hatran, |
| 264 | ucp_Multani, |
| 265 | ucp_Old_Hungarian, |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame^] | 266 | ucp_SignWriting, |
| 267 | /* New for Unicode 10.0.0 (no update since 8.0.0) */ |
| 268 | ucp_Adlam, |
| 269 | ucp_Bhaiksuki, |
| 270 | ucp_Marchen, |
| 271 | ucp_Newa, |
| 272 | ucp_Osage, |
| 273 | ucp_Tangut, |
| 274 | ucp_Masaram_Gondi, |
| 275 | ucp_Nushu, |
| 276 | ucp_Soyombo, |
| 277 | ucp_Zanabazar_Square |
Nick Kralevich | f73ff17 | 2014-09-27 12:41:49 -0700 | [diff] [blame] | 278 | }; |
| 279 | |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame^] | 280 | #endif /* PCRE2_UCP_H_IDEMPOTENT_GUARD */ |
Nick Kralevich | f73ff17 | 2014-09-27 12:41:49 -0700 | [diff] [blame] | 281 | |
Janis Danisevskis | 112c9cc | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 282 | /* End of pcre2_ucp.h */ |