Elliott Hughes | 5b80804 | 2021-10-01 10:56:10 -0700 | [diff] [blame] | 1 | /************************************************* |
| 2 | * Perl-Compatible Regular Expressions * |
| 3 | *************************************************/ |
| 4 | |
| 5 | /* PCRE is a library of functions to support regular expressions whose syntax |
| 6 | and semantics are as close as possible to those of the Perl 5 language. |
| 7 | |
| 8 | Written by Philip Hazel |
| 9 | Original API code Copyright (c) 1997-2012 University of Cambridge |
| 10 | New API code Copyright (c) 2016-2021 University of Cambridge |
| 11 | |
| 12 | ----------------------------------------------------------------------------- |
| 13 | Redistribution and use in source and binary forms, with or without |
| 14 | modification, are permitted provided that the following conditions are met: |
| 15 | |
| 16 | * Redistributions of source code must retain the above copyright notice, |
| 17 | this list of conditions and the following disclaimer. |
| 18 | |
| 19 | * Redistributions in binary form must reproduce the above copyright |
| 20 | notice, this list of conditions and the following disclaimer in the |
| 21 | documentation and/or other materials provided with the distribution. |
| 22 | |
| 23 | * Neither the name of the University of Cambridge nor the names of its |
| 24 | contributors may be used to endorse or promote products derived from |
| 25 | this software without specific prior written permission. |
| 26 | |
| 27 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 28 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 29 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 30 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 31 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 32 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 33 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 34 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 35 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 36 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 37 | POSSIBILITY OF SUCH DAMAGE. |
| 38 | ----------------------------------------------------------------------------- |
| 39 | */ |
| 40 | |
| 41 | |
| 42 | #ifdef HAVE_CONFIG_H |
| 43 | #include "config.h" |
| 44 | #endif |
| 45 | |
| 46 | #include "pcre2_internal.h" |
| 47 | |
| 48 | #define STRING(a) # a |
| 49 | #define XSTRING(s) STRING(s) |
| 50 | |
| 51 | /* The texts of compile-time error messages. Compile-time error numbers start |
| 52 | at COMPILE_ERROR_BASE (100). |
| 53 | |
| 54 | This used to be a table of strings, but in order to reduce the number of |
| 55 | relocations needed when a shared library is loaded dynamically, it is now one |
| 56 | long string. We cannot use a table of offsets, because the lengths of inserts |
| 57 | such as XSTRING(MAX_NAME_SIZE) are not known. Instead, |
| 58 | pcre2_get_error_message() counts through to the one it wants - this isn't a |
| 59 | performance issue because these strings are used only when there is an error. |
| 60 | |
| 61 | Each substring ends with \0 to insert a null character. This includes the final |
| 62 | substring, so that the whole string ends with \0\0, which can be detected when |
| 63 | counting through. */ |
| 64 | |
| 65 | static const unsigned char compile_error_texts[] = |
| 66 | "no error\0" |
| 67 | "\\ at end of pattern\0" |
| 68 | "\\c at end of pattern\0" |
| 69 | "unrecognized character follows \\\0" |
| 70 | "numbers out of order in {} quantifier\0" |
| 71 | /* 5 */ |
| 72 | "number too big in {} quantifier\0" |
| 73 | "missing terminating ] for character class\0" |
| 74 | "escape sequence is invalid in character class\0" |
| 75 | "range out of order in character class\0" |
| 76 | "quantifier does not follow a repeatable item\0" |
| 77 | /* 10 */ |
| 78 | "internal error: unexpected repeat\0" |
| 79 | "unrecognized character after (? or (?-\0" |
| 80 | "POSIX named classes are supported only within a class\0" |
| 81 | "POSIX collating elements are not supported\0" |
| 82 | "missing closing parenthesis\0" |
| 83 | /* 15 */ |
| 84 | "reference to non-existent subpattern\0" |
| 85 | "pattern passed as NULL\0" |
| 86 | "unrecognised compile-time option bit(s)\0" |
| 87 | "missing ) after (?# comment\0" |
| 88 | "parentheses are too deeply nested\0" |
| 89 | /* 20 */ |
| 90 | "regular expression is too large\0" |
| 91 | "failed to allocate heap memory\0" |
| 92 | "unmatched closing parenthesis\0" |
| 93 | "internal error: code overflow\0" |
| 94 | "missing closing parenthesis for condition\0" |
| 95 | /* 25 */ |
| 96 | "lookbehind assertion is not fixed length\0" |
| 97 | "a relative value of zero is not allowed\0" |
| 98 | "conditional subpattern contains more than two branches\0" |
| 99 | "assertion expected after (?( or (?(?C)\0" |
| 100 | "digit expected after (?+ or (?-\0" |
| 101 | /* 30 */ |
| 102 | "unknown POSIX class name\0" |
| 103 | "internal error in pcre2_study(): should not occur\0" |
| 104 | "this version of PCRE2 does not have Unicode support\0" |
| 105 | "parentheses are too deeply nested (stack check)\0" |
| 106 | "character code point value in \\x{} or \\o{} is too large\0" |
| 107 | /* 35 */ |
| 108 | "lookbehind is too complicated\0" |
| 109 | "\\C is not allowed in a lookbehind assertion in UTF-" XSTRING(PCRE2_CODE_UNIT_WIDTH) " mode\0" |
| 110 | "PCRE2 does not support \\F, \\L, \\l, \\N{name}, \\U, or \\u\0" |
| 111 | "number after (?C is greater than 255\0" |
| 112 | "closing parenthesis for (?C expected\0" |
| 113 | /* 40 */ |
| 114 | "invalid escape sequence in (*VERB) name\0" |
| 115 | "unrecognized character after (?P\0" |
| 116 | "syntax error in subpattern name (missing terminator?)\0" |
| 117 | "two named subpatterns have the same name (PCRE2_DUPNAMES not set)\0" |
| 118 | "subpattern name must start with a non-digit\0" |
| 119 | /* 45 */ |
| 120 | "this version of PCRE2 does not have support for \\P, \\p, or \\X\0" |
| 121 | "malformed \\P or \\p sequence\0" |
Elliott Hughes | 4e19c8e | 2022-04-15 15:11:02 -0700 | [diff] [blame] | 122 | "unknown property after \\P or \\p\0" |
Elliott Hughes | 5b80804 | 2021-10-01 10:56:10 -0700 | [diff] [blame] | 123 | "subpattern name is too long (maximum " XSTRING(MAX_NAME_SIZE) " code units)\0" |
| 124 | "too many named subpatterns (maximum " XSTRING(MAX_NAME_COUNT) ")\0" |
| 125 | /* 50 */ |
| 126 | "invalid range in character class\0" |
| 127 | "octal value is greater than \\377 in 8-bit non-UTF-8 mode\0" |
| 128 | "internal error: overran compiling workspace\0" |
| 129 | "internal error: previously-checked referenced subpattern not found\0" |
| 130 | "DEFINE subpattern contains more than one branch\0" |
| 131 | /* 55 */ |
| 132 | "missing opening brace after \\o\0" |
| 133 | "internal error: unknown newline setting\0" |
| 134 | "\\g is not followed by a braced, angle-bracketed, or quoted name/number or by a plain number\0" |
| 135 | "(?R (recursive pattern call) must be followed by a closing parenthesis\0" |
| 136 | /* "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0" */ |
| 137 | "obsolete error (should not occur)\0" /* Was the above */ |
| 138 | /* 60 */ |
| 139 | "(*VERB) not recognized or malformed\0" |
| 140 | "subpattern number is too big\0" |
| 141 | "subpattern name expected\0" |
| 142 | "internal error: parsed pattern overflow\0" |
| 143 | "non-octal character in \\o{} (closing brace missing?)\0" |
| 144 | /* 65 */ |
| 145 | "different names for subpatterns of the same number are not allowed\0" |
| 146 | "(*MARK) must have an argument\0" |
| 147 | "non-hex character in \\x{} (closing brace missing?)\0" |
| 148 | #ifndef EBCDIC |
| 149 | "\\c must be followed by a printable ASCII character\0" |
| 150 | #else |
| 151 | "\\c must be followed by a letter or one of [\\]^_?\0" |
| 152 | #endif |
| 153 | "\\k is not followed by a braced, angle-bracketed, or quoted name\0" |
| 154 | /* 70 */ |
| 155 | "internal error: unknown meta code in check_lookbehinds()\0" |
| 156 | "\\N is not supported in a class\0" |
| 157 | "callout string is too long\0" |
| 158 | "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0" |
| 159 | "using UTF is disabled by the application\0" |
| 160 | /* 75 */ |
| 161 | "using UCP is disabled by the application\0" |
| 162 | "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0" |
| 163 | "character code point value in \\u.... sequence is too large\0" |
| 164 | "digits missing in \\x{} or \\o{} or \\N{U+}\0" |
| 165 | "syntax error or number too big in (?(VERSION condition\0" |
| 166 | /* 80 */ |
| 167 | "internal error: unknown opcode in auto_possessify()\0" |
| 168 | "missing terminating delimiter for callout with string argument\0" |
| 169 | "unrecognized string delimiter follows (?C\0" |
| 170 | "using \\C is disabled by the application\0" |
| 171 | "(?| and/or (?J: or (?x: parentheses are too deeply nested\0" |
| 172 | /* 85 */ |
| 173 | "using \\C is disabled in this PCRE2 library\0" |
| 174 | "regular expression is too complicated\0" |
| 175 | "lookbehind assertion is too long\0" |
| 176 | "pattern string is longer than the limit set by the application\0" |
| 177 | "internal error: unknown code in parsed pattern\0" |
| 178 | /* 90 */ |
| 179 | "internal error: bad code value in parsed_skip()\0" |
| 180 | "PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES is not allowed in UTF-16 mode\0" |
| 181 | "invalid option bits with PCRE2_LITERAL\0" |
| 182 | "\\N{U+dddd} is supported only in Unicode (UTF) mode\0" |
| 183 | "invalid hyphen in option setting\0" |
| 184 | /* 95 */ |
| 185 | "(*alpha_assertion) not recognized\0" |
| 186 | "script runs require Unicode support, which this version of PCRE2 does not have\0" |
| 187 | "too many capturing groups (maximum 65535)\0" |
| 188 | "atomic assertion expected after (?( or (?(?C)\0" |
| 189 | "\\K is not allowed in lookarounds (but see PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK)\0" |
| 190 | ; |
| 191 | |
| 192 | /* Match-time and UTF error texts are in the same format. */ |
| 193 | |
| 194 | static const unsigned char match_error_texts[] = |
| 195 | "no error\0" |
| 196 | "no match\0" |
| 197 | "partial match\0" |
| 198 | "UTF-8 error: 1 byte missing at end\0" |
| 199 | "UTF-8 error: 2 bytes missing at end\0" |
| 200 | /* 5 */ |
| 201 | "UTF-8 error: 3 bytes missing at end\0" |
| 202 | "UTF-8 error: 4 bytes missing at end\0" |
| 203 | "UTF-8 error: 5 bytes missing at end\0" |
| 204 | "UTF-8 error: byte 2 top bits not 0x80\0" |
| 205 | "UTF-8 error: byte 3 top bits not 0x80\0" |
| 206 | /* 10 */ |
| 207 | "UTF-8 error: byte 4 top bits not 0x80\0" |
| 208 | "UTF-8 error: byte 5 top bits not 0x80\0" |
| 209 | "UTF-8 error: byte 6 top bits not 0x80\0" |
| 210 | "UTF-8 error: 5-byte character is not allowed (RFC 3629)\0" |
| 211 | "UTF-8 error: 6-byte character is not allowed (RFC 3629)\0" |
| 212 | /* 15 */ |
| 213 | "UTF-8 error: code points greater than 0x10ffff are not defined\0" |
| 214 | "UTF-8 error: code points 0xd800-0xdfff are not defined\0" |
| 215 | "UTF-8 error: overlong 2-byte sequence\0" |
| 216 | "UTF-8 error: overlong 3-byte sequence\0" |
| 217 | "UTF-8 error: overlong 4-byte sequence\0" |
| 218 | /* 20 */ |
| 219 | "UTF-8 error: overlong 5-byte sequence\0" |
| 220 | "UTF-8 error: overlong 6-byte sequence\0" |
| 221 | "UTF-8 error: isolated byte with 0x80 bit set\0" |
| 222 | "UTF-8 error: illegal byte (0xfe or 0xff)\0" |
| 223 | "UTF-16 error: missing low surrogate at end\0" |
| 224 | /* 25 */ |
| 225 | "UTF-16 error: invalid low surrogate\0" |
| 226 | "UTF-16 error: isolated low surrogate\0" |
| 227 | "UTF-32 error: code points 0xd800-0xdfff are not defined\0" |
| 228 | "UTF-32 error: code points greater than 0x10ffff are not defined\0" |
| 229 | "bad data value\0" |
| 230 | /* 30 */ |
| 231 | "patterns do not all use the same character tables\0" |
| 232 | "magic number missing\0" |
| 233 | "pattern compiled in wrong mode: 8/16/32-bit error\0" |
| 234 | "bad offset value\0" |
| 235 | "bad option value\0" |
| 236 | /* 35 */ |
| 237 | "invalid replacement string\0" |
| 238 | "bad offset into UTF string\0" |
| 239 | "callout error code\0" /* Never returned by PCRE2 itself */ |
| 240 | "invalid data in workspace for DFA restart\0" |
| 241 | "too much recursion for DFA matching\0" |
| 242 | /* 40 */ |
| 243 | "backreference condition or recursion test is not supported for DFA matching\0" |
| 244 | "function is not supported for DFA matching\0" |
| 245 | "pattern contains an item that is not supported for DFA matching\0" |
| 246 | "workspace size exceeded in DFA matching\0" |
| 247 | "internal error - pattern overwritten?\0" |
| 248 | /* 45 */ |
| 249 | "bad JIT option\0" |
| 250 | "JIT stack limit reached\0" |
| 251 | "match limit exceeded\0" |
| 252 | "no more memory\0" |
| 253 | "unknown substring\0" |
| 254 | /* 50 */ |
| 255 | "non-unique substring name\0" |
Elliott Hughes | 4e19c8e | 2022-04-15 15:11:02 -0700 | [diff] [blame] | 256 | "NULL argument passed with non-zero length\0" |
Elliott Hughes | 5b80804 | 2021-10-01 10:56:10 -0700 | [diff] [blame] | 257 | "nested recursion at the same subject position\0" |
| 258 | "matching depth limit exceeded\0" |
| 259 | "requested value is not available\0" |
| 260 | /* 55 */ |
| 261 | "requested value is not set\0" |
| 262 | "offset limit set without PCRE2_USE_OFFSET_LIMIT\0" |
| 263 | "bad escape sequence in replacement string\0" |
| 264 | "expected closing curly bracket in replacement string\0" |
| 265 | "bad substitution in replacement string\0" |
| 266 | /* 60 */ |
| 267 | "match with end before start or start moved backwards is not supported\0" |
| 268 | "too many replacements (more than INT_MAX)\0" |
| 269 | "bad serialized data\0" |
| 270 | "heap limit exceeded\0" |
| 271 | "invalid syntax\0" |
| 272 | /* 65 */ |
| 273 | "internal error - duplicate substitution match\0" |
| 274 | "PCRE2_MATCH_INVALID_UTF is not supported for DFA matching\0" |
| 275 | ; |
| 276 | |
| 277 | |
| 278 | /************************************************* |
| 279 | * Return error message * |
| 280 | *************************************************/ |
| 281 | |
| 282 | /* This function copies an error message into a buffer whose units are of an |
| 283 | appropriate width. Error numbers are positive for compile-time errors, and |
| 284 | negative for match-time errors (except for UTF errors), but the numbers are all |
| 285 | distinct. |
| 286 | |
| 287 | Arguments: |
| 288 | enumber error number |
| 289 | buffer where to put the message (zero terminated) |
| 290 | size size of the buffer in code units |
| 291 | |
| 292 | Returns: length of message if all is well |
| 293 | negative on error |
| 294 | */ |
| 295 | |
| 296 | PCRE2_EXP_DEFN int PCRE2_CALL_CONVENTION |
| 297 | pcre2_get_error_message(int enumber, PCRE2_UCHAR *buffer, PCRE2_SIZE size) |
| 298 | { |
| 299 | const unsigned char *message; |
| 300 | PCRE2_SIZE i; |
| 301 | int n; |
| 302 | |
| 303 | if (size == 0) return PCRE2_ERROR_NOMEMORY; |
| 304 | |
| 305 | if (enumber >= COMPILE_ERROR_BASE) /* Compile error */ |
| 306 | { |
| 307 | message = compile_error_texts; |
| 308 | n = enumber - COMPILE_ERROR_BASE; |
| 309 | } |
| 310 | else if (enumber < 0) /* Match or UTF error */ |
| 311 | { |
| 312 | message = match_error_texts; |
| 313 | n = -enumber; |
| 314 | } |
| 315 | else /* Invalid error number */ |
| 316 | { |
| 317 | message = (unsigned char *)"\0"; /* Empty message list */ |
| 318 | n = 1; |
| 319 | } |
| 320 | |
| 321 | for (; n > 0; n--) |
| 322 | { |
| 323 | while (*message++ != CHAR_NUL) {}; |
| 324 | if (*message == CHAR_NUL) return PCRE2_ERROR_BADDATA; |
| 325 | } |
| 326 | |
| 327 | for (i = 0; *message != 0; i++) |
| 328 | { |
| 329 | if (i >= size - 1) |
| 330 | { |
| 331 | buffer[i] = 0; /* Terminate partial message */ |
| 332 | return PCRE2_ERROR_NOMEMORY; |
| 333 | } |
| 334 | buffer[i] = *message++; |
| 335 | } |
| 336 | |
| 337 | buffer[i] = 0; |
| 338 | return (int)i; |
| 339 | } |
| 340 | |
| 341 | /* End of pcre2_error.c */ |