Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 1 | /************************************************* |
| 2 | * Perl-Compatible Regular Expressions * |
| 3 | *************************************************/ |
| 4 | |
| 5 | /* PCRE2 is a library of functions to support regular expressions whose syntax |
Elliott Hughes | 0c26e19 | 2019-08-07 12:24:46 -0700 | [diff] [blame] | 6 | and semantics are as close as possible to those of the Perl 5 language. This is |
| 7 | the public header file to be #included by applications that call PCRE2 via the |
| 8 | POSIX wrapper interface. |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 9 | |
| 10 | Written by Philip Hazel |
| 11 | Original API code Copyright (c) 1997-2012 University of Cambridge |
Elliott Hughes | 0c26e19 | 2019-08-07 12:24:46 -0700 | [diff] [blame] | 12 | New API code Copyright (c) 2016-2019 University of Cambridge |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 13 | |
| 14 | ----------------------------------------------------------------------------- |
| 15 | Redistribution and use in source and binary forms, with or without |
| 16 | modification, are permitted provided that the following conditions are met: |
| 17 | |
| 18 | * Redistributions of source code must retain the above copyright notice, |
| 19 | this list of conditions and the following disclaimer. |
| 20 | |
| 21 | * Redistributions in binary form must reproduce the above copyright |
| 22 | notice, this list of conditions and the following disclaimer in the |
| 23 | documentation and/or other materials provided with the distribution. |
| 24 | |
| 25 | * Neither the name of the University of Cambridge nor the names of its |
| 26 | contributors may be used to endorse or promote products derived from |
| 27 | this software without specific prior written permission. |
| 28 | |
| 29 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 30 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 31 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 32 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE |
| 33 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR |
| 34 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF |
| 35 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS |
| 36 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN |
| 37 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) |
| 38 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE |
| 39 | POSSIBILITY OF SUCH DAMAGE. |
| 40 | ----------------------------------------------------------------------------- |
| 41 | */ |
| 42 | |
| 43 | |
| 44 | /* Have to include stdlib.h in order to ensure that size_t is defined. */ |
| 45 | |
| 46 | #include <stdlib.h> |
| 47 | |
| 48 | /* Allow for C++ users */ |
| 49 | |
| 50 | #ifdef __cplusplus |
| 51 | extern "C" { |
| 52 | #endif |
| 53 | |
| 54 | /* Options, mostly defined by POSIX, but with some extras. */ |
| 55 | |
| 56 | #define REG_ICASE 0x0001 /* Maps to PCRE2_CASELESS */ |
| 57 | #define REG_NEWLINE 0x0002 /* Maps to PCRE2_MULTILINE */ |
| 58 | #define REG_NOTBOL 0x0004 /* Maps to PCRE2_NOTBOL */ |
| 59 | #define REG_NOTEOL 0x0008 /* Maps to PCRE2_NOTEOL */ |
| 60 | #define REG_DOTALL 0x0010 /* NOT defined by POSIX; maps to PCRE2_DOTALL */ |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame] | 61 | #define REG_NOSUB 0x0020 /* Do not report what was matched */ |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 62 | #define REG_UTF 0x0040 /* NOT defined by POSIX; maps to PCRE2_UTF */ |
| 63 | #define REG_STARTEND 0x0080 /* BSD feature: pass subject string by so,eo */ |
| 64 | #define REG_NOTEMPTY 0x0100 /* NOT defined by POSIX; maps to PCRE2_NOTEMPTY */ |
| 65 | #define REG_UNGREEDY 0x0200 /* NOT defined by POSIX; maps to PCRE2_UNGREEDY */ |
| 66 | #define REG_UCP 0x0400 /* NOT defined by POSIX; maps to PCRE2_UCP */ |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame] | 67 | #define REG_PEND 0x0800 /* GNU feature: pass end pattern by re_endp */ |
| 68 | #define REG_NOSPEC 0x1000 /* Maps to PCRE2_LITERAL */ |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 69 | |
| 70 | /* This is not used by PCRE2, but by defining it we make it easier |
| 71 | to slot PCRE2 into existing programs that make POSIX calls. */ |
| 72 | |
| 73 | #define REG_EXTENDED 0 |
| 74 | |
| 75 | /* Error values. Not all these are relevant or used by the wrapper. */ |
| 76 | |
| 77 | enum { |
| 78 | REG_ASSERT = 1, /* internal error ? */ |
| 79 | REG_BADBR, /* invalid repeat counts in {} */ |
| 80 | REG_BADPAT, /* pattern error */ |
| 81 | REG_BADRPT, /* ? * + invalid */ |
| 82 | REG_EBRACE, /* unbalanced {} */ |
| 83 | REG_EBRACK, /* unbalanced [] */ |
| 84 | REG_ECOLLATE, /* collation error - not relevant */ |
| 85 | REG_ECTYPE, /* bad class */ |
| 86 | REG_EESCAPE, /* bad escape sequence */ |
| 87 | REG_EMPTY, /* empty expression */ |
| 88 | REG_EPAREN, /* unbalanced () */ |
| 89 | REG_ERANGE, /* bad range inside [] */ |
| 90 | REG_ESIZE, /* expression too big */ |
| 91 | REG_ESPACE, /* failed to get memory */ |
| 92 | REG_ESUBREG, /* bad back reference */ |
| 93 | REG_INVARG, /* bad argument */ |
| 94 | REG_NOMATCH /* match failed */ |
| 95 | }; |
| 96 | |
| 97 | |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame] | 98 | /* The structure representing a compiled regular expression. It is also used |
| 99 | for passing the pattern end pointer when REG_PEND is set. */ |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 100 | |
| 101 | typedef struct { |
| 102 | void *re_pcre2_code; |
| 103 | void *re_match_data; |
Elliott Hughes | 9bc971b | 2018-07-27 13:23:14 -0700 | [diff] [blame] | 104 | const char *re_endp; |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 105 | size_t re_nsub; |
| 106 | size_t re_erroffset; |
Janis Danisevskis | 8b979b2 | 2016-08-15 16:09:16 +0100 | [diff] [blame] | 107 | int re_cflags; |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 108 | } regex_t; |
| 109 | |
| 110 | /* The structure in which a captured offset is returned. */ |
| 111 | |
| 112 | typedef int regoff_t; |
| 113 | |
| 114 | typedef struct { |
| 115 | regoff_t rm_so; |
| 116 | regoff_t rm_eo; |
| 117 | } regmatch_t; |
| 118 | |
| 119 | /* When an application links to a PCRE2 DLL in Windows, the symbols that are |
| 120 | imported have to be identified as such. When building PCRE2, the appropriate |
| 121 | export settings are needed, and are set in pcre2posix.c before including this |
| 122 | file. */ |
| 123 | |
| 124 | #if defined(_WIN32) && !defined(PCRE2_STATIC) && !defined(PCRE2POSIX_EXP_DECL) |
| 125 | # define PCRE2POSIX_EXP_DECL extern __declspec(dllimport) |
| 126 | # define PCRE2POSIX_EXP_DEFN __declspec(dllimport) |
| 127 | #endif |
| 128 | |
| 129 | /* By default, we use the standard "extern" declarations. */ |
| 130 | |
| 131 | #ifndef PCRE2POSIX_EXP_DECL |
| 132 | # ifdef __cplusplus |
| 133 | # define PCRE2POSIX_EXP_DECL extern "C" |
| 134 | # define PCRE2POSIX_EXP_DEFN extern "C" |
| 135 | # else |
| 136 | # define PCRE2POSIX_EXP_DECL extern |
| 137 | # define PCRE2POSIX_EXP_DEFN extern |
| 138 | # endif |
| 139 | #endif |
| 140 | |
Elliott Hughes | 0c26e19 | 2019-08-07 12:24:46 -0700 | [diff] [blame] | 141 | /* The functions. The actual code is in functions with pcre2_xxx names for |
| 142 | uniqueness. POSIX names are provided as macros for API compatibility with POSIX |
| 143 | regex functions. It's done this way to ensure to they are always linked from |
| 144 | the PCRE2 library and not by accident from elsewhere (regex_t differs in size |
| 145 | elsewhere). */ |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 146 | |
Elliott Hughes | 0c26e19 | 2019-08-07 12:24:46 -0700 | [diff] [blame] | 147 | PCRE2POSIX_EXP_DECL int pcre2_regcomp(regex_t *, const char *, int); |
| 148 | PCRE2POSIX_EXP_DECL int pcre2_regexec(const regex_t *, const char *, size_t, |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 149 | regmatch_t *, int); |
Elliott Hughes | 0c26e19 | 2019-08-07 12:24:46 -0700 | [diff] [blame] | 150 | PCRE2POSIX_EXP_DECL size_t pcre2_regerror(int, const regex_t *, char *, size_t); |
| 151 | PCRE2POSIX_EXP_DECL void pcre2_regfree(regex_t *); |
| 152 | |
| 153 | #define regcomp pcre2_regcomp |
| 154 | #define regexec pcre2_regexec |
| 155 | #define regerror pcre2_regerror |
| 156 | #define regfree pcre2_regfree |
| 157 | |
| 158 | /* Debian had a patch that used different names. These are now here to save |
| 159 | them having to maintain their own patch, but are not documented by PCRE2. */ |
| 160 | |
| 161 | #define PCRE2regcomp pcre2_regcomp |
| 162 | #define PCRE2regexec pcre2_regexec |
| 163 | #define PCRE2regerror pcre2_regerror |
| 164 | #define PCRE2regfree pcre2_regfree |
Janis Danisevskis | 53e448c | 2016-03-31 13:35:25 +0100 | [diff] [blame] | 165 | |
| 166 | #ifdef __cplusplus |
| 167 | } /* extern "C" */ |
| 168 | #endif |
| 169 | |
| 170 | /* End of pcre2posix.h */ |