Guido van Rossum | db25f32 | 1997-07-10 14:31:32 +0000 | [diff] [blame] | 1 | /* |
| 2 | * -*- mode: c-mode; c-file-style: python -*- |
| 3 | */ |
| 4 | |
Guido van Rossum | a330996 | 1993-07-28 09:05:47 +0000 | [diff] [blame] | 5 | #ifndef Py_REGEXPR_H |
| 6 | #define Py_REGEXPR_H |
| 7 | #ifdef __cplusplus |
| 8 | extern "C" { |
| 9 | #endif |
| 10 | |
Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 11 | /* |
Guido van Rossum | db25f32 | 1997-07-10 14:31:32 +0000 | [diff] [blame] | 12 | * regexpr.h |
| 13 | * |
| 14 | * Author: Tatu Ylonen <ylo@ngs.fi> |
| 15 | * |
| 16 | * Copyright (c) 1991 Tatu Ylonen, Espoo, Finland |
| 17 | * |
| 18 | * Permission to use, copy, modify, distribute, and sell this software |
| 19 | * and its documentation for any purpose is hereby granted without fee, |
| 20 | * provided that the above copyright notice appear in all copies. This |
| 21 | * software is provided "as is" without express or implied warranty. |
| 22 | * |
| 23 | * Created: Thu Sep 26 17:15:36 1991 ylo |
| 24 | * Last modified: Mon Nov 4 15:49:46 1991 ylo |
| 25 | */ |
Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 26 | |
Guido van Rossum | db25f32 | 1997-07-10 14:31:32 +0000 | [diff] [blame] | 27 | /* $Id$ */ |
Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 28 | |
| 29 | #ifndef REGEXPR_H |
| 30 | #define REGEXPR_H |
| 31 | |
Guido van Rossum | acbe8da | 1993-04-15 15:33:52 +0000 | [diff] [blame] | 32 | #define RE_NREGS 100 /* number of registers available */ |
Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 33 | |
| 34 | typedef struct re_pattern_buffer |
| 35 | { |
Guido van Rossum | 95e8053 | 1997-08-13 22:34:14 +0000 | [diff] [blame] | 36 | unsigned char *buffer; /* compiled pattern */ |
Guido van Rossum | db25f32 | 1997-07-10 14:31:32 +0000 | [diff] [blame] | 37 | int allocated; /* allocated size of compiled pattern */ |
| 38 | int used; /* actual length of compiled pattern */ |
Guido van Rossum | 95e8053 | 1997-08-13 22:34:14 +0000 | [diff] [blame] | 39 | unsigned char *fastmap; /* fastmap[ch] is true if ch can start pattern */ |
| 40 | unsigned char *translate; /* translation to apply during compilation/matching */ |
| 41 | unsigned char fastmap_accurate; /* true if fastmap is valid */ |
| 42 | unsigned char can_be_null; /* true if can match empty string */ |
| 43 | unsigned char uses_registers; /* registers are used and need to be initialized */ |
Guido van Rossum | db25f32 | 1997-07-10 14:31:32 +0000 | [diff] [blame] | 44 | int num_registers; /* number of registers used */ |
Guido van Rossum | 95e8053 | 1997-08-13 22:34:14 +0000 | [diff] [blame] | 45 | unsigned char anchor; /* anchor: 0=none 1=begline 2=begbuf */ |
Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 46 | } *regexp_t; |
| 47 | |
| 48 | typedef struct re_registers |
| 49 | { |
Guido van Rossum | db25f32 | 1997-07-10 14:31:32 +0000 | [diff] [blame] | 50 | int start[RE_NREGS]; /* start offset of region */ |
| 51 | int end[RE_NREGS]; /* end offset of region */ |
Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 52 | } *regexp_registers_t; |
| 53 | |
| 54 | /* bit definitions for syntax */ |
| 55 | #define RE_NO_BK_PARENS 1 /* no quoting for parentheses */ |
| 56 | #define RE_NO_BK_VBAR 2 /* no quoting for vertical bar */ |
| 57 | #define RE_BK_PLUS_QM 4 /* quoting needed for + and ? */ |
| 58 | #define RE_TIGHT_VBAR 8 /* | binds tighter than ^ and $ */ |
| 59 | #define RE_NEWLINE_OR 16 /* treat newline as or */ |
| 60 | #define RE_CONTEXT_INDEP_OPS 32 /* ^$?*+ are special in all contexts */ |
| 61 | #define RE_ANSI_HEX 64 /* ansi sequences (\n etc) and \xhh */ |
| 62 | #define RE_NO_GNU_EXTENSIONS 128 /* no gnu extensions */ |
| 63 | |
| 64 | /* definitions for some common regexp styles */ |
| 65 | #define RE_SYNTAX_AWK (RE_NO_BK_PARENS|RE_NO_BK_VBAR|RE_CONTEXT_INDEP_OPS) |
| 66 | #define RE_SYNTAX_EGREP (RE_SYNTAX_AWK|RE_NEWLINE_OR) |
| 67 | #define RE_SYNTAX_GREP (RE_BK_PLUS_QM|RE_NEWLINE_OR) |
| 68 | #define RE_SYNTAX_EMACS 0 |
| 69 | |
Guido van Rossum | 74fb303 | 1997-07-17 22:41:38 +0000 | [diff] [blame] | 70 | #define Sword 1 |
| 71 | #define Swhitespace 2 |
| 72 | #define Sdigit 4 |
Guido van Rossum | 52d6832 | 1997-08-13 03:21:14 +0000 | [diff] [blame] | 73 | #define Soctaldigit 8 |
| 74 | #define Shexdigit 16 |
Guido van Rossum | 74fb303 | 1997-07-17 22:41:38 +0000 | [diff] [blame] | 75 | |
Guido van Rossum | 53d0de4 | 1996-05-24 20:51:38 +0000 | [diff] [blame] | 76 | /* Rename all exported symbols to avoid conflicts with similarly named |
| 77 | symbols in some systems' standard C libraries... */ |
| 78 | |
| 79 | #define re_syntax _Py_re_syntax |
Guido van Rossum | 74fb303 | 1997-07-17 22:41:38 +0000 | [diff] [blame] | 80 | #define re_syntax_table _Py_re_syntax_table |
| 81 | #define re_compile_initialize _Py_re_compile_initialize |
Guido van Rossum | 53d0de4 | 1996-05-24 20:51:38 +0000 | [diff] [blame] | 82 | #define re_set_syntax _Py_re_set_syntax |
| 83 | #define re_compile_pattern _Py_re_compile_pattern |
| 84 | #define re_match _Py_re_match |
Guido van Rossum | 53d0de4 | 1996-05-24 20:51:38 +0000 | [diff] [blame] | 85 | #define re_search _Py_re_search |
Guido van Rossum | 53d0de4 | 1996-05-24 20:51:38 +0000 | [diff] [blame] | 86 | #define re_compile_fastmap _Py_re_compile_fastmap |
| 87 | #define re_comp _Py_re_comp |
| 88 | #define re_exec _Py_re_exec |
| 89 | |
Guido van Rossum | 9db23a8 | 1992-01-27 16:48:48 +0000 | [diff] [blame] | 90 | #ifdef HAVE_PROTOTYPES |
Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 91 | |
Guido van Rossum | b6775db | 1994-08-01 11:34:53 +0000 | [diff] [blame] | 92 | extern int re_syntax; |
Guido van Rossum | db25f32 | 1997-07-10 14:31:32 +0000 | [diff] [blame] | 93 | /* This is the actual syntax mask. It was added so that Python could do |
| 94 | * syntax-dependent munging of patterns before compilation. */ |
Guido van Rossum | b6775db | 1994-08-01 11:34:53 +0000 | [diff] [blame] | 95 | |
Guido van Rossum | 95e8053 | 1997-08-13 22:34:14 +0000 | [diff] [blame] | 96 | extern unsigned char re_syntax_table[256]; |
Guido van Rossum | 74fb303 | 1997-07-17 22:41:38 +0000 | [diff] [blame] | 97 | |
| 98 | void re_compile_initialize(void); |
| 99 | |
Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 100 | int re_set_syntax(int syntax); |
| 101 | /* This sets the syntax to use and returns the previous syntax. The |
Guido van Rossum | db25f32 | 1997-07-10 14:31:32 +0000 | [diff] [blame] | 102 | * syntax is specified by a bit mask of the above defined bits. */ |
Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 103 | |
Guido van Rossum | d19c04a | 1997-09-03 00:47:36 +0000 | [diff] [blame] | 104 | char *re_compile_pattern(unsigned char *regex, int regex_size, regexp_t compiled); |
Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 105 | /* This compiles the regexp (given in regex and length in regex_size). |
Guido van Rossum | db25f32 | 1997-07-10 14:31:32 +0000 | [diff] [blame] | 106 | * This returns NULL if the regexp compiled successfully, and an error |
| 107 | * message if an error was encountered. The buffer field must be |
| 108 | * initialized to a memory area allocated by malloc (or to NULL) before |
| 109 | * use, and the allocated field must be set to its length (or 0 if |
| 110 | * buffer is NULL). Also, the translate field must be set to point to a |
| 111 | * valid translation table, or NULL if it is not used. */ |
Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 112 | |
Guido van Rossum | 95e8053 | 1997-08-13 22:34:14 +0000 | [diff] [blame] | 113 | int re_match(regexp_t compiled, unsigned char *string, int size, int pos, |
Guido van Rossum | 004c1e1 | 1997-05-09 02:35:58 +0000 | [diff] [blame] | 114 | regexp_registers_t old_regs); |
Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 115 | /* This tries to match the regexp against the string. This returns the |
Guido van Rossum | db25f32 | 1997-07-10 14:31:32 +0000 | [diff] [blame] | 116 | * length of the matched portion, or -1 if the pattern could not be |
| 117 | * matched and -2 if an error (such as failure stack overflow) is |
| 118 | * encountered. */ |
Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 119 | |
Guido van Rossum | 95e8053 | 1997-08-13 22:34:14 +0000 | [diff] [blame] | 120 | int re_search(regexp_t compiled, unsigned char *string, int size, int startpos, |
Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 121 | int range, regexp_registers_t regs); |
Guido van Rossum | db25f32 | 1997-07-10 14:31:32 +0000 | [diff] [blame] | 122 | /* This rearches for a substring matching the regexp. This returns the |
| 123 | * first index at which a match is found. range specifies at how many |
| 124 | * positions to try matching; positive values indicate searching |
| 125 | * forwards, and negative values indicate searching backwards. mstop |
| 126 | * specifies the offset beyond which a match must not go. This returns |
| 127 | * -1 if no match is found, and -2 if an error (such as failure stack |
| 128 | * overflow) is encountered. */ |
Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 129 | |
Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 130 | void re_compile_fastmap(regexp_t compiled); |
| 131 | /* This computes the fastmap for the regexp. For this to have any effect, |
Guido van Rossum | db25f32 | 1997-07-10 14:31:32 +0000 | [diff] [blame] | 132 | * the calling program must have initialized the fastmap field to point |
| 133 | * to an array of 256 characters. */ |
Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 134 | |
Guido van Rossum | 9db23a8 | 1992-01-27 16:48:48 +0000 | [diff] [blame] | 135 | #else /* HAVE_PROTOTYPES */ |
Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 136 | |
Guido van Rossum | b6775db | 1994-08-01 11:34:53 +0000 | [diff] [blame] | 137 | extern int re_syntax; |
Guido van Rossum | 95e8053 | 1997-08-13 22:34:14 +0000 | [diff] [blame] | 138 | extern unsigned char re_syntax_table[256]; |
Guido van Rossum | 74fb303 | 1997-07-17 22:41:38 +0000 | [diff] [blame] | 139 | void re_compile_initialize(); |
Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 140 | int re_set_syntax(); |
Guido van Rossum | d19c04a | 1997-09-03 00:47:36 +0000 | [diff] [blame] | 141 | char *re_compile_pattern(); |
Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 142 | int re_match(); |
Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 143 | int re_search(); |
Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 144 | void re_compile_fastmap(); |
Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 145 | |
Guido van Rossum | 9db23a8 | 1992-01-27 16:48:48 +0000 | [diff] [blame] | 146 | #endif /* HAVE_PROTOTYPES */ |
Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 147 | |
| 148 | #endif /* REGEXPR_H */ |
| 149 | |
| 150 | |
Guido van Rossum | a330996 | 1993-07-28 09:05:47 +0000 | [diff] [blame] | 151 | |
| 152 | #ifdef __cplusplus |
| 153 | } |
| 154 | #endif |
| 155 | #endif /* !Py_REGEXPR_H */ |