| Guido van Rossum | db25f32 | 1997-07-10 14:31:32 +0000 | [diff] [blame] | 1 | /* | 
 | 2 |  * -*- mode: c-mode; c-file-style: python -*- | 
 | 3 |  */ | 
 | 4 |  | 
| Guido van Rossum | a330996 | 1993-07-28 09:05:47 +0000 | [diff] [blame] | 5 | #ifndef Py_REGEXPR_H | 
 | 6 | #define Py_REGEXPR_H | 
 | 7 | #ifdef __cplusplus | 
 | 8 | extern "C" { | 
 | 9 | #endif | 
 | 10 |  | 
| Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 11 | /* | 
| Guido van Rossum | db25f32 | 1997-07-10 14:31:32 +0000 | [diff] [blame] | 12 |  * regexpr.h | 
 | 13 |  * | 
 | 14 |  * Author: Tatu Ylonen <ylo@ngs.fi> | 
 | 15 |  * | 
 | 16 |  * Copyright (c) 1991 Tatu Ylonen, Espoo, Finland | 
 | 17 |  * | 
 | 18 |  * Permission to use, copy, modify, distribute, and sell this software | 
 | 19 |  * and its documentation for any purpose is hereby granted without fee, | 
 | 20 |  * provided that the above copyright notice appear in all copies.  This | 
 | 21 |  * software is provided "as is" without express or implied warranty. | 
 | 22 |  * | 
 | 23 |  * Created: Thu Sep 26 17:15:36 1991 ylo | 
 | 24 |  * Last modified: Mon Nov  4 15:49:46 1991 ylo | 
 | 25 |  */ | 
| Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 26 |  | 
| Guido van Rossum | db25f32 | 1997-07-10 14:31:32 +0000 | [diff] [blame] | 27 | /* $Id$ */ | 
| Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 28 |  | 
 | 29 | #ifndef REGEXPR_H | 
 | 30 | #define REGEXPR_H | 
 | 31 |  | 
| Guido van Rossum | acbe8da | 1993-04-15 15:33:52 +0000 | [diff] [blame] | 32 | #define RE_NREGS	100  /* number of registers available */ | 
| Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 33 |  | 
 | 34 | typedef struct re_pattern_buffer | 
 | 35 | { | 
| Guido van Rossum | 95e8053 | 1997-08-13 22:34:14 +0000 | [diff] [blame] | 36 | 	unsigned char *buffer;          /* compiled pattern */ | 
| Guido van Rossum | db25f32 | 1997-07-10 14:31:32 +0000 | [diff] [blame] | 37 | 	int allocated;         /* allocated size of compiled pattern */ | 
 | 38 | 	int used;              /* actual length of compiled pattern */ | 
| Guido van Rossum | 95e8053 | 1997-08-13 22:34:14 +0000 | [diff] [blame] | 39 | 	unsigned char *fastmap;         /* fastmap[ch] is true if ch can start pattern */ | 
 | 40 | 	unsigned char *translate;       /* translation to apply during compilation/matching */ | 
 | 41 | 	unsigned char fastmap_accurate; /* true if fastmap is valid */ | 
 | 42 | 	unsigned char can_be_null;      /* true if can match empty string */ | 
 | 43 | 	unsigned char uses_registers;   /* registers are used and need to be initialized */ | 
| Guido van Rossum | db25f32 | 1997-07-10 14:31:32 +0000 | [diff] [blame] | 44 | 	int num_registers;     /* number of registers used */ | 
| Guido van Rossum | 95e8053 | 1997-08-13 22:34:14 +0000 | [diff] [blame] | 45 | 	unsigned char anchor;           /* anchor: 0=none 1=begline 2=begbuf */ | 
| Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 46 | } *regexp_t; | 
 | 47 |  | 
 | 48 | typedef struct re_registers | 
 | 49 | { | 
| Guido van Rossum | db25f32 | 1997-07-10 14:31:32 +0000 | [diff] [blame] | 50 | 	int start[RE_NREGS];  /* start offset of region */ | 
 | 51 | 	int end[RE_NREGS];    /* end offset of region */ | 
| Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 52 | } *regexp_registers_t; | 
 | 53 |  | 
 | 54 | /* bit definitions for syntax */ | 
 | 55 | #define RE_NO_BK_PARENS		1    /* no quoting for parentheses */ | 
 | 56 | #define RE_NO_BK_VBAR		2    /* no quoting for vertical bar */ | 
 | 57 | #define RE_BK_PLUS_QM		4    /* quoting needed for + and ? */ | 
 | 58 | #define RE_TIGHT_VBAR		8    /* | binds tighter than ^ and $ */ | 
 | 59 | #define RE_NEWLINE_OR		16   /* treat newline as or */ | 
 | 60 | #define RE_CONTEXT_INDEP_OPS	32   /* ^$?*+ are special in all contexts */ | 
 | 61 | #define RE_ANSI_HEX		64   /* ansi sequences (\n etc) and \xhh */ | 
 | 62 | #define RE_NO_GNU_EXTENSIONS   128   /* no gnu extensions */ | 
 | 63 |  | 
 | 64 | /* definitions for some common regexp styles */ | 
 | 65 | #define RE_SYNTAX_AWK	(RE_NO_BK_PARENS|RE_NO_BK_VBAR|RE_CONTEXT_INDEP_OPS) | 
 | 66 | #define RE_SYNTAX_EGREP	(RE_SYNTAX_AWK|RE_NEWLINE_OR) | 
 | 67 | #define RE_SYNTAX_GREP	(RE_BK_PLUS_QM|RE_NEWLINE_OR) | 
 | 68 | #define RE_SYNTAX_EMACS	0 | 
 | 69 |  | 
| Guido van Rossum | 74fb303 | 1997-07-17 22:41:38 +0000 | [diff] [blame] | 70 | #define Sword       1 | 
 | 71 | #define Swhitespace 2 | 
 | 72 | #define Sdigit      4 | 
| Guido van Rossum | 52d6832 | 1997-08-13 03:21:14 +0000 | [diff] [blame] | 73 | #define Soctaldigit 8 | 
 | 74 | #define Shexdigit   16 | 
| Guido van Rossum | 74fb303 | 1997-07-17 22:41:38 +0000 | [diff] [blame] | 75 |  | 
| Guido van Rossum | 53d0de4 | 1996-05-24 20:51:38 +0000 | [diff] [blame] | 76 | /* Rename all exported symbols to avoid conflicts with similarly named | 
 | 77 |    symbols in some systems' standard C libraries... */ | 
 | 78 |  | 
 | 79 | #define re_syntax _Py_re_syntax | 
| Guido van Rossum | 74fb303 | 1997-07-17 22:41:38 +0000 | [diff] [blame] | 80 | #define re_syntax_table _Py_re_syntax_table | 
 | 81 | #define re_compile_initialize _Py_re_compile_initialize | 
| Guido van Rossum | 53d0de4 | 1996-05-24 20:51:38 +0000 | [diff] [blame] | 82 | #define re_set_syntax _Py_re_set_syntax | 
 | 83 | #define re_compile_pattern _Py_re_compile_pattern | 
 | 84 | #define re_match _Py_re_match | 
| Guido van Rossum | 53d0de4 | 1996-05-24 20:51:38 +0000 | [diff] [blame] | 85 | #define re_search _Py_re_search | 
| Guido van Rossum | 53d0de4 | 1996-05-24 20:51:38 +0000 | [diff] [blame] | 86 | #define re_compile_fastmap _Py_re_compile_fastmap | 
 | 87 | #define re_comp _Py_re_comp | 
 | 88 | #define re_exec _Py_re_exec | 
 | 89 |  | 
| Guido van Rossum | 9db23a8 | 1992-01-27 16:48:48 +0000 | [diff] [blame] | 90 | #ifdef HAVE_PROTOTYPES | 
| Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 91 |  | 
| Guido van Rossum | b6775db | 1994-08-01 11:34:53 +0000 | [diff] [blame] | 92 | extern int re_syntax; | 
| Guido van Rossum | db25f32 | 1997-07-10 14:31:32 +0000 | [diff] [blame] | 93 | /* This is the actual syntax mask.  It was added so that Python could do | 
 | 94 |  * syntax-dependent munging of patterns before compilation. */ | 
| Guido van Rossum | b6775db | 1994-08-01 11:34:53 +0000 | [diff] [blame] | 95 |  | 
| Guido van Rossum | 95e8053 | 1997-08-13 22:34:14 +0000 | [diff] [blame] | 96 | extern unsigned char re_syntax_table[256]; | 
| Guido van Rossum | 74fb303 | 1997-07-17 22:41:38 +0000 | [diff] [blame] | 97 |  | 
 | 98 | void re_compile_initialize(void); | 
 | 99 |  | 
| Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 100 | int re_set_syntax(int syntax); | 
 | 101 | /* This sets the syntax to use and returns the previous syntax.  The | 
| Guido van Rossum | db25f32 | 1997-07-10 14:31:32 +0000 | [diff] [blame] | 102 |  * syntax is specified by a bit mask of the above defined bits. */ | 
| Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 103 |  | 
| Guido van Rossum | d19c04a | 1997-09-03 00:47:36 +0000 | [diff] [blame] | 104 | char *re_compile_pattern(unsigned char *regex, int regex_size, regexp_t compiled); | 
| Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 105 | /* This compiles the regexp (given in regex and length in regex_size). | 
| Guido van Rossum | db25f32 | 1997-07-10 14:31:32 +0000 | [diff] [blame] | 106 |  * This returns NULL if the regexp compiled successfully, and an error | 
 | 107 |  * message if an error was encountered.  The buffer field must be | 
 | 108 |  * initialized to a memory area allocated by malloc (or to NULL) before | 
 | 109 |  * use, and the allocated field must be set to its length (or 0 if | 
 | 110 |  * buffer is NULL).  Also, the translate field must be set to point to a | 
 | 111 |  * valid translation table, or NULL if it is not used. */ | 
| Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 112 |  | 
| Guido van Rossum | 95e8053 | 1997-08-13 22:34:14 +0000 | [diff] [blame] | 113 | int re_match(regexp_t compiled, unsigned char *string, int size, int pos, | 
| Guido van Rossum | 004c1e1 | 1997-05-09 02:35:58 +0000 | [diff] [blame] | 114 | 	     regexp_registers_t old_regs); | 
| Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 115 | /* This tries to match the regexp against the string.  This returns the | 
| Guido van Rossum | db25f32 | 1997-07-10 14:31:32 +0000 | [diff] [blame] | 116 |  * length of the matched portion, or -1 if the pattern could not be | 
 | 117 |  * matched and -2 if an error (such as failure stack overflow) is | 
 | 118 |  * encountered. */ | 
| Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 119 |  | 
| Guido van Rossum | 95e8053 | 1997-08-13 22:34:14 +0000 | [diff] [blame] | 120 | int re_search(regexp_t compiled, unsigned char *string, int size, int startpos, | 
| Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 121 | 	      int range, regexp_registers_t regs); | 
| Thomas Wouters | 7e47402 | 2000-07-16 12:04:32 +0000 | [diff] [blame] | 122 | /* This searches for a substring matching the regexp.  This returns the | 
| Guido van Rossum | db25f32 | 1997-07-10 14:31:32 +0000 | [diff] [blame] | 123 |  * first index at which a match is found.  range specifies at how many | 
 | 124 |  * positions to try matching; positive values indicate searching | 
 | 125 |  * forwards, and negative values indicate searching backwards.  mstop | 
 | 126 |  * specifies the offset beyond which a match must not go.  This returns | 
 | 127 |  * -1 if no match is found, and -2 if an error (such as failure stack | 
 | 128 |  * overflow) is encountered. */ | 
| Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 129 |  | 
| Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 130 | void re_compile_fastmap(regexp_t compiled); | 
 | 131 | /* This computes the fastmap for the regexp.  For this to have any effect, | 
| Guido van Rossum | db25f32 | 1997-07-10 14:31:32 +0000 | [diff] [blame] | 132 |  * the calling program must have initialized the fastmap field to point | 
 | 133 |  * to an array of 256 characters. */ | 
| Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 134 |  | 
| Guido van Rossum | 9db23a8 | 1992-01-27 16:48:48 +0000 | [diff] [blame] | 135 | #else /* HAVE_PROTOTYPES */ | 
| Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 136 |  | 
| Guido van Rossum | b6775db | 1994-08-01 11:34:53 +0000 | [diff] [blame] | 137 | extern int re_syntax; | 
| Guido van Rossum | 95e8053 | 1997-08-13 22:34:14 +0000 | [diff] [blame] | 138 | extern unsigned char re_syntax_table[256]; | 
| Guido van Rossum | 74fb303 | 1997-07-17 22:41:38 +0000 | [diff] [blame] | 139 | void re_compile_initialize(); | 
| Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 140 | int re_set_syntax(); | 
| Guido van Rossum | d19c04a | 1997-09-03 00:47:36 +0000 | [diff] [blame] | 141 | char *re_compile_pattern(); | 
| Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 142 | int re_match(); | 
| Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 143 | int re_search(); | 
| Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 144 | void re_compile_fastmap(); | 
| Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 145 |  | 
| Guido van Rossum | 9db23a8 | 1992-01-27 16:48:48 +0000 | [diff] [blame] | 146 | #endif /* HAVE_PROTOTYPES */ | 
| Guido van Rossum | b674c3b | 1992-01-19 16:32:47 +0000 | [diff] [blame] | 147 |  | 
 | 148 | #endif /* REGEXPR_H */ | 
 | 149 |  | 
 | 150 |  | 
| Guido van Rossum | a330996 | 1993-07-28 09:05:47 +0000 | [diff] [blame] | 151 |  | 
 | 152 | #ifdef __cplusplus | 
 | 153 | } | 
 | 154 | #endif | 
 | 155 | #endif /* !Py_REGEXPR_H */ |