blob: 122180276c68f13fd522a326b3e56ba43e659335 [file] [log] [blame]
Guido van Rossumdb25f321997-07-10 14:31:32 +00001/*
2 * -*- mode: c-mode; c-file-style: python -*-
3 */
4
Guido van Rossuma3309961993-07-28 09:05:47 +00005#ifndef Py_REGEXPR_H
6#define Py_REGEXPR_H
7#ifdef __cplusplus
8extern "C" {
9#endif
10
Guido van Rossumb674c3b1992-01-19 16:32:47 +000011/*
Guido van Rossumdb25f321997-07-10 14:31:32 +000012 * regexpr.h
13 *
14 * Author: Tatu Ylonen <ylo@ngs.fi>
15 *
16 * Copyright (c) 1991 Tatu Ylonen, Espoo, Finland
17 *
18 * Permission to use, copy, modify, distribute, and sell this software
19 * and its documentation for any purpose is hereby granted without fee,
20 * provided that the above copyright notice appear in all copies. This
21 * software is provided "as is" without express or implied warranty.
22 *
23 * Created: Thu Sep 26 17:15:36 1991 ylo
24 * Last modified: Mon Nov 4 15:49:46 1991 ylo
25 */
Guido van Rossumb674c3b1992-01-19 16:32:47 +000026
Guido van Rossumdb25f321997-07-10 14:31:32 +000027/* $Id$ */
Guido van Rossumb674c3b1992-01-19 16:32:47 +000028
29#ifndef REGEXPR_H
30#define REGEXPR_H
31
Guido van Rossumacbe8da1993-04-15 15:33:52 +000032#define RE_NREGS 100 /* number of registers available */
Guido van Rossumb674c3b1992-01-19 16:32:47 +000033
34typedef struct re_pattern_buffer
35{
Guido van Rossumdb25f321997-07-10 14:31:32 +000036 char *buffer; /* compiled pattern */
37 int allocated; /* allocated size of compiled pattern */
38 int used; /* actual length of compiled pattern */
39 char *fastmap; /* fastmap[ch] is true if ch can start pattern */
40 char *translate; /* translation to apply during compilation/matching */
41 char fastmap_accurate; /* true if fastmap is valid */
42 char can_be_null; /* true if can match empty string */
43 char uses_registers; /* registers are used and need to be initialized */
44 int num_registers; /* number of registers used */
45 char anchor; /* anchor: 0=none 1=begline 2=begbuf */
Guido van Rossumb674c3b1992-01-19 16:32:47 +000046} *regexp_t;
47
48typedef struct re_registers
49{
Guido van Rossumdb25f321997-07-10 14:31:32 +000050 int start[RE_NREGS]; /* start offset of region */
51 int end[RE_NREGS]; /* end offset of region */
Guido van Rossumb674c3b1992-01-19 16:32:47 +000052} *regexp_registers_t;
53
54/* bit definitions for syntax */
55#define RE_NO_BK_PARENS 1 /* no quoting for parentheses */
56#define RE_NO_BK_VBAR 2 /* no quoting for vertical bar */
57#define RE_BK_PLUS_QM 4 /* quoting needed for + and ? */
58#define RE_TIGHT_VBAR 8 /* | binds tighter than ^ and $ */
59#define RE_NEWLINE_OR 16 /* treat newline as or */
60#define RE_CONTEXT_INDEP_OPS 32 /* ^$?*+ are special in all contexts */
61#define RE_ANSI_HEX 64 /* ansi sequences (\n etc) and \xhh */
62#define RE_NO_GNU_EXTENSIONS 128 /* no gnu extensions */
63
64/* definitions for some common regexp styles */
65#define RE_SYNTAX_AWK (RE_NO_BK_PARENS|RE_NO_BK_VBAR|RE_CONTEXT_INDEP_OPS)
66#define RE_SYNTAX_EGREP (RE_SYNTAX_AWK|RE_NEWLINE_OR)
67#define RE_SYNTAX_GREP (RE_BK_PLUS_QM|RE_NEWLINE_OR)
68#define RE_SYNTAX_EMACS 0
69
Guido van Rossum53d0de41996-05-24 20:51:38 +000070/* Rename all exported symbols to avoid conflicts with similarly named
71 symbols in some systems' standard C libraries... */
72
73#define re_syntax _Py_re_syntax
74#define re_set_syntax _Py_re_set_syntax
75#define re_compile_pattern _Py_re_compile_pattern
76#define re_match _Py_re_match
Guido van Rossum53d0de41996-05-24 20:51:38 +000077#define re_search _Py_re_search
Guido van Rossum53d0de41996-05-24 20:51:38 +000078#define re_compile_fastmap _Py_re_compile_fastmap
79#define re_comp _Py_re_comp
80#define re_exec _Py_re_exec
81
Guido van Rossum9db23a81992-01-27 16:48:48 +000082#ifdef HAVE_PROTOTYPES
Guido van Rossumb674c3b1992-01-19 16:32:47 +000083
Guido van Rossumb6775db1994-08-01 11:34:53 +000084extern int re_syntax;
Guido van Rossumdb25f321997-07-10 14:31:32 +000085/* This is the actual syntax mask. It was added so that Python could do
86 * syntax-dependent munging of patterns before compilation. */
Guido van Rossumb6775db1994-08-01 11:34:53 +000087
Guido van Rossumb674c3b1992-01-19 16:32:47 +000088int re_set_syntax(int syntax);
89/* This sets the syntax to use and returns the previous syntax. The
Guido van Rossumdb25f321997-07-10 14:31:32 +000090 * syntax is specified by a bit mask of the above defined bits. */
Guido van Rossumb674c3b1992-01-19 16:32:47 +000091
92char *re_compile_pattern(char *regex, int regex_size, regexp_t compiled);
93/* This compiles the regexp (given in regex and length in regex_size).
Guido van Rossumdb25f321997-07-10 14:31:32 +000094 * This returns NULL if the regexp compiled successfully, and an error
95 * message if an error was encountered. The buffer field must be
96 * initialized to a memory area allocated by malloc (or to NULL) before
97 * use, and the allocated field must be set to its length (or 0 if
98 * buffer is NULL). Also, the translate field must be set to point to a
99 * valid translation table, or NULL if it is not used. */
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000100
101int re_match(regexp_t compiled, char *string, int size, int pos,
Guido van Rossum004c1e11997-05-09 02:35:58 +0000102 regexp_registers_t old_regs);
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000103/* This tries to match the regexp against the string. This returns the
Guido van Rossumdb25f321997-07-10 14:31:32 +0000104 * length of the matched portion, or -1 if the pattern could not be
105 * matched and -2 if an error (such as failure stack overflow) is
106 * encountered. */
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000107
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000108int re_search(regexp_t compiled, char *string, int size, int startpos,
109 int range, regexp_registers_t regs);
Guido van Rossumdb25f321997-07-10 14:31:32 +0000110/* This rearches for a substring matching the regexp. This returns the
111 * first index at which a match is found. range specifies at how many
112 * positions to try matching; positive values indicate searching
113 * forwards, and negative values indicate searching backwards. mstop
114 * specifies the offset beyond which a match must not go. This returns
115 * -1 if no match is found, and -2 if an error (such as failure stack
116 * overflow) is encountered. */
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000117
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000118void re_compile_fastmap(regexp_t compiled);
119/* This computes the fastmap for the regexp. For this to have any effect,
Guido van Rossumdb25f321997-07-10 14:31:32 +0000120 * the calling program must have initialized the fastmap field to point
121 * to an array of 256 characters. */
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000122
123char *re_comp(char *s);
124/* BSD 4.2 regex library routine re_comp. This compiles the regexp into
Guido van Rossumdb25f321997-07-10 14:31:32 +0000125 * an internal buffer. This returns NULL if the regexp was compiled
126 * successfully, and an error message if there was an error. */
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000127
128int re_exec(char *s);
Guido van Rossumdb25f321997-07-10 14:31:32 +0000129/* BSD 4.2 regexp library routine re_exec. This returns true if the
130 * string matches the regular expression (that is, a matching part is
131 * found anywhere in the string). */
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000132
Guido van Rossum9db23a81992-01-27 16:48:48 +0000133#else /* HAVE_PROTOTYPES */
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000134
Guido van Rossumb6775db1994-08-01 11:34:53 +0000135extern int re_syntax;
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000136int re_set_syntax();
137char *re_compile_pattern();
138int re_match();
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000139int re_search();
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000140void re_compile_fastmap();
141char *re_comp();
142int re_exec();
143
Guido van Rossum9db23a81992-01-27 16:48:48 +0000144#endif /* HAVE_PROTOTYPES */
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000145
146#endif /* REGEXPR_H */
147
148
Guido van Rossuma3309961993-07-28 09:05:47 +0000149
150#ifdef __cplusplus
151}
152#endif
153#endif /* !Py_REGEXPR_H */