blob: 91f00b9504ca039e19f909d3a69ff8c214ecb57d [file] [log] [blame]
Guido van Rossumdb25f321997-07-10 14:31:32 +00001/*
2 * -*- mode: c-mode; c-file-style: python -*-
3 */
4
Guido van Rossuma3309961993-07-28 09:05:47 +00005#ifndef Py_REGEXPR_H
6#define Py_REGEXPR_H
7#ifdef __cplusplus
8extern "C" {
9#endif
10
Guido van Rossumb674c3b1992-01-19 16:32:47 +000011/*
Guido van Rossumdb25f321997-07-10 14:31:32 +000012 * regexpr.h
13 *
14 * Author: Tatu Ylonen <ylo@ngs.fi>
15 *
16 * Copyright (c) 1991 Tatu Ylonen, Espoo, Finland
17 *
18 * Permission to use, copy, modify, distribute, and sell this software
19 * and its documentation for any purpose is hereby granted without fee,
20 * provided that the above copyright notice appear in all copies. This
21 * software is provided "as is" without express or implied warranty.
22 *
23 * Created: Thu Sep 26 17:15:36 1991 ylo
24 * Last modified: Mon Nov 4 15:49:46 1991 ylo
25 */
Guido van Rossumb674c3b1992-01-19 16:32:47 +000026
Guido van Rossumdb25f321997-07-10 14:31:32 +000027/* $Id$ */
Guido van Rossumb674c3b1992-01-19 16:32:47 +000028
29#ifndef REGEXPR_H
30#define REGEXPR_H
31
Guido van Rossumacbe8da1993-04-15 15:33:52 +000032#define RE_NREGS 100 /* number of registers available */
Guido van Rossumb674c3b1992-01-19 16:32:47 +000033
34typedef struct re_pattern_buffer
35{
Guido van Rossumdb25f321997-07-10 14:31:32 +000036 char *buffer; /* compiled pattern */
37 int allocated; /* allocated size of compiled pattern */
38 int used; /* actual length of compiled pattern */
39 char *fastmap; /* fastmap[ch] is true if ch can start pattern */
40 char *translate; /* translation to apply during compilation/matching */
41 char fastmap_accurate; /* true if fastmap is valid */
42 char can_be_null; /* true if can match empty string */
43 char uses_registers; /* registers are used and need to be initialized */
44 int num_registers; /* number of registers used */
45 char anchor; /* anchor: 0=none 1=begline 2=begbuf */
Guido van Rossumb674c3b1992-01-19 16:32:47 +000046} *regexp_t;
47
48typedef struct re_registers
49{
Guido van Rossumdb25f321997-07-10 14:31:32 +000050 int start[RE_NREGS]; /* start offset of region */
51 int end[RE_NREGS]; /* end offset of region */
Guido van Rossumb674c3b1992-01-19 16:32:47 +000052} *regexp_registers_t;
53
54/* bit definitions for syntax */
55#define RE_NO_BK_PARENS 1 /* no quoting for parentheses */
56#define RE_NO_BK_VBAR 2 /* no quoting for vertical bar */
57#define RE_BK_PLUS_QM 4 /* quoting needed for + and ? */
58#define RE_TIGHT_VBAR 8 /* | binds tighter than ^ and $ */
59#define RE_NEWLINE_OR 16 /* treat newline as or */
60#define RE_CONTEXT_INDEP_OPS 32 /* ^$?*+ are special in all contexts */
61#define RE_ANSI_HEX 64 /* ansi sequences (\n etc) and \xhh */
62#define RE_NO_GNU_EXTENSIONS 128 /* no gnu extensions */
63
64/* definitions for some common regexp styles */
65#define RE_SYNTAX_AWK (RE_NO_BK_PARENS|RE_NO_BK_VBAR|RE_CONTEXT_INDEP_OPS)
66#define RE_SYNTAX_EGREP (RE_SYNTAX_AWK|RE_NEWLINE_OR)
67#define RE_SYNTAX_GREP (RE_BK_PLUS_QM|RE_NEWLINE_OR)
68#define RE_SYNTAX_EMACS 0
69
Guido van Rossum74fb3031997-07-17 22:41:38 +000070#define Sword 1
71#define Swhitespace 2
72#define Sdigit 4
73
Guido van Rossum53d0de41996-05-24 20:51:38 +000074/* Rename all exported symbols to avoid conflicts with similarly named
75 symbols in some systems' standard C libraries... */
76
77#define re_syntax _Py_re_syntax
Guido van Rossum74fb3031997-07-17 22:41:38 +000078#define re_syntax_table _Py_re_syntax_table
79#define re_compile_initialize _Py_re_compile_initialize
Guido van Rossum53d0de41996-05-24 20:51:38 +000080#define re_set_syntax _Py_re_set_syntax
81#define re_compile_pattern _Py_re_compile_pattern
82#define re_match _Py_re_match
Guido van Rossum53d0de41996-05-24 20:51:38 +000083#define re_search _Py_re_search
Guido van Rossum53d0de41996-05-24 20:51:38 +000084#define re_compile_fastmap _Py_re_compile_fastmap
85#define re_comp _Py_re_comp
86#define re_exec _Py_re_exec
87
Guido van Rossum9db23a81992-01-27 16:48:48 +000088#ifdef HAVE_PROTOTYPES
Guido van Rossumb674c3b1992-01-19 16:32:47 +000089
Guido van Rossumb6775db1994-08-01 11:34:53 +000090extern int re_syntax;
Guido van Rossumdb25f321997-07-10 14:31:32 +000091/* This is the actual syntax mask. It was added so that Python could do
92 * syntax-dependent munging of patterns before compilation. */
Guido van Rossumb6775db1994-08-01 11:34:53 +000093
Guido van Rossum74fb3031997-07-17 22:41:38 +000094extern char re_syntax_table[256];
95
96void re_compile_initialize(void);
97
Guido van Rossumb674c3b1992-01-19 16:32:47 +000098int re_set_syntax(int syntax);
99/* This sets the syntax to use and returns the previous syntax. The
Guido van Rossumdb25f321997-07-10 14:31:32 +0000100 * syntax is specified by a bit mask of the above defined bits. */
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000101
102char *re_compile_pattern(char *regex, int regex_size, regexp_t compiled);
103/* This compiles the regexp (given in regex and length in regex_size).
Guido van Rossumdb25f321997-07-10 14:31:32 +0000104 * This returns NULL if the regexp compiled successfully, and an error
105 * message if an error was encountered. The buffer field must be
106 * initialized to a memory area allocated by malloc (or to NULL) before
107 * use, and the allocated field must be set to its length (or 0 if
108 * buffer is NULL). Also, the translate field must be set to point to a
109 * valid translation table, or NULL if it is not used. */
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000110
111int re_match(regexp_t compiled, char *string, int size, int pos,
Guido van Rossum004c1e11997-05-09 02:35:58 +0000112 regexp_registers_t old_regs);
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000113/* This tries to match the regexp against the string. This returns the
Guido van Rossumdb25f321997-07-10 14:31:32 +0000114 * length of the matched portion, or -1 if the pattern could not be
115 * matched and -2 if an error (such as failure stack overflow) is
116 * encountered. */
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000117
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000118int re_search(regexp_t compiled, char *string, int size, int startpos,
119 int range, regexp_registers_t regs);
Guido van Rossumdb25f321997-07-10 14:31:32 +0000120/* This rearches for a substring matching the regexp. This returns the
121 * first index at which a match is found. range specifies at how many
122 * positions to try matching; positive values indicate searching
123 * forwards, and negative values indicate searching backwards. mstop
124 * specifies the offset beyond which a match must not go. This returns
125 * -1 if no match is found, and -2 if an error (such as failure stack
126 * overflow) is encountered. */
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000127
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000128void re_compile_fastmap(regexp_t compiled);
129/* This computes the fastmap for the regexp. For this to have any effect,
Guido van Rossumdb25f321997-07-10 14:31:32 +0000130 * the calling program must have initialized the fastmap field to point
131 * to an array of 256 characters. */
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000132
133char *re_comp(char *s);
134/* BSD 4.2 regex library routine re_comp. This compiles the regexp into
Guido van Rossumdb25f321997-07-10 14:31:32 +0000135 * an internal buffer. This returns NULL if the regexp was compiled
136 * successfully, and an error message if there was an error. */
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000137
138int re_exec(char *s);
Guido van Rossumdb25f321997-07-10 14:31:32 +0000139/* BSD 4.2 regexp library routine re_exec. This returns true if the
140 * string matches the regular expression (that is, a matching part is
141 * found anywhere in the string). */
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000142
Guido van Rossum9db23a81992-01-27 16:48:48 +0000143#else /* HAVE_PROTOTYPES */
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000144
Guido van Rossumb6775db1994-08-01 11:34:53 +0000145extern int re_syntax;
Guido van Rossum74fb3031997-07-17 22:41:38 +0000146extern char re_syntax_table[256];
147void re_compile_initialize();
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000148int re_set_syntax();
149char *re_compile_pattern();
150int re_match();
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000151int re_search();
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000152void re_compile_fastmap();
153char *re_comp();
154int re_exec();
155
Guido van Rossum9db23a81992-01-27 16:48:48 +0000156#endif /* HAVE_PROTOTYPES */
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000157
158#endif /* REGEXPR_H */
159
160
Guido van Rossuma3309961993-07-28 09:05:47 +0000161
162#ifdef __cplusplus
163}
164#endif
165#endif /* !Py_REGEXPR_H */