blob: 2aee62d6f8aae5533a1852d60708e1c17e5070c4 [file] [log] [blame]
Guido van Rossumdb25f321997-07-10 14:31:32 +00001/*
2 * -*- mode: c-mode; c-file-style: python -*-
3 */
4
Guido van Rossuma3309961993-07-28 09:05:47 +00005#ifndef Py_REGEXPR_H
6#define Py_REGEXPR_H
7#ifdef __cplusplus
8extern "C" {
9#endif
10
Guido van Rossumb674c3b1992-01-19 16:32:47 +000011/*
Guido van Rossumdb25f321997-07-10 14:31:32 +000012 * regexpr.h
13 *
14 * Author: Tatu Ylonen <ylo@ngs.fi>
15 *
16 * Copyright (c) 1991 Tatu Ylonen, Espoo, Finland
17 *
18 * Permission to use, copy, modify, distribute, and sell this software
19 * and its documentation for any purpose is hereby granted without fee,
20 * provided that the above copyright notice appear in all copies. This
21 * software is provided "as is" without express or implied warranty.
22 *
23 * Created: Thu Sep 26 17:15:36 1991 ylo
24 * Last modified: Mon Nov 4 15:49:46 1991 ylo
25 */
Guido van Rossumb674c3b1992-01-19 16:32:47 +000026
Guido van Rossumdb25f321997-07-10 14:31:32 +000027/* $Id$ */
Guido van Rossumb674c3b1992-01-19 16:32:47 +000028
29#ifndef REGEXPR_H
30#define REGEXPR_H
31
Guido van Rossumacbe8da1993-04-15 15:33:52 +000032#define RE_NREGS 100 /* number of registers available */
Guido van Rossumb674c3b1992-01-19 16:32:47 +000033
34typedef struct re_pattern_buffer
35{
Guido van Rossum95e80531997-08-13 22:34:14 +000036 unsigned char *buffer; /* compiled pattern */
Guido van Rossumdb25f321997-07-10 14:31:32 +000037 int allocated; /* allocated size of compiled pattern */
38 int used; /* actual length of compiled pattern */
Guido van Rossum95e80531997-08-13 22:34:14 +000039 unsigned char *fastmap; /* fastmap[ch] is true if ch can start pattern */
40 unsigned char *translate; /* translation to apply during compilation/matching */
41 unsigned char fastmap_accurate; /* true if fastmap is valid */
42 unsigned char can_be_null; /* true if can match empty string */
43 unsigned char uses_registers; /* registers are used and need to be initialized */
Guido van Rossumdb25f321997-07-10 14:31:32 +000044 int num_registers; /* number of registers used */
Guido van Rossum95e80531997-08-13 22:34:14 +000045 unsigned char anchor; /* anchor: 0=none 1=begline 2=begbuf */
Guido van Rossumb674c3b1992-01-19 16:32:47 +000046} *regexp_t;
47
48typedef struct re_registers
49{
Guido van Rossumdb25f321997-07-10 14:31:32 +000050 int start[RE_NREGS]; /* start offset of region */
51 int end[RE_NREGS]; /* end offset of region */
Guido van Rossumb674c3b1992-01-19 16:32:47 +000052} *regexp_registers_t;
53
54/* bit definitions for syntax */
55#define RE_NO_BK_PARENS 1 /* no quoting for parentheses */
56#define RE_NO_BK_VBAR 2 /* no quoting for vertical bar */
57#define RE_BK_PLUS_QM 4 /* quoting needed for + and ? */
58#define RE_TIGHT_VBAR 8 /* | binds tighter than ^ and $ */
59#define RE_NEWLINE_OR 16 /* treat newline as or */
60#define RE_CONTEXT_INDEP_OPS 32 /* ^$?*+ are special in all contexts */
61#define RE_ANSI_HEX 64 /* ansi sequences (\n etc) and \xhh */
62#define RE_NO_GNU_EXTENSIONS 128 /* no gnu extensions */
63
64/* definitions for some common regexp styles */
65#define RE_SYNTAX_AWK (RE_NO_BK_PARENS|RE_NO_BK_VBAR|RE_CONTEXT_INDEP_OPS)
66#define RE_SYNTAX_EGREP (RE_SYNTAX_AWK|RE_NEWLINE_OR)
67#define RE_SYNTAX_GREP (RE_BK_PLUS_QM|RE_NEWLINE_OR)
68#define RE_SYNTAX_EMACS 0
69
Guido van Rossum74fb3031997-07-17 22:41:38 +000070#define Sword 1
71#define Swhitespace 2
72#define Sdigit 4
Guido van Rossum52d68321997-08-13 03:21:14 +000073#define Soctaldigit 8
74#define Shexdigit 16
Guido van Rossum74fb3031997-07-17 22:41:38 +000075
Guido van Rossum53d0de41996-05-24 20:51:38 +000076/* Rename all exported symbols to avoid conflicts with similarly named
77 symbols in some systems' standard C libraries... */
78
79#define re_syntax _Py_re_syntax
Guido van Rossum74fb3031997-07-17 22:41:38 +000080#define re_syntax_table _Py_re_syntax_table
81#define re_compile_initialize _Py_re_compile_initialize
Guido van Rossum53d0de41996-05-24 20:51:38 +000082#define re_set_syntax _Py_re_set_syntax
83#define re_compile_pattern _Py_re_compile_pattern
84#define re_match _Py_re_match
Guido van Rossum53d0de41996-05-24 20:51:38 +000085#define re_search _Py_re_search
Guido van Rossum53d0de41996-05-24 20:51:38 +000086#define re_compile_fastmap _Py_re_compile_fastmap
87#define re_comp _Py_re_comp
88#define re_exec _Py_re_exec
89
Guido van Rossum9db23a81992-01-27 16:48:48 +000090#ifdef HAVE_PROTOTYPES
Guido van Rossumb674c3b1992-01-19 16:32:47 +000091
Guido van Rossumb6775db1994-08-01 11:34:53 +000092extern int re_syntax;
Guido van Rossumdb25f321997-07-10 14:31:32 +000093/* This is the actual syntax mask. It was added so that Python could do
94 * syntax-dependent munging of patterns before compilation. */
Guido van Rossumb6775db1994-08-01 11:34:53 +000095
Guido van Rossum95e80531997-08-13 22:34:14 +000096extern unsigned char re_syntax_table[256];
Guido van Rossum74fb3031997-07-17 22:41:38 +000097
98void re_compile_initialize(void);
99
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000100int re_set_syntax(int syntax);
101/* This sets the syntax to use and returns the previous syntax. The
Guido van Rossumdb25f321997-07-10 14:31:32 +0000102 * syntax is specified by a bit mask of the above defined bits. */
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000103
Guido van Rossumd19c04a1997-09-03 00:47:36 +0000104char *re_compile_pattern(unsigned char *regex, int regex_size, regexp_t compiled);
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000105/* This compiles the regexp (given in regex and length in regex_size).
Guido van Rossumdb25f321997-07-10 14:31:32 +0000106 * This returns NULL if the regexp compiled successfully, and an error
107 * message if an error was encountered. The buffer field must be
108 * initialized to a memory area allocated by malloc (or to NULL) before
109 * use, and the allocated field must be set to its length (or 0 if
110 * buffer is NULL). Also, the translate field must be set to point to a
111 * valid translation table, or NULL if it is not used. */
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000112
Guido van Rossum95e80531997-08-13 22:34:14 +0000113int re_match(regexp_t compiled, unsigned char *string, int size, int pos,
Guido van Rossum004c1e11997-05-09 02:35:58 +0000114 regexp_registers_t old_regs);
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000115/* This tries to match the regexp against the string. This returns the
Guido van Rossumdb25f321997-07-10 14:31:32 +0000116 * length of the matched portion, or -1 if the pattern could not be
117 * matched and -2 if an error (such as failure stack overflow) is
118 * encountered. */
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000119
Guido van Rossum95e80531997-08-13 22:34:14 +0000120int re_search(regexp_t compiled, unsigned char *string, int size, int startpos,
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000121 int range, regexp_registers_t regs);
Thomas Wouters7e474022000-07-16 12:04:32 +0000122/* This searches for a substring matching the regexp. This returns the
Guido van Rossumdb25f321997-07-10 14:31:32 +0000123 * first index at which a match is found. range specifies at how many
124 * positions to try matching; positive values indicate searching
125 * forwards, and negative values indicate searching backwards. mstop
126 * specifies the offset beyond which a match must not go. This returns
127 * -1 if no match is found, and -2 if an error (such as failure stack
128 * overflow) is encountered. */
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000129
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000130void re_compile_fastmap(regexp_t compiled);
131/* This computes the fastmap for the regexp. For this to have any effect,
Guido van Rossumdb25f321997-07-10 14:31:32 +0000132 * the calling program must have initialized the fastmap field to point
133 * to an array of 256 characters. */
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000134
Guido van Rossum9db23a81992-01-27 16:48:48 +0000135#else /* HAVE_PROTOTYPES */
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000136
Guido van Rossumb6775db1994-08-01 11:34:53 +0000137extern int re_syntax;
Guido van Rossum95e80531997-08-13 22:34:14 +0000138extern unsigned char re_syntax_table[256];
Guido van Rossum74fb3031997-07-17 22:41:38 +0000139void re_compile_initialize();
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000140int re_set_syntax();
Guido van Rossumd19c04a1997-09-03 00:47:36 +0000141char *re_compile_pattern();
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000142int re_match();
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000143int re_search();
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000144void re_compile_fastmap();
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000145
Guido van Rossum9db23a81992-01-27 16:48:48 +0000146#endif /* HAVE_PROTOTYPES */
Guido van Rossumb674c3b1992-01-19 16:32:47 +0000147
148#endif /* REGEXPR_H */
149
150
Guido van Rossuma3309961993-07-28 09:05:47 +0000151
152#ifdef __cplusplus
153}
154#endif
155#endif /* !Py_REGEXPR_H */