Guido van Rossum | b700df9 | 2000-03-31 14:59:30 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Secret Labs' Regular Expression Engine |
Guido van Rossum | b700df9 | 2000-03-31 14:59:30 +0000 | [diff] [blame] | 3 | * |
Fredrik Lundh | 8094611 | 2000-06-29 18:03:25 +0000 | [diff] [blame] | 4 | * regular expression matching engine |
Guido van Rossum | b700df9 | 2000-03-31 14:59:30 +0000 | [diff] [blame] | 5 | * |
| 6 | * Copyright (c) 1997-2000 by Secret Labs AB. All rights reserved. |
| 7 | * |
| 8 | * See the _sre.c file for information on usage and redistribution. |
| 9 | */ |
| 10 | |
| 11 | #ifndef SRE_INCLUDED |
| 12 | #define SRE_INCLUDED |
| 13 | |
| 14 | #include "sre_constants.h" |
| 15 | |
Fredrik Lundh | 102f3ad | 2000-06-29 08:55:54 +0000 | [diff] [blame] | 16 | /* size of a code word (must be unsigned short or larger) */ |
Fredrik Lundh | 1294ad0 | 2001-06-26 17:17:07 +0000 | [diff] [blame] | 17 | #ifdef USE_UCS4_STORAGE |
| 18 | #define SRE_CODE unsigned long |
| 19 | #else |
Fredrik Lundh | 102f3ad | 2000-06-29 08:55:54 +0000 | [diff] [blame] | 20 | #define SRE_CODE unsigned short |
Fredrik Lundh | 1294ad0 | 2001-06-26 17:17:07 +0000 | [diff] [blame] | 21 | #endif |
Fredrik Lundh | 102f3ad | 2000-06-29 08:55:54 +0000 | [diff] [blame] | 22 | |
Guido van Rossum | b700df9 | 2000-03-31 14:59:30 +0000 | [diff] [blame] | 23 | typedef struct { |
Fredrik Lundh | 6f01398 | 2000-07-03 18:44:21 +0000 | [diff] [blame] | 24 | PyObject_VAR_HEAD |
Guido van Rossum | b700df9 | 2000-03-31 14:59:30 +0000 | [diff] [blame] | 25 | int groups; |
| 26 | PyObject* groupindex; |
Fredrik Lundh | c230173 | 2000-07-02 22:25:39 +0000 | [diff] [blame] | 27 | PyObject* indexgroup; |
Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 28 | /* compatibility */ |
| 29 | PyObject* pattern; /* pattern source (or None) */ |
| 30 | int flags; /* flags used when compiling pattern source */ |
Fredrik Lundh | 6f01398 | 2000-07-03 18:44:21 +0000 | [diff] [blame] | 31 | /* pattern code */ |
| 32 | SRE_CODE code[1]; |
Guido van Rossum | b700df9 | 2000-03-31 14:59:30 +0000 | [diff] [blame] | 33 | } PatternObject; |
| 34 | |
Fredrik Lundh | 6f01398 | 2000-07-03 18:44:21 +0000 | [diff] [blame] | 35 | #define PatternObject_GetCode(o) (((PatternObject*)(o))->code) |
Guido van Rossum | b700df9 | 2000-03-31 14:59:30 +0000 | [diff] [blame] | 36 | |
| 37 | typedef struct { |
Fredrik Lundh | 6f01398 | 2000-07-03 18:44:21 +0000 | [diff] [blame] | 38 | PyObject_VAR_HEAD |
Guido van Rossum | b700df9 | 2000-03-31 14:59:30 +0000 | [diff] [blame] | 39 | PyObject* string; /* link to the target string */ |
Fredrik Lundh | 8a3ebf8 | 2000-07-23 21:46:17 +0000 | [diff] [blame] | 40 | PyObject* regs; /* cached list of matching spans */ |
Guido van Rossum | b700df9 | 2000-03-31 14:59:30 +0000 | [diff] [blame] | 41 | PatternObject* pattern; /* link to the regex (pattern) object */ |
Fredrik Lundh | 6f01398 | 2000-07-03 18:44:21 +0000 | [diff] [blame] | 42 | int pos, endpos; /* current target slice */ |
| 43 | int lastindex; /* last index marker seen by the engine (-1 if none) */ |
Guido van Rossum | b700df9 | 2000-03-31 14:59:30 +0000 | [diff] [blame] | 44 | int groups; /* number of groups (start/end marks) */ |
Fredrik Lundh | 6f01398 | 2000-07-03 18:44:21 +0000 | [diff] [blame] | 45 | int mark[1]; |
Guido van Rossum | b700df9 | 2000-03-31 14:59:30 +0000 | [diff] [blame] | 46 | } MatchObject; |
| 47 | |
Fredrik Lundh | 102f3ad | 2000-06-29 08:55:54 +0000 | [diff] [blame] | 48 | typedef unsigned int (*SRE_TOLOWER_HOOK)(unsigned int ch); |
| 49 | |
Fredrik Lundh | be2211e | 2000-06-29 16:57:40 +0000 | [diff] [blame] | 50 | /* FIXME: <fl> shouldn't be a constant, really... */ |
| 51 | #define SRE_MARK_SIZE 200 |
| 52 | |
Fredrik Lundh | 29c4ba9 | 2000-08-01 18:20:07 +0000 | [diff] [blame] | 53 | typedef struct SRE_REPEAT_T { |
| 54 | int count; |
| 55 | SRE_CODE* pattern; /* points to REPEAT operator arguments */ |
| 56 | struct SRE_REPEAT_T *prev; /* points to previous repeat context */ |
| 57 | } SRE_REPEAT; |
| 58 | |
Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 59 | typedef struct { |
| 60 | /* string pointers */ |
| 61 | void* ptr; /* current position (also end of current slice) */ |
| 62 | void* beginning; /* start of original string */ |
| 63 | void* start; /* start of current slice */ |
| 64 | void* end; /* end of original string */ |
Fredrik Lundh | 8a3ebf8 | 2000-07-23 21:46:17 +0000 | [diff] [blame] | 65 | /* attributes for the match object */ |
| 66 | PyObject* string; |
| 67 | int pos, endpos; |
Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 68 | /* character size */ |
| 69 | int charsize; |
| 70 | /* registers */ |
Fredrik Lundh | 6f01398 | 2000-07-03 18:44:21 +0000 | [diff] [blame] | 71 | int lastindex; |
Fredrik Lundh | 102f3ad | 2000-06-29 08:55:54 +0000 | [diff] [blame] | 72 | int lastmark; |
Fredrik Lundh | be2211e | 2000-06-29 16:57:40 +0000 | [diff] [blame] | 73 | void* mark[SRE_MARK_SIZE]; |
Fredrik Lundh | 29c4ba9 | 2000-08-01 18:20:07 +0000 | [diff] [blame] | 74 | /* dynamically allocated stuff */ |
| 75 | void** mark_stack; |
| 76 | int mark_stack_size; |
| 77 | int mark_stack_base; |
| 78 | SRE_REPEAT *repeat; /* current repeat context */ |
Fredrik Lundh | 102f3ad | 2000-06-29 08:55:54 +0000 | [diff] [blame] | 79 | /* hooks */ |
Fredrik Lundh | b389df3 | 2000-06-29 12:48:37 +0000 | [diff] [blame] | 80 | SRE_TOLOWER_HOOK lower; |
Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 81 | } SRE_STATE; |
Guido van Rossum | b700df9 | 2000-03-31 14:59:30 +0000 | [diff] [blame] | 82 | |
Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 83 | typedef struct { |
| 84 | PyObject_HEAD |
| 85 | PyObject* pattern; |
Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 86 | SRE_STATE state; |
Fredrik Lundh | be2211e | 2000-06-29 16:57:40 +0000 | [diff] [blame] | 87 | } ScannerObject; |
Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 88 | |
| 89 | #endif |