Guido van Rossum | b700df9 | 2000-03-31 14:59:30 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Secret Labs' Regular Expression Engine |
Guido van Rossum | b700df9 | 2000-03-31 14:59:30 +0000 | [diff] [blame] | 3 | * |
Fredrik Lundh | 8094611 | 2000-06-29 18:03:25 +0000 | [diff] [blame] | 4 | * regular expression matching engine |
Guido van Rossum | b700df9 | 2000-03-31 14:59:30 +0000 | [diff] [blame] | 5 | * |
Fredrik Lundh | b0f05bd | 2001-07-02 16:42:49 +0000 | [diff] [blame] | 6 | * Copyright (c) 1997-2001 by Secret Labs AB. All rights reserved. |
Guido van Rossum | b700df9 | 2000-03-31 14:59:30 +0000 | [diff] [blame] | 7 | * |
| 8 | * See the _sre.c file for information on usage and redistribution. |
| 9 | */ |
| 10 | |
| 11 | #ifndef SRE_INCLUDED |
| 12 | #define SRE_INCLUDED |
| 13 | |
| 14 | #include "sre_constants.h" |
| 15 | |
Fredrik Lundh | 8f45585 | 2001-06-27 18:59:43 +0000 | [diff] [blame] | 16 | /* size of a code word (must be unsigned short or larger, and |
Antoine Pitrou | 766a16e | 2012-06-23 14:17:39 +0200 | [diff] [blame] | 17 | large enough to hold a UCS4 character) */ |
Martin v. Löwis | 7d9c6c7 | 2004-05-07 07:18:13 +0000 | [diff] [blame] | 18 | #define SRE_CODE Py_UCS4 |
Serhiy Storchaka | 70ca021 | 2013-02-16 16:47:47 +0200 | [diff] [blame] | 19 | #if SIZEOF_SIZE_T > 4 |
| 20 | # define SRE_MAXREPEAT (~(SRE_CODE)0) |
Serhiy Storchaka | 9baa5b2 | 2014-09-29 22:49:23 +0300 | [diff] [blame] | 21 | # define SRE_MAXGROUPS ((~(SRE_CODE)0) / 2) |
Serhiy Storchaka | 70ca021 | 2013-02-16 16:47:47 +0200 | [diff] [blame] | 22 | #else |
Serhiy Storchaka | 1f35ae0 | 2013-08-03 19:18:38 +0300 | [diff] [blame] | 23 | # define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX) |
Serhiy Storchaka | 9baa5b2 | 2014-09-29 22:49:23 +0300 | [diff] [blame] | 24 | # define SRE_MAXGROUPS ((SRE_CODE)PY_SSIZE_T_MAX / SIZEOF_SIZE_T / 2) |
Serhiy Storchaka | 70ca021 | 2013-02-16 16:47:47 +0200 | [diff] [blame] | 25 | #endif |
Fredrik Lundh | 102f3ad | 2000-06-29 08:55:54 +0000 | [diff] [blame] | 26 | |
Guido van Rossum | b700df9 | 2000-03-31 14:59:30 +0000 | [diff] [blame] | 27 | typedef struct { |
Fredrik Lundh | 6f01398 | 2000-07-03 18:44:21 +0000 | [diff] [blame] | 28 | PyObject_VAR_HEAD |
Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 29 | Py_ssize_t groups; /* must be first! */ |
Victor Stinner | 726a57d | 2016-11-22 23:04:39 +0100 | [diff] [blame] | 30 | PyObject* groupindex; /* dict */ |
| 31 | PyObject* indexgroup; /* tuple */ |
Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 32 | /* compatibility */ |
| 33 | PyObject* pattern; /* pattern source (or None) */ |
| 34 | int flags; /* flags used when compiling pattern source */ |
Raymond Hettinger | 027bb63 | 2004-05-31 03:09:25 +0000 | [diff] [blame] | 35 | PyObject *weakreflist; /* List of weak references */ |
Serhiy Storchaka | 9eabac6 | 2013-10-26 10:45:48 +0300 | [diff] [blame] | 36 | int isbytes; /* pattern type (1 - bytes, 0 - string, -1 - None) */ |
Fredrik Lundh | 6f01398 | 2000-07-03 18:44:21 +0000 | [diff] [blame] | 37 | /* pattern code */ |
Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 38 | Py_ssize_t codesize; |
Fredrik Lundh | 6f01398 | 2000-07-03 18:44:21 +0000 | [diff] [blame] | 39 | SRE_CODE code[1]; |
Guido van Rossum | b700df9 | 2000-03-31 14:59:30 +0000 | [diff] [blame] | 40 | } PatternObject; |
| 41 | |
Fredrik Lundh | 6f01398 | 2000-07-03 18:44:21 +0000 | [diff] [blame] | 42 | #define PatternObject_GetCode(o) (((PatternObject*)(o))->code) |
Guido van Rossum | b700df9 | 2000-03-31 14:59:30 +0000 | [diff] [blame] | 43 | |
| 44 | typedef struct { |
Fredrik Lundh | 6f01398 | 2000-07-03 18:44:21 +0000 | [diff] [blame] | 45 | PyObject_VAR_HEAD |
Fredrik Lundh | b0f05bd | 2001-07-02 16:42:49 +0000 | [diff] [blame] | 46 | PyObject* string; /* link to the target string (must be first) */ |
Fredrik Lundh | 8a3ebf8 | 2000-07-23 21:46:17 +0000 | [diff] [blame] | 47 | PyObject* regs; /* cached list of matching spans */ |
Guido van Rossum | b700df9 | 2000-03-31 14:59:30 +0000 | [diff] [blame] | 48 | PatternObject* pattern; /* link to the regex (pattern) object */ |
Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 49 | Py_ssize_t pos, endpos; /* current target slice */ |
| 50 | Py_ssize_t lastindex; /* last index marker seen by the engine (-1 if none) */ |
| 51 | Py_ssize_t groups; /* number of groups (start/end marks) */ |
| 52 | Py_ssize_t mark[1]; |
Guido van Rossum | b700df9 | 2000-03-31 14:59:30 +0000 | [diff] [blame] | 53 | } MatchObject; |
| 54 | |
Fredrik Lundh | 29c4ba9 | 2000-08-01 18:20:07 +0000 | [diff] [blame] | 55 | typedef struct SRE_REPEAT_T { |
Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 56 | Py_ssize_t count; |
Fredrik Lundh | 29c4ba9 | 2000-08-01 18:20:07 +0000 | [diff] [blame] | 57 | SRE_CODE* pattern; /* points to REPEAT operator arguments */ |
Gustavo Niemeyer | ad3fc44 | 2003-10-17 22:13:16 +0000 | [diff] [blame] | 58 | void* last_ptr; /* helper to check for infinite loops */ |
Fredrik Lundh | 29c4ba9 | 2000-08-01 18:20:07 +0000 | [diff] [blame] | 59 | struct SRE_REPEAT_T *prev; /* points to previous repeat context */ |
| 60 | } SRE_REPEAT; |
| 61 | |
Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 62 | typedef struct { |
| 63 | /* string pointers */ |
| 64 | void* ptr; /* current position (also end of current slice) */ |
| 65 | void* beginning; /* start of original string */ |
| 66 | void* start; /* start of current slice */ |
| 67 | void* end; /* end of original string */ |
Fredrik Lundh | 8a3ebf8 | 2000-07-23 21:46:17 +0000 | [diff] [blame] | 68 | /* attributes for the match object */ |
| 69 | PyObject* string; |
Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 70 | Py_ssize_t pos, endpos; |
Serhiy Storchaka | 9eabac6 | 2013-10-26 10:45:48 +0300 | [diff] [blame] | 71 | int isbytes; |
| 72 | int charsize; /* character size */ |
Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 73 | /* registers */ |
Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 74 | Py_ssize_t lastindex; |
| 75 | Py_ssize_t lastmark; |
Serhiy Storchaka | 9baa5b2 | 2014-09-29 22:49:23 +0300 | [diff] [blame] | 76 | void** mark; |
Fredrik Lundh | 29c4ba9 | 2000-08-01 18:20:07 +0000 | [diff] [blame] | 77 | /* dynamically allocated stuff */ |
Gustavo Niemeyer | ad3fc44 | 2003-10-17 22:13:16 +0000 | [diff] [blame] | 78 | char* data_stack; |
Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 79 | size_t data_stack_size; |
| 80 | size_t data_stack_base; |
Benjamin Peterson | e48944b | 2012-03-07 14:50:25 -0600 | [diff] [blame] | 81 | Py_buffer buffer; |
Gustavo Niemeyer | ad3fc44 | 2003-10-17 22:13:16 +0000 | [diff] [blame] | 82 | /* current repeat context */ |
| 83 | SRE_REPEAT *repeat; |
Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 84 | } SRE_STATE; |
Guido van Rossum | b700df9 | 2000-03-31 14:59:30 +0000 | [diff] [blame] | 85 | |
Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 86 | typedef struct { |
| 87 | PyObject_HEAD |
| 88 | PyObject* pattern; |
Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 89 | SRE_STATE state; |
Fredrik Lundh | be2211e | 2000-06-29 16:57:40 +0000 | [diff] [blame] | 90 | } ScannerObject; |
Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 91 | |
| 92 | #endif |