| Guido van Rossum | b700df9 | 2000-03-31 14:59:30 +0000 | [diff] [blame] | 1 | /* | 
 | 2 |  * Secret Labs' Regular Expression Engine | 
| Guido van Rossum | b700df9 | 2000-03-31 14:59:30 +0000 | [diff] [blame] | 3 |  * | 
| Fredrik Lundh | 8094611 | 2000-06-29 18:03:25 +0000 | [diff] [blame] | 4 |  * regular expression matching engine | 
| Guido van Rossum | b700df9 | 2000-03-31 14:59:30 +0000 | [diff] [blame] | 5 |  * | 
| Fredrik Lundh | b0f05bd | 2001-07-02 16:42:49 +0000 | [diff] [blame] | 6 |  * Copyright (c) 1997-2001 by Secret Labs AB.  All rights reserved. | 
| Guido van Rossum | b700df9 | 2000-03-31 14:59:30 +0000 | [diff] [blame] | 7 |  * | 
 | 8 |  * See the _sre.c file for information on usage and redistribution. | 
 | 9 |  */ | 
 | 10 |  | 
 | 11 | #ifndef SRE_INCLUDED | 
 | 12 | #define SRE_INCLUDED | 
 | 13 |  | 
 | 14 | #include "sre_constants.h" | 
 | 15 |  | 
| Fredrik Lundh | 8f45585 | 2001-06-27 18:59:43 +0000 | [diff] [blame] | 16 | /* size of a code word (must be unsigned short or larger, and | 
| Antoine Pitrou | 766a16e | 2012-06-23 14:17:39 +0200 | [diff] [blame] | 17 |    large enough to hold a UCS4 character) */ | 
| Martin v. Löwis | 7d9c6c7 | 2004-05-07 07:18:13 +0000 | [diff] [blame] | 18 | #define SRE_CODE Py_UCS4 | 
| Serhiy Storchaka | 70ca021 | 2013-02-16 16:47:47 +0200 | [diff] [blame] | 19 | #if SIZEOF_SIZE_T > 4 | 
 | 20 | # define SRE_MAXREPEAT (~(SRE_CODE)0) | 
 | 21 | #else | 
 | 22 | # define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX + 1u) | 
 | 23 | #endif | 
| Fredrik Lundh | 102f3ad | 2000-06-29 08:55:54 +0000 | [diff] [blame] | 24 |  | 
| Guido van Rossum | b700df9 | 2000-03-31 14:59:30 +0000 | [diff] [blame] | 25 | typedef struct { | 
| Fredrik Lundh | 6f01398 | 2000-07-03 18:44:21 +0000 | [diff] [blame] | 26 |     PyObject_VAR_HEAD | 
| Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 27 |     Py_ssize_t groups; /* must be first! */ | 
| Guido van Rossum | b700df9 | 2000-03-31 14:59:30 +0000 | [diff] [blame] | 28 |     PyObject* groupindex; | 
| Fredrik Lundh | c230173 | 2000-07-02 22:25:39 +0000 | [diff] [blame] | 29 |     PyObject* indexgroup; | 
| Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 30 |     /* compatibility */ | 
 | 31 |     PyObject* pattern; /* pattern source (or None) */ | 
 | 32 |     int flags; /* flags used when compiling pattern source */ | 
| Raymond Hettinger | 027bb63 | 2004-05-31 03:09:25 +0000 | [diff] [blame] | 33 |     PyObject *weakreflist; /* List of weak references */ | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 34 |     int logical_charsize; /* pattern charsize (or -1) */ | 
 | 35 |     int charsize; | 
| Benjamin Peterson | e48944b | 2012-03-07 14:50:25 -0600 | [diff] [blame] | 36 |     Py_buffer view; | 
| Fredrik Lundh | 6f01398 | 2000-07-03 18:44:21 +0000 | [diff] [blame] | 37 |     /* pattern code */ | 
| Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 38 |     Py_ssize_t codesize; | 
| Fredrik Lundh | 6f01398 | 2000-07-03 18:44:21 +0000 | [diff] [blame] | 39 |     SRE_CODE code[1]; | 
| Guido van Rossum | b700df9 | 2000-03-31 14:59:30 +0000 | [diff] [blame] | 40 | } PatternObject; | 
 | 41 |  | 
| Fredrik Lundh | 6f01398 | 2000-07-03 18:44:21 +0000 | [diff] [blame] | 42 | #define PatternObject_GetCode(o) (((PatternObject*)(o))->code) | 
| Guido van Rossum | b700df9 | 2000-03-31 14:59:30 +0000 | [diff] [blame] | 43 |  | 
 | 44 | typedef struct { | 
| Fredrik Lundh | 6f01398 | 2000-07-03 18:44:21 +0000 | [diff] [blame] | 45 |     PyObject_VAR_HEAD | 
| Fredrik Lundh | b0f05bd | 2001-07-02 16:42:49 +0000 | [diff] [blame] | 46 |     PyObject* string; /* link to the target string (must be first) */ | 
| Fredrik Lundh | 8a3ebf8 | 2000-07-23 21:46:17 +0000 | [diff] [blame] | 47 |     PyObject* regs; /* cached list of matching spans */ | 
| Guido van Rossum | b700df9 | 2000-03-31 14:59:30 +0000 | [diff] [blame] | 48 |     PatternObject* pattern; /* link to the regex (pattern) object */ | 
| Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 49 |     Py_ssize_t pos, endpos; /* current target slice */ | 
 | 50 |     Py_ssize_t lastindex; /* last index marker seen by the engine (-1 if none) */ | 
 | 51 |     Py_ssize_t groups; /* number of groups (start/end marks) */ | 
 | 52 |     Py_ssize_t mark[1]; | 
| Guido van Rossum | b700df9 | 2000-03-31 14:59:30 +0000 | [diff] [blame] | 53 | } MatchObject; | 
 | 54 |  | 
| Fredrik Lundh | 102f3ad | 2000-06-29 08:55:54 +0000 | [diff] [blame] | 55 | typedef unsigned int (*SRE_TOLOWER_HOOK)(unsigned int ch); | 
 | 56 |  | 
| Fredrik Lundh | be2211e | 2000-06-29 16:57:40 +0000 | [diff] [blame] | 57 | /* FIXME: <fl> shouldn't be a constant, really... */ | 
 | 58 | #define SRE_MARK_SIZE 200 | 
 | 59 |  | 
| Fredrik Lundh | 29c4ba9 | 2000-08-01 18:20:07 +0000 | [diff] [blame] | 60 | typedef struct SRE_REPEAT_T { | 
| Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 61 |     Py_ssize_t count; | 
| Fredrik Lundh | 29c4ba9 | 2000-08-01 18:20:07 +0000 | [diff] [blame] | 62 |     SRE_CODE* pattern; /* points to REPEAT operator arguments */ | 
| Gustavo Niemeyer | ad3fc44 | 2003-10-17 22:13:16 +0000 | [diff] [blame] | 63 |     void* last_ptr; /* helper to check for infinite loops */ | 
| Fredrik Lundh | 29c4ba9 | 2000-08-01 18:20:07 +0000 | [diff] [blame] | 64 |     struct SRE_REPEAT_T *prev; /* points to previous repeat context */ | 
 | 65 | } SRE_REPEAT; | 
 | 66 |  | 
| Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 67 | typedef struct { | 
 | 68 |     /* string pointers */ | 
 | 69 |     void* ptr; /* current position (also end of current slice) */ | 
 | 70 |     void* beginning; /* start of original string */ | 
 | 71 |     void* start; /* start of current slice */ | 
 | 72 |     void* end; /* end of original string */ | 
| Fredrik Lundh | 8a3ebf8 | 2000-07-23 21:46:17 +0000 | [diff] [blame] | 73 |     /* attributes for the match object */ | 
 | 74 |     PyObject* string; | 
| Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 75 |     Py_ssize_t pos, endpos; | 
| Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 76 |     /* character size */ | 
| Martin v. Löwis | d63a3b8 | 2011-09-28 07:41:54 +0200 | [diff] [blame] | 77 |     int logical_charsize; /* kind of thing: 1 - bytes, 2/4 - unicode */ | 
| Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 78 |     int charsize; | 
 | 79 |     /* registers */ | 
| Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 80 |     Py_ssize_t lastindex; | 
 | 81 |     Py_ssize_t lastmark; | 
| Fredrik Lundh | be2211e | 2000-06-29 16:57:40 +0000 | [diff] [blame] | 82 |     void* mark[SRE_MARK_SIZE]; | 
| Fredrik Lundh | 29c4ba9 | 2000-08-01 18:20:07 +0000 | [diff] [blame] | 83 |     /* dynamically allocated stuff */ | 
| Gustavo Niemeyer | ad3fc44 | 2003-10-17 22:13:16 +0000 | [diff] [blame] | 84 |     char* data_stack; | 
| Thomas Wouters | 0e3f591 | 2006-08-11 14:57:12 +0000 | [diff] [blame] | 85 |     size_t data_stack_size; | 
 | 86 |     size_t data_stack_base; | 
| Benjamin Peterson | e48944b | 2012-03-07 14:50:25 -0600 | [diff] [blame] | 87 |     Py_buffer buffer; | 
| Gustavo Niemeyer | ad3fc44 | 2003-10-17 22:13:16 +0000 | [diff] [blame] | 88 |     /* current repeat context */ | 
 | 89 |     SRE_REPEAT *repeat; | 
| Fredrik Lundh | 102f3ad | 2000-06-29 08:55:54 +0000 | [diff] [blame] | 90 |     /* hooks */ | 
| Fredrik Lundh | b389df3 | 2000-06-29 12:48:37 +0000 | [diff] [blame] | 91 |     SRE_TOLOWER_HOOK lower; | 
| Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 92 | } SRE_STATE; | 
| Guido van Rossum | b700df9 | 2000-03-31 14:59:30 +0000 | [diff] [blame] | 93 |  | 
| Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 94 | typedef struct { | 
 | 95 |     PyObject_HEAD | 
 | 96 |     PyObject* pattern; | 
| Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 97 |     SRE_STATE state; | 
| Fredrik Lundh | be2211e | 2000-06-29 16:57:40 +0000 | [diff] [blame] | 98 | } ScannerObject; | 
| Jeremy Hylton | b1aa195 | 2000-06-01 17:39:12 +0000 | [diff] [blame] | 99 |  | 
 | 100 | #endif |