blob: 531dc319cbc20a8ea12e597bcd5f94fa17470a81 [file] [log] [blame]
Guido van Rossum7627c0d2000-03-31 14:58:54 +00001#
2# Secret Labs' Regular Expression Engine
3# $Id$
4#
5# various symbols used by the regular expression engine.
6# run this script to update the _sre include files!
7#
8# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
9#
10# This code can only be used for 1.6 alpha testing. All other use
11# require explicit permission from Secret Labs AB.
12#
13# Portions of this engine have been developed in cooperation with
14# CNRI. Hewlett-Packard provided funding for 1.6 integration and
15# other compatibility work.
16#
17
18# operators
19
20FAILURE = "failure"
21SUCCESS = "success"
22
23ANY = "any"
24ASSERT = "assert"
25AT = "at"
26BRANCH = "branch"
27CALL = "call"
28CATEGORY = "category"
29GROUP = "group"
30GROUP_IGNORE = "group_ignore"
31IN = "in"
32IN_IGNORE = "in_ignore"
33JUMP = "jump"
34LITERAL = "literal"
35LITERAL_IGNORE = "literal_ignore"
36MARK = "mark"
37MAX_REPEAT = "max_repeat"
38MAX_REPEAT_ONE = "max_repeat_one"
39MAX_UNTIL = "max_until"
40MIN_REPEAT = "min_repeat"
41MIN_UNTIL = "min_until"
42NEGATE = "negate"
43NOT_LITERAL = "not_literal"
44NOT_LITERAL_IGNORE = "not_literal_ignore"
45RANGE = "range"
46REPEAT = "repeat"
47SUBPATTERN = "subpattern"
48
49# positions
50AT_BEGINNING = "at_beginning"
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000051AT_BEGINNING_LINE = "at_beginning_line"
Guido van Rossum7627c0d2000-03-31 14:58:54 +000052AT_BOUNDARY = "at_boundary"
53AT_NON_BOUNDARY = "at_non_boundary"
54AT_END = "at_end"
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000055AT_END_LINE = "at_end_line"
Guido van Rossum7627c0d2000-03-31 14:58:54 +000056
57# categories
Guido van Rossum7627c0d2000-03-31 14:58:54 +000058CATEGORY_DIGIT = "category_digit"
59CATEGORY_NOT_DIGIT = "category_not_digit"
60CATEGORY_SPACE = "category_space"
61CATEGORY_NOT_SPACE = "category_not_space"
62CATEGORY_WORD = "category_word"
63CATEGORY_NOT_WORD = "category_not_word"
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000064CATEGORY_LINEBREAK = "category_linebreak"
65CATEGORY_NOT_LINEBREAK = "category_not_linebreak"
66CATEGORY_LOC_DIGIT = "category_loc_digit"
67CATEGORY_LOC_NOT_DIGIT = "category_loc_not_digit"
68CATEGORY_LOC_SPACE = "category_loc_space"
69CATEGORY_LOC_NOT_SPACE = "category_loc_not_space"
70CATEGORY_LOC_WORD = "category_loc_word"
71CATEGORY_LOC_NOT_WORD = "category_loc_not_word"
72CATEGORY_LOC_LINEBREAK = "category_loc_linebreak"
73CATEGORY_LOC_NOT_LINEBREAK = "category_loc_not_linebreak"
Guido van Rossum7627c0d2000-03-31 14:58:54 +000074
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +000075OPCODES = [
Guido van Rossum7627c0d2000-03-31 14:58:54 +000076
77 # failure=0 success=1 (just because it looks better that way :-)
78 FAILURE, SUCCESS,
79
80 ANY,
81 ASSERT,
82 AT,
83 BRANCH,
84 CALL,
85 CATEGORY,
86 GROUP, GROUP_IGNORE,
87 IN, IN_IGNORE,
88 JUMP,
89 LITERAL, LITERAL_IGNORE,
90 MARK,
91 MAX_REPEAT, MAX_UNTIL,
92 MAX_REPEAT_ONE,
93 MIN_REPEAT, MIN_UNTIL,
94 NOT_LITERAL, NOT_LITERAL_IGNORE,
95 NEGATE,
96 RANGE,
97 REPEAT
98
99]
100
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000101ATCODES = [
102 AT_BEGINNING, AT_BEGINNING_LINE, AT_BOUNDARY,
103 AT_NON_BOUNDARY, AT_END, AT_END_LINE
104]
105
106CHCODES = [
107 CATEGORY_DIGIT, CATEGORY_NOT_DIGIT, CATEGORY_SPACE,
108 CATEGORY_NOT_SPACE, CATEGORY_WORD, CATEGORY_NOT_WORD,
109 CATEGORY_LINEBREAK, CATEGORY_NOT_LINEBREAK, CATEGORY_LOC_DIGIT,
110 CATEGORY_LOC_NOT_DIGIT, CATEGORY_LOC_SPACE,
111 CATEGORY_LOC_NOT_SPACE, CATEGORY_LOC_WORD, CATEGORY_LOC_NOT_WORD,
112 CATEGORY_LOC_LINEBREAK, CATEGORY_LOC_NOT_LINEBREAK
113]
114
115def makedict(list):
116 d = {}
117 i = 0
118 for item in list:
119 d[item] = i
120 i = i + 1
121 return d
122
123OPCODES = makedict(OPCODES)
124ATCODES = makedict(ATCODES)
125CHCODES = makedict(CHCODES)
Guido van Rossum7627c0d2000-03-31 14:58:54 +0000126
127# replacement operations for "ignore case" mode
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000128OP_IGNORE = {
Guido van Rossum7627c0d2000-03-31 14:58:54 +0000129 GROUP: GROUP_IGNORE,
130 IN: IN_IGNORE,
131 LITERAL: LITERAL_IGNORE,
132 NOT_LITERAL: NOT_LITERAL_IGNORE
133}
134
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000135AT_MULTILINE = {
136 AT_BEGINNING: AT_BEGINNING_LINE,
137 AT_END: AT_END_LINE
Guido van Rossum7627c0d2000-03-31 14:58:54 +0000138}
139
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000140CH_LOCALE = {
141 CATEGORY_DIGIT: CATEGORY_LOC_DIGIT,
142 CATEGORY_NOT_DIGIT: CATEGORY_LOC_NOT_DIGIT,
143 CATEGORY_SPACE: CATEGORY_LOC_SPACE,
144 CATEGORY_NOT_SPACE: CATEGORY_LOC_NOT_SPACE,
145 CATEGORY_WORD: CATEGORY_LOC_WORD,
146 CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD,
147 CATEGORY_LINEBREAK: CATEGORY_LOC_LINEBREAK,
148 CATEGORY_NOT_LINEBREAK: CATEGORY_LOC_NOT_LINEBREAK
Guido van Rossum7627c0d2000-03-31 14:58:54 +0000149}
150
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000151# flags
152SRE_FLAG_TEMPLATE = 1 # NYI
153SRE_FLAG_IGNORECASE = 2
154SRE_FLAG_LOCALE = 4
155SRE_FLAG_MULTILINE = 8
156SRE_FLAG_DOTALL = 16
157SRE_FLAG_VERBOSE = 32
158
Guido van Rossum7627c0d2000-03-31 14:58:54 +0000159if __name__ == "__main__":
160 import string
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000161 def dump(f, d, prefix):
162 items = d.items()
163 items.sort(lambda a, b: cmp(a[1], b[1]))
164 for k, v in items:
165 f.write("#define %s_%s %s\n" % (prefix, string.upper(k), v))
Guido van Rossum7627c0d2000-03-31 14:58:54 +0000166 f = open("sre_constants.h", "w")
Jeremy Hyltonb1aa1952000-06-01 17:39:12 +0000167 f.write("/* generated from sre_constants.py */\n")
168 dump(f, OPCODES, "SRE_OP")
169 dump(f, ATCODES, "SRE")
170 dump(f, CHCODES, "SRE")
Guido van Rossum7627c0d2000-03-31 14:58:54 +0000171 f.close()
172 print "done"