blob: 4328cb367dc372b9bfef28e101b2920d8138240d [file] [log] [blame]
J. Duke319a3b92007-12-01 00:00:00 +00001/*
2 * Copyright 1995-2001 Sun Microsystems, Inc. All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation. Sun designates this
8 * particular file as subject to the "Classpath" exception as provided
9 * by Sun in the LICENSE file that accompanied this code.
10 *
11 * This code is distributed in the hope that it will be useful, but WITHOUT
12 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 * version 2 for more details (a copy is included in the LICENSE file that
15 * accompanied this code).
16 *
17 * You should have received a copy of the GNU General Public License version
18 * 2 along with this work; if not, write to the Free Software Foundation,
19 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
20 *
21 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
22 * CA 95054 USA or visit www.sun.com if you need additional information or
23 * have any questions.
24 */
25
26package sun.misc;
27import java.io.*;
28
29/**
30 * A class to represent a pool of regular expressions. A string
31 * can be matched against the whole pool all at once. It is much
32 * faster than doing individual regular expression matches one-by-one.
33 *
34 * @see java.misc.RegexpTarget
35 * @author James Gosling
36 */
37
38public class RegexpPool {
39 private RegexpNode prefixMachine = new RegexpNode();
40 private RegexpNode suffixMachine = new RegexpNode();
41 private static final int BIG = 0x7FFFFFFF;
42 private int lastDepth = BIG;
43
44 public RegexpPool () {
45 }
46
47 /**
48 * Add a regular expression to the pool of regular expressions.
49 * @param re The regular expression to add to the pool.
50 For now, only handles strings that either begin or end with
51 a '*'.
52 * @param ret The object to be returned when this regular expression is
53 matched. If ret is an instance of the RegexpTarget class, ret.found
54 is called with the string fragment that matched the '*' as its
55 parameter.
56 * @exception REException error
57 */
58 public void add(String re, Object ret) throws REException {
59 add(re, ret, false);
60 }
61
62 /**
63 * Replace the target for the regular expression with a different
64 * target.
65 *
66 * @param re The regular expression to be replaced in the pool.
67 * For now, only handles strings that either begin or end with
68 * a '*'.
69 * @param ret The object to be returned when this regular expression is
70 * matched. If ret is an instance of the RegexpTarget class, ret.found
71 * is called with the string fragment that matched the '*' as its
72 * parameter.
73 */
74 public void replace(String re, Object ret) {
75 try {
76 add(re, ret, true);
77 } catch(Exception e) {
78 // should never occur if replace is true
79 }
80 }
81
82 /**
83 * Delete the regular expression and its target.
84 * @param re The regular expression to be deleted from the pool.
85 * must begin or end with a '*'
86 * @return target - the old target.
87 */
88 public Object delete(String re) {
89 Object o = null;
90 RegexpNode p = prefixMachine;
91 RegexpNode best = p;
92 int len = re.length() - 1;
93 int i;
94 boolean prefix = true;
95
96 if (!re.startsWith("*") ||
97 !re.endsWith("*"))
98 len++;
99
100 if (len <= 0)
101 return null;
102
103 /* March forward through the prefix machine */
104 for (i = 0; p != null; i++) {
105 if (p.result != null && p.depth < BIG
106 && (!p.exact || i == len)) {
107 best = p;
108 }
109 if (i >= len)
110 break;
111 p = p.find(re.charAt(i));
112 }
113
114 /* march backward through the suffix machine */
115 p = suffixMachine;
116 for (i = len; --i >= 0 && p != null;) {
117 if (p.result != null && p.depth < BIG) {
118 prefix = false;
119 best = p;
120 }
121 p = p.find(re.charAt(i));
122 }
123
124 // delete only if there is an exact match
125 if (prefix) {
126 if (re.equals(best.re)) {
127 o = best.result;
128 best.result = null;
129
130 }
131 } else {
132 if (re.equals(best.re)) {
133 o = best.result;
134 best.result = null;
135 }
136 }
137 return o;
138 }
139
140 /** Search for a match to a string & return the object associated
141 with it with the match. When multiple regular expressions
142 would match the string, the best match is returned first.
143 The next best match is returned the next time matchNext is
144 called.
145 @param s The string to match against the regular expressions
146 in the pool.
147 @return null on failure, otherwise the object associated with
148 the regular expression when it was added to the pool.
149 If the object is an instance of RegexpTarget, then
150 the return value is the result from calling
151 return.found(string_that_matched_wildcard).
152 */
153 public Object match(String s) {
154 return matchAfter(s, BIG);
155 }
156
157 /** Identical to match except that it will only find matches to
158 regular expressions that were added to the pool <i>after</i>
159 the last regular expression that matched in the last call
160 to match() or matchNext() */
161 public Object matchNext(String s) {
162 return matchAfter(s, lastDepth);
163 }
164
165 private void add(String re, Object ret, boolean replace) throws REException {
166 int len = re.length();
167 RegexpNode p;
168 if (re.charAt(0) == '*') {
169 p = suffixMachine;
170 while (len > 1)
171 p = p.add(re.charAt(--len));
172 } else {
173 boolean exact = false;
174 if (re.charAt(len - 1) == '*')
175 len--;
176 else
177 exact = true;
178 p = prefixMachine;
179 for (int i = 0; i < len; i++)
180 p = p.add(re.charAt(i));
181 p.exact = exact;
182 }
183
184 if (p.result != null && !replace)
185 throw new REException(re + " is a duplicate");
186
187 p.re = re;
188 p.result = ret;
189 }
190
191 private Object matchAfter(String s, int lastMatchDepth) {
192 RegexpNode p = prefixMachine;
193 RegexpNode best = p;
194 int bst = 0;
195 int bend = 0;
196 int len = s.length();
197 int i;
198 if (len <= 0)
199 return null;
200 /* March forward through the prefix machine */
201 for (i = 0; p != null; i++) {
202 if (p.result != null && p.depth < lastMatchDepth
203 && (!p.exact || i == len)) {
204 lastDepth = p.depth;
205 best = p;
206 bst = i;
207 bend = len;
208 }
209 if (i >= len)
210 break;
211 p = p.find(s.charAt(i));
212 }
213 /* march backward through the suffix machine */
214 p = suffixMachine;
215 for (i = len; --i >= 0 && p != null;) {
216 if (p.result != null && p.depth < lastMatchDepth) {
217 lastDepth = p.depth;
218 best = p;
219 bst = 0;
220 bend = i+1;
221 }
222 p = p.find(s.charAt(i));
223 }
224 Object o = best.result;
225 if (o != null && o instanceof RegexpTarget)
226 o = ((RegexpTarget) o).found(s.substring(bst, bend));
227 return o;
228 }
229
230 /** Resets the pool so that the next call to matchNext looks
231 at all regular expressions in the pool. match(s); is equivalent
232 to reset(); matchNext(s);
233 <p><b>Multithreading note:</b> reset/nextMatch leave state in the
234 regular expression pool. If multiple threads could be using this
235 pool this way, they should be syncronized to avoid race hazards.
236 match() was done in such a way that there are no such race
237 hazards: multiple threads can be matching in the same pool
238 simultaneously. */
239 public void reset() {
240 lastDepth = BIG;
241 }
242
243 /** Print this pool to standard output */
244 public void print(PrintStream out) {
245 out.print("Regexp pool:\n");
246 if (suffixMachine.firstchild != null) {
247 out.print(" Suffix machine: ");
248 suffixMachine.firstchild.print(out);
249 out.print("\n");
250 }
251 if (prefixMachine.firstchild != null) {
252 out.print(" Prefix machine: ");
253 prefixMachine.firstchild.print(out);
254 out.print("\n");
255 }
256 }
257
258}
259
260/* A node in a regular expression finite state machine. */
261class RegexpNode {
262 char c;
263 RegexpNode firstchild;
264 RegexpNode nextsibling;
265 int depth;
266 boolean exact;
267 Object result;
268 String re = null;
269
270 RegexpNode () {
271 c = '#';
272 depth = 0;
273 }
274 RegexpNode (char C, int depth) {
275 c = C;
276 this.depth = depth;
277 }
278 RegexpNode add(char C) {
279 RegexpNode p = firstchild;
280 if (p == null)
281 p = new RegexpNode (C, depth+1);
282 else {
283 while (p != null)
284 if (p.c == C)
285 return p;
286 else
287 p = p.nextsibling;
288 p = new RegexpNode (C, depth+1);
289 p.nextsibling = firstchild;
290 }
291 firstchild = p;
292 return p;
293 }
294 RegexpNode find(char C) {
295 for (RegexpNode p = firstchild;
296 p != null;
297 p = p.nextsibling)
298 if (p.c == C)
299 return p;
300 return null;
301 }
302 void print(PrintStream out) {
303 if (nextsibling != null) {
304 RegexpNode p = this;
305 out.print("(");
306 while (p != null) {
307 out.write(p.c);
308 if (p.firstchild != null)
309 p.firstchild.print(out);
310 p = p.nextsibling;
311 out.write(p != null ? '|' : ')');
312 }
313 } else {
314 out.write(c);
315 if (firstchild != null)
316 firstchild.print(out);
317 }
318 }
319}