J. Duke | 319a3b9 | 2007-12-01 00:00:00 +0000 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright 1995-2001 Sun Microsystems, Inc. All Rights Reserved. |
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | * |
| 5 | * This code is free software; you can redistribute it and/or modify it |
| 6 | * under the terms of the GNU General Public License version 2 only, as |
| 7 | * published by the Free Software Foundation. Sun designates this |
| 8 | * particular file as subject to the "Classpath" exception as provided |
| 9 | * by Sun in the LICENSE file that accompanied this code. |
| 10 | * |
| 11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 14 | * version 2 for more details (a copy is included in the LICENSE file that |
| 15 | * accompanied this code). |
| 16 | * |
| 17 | * You should have received a copy of the GNU General Public License version |
| 18 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 20 | * |
| 21 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
| 22 | * CA 95054 USA or visit www.sun.com if you need additional information or |
| 23 | * have any questions. |
| 24 | */ |
| 25 | |
| 26 | package sun.misc; |
| 27 | import java.io.*; |
| 28 | |
| 29 | /** |
| 30 | * A class to represent a pool of regular expressions. A string |
| 31 | * can be matched against the whole pool all at once. It is much |
| 32 | * faster than doing individual regular expression matches one-by-one. |
| 33 | * |
| 34 | * @see java.misc.RegexpTarget |
| 35 | * @author James Gosling |
| 36 | */ |
| 37 | |
| 38 | public class RegexpPool { |
| 39 | private RegexpNode prefixMachine = new RegexpNode(); |
| 40 | private RegexpNode suffixMachine = new RegexpNode(); |
| 41 | private static final int BIG = 0x7FFFFFFF; |
| 42 | private int lastDepth = BIG; |
| 43 | |
| 44 | public RegexpPool () { |
| 45 | } |
| 46 | |
| 47 | /** |
| 48 | * Add a regular expression to the pool of regular expressions. |
| 49 | * @param re The regular expression to add to the pool. |
| 50 | For now, only handles strings that either begin or end with |
| 51 | a '*'. |
| 52 | * @param ret The object to be returned when this regular expression is |
| 53 | matched. If ret is an instance of the RegexpTarget class, ret.found |
| 54 | is called with the string fragment that matched the '*' as its |
| 55 | parameter. |
| 56 | * @exception REException error |
| 57 | */ |
| 58 | public void add(String re, Object ret) throws REException { |
| 59 | add(re, ret, false); |
| 60 | } |
| 61 | |
| 62 | /** |
| 63 | * Replace the target for the regular expression with a different |
| 64 | * target. |
| 65 | * |
| 66 | * @param re The regular expression to be replaced in the pool. |
| 67 | * For now, only handles strings that either begin or end with |
| 68 | * a '*'. |
| 69 | * @param ret The object to be returned when this regular expression is |
| 70 | * matched. If ret is an instance of the RegexpTarget class, ret.found |
| 71 | * is called with the string fragment that matched the '*' as its |
| 72 | * parameter. |
| 73 | */ |
| 74 | public void replace(String re, Object ret) { |
| 75 | try { |
| 76 | add(re, ret, true); |
| 77 | } catch(Exception e) { |
| 78 | // should never occur if replace is true |
| 79 | } |
| 80 | } |
| 81 | |
| 82 | /** |
| 83 | * Delete the regular expression and its target. |
| 84 | * @param re The regular expression to be deleted from the pool. |
| 85 | * must begin or end with a '*' |
| 86 | * @return target - the old target. |
| 87 | */ |
| 88 | public Object delete(String re) { |
| 89 | Object o = null; |
| 90 | RegexpNode p = prefixMachine; |
| 91 | RegexpNode best = p; |
| 92 | int len = re.length() - 1; |
| 93 | int i; |
| 94 | boolean prefix = true; |
| 95 | |
| 96 | if (!re.startsWith("*") || |
| 97 | !re.endsWith("*")) |
| 98 | len++; |
| 99 | |
| 100 | if (len <= 0) |
| 101 | return null; |
| 102 | |
| 103 | /* March forward through the prefix machine */ |
| 104 | for (i = 0; p != null; i++) { |
| 105 | if (p.result != null && p.depth < BIG |
| 106 | && (!p.exact || i == len)) { |
| 107 | best = p; |
| 108 | } |
| 109 | if (i >= len) |
| 110 | break; |
| 111 | p = p.find(re.charAt(i)); |
| 112 | } |
| 113 | |
| 114 | /* march backward through the suffix machine */ |
| 115 | p = suffixMachine; |
| 116 | for (i = len; --i >= 0 && p != null;) { |
| 117 | if (p.result != null && p.depth < BIG) { |
| 118 | prefix = false; |
| 119 | best = p; |
| 120 | } |
| 121 | p = p.find(re.charAt(i)); |
| 122 | } |
| 123 | |
| 124 | // delete only if there is an exact match |
| 125 | if (prefix) { |
| 126 | if (re.equals(best.re)) { |
| 127 | o = best.result; |
| 128 | best.result = null; |
| 129 | |
| 130 | } |
| 131 | } else { |
| 132 | if (re.equals(best.re)) { |
| 133 | o = best.result; |
| 134 | best.result = null; |
| 135 | } |
| 136 | } |
| 137 | return o; |
| 138 | } |
| 139 | |
| 140 | /** Search for a match to a string & return the object associated |
| 141 | with it with the match. When multiple regular expressions |
| 142 | would match the string, the best match is returned first. |
| 143 | The next best match is returned the next time matchNext is |
| 144 | called. |
| 145 | @param s The string to match against the regular expressions |
| 146 | in the pool. |
| 147 | @return null on failure, otherwise the object associated with |
| 148 | the regular expression when it was added to the pool. |
| 149 | If the object is an instance of RegexpTarget, then |
| 150 | the return value is the result from calling |
| 151 | return.found(string_that_matched_wildcard). |
| 152 | */ |
| 153 | public Object match(String s) { |
| 154 | return matchAfter(s, BIG); |
| 155 | } |
| 156 | |
| 157 | /** Identical to match except that it will only find matches to |
| 158 | regular expressions that were added to the pool <i>after</i> |
| 159 | the last regular expression that matched in the last call |
| 160 | to match() or matchNext() */ |
| 161 | public Object matchNext(String s) { |
| 162 | return matchAfter(s, lastDepth); |
| 163 | } |
| 164 | |
| 165 | private void add(String re, Object ret, boolean replace) throws REException { |
| 166 | int len = re.length(); |
| 167 | RegexpNode p; |
| 168 | if (re.charAt(0) == '*') { |
| 169 | p = suffixMachine; |
| 170 | while (len > 1) |
| 171 | p = p.add(re.charAt(--len)); |
| 172 | } else { |
| 173 | boolean exact = false; |
| 174 | if (re.charAt(len - 1) == '*') |
| 175 | len--; |
| 176 | else |
| 177 | exact = true; |
| 178 | p = prefixMachine; |
| 179 | for (int i = 0; i < len; i++) |
| 180 | p = p.add(re.charAt(i)); |
| 181 | p.exact = exact; |
| 182 | } |
| 183 | |
| 184 | if (p.result != null && !replace) |
| 185 | throw new REException(re + " is a duplicate"); |
| 186 | |
| 187 | p.re = re; |
| 188 | p.result = ret; |
| 189 | } |
| 190 | |
| 191 | private Object matchAfter(String s, int lastMatchDepth) { |
| 192 | RegexpNode p = prefixMachine; |
| 193 | RegexpNode best = p; |
| 194 | int bst = 0; |
| 195 | int bend = 0; |
| 196 | int len = s.length(); |
| 197 | int i; |
| 198 | if (len <= 0) |
| 199 | return null; |
| 200 | /* March forward through the prefix machine */ |
| 201 | for (i = 0; p != null; i++) { |
| 202 | if (p.result != null && p.depth < lastMatchDepth |
| 203 | && (!p.exact || i == len)) { |
| 204 | lastDepth = p.depth; |
| 205 | best = p; |
| 206 | bst = i; |
| 207 | bend = len; |
| 208 | } |
| 209 | if (i >= len) |
| 210 | break; |
| 211 | p = p.find(s.charAt(i)); |
| 212 | } |
| 213 | /* march backward through the suffix machine */ |
| 214 | p = suffixMachine; |
| 215 | for (i = len; --i >= 0 && p != null;) { |
| 216 | if (p.result != null && p.depth < lastMatchDepth) { |
| 217 | lastDepth = p.depth; |
| 218 | best = p; |
| 219 | bst = 0; |
| 220 | bend = i+1; |
| 221 | } |
| 222 | p = p.find(s.charAt(i)); |
| 223 | } |
| 224 | Object o = best.result; |
| 225 | if (o != null && o instanceof RegexpTarget) |
| 226 | o = ((RegexpTarget) o).found(s.substring(bst, bend)); |
| 227 | return o; |
| 228 | } |
| 229 | |
| 230 | /** Resets the pool so that the next call to matchNext looks |
| 231 | at all regular expressions in the pool. match(s); is equivalent |
| 232 | to reset(); matchNext(s); |
| 233 | <p><b>Multithreading note:</b> reset/nextMatch leave state in the |
| 234 | regular expression pool. If multiple threads could be using this |
| 235 | pool this way, they should be syncronized to avoid race hazards. |
| 236 | match() was done in such a way that there are no such race |
| 237 | hazards: multiple threads can be matching in the same pool |
| 238 | simultaneously. */ |
| 239 | public void reset() { |
| 240 | lastDepth = BIG; |
| 241 | } |
| 242 | |
| 243 | /** Print this pool to standard output */ |
| 244 | public void print(PrintStream out) { |
| 245 | out.print("Regexp pool:\n"); |
| 246 | if (suffixMachine.firstchild != null) { |
| 247 | out.print(" Suffix machine: "); |
| 248 | suffixMachine.firstchild.print(out); |
| 249 | out.print("\n"); |
| 250 | } |
| 251 | if (prefixMachine.firstchild != null) { |
| 252 | out.print(" Prefix machine: "); |
| 253 | prefixMachine.firstchild.print(out); |
| 254 | out.print("\n"); |
| 255 | } |
| 256 | } |
| 257 | |
| 258 | } |
| 259 | |
| 260 | /* A node in a regular expression finite state machine. */ |
| 261 | class RegexpNode { |
| 262 | char c; |
| 263 | RegexpNode firstchild; |
| 264 | RegexpNode nextsibling; |
| 265 | int depth; |
| 266 | boolean exact; |
| 267 | Object result; |
| 268 | String re = null; |
| 269 | |
| 270 | RegexpNode () { |
| 271 | c = '#'; |
| 272 | depth = 0; |
| 273 | } |
| 274 | RegexpNode (char C, int depth) { |
| 275 | c = C; |
| 276 | this.depth = depth; |
| 277 | } |
| 278 | RegexpNode add(char C) { |
| 279 | RegexpNode p = firstchild; |
| 280 | if (p == null) |
| 281 | p = new RegexpNode (C, depth+1); |
| 282 | else { |
| 283 | while (p != null) |
| 284 | if (p.c == C) |
| 285 | return p; |
| 286 | else |
| 287 | p = p.nextsibling; |
| 288 | p = new RegexpNode (C, depth+1); |
| 289 | p.nextsibling = firstchild; |
| 290 | } |
| 291 | firstchild = p; |
| 292 | return p; |
| 293 | } |
| 294 | RegexpNode find(char C) { |
| 295 | for (RegexpNode p = firstchild; |
| 296 | p != null; |
| 297 | p = p.nextsibling) |
| 298 | if (p.c == C) |
| 299 | return p; |
| 300 | return null; |
| 301 | } |
| 302 | void print(PrintStream out) { |
| 303 | if (nextsibling != null) { |
| 304 | RegexpNode p = this; |
| 305 | out.print("("); |
| 306 | while (p != null) { |
| 307 | out.write(p.c); |
| 308 | if (p.firstchild != null) |
| 309 | p.firstchild.print(out); |
| 310 | p = p.nextsibling; |
| 311 | out.write(p != null ? '|' : ')'); |
| 312 | } |
| 313 | } else { |
| 314 | out.write(c); |
| 315 | if (firstchild != null) |
| 316 | firstchild.print(out); |
| 317 | } |
| 318 | } |
| 319 | } |