J. Duke | 319a3b9 | 2007-12-01 00:00:00 +0000 | [diff] [blame^] | 1 | /* |
| 2 | * Copyright 1999-2006 Sun Microsystems, Inc. All Rights Reserved. |
| 3 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. |
| 4 | * |
| 5 | * This code is free software; you can redistribute it and/or modify it |
| 6 | * under the terms of the GNU General Public License version 2 only, as |
| 7 | * published by the Free Software Foundation. Sun designates this |
| 8 | * particular file as subject to the "Classpath" exception as provided |
| 9 | * by Sun in the LICENSE file that accompanied this code. |
| 10 | * |
| 11 | * This code is distributed in the hope that it will be useful, but WITHOUT |
| 12 | * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or |
| 13 | * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License |
| 14 | * version 2 for more details (a copy is included in the LICENSE file that |
| 15 | * accompanied this code). |
| 16 | * |
| 17 | * You should have received a copy of the GNU General Public License version |
| 18 | * 2 along with this work; if not, write to the Free Software Foundation, |
| 19 | * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. |
| 20 | * |
| 21 | * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara, |
| 22 | * CA 95054 USA or visit www.sun.com if you need additional information or |
| 23 | * have any questions. |
| 24 | */ |
| 25 | |
| 26 | package java.util.regex; |
| 27 | |
| 28 | |
| 29 | /** |
| 30 | * An engine that performs match operations on a {@link java.lang.CharSequence |
| 31 | * </code>character sequence<code>} by interpreting a {@link Pattern}. |
| 32 | * |
| 33 | * <p> A matcher is created from a pattern by invoking the pattern's {@link |
| 34 | * Pattern#matcher matcher} method. Once created, a matcher can be used to |
| 35 | * perform three different kinds of match operations: |
| 36 | * |
| 37 | * <ul> |
| 38 | * |
| 39 | * <li><p> The {@link #matches matches} method attempts to match the entire |
| 40 | * input sequence against the pattern. </p></li> |
| 41 | * |
| 42 | * <li><p> The {@link #lookingAt lookingAt} method attempts to match the |
| 43 | * input sequence, starting at the beginning, against the pattern. </p></li> |
| 44 | * |
| 45 | * <li><p> The {@link #find find} method scans the input sequence looking for |
| 46 | * the next subsequence that matches the pattern. </p></li> |
| 47 | * |
| 48 | * </ul> |
| 49 | * |
| 50 | * <p> Each of these methods returns a boolean indicating success or failure. |
| 51 | * More information about a successful match can be obtained by querying the |
| 52 | * state of the matcher. |
| 53 | * |
| 54 | * <p> A matcher finds matches in a subset of its input called the |
| 55 | * <i>region</i>. By default, the region contains all of the matcher's input. |
| 56 | * The region can be modified via the{@link #region region} method and queried |
| 57 | * via the {@link #regionStart regionStart} and {@link #regionEnd regionEnd} |
| 58 | * methods. The way that the region boundaries interact with some pattern |
| 59 | * constructs can be changed. See {@link #useAnchoringBounds |
| 60 | * useAnchoringBounds} and {@link #useTransparentBounds useTransparentBounds} |
| 61 | * for more details. |
| 62 | * |
| 63 | * <p> This class also defines methods for replacing matched subsequences with |
| 64 | * new strings whose contents can, if desired, be computed from the match |
| 65 | * result. The {@link #appendReplacement appendReplacement} and {@link |
| 66 | * #appendTail appendTail} methods can be used in tandem in order to collect |
| 67 | * the result into an existing string buffer, or the more convenient {@link |
| 68 | * #replaceAll replaceAll} method can be used to create a string in which every |
| 69 | * matching subsequence in the input sequence is replaced. |
| 70 | * |
| 71 | * <p> The explicit state of a matcher includes the start and end indices of |
| 72 | * the most recent successful match. It also includes the start and end |
| 73 | * indices of the input subsequence captured by each <a |
| 74 | * href="Pattern.html#cg">capturing group</a> in the pattern as well as a total |
| 75 | * count of such subsequences. As a convenience, methods are also provided for |
| 76 | * returning these captured subsequences in string form. |
| 77 | * |
| 78 | * <p> The explicit state of a matcher is initially undefined; attempting to |
| 79 | * query any part of it before a successful match will cause an {@link |
| 80 | * IllegalStateException} to be thrown. The explicit state of a matcher is |
| 81 | * recomputed by every match operation. |
| 82 | * |
| 83 | * <p> The implicit state of a matcher includes the input character sequence as |
| 84 | * well as the <i>append position</i>, which is initially zero and is updated |
| 85 | * by the {@link #appendReplacement appendReplacement} method. |
| 86 | * |
| 87 | * <p> A matcher may be reset explicitly by invoking its {@link #reset()} |
| 88 | * method or, if a new input sequence is desired, its {@link |
| 89 | * #reset(java.lang.CharSequence) reset(CharSequence)} method. Resetting a |
| 90 | * matcher discards its explicit state information and sets the append position |
| 91 | * to zero. |
| 92 | * |
| 93 | * <p> Instances of this class are not safe for use by multiple concurrent |
| 94 | * threads. </p> |
| 95 | * |
| 96 | * |
| 97 | * @author Mike McCloskey |
| 98 | * @author Mark Reinhold |
| 99 | * @author JSR-51 Expert Group |
| 100 | * @since 1.4 |
| 101 | * @spec JSR-51 |
| 102 | */ |
| 103 | |
| 104 | public final class Matcher implements MatchResult { |
| 105 | |
| 106 | /** |
| 107 | * The Pattern object that created this Matcher. |
| 108 | */ |
| 109 | Pattern parentPattern; |
| 110 | |
| 111 | /** |
| 112 | * The storage used by groups. They may contain invalid values if |
| 113 | * a group was skipped during the matching. |
| 114 | */ |
| 115 | int[] groups; |
| 116 | |
| 117 | /** |
| 118 | * The range within the sequence that is to be matched. Anchors |
| 119 | * will match at these "hard" boundaries. Changing the region |
| 120 | * changes these values. |
| 121 | */ |
| 122 | int from, to; |
| 123 | |
| 124 | /** |
| 125 | * Lookbehind uses this value to ensure that the subexpression |
| 126 | * match ends at the point where the lookbehind was encountered. |
| 127 | */ |
| 128 | int lookbehindTo; |
| 129 | |
| 130 | /** |
| 131 | * The original string being matched. |
| 132 | */ |
| 133 | CharSequence text; |
| 134 | |
| 135 | /** |
| 136 | * Matcher state used by the last node. NOANCHOR is used when a |
| 137 | * match does not have to consume all of the input. ENDANCHOR is |
| 138 | * the mode used for matching all the input. |
| 139 | */ |
| 140 | static final int ENDANCHOR = 1; |
| 141 | static final int NOANCHOR = 0; |
| 142 | int acceptMode = NOANCHOR; |
| 143 | |
| 144 | /** |
| 145 | * The range of string that last matched the pattern. If the last |
| 146 | * match failed then first is -1; last initially holds 0 then it |
| 147 | * holds the index of the end of the last match (which is where the |
| 148 | * next search starts). |
| 149 | */ |
| 150 | int first = -1, last = 0; |
| 151 | |
| 152 | /** |
| 153 | * The end index of what matched in the last match operation. |
| 154 | */ |
| 155 | int oldLast = -1; |
| 156 | |
| 157 | /** |
| 158 | * The index of the last position appended in a substitution. |
| 159 | */ |
| 160 | int lastAppendPosition = 0; |
| 161 | |
| 162 | /** |
| 163 | * Storage used by nodes to tell what repetition they are on in |
| 164 | * a pattern, and where groups begin. The nodes themselves are stateless, |
| 165 | * so they rely on this field to hold state during a match. |
| 166 | */ |
| 167 | int[] locals; |
| 168 | |
| 169 | /** |
| 170 | * Boolean indicating whether or not more input could change |
| 171 | * the results of the last match. |
| 172 | * |
| 173 | * If hitEnd is true, and a match was found, then more input |
| 174 | * might cause a different match to be found. |
| 175 | * If hitEnd is true and a match was not found, then more |
| 176 | * input could cause a match to be found. |
| 177 | * If hitEnd is false and a match was found, then more input |
| 178 | * will not change the match. |
| 179 | * If hitEnd is false and a match was not found, then more |
| 180 | * input will not cause a match to be found. |
| 181 | */ |
| 182 | boolean hitEnd; |
| 183 | |
| 184 | /** |
| 185 | * Boolean indicating whether or not more input could change |
| 186 | * a positive match into a negative one. |
| 187 | * |
| 188 | * If requireEnd is true, and a match was found, then more |
| 189 | * input could cause the match to be lost. |
| 190 | * If requireEnd is false and a match was found, then more |
| 191 | * input might change the match but the match won't be lost. |
| 192 | * If a match was not found, then requireEnd has no meaning. |
| 193 | */ |
| 194 | boolean requireEnd; |
| 195 | |
| 196 | /** |
| 197 | * If transparentBounds is true then the boundaries of this |
| 198 | * matcher's region are transparent to lookahead, lookbehind, |
| 199 | * and boundary matching constructs that try to see beyond them. |
| 200 | */ |
| 201 | boolean transparentBounds = false; |
| 202 | |
| 203 | /** |
| 204 | * If anchoringBounds is true then the boundaries of this |
| 205 | * matcher's region match anchors such as ^ and $. |
| 206 | */ |
| 207 | boolean anchoringBounds = true; |
| 208 | |
| 209 | /** |
| 210 | * No default constructor. |
| 211 | */ |
| 212 | Matcher() { |
| 213 | } |
| 214 | |
| 215 | /** |
| 216 | * All matchers have the state used by Pattern during a match. |
| 217 | */ |
| 218 | Matcher(Pattern parent, CharSequence text) { |
| 219 | this.parentPattern = parent; |
| 220 | this.text = text; |
| 221 | |
| 222 | // Allocate state storage |
| 223 | int parentGroupCount = Math.max(parent.capturingGroupCount, 10); |
| 224 | groups = new int[parentGroupCount * 2]; |
| 225 | locals = new int[parent.localCount]; |
| 226 | |
| 227 | // Put fields into initial states |
| 228 | reset(); |
| 229 | } |
| 230 | |
| 231 | /** |
| 232 | * Returns the pattern that is interpreted by this matcher. |
| 233 | * |
| 234 | * @return The pattern for which this matcher was created |
| 235 | */ |
| 236 | public Pattern pattern() { |
| 237 | return parentPattern; |
| 238 | } |
| 239 | |
| 240 | /** |
| 241 | * Returns the match state of this matcher as a {@link MatchResult}. |
| 242 | * The result is unaffected by subsequent operations performed upon this |
| 243 | * matcher. |
| 244 | * |
| 245 | * @return a <code>MatchResult</code> with the state of this matcher |
| 246 | * @since 1.5 |
| 247 | */ |
| 248 | public MatchResult toMatchResult() { |
| 249 | Matcher result = new Matcher(this.parentPattern, text.toString()); |
| 250 | result.first = this.first; |
| 251 | result.last = this.last; |
| 252 | result.groups = (int[])(this.groups.clone()); |
| 253 | return result; |
| 254 | } |
| 255 | |
| 256 | /** |
| 257 | * Changes the <tt>Pattern</tt> that this <tt>Matcher</tt> uses to |
| 258 | * find matches with. |
| 259 | * |
| 260 | * <p> This method causes this matcher to lose information |
| 261 | * about the groups of the last match that occurred. The |
| 262 | * matcher's position in the input is maintained and its |
| 263 | * last append position is unaffected.</p> |
| 264 | * |
| 265 | * @param newPattern |
| 266 | * The new pattern used by this matcher |
| 267 | * @return This matcher |
| 268 | * @throws IllegalArgumentException |
| 269 | * If newPattern is <tt>null</tt> |
| 270 | * @since 1.5 |
| 271 | */ |
| 272 | public Matcher usePattern(Pattern newPattern) { |
| 273 | if (newPattern == null) |
| 274 | throw new IllegalArgumentException("Pattern cannot be null"); |
| 275 | parentPattern = newPattern; |
| 276 | |
| 277 | // Reallocate state storage |
| 278 | int parentGroupCount = Math.max(newPattern.capturingGroupCount, 10); |
| 279 | groups = new int[parentGroupCount * 2]; |
| 280 | locals = new int[newPattern.localCount]; |
| 281 | for (int i = 0; i < groups.length; i++) |
| 282 | groups[i] = -1; |
| 283 | for (int i = 0; i < locals.length; i++) |
| 284 | locals[i] = -1; |
| 285 | return this; |
| 286 | } |
| 287 | |
| 288 | /** |
| 289 | * Resets this matcher. |
| 290 | * |
| 291 | * <p> Resetting a matcher discards all of its explicit state information |
| 292 | * and sets its append position to zero. The matcher's region is set to the |
| 293 | * default region, which is its entire character sequence. The anchoring |
| 294 | * and transparency of this matcher's region boundaries are unaffected. |
| 295 | * |
| 296 | * @return This matcher |
| 297 | */ |
| 298 | public Matcher reset() { |
| 299 | first = -1; |
| 300 | last = 0; |
| 301 | oldLast = -1; |
| 302 | for(int i=0; i<groups.length; i++) |
| 303 | groups[i] = -1; |
| 304 | for(int i=0; i<locals.length; i++) |
| 305 | locals[i] = -1; |
| 306 | lastAppendPosition = 0; |
| 307 | from = 0; |
| 308 | to = getTextLength(); |
| 309 | return this; |
| 310 | } |
| 311 | |
| 312 | /** |
| 313 | * Resets this matcher with a new input sequence. |
| 314 | * |
| 315 | * <p> Resetting a matcher discards all of its explicit state information |
| 316 | * and sets its append position to zero. The matcher's region is set to |
| 317 | * the default region, which is its entire character sequence. The |
| 318 | * anchoring and transparency of this matcher's region boundaries are |
| 319 | * unaffected. |
| 320 | * |
| 321 | * @param input |
| 322 | * The new input character sequence |
| 323 | * |
| 324 | * @return This matcher |
| 325 | */ |
| 326 | public Matcher reset(CharSequence input) { |
| 327 | text = input; |
| 328 | return reset(); |
| 329 | } |
| 330 | |
| 331 | /** |
| 332 | * Returns the start index of the previous match. </p> |
| 333 | * |
| 334 | * @return The index of the first character matched |
| 335 | * |
| 336 | * @throws IllegalStateException |
| 337 | * If no match has yet been attempted, |
| 338 | * or if the previous match operation failed |
| 339 | */ |
| 340 | public int start() { |
| 341 | if (first < 0) |
| 342 | throw new IllegalStateException("No match available"); |
| 343 | return first; |
| 344 | } |
| 345 | |
| 346 | /** |
| 347 | * Returns the start index of the subsequence captured by the given group |
| 348 | * during the previous match operation. |
| 349 | * |
| 350 | * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left |
| 351 | * to right, starting at one. Group zero denotes the entire pattern, so |
| 352 | * the expression <i>m.</i><tt>start(0)</tt> is equivalent to |
| 353 | * <i>m.</i><tt>start()</tt>. </p> |
| 354 | * |
| 355 | * @param group |
| 356 | * The index of a capturing group in this matcher's pattern |
| 357 | * |
| 358 | * @return The index of the first character captured by the group, |
| 359 | * or <tt>-1</tt> if the match was successful but the group |
| 360 | * itself did not match anything |
| 361 | * |
| 362 | * @throws IllegalStateException |
| 363 | * If no match has yet been attempted, |
| 364 | * or if the previous match operation failed |
| 365 | * |
| 366 | * @throws IndexOutOfBoundsException |
| 367 | * If there is no capturing group in the pattern |
| 368 | * with the given index |
| 369 | */ |
| 370 | public int start(int group) { |
| 371 | if (first < 0) |
| 372 | throw new IllegalStateException("No match available"); |
| 373 | if (group > groupCount()) |
| 374 | throw new IndexOutOfBoundsException("No group " + group); |
| 375 | return groups[group * 2]; |
| 376 | } |
| 377 | |
| 378 | /** |
| 379 | * Returns the offset after the last character matched. </p> |
| 380 | * |
| 381 | * @return The offset after the last character matched |
| 382 | * |
| 383 | * @throws IllegalStateException |
| 384 | * If no match has yet been attempted, |
| 385 | * or if the previous match operation failed |
| 386 | */ |
| 387 | public int end() { |
| 388 | if (first < 0) |
| 389 | throw new IllegalStateException("No match available"); |
| 390 | return last; |
| 391 | } |
| 392 | |
| 393 | /** |
| 394 | * Returns the offset after the last character of the subsequence |
| 395 | * captured by the given group during the previous match operation. |
| 396 | * |
| 397 | * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left |
| 398 | * to right, starting at one. Group zero denotes the entire pattern, so |
| 399 | * the expression <i>m.</i><tt>end(0)</tt> is equivalent to |
| 400 | * <i>m.</i><tt>end()</tt>. </p> |
| 401 | * |
| 402 | * @param group |
| 403 | * The index of a capturing group in this matcher's pattern |
| 404 | * |
| 405 | * @return The offset after the last character captured by the group, |
| 406 | * or <tt>-1</tt> if the match was successful |
| 407 | * but the group itself did not match anything |
| 408 | * |
| 409 | * @throws IllegalStateException |
| 410 | * If no match has yet been attempted, |
| 411 | * or if the previous match operation failed |
| 412 | * |
| 413 | * @throws IndexOutOfBoundsException |
| 414 | * If there is no capturing group in the pattern |
| 415 | * with the given index |
| 416 | */ |
| 417 | public int end(int group) { |
| 418 | if (first < 0) |
| 419 | throw new IllegalStateException("No match available"); |
| 420 | if (group > groupCount()) |
| 421 | throw new IndexOutOfBoundsException("No group " + group); |
| 422 | return groups[group * 2 + 1]; |
| 423 | } |
| 424 | |
| 425 | /** |
| 426 | * Returns the input subsequence matched by the previous match. |
| 427 | * |
| 428 | * <p> For a matcher <i>m</i> with input sequence <i>s</i>, |
| 429 | * the expressions <i>m.</i><tt>group()</tt> and |
| 430 | * <i>s.</i><tt>substring(</tt><i>m.</i><tt>start(),</tt> <i>m.</i><tt>end())</tt> |
| 431 | * are equivalent. </p> |
| 432 | * |
| 433 | * <p> Note that some patterns, for example <tt>a*</tt>, match the empty |
| 434 | * string. This method will return the empty string when the pattern |
| 435 | * successfully matches the empty string in the input. </p> |
| 436 | * |
| 437 | * @return The (possibly empty) subsequence matched by the previous match, |
| 438 | * in string form |
| 439 | * |
| 440 | * @throws IllegalStateException |
| 441 | * If no match has yet been attempted, |
| 442 | * or if the previous match operation failed |
| 443 | */ |
| 444 | public String group() { |
| 445 | return group(0); |
| 446 | } |
| 447 | |
| 448 | /** |
| 449 | * Returns the input subsequence captured by the given group during the |
| 450 | * previous match operation. |
| 451 | * |
| 452 | * <p> For a matcher <i>m</i>, input sequence <i>s</i>, and group index |
| 453 | * <i>g</i>, the expressions <i>m.</i><tt>group(</tt><i>g</i><tt>)</tt> and |
| 454 | * <i>s.</i><tt>substring(</tt><i>m.</i><tt>start(</tt><i>g</i><tt>),</tt> <i>m.</i><tt>end(</tt><i>g</i><tt>))</tt> |
| 455 | * are equivalent. </p> |
| 456 | * |
| 457 | * <p> <a href="Pattern.html#cg">Capturing groups</a> are indexed from left |
| 458 | * to right, starting at one. Group zero denotes the entire pattern, so |
| 459 | * the expression <tt>m.group(0)</tt> is equivalent to <tt>m.group()</tt>. |
| 460 | * </p> |
| 461 | * |
| 462 | * <p> If the match was successful but the group specified failed to match |
| 463 | * any part of the input sequence, then <tt>null</tt> is returned. Note |
| 464 | * that some groups, for example <tt>(a*)</tt>, match the empty string. |
| 465 | * This method will return the empty string when such a group successfully |
| 466 | * matches the empty string in the input. </p> |
| 467 | * |
| 468 | * @param group |
| 469 | * The index of a capturing group in this matcher's pattern |
| 470 | * |
| 471 | * @return The (possibly empty) subsequence captured by the group |
| 472 | * during the previous match, or <tt>null</tt> if the group |
| 473 | * failed to match part of the input |
| 474 | * |
| 475 | * @throws IllegalStateException |
| 476 | * If no match has yet been attempted, |
| 477 | * or if the previous match operation failed |
| 478 | * |
| 479 | * @throws IndexOutOfBoundsException |
| 480 | * If there is no capturing group in the pattern |
| 481 | * with the given index |
| 482 | */ |
| 483 | public String group(int group) { |
| 484 | if (first < 0) |
| 485 | throw new IllegalStateException("No match found"); |
| 486 | if (group < 0 || group > groupCount()) |
| 487 | throw new IndexOutOfBoundsException("No group " + group); |
| 488 | if ((groups[group*2] == -1) || (groups[group*2+1] == -1)) |
| 489 | return null; |
| 490 | return getSubSequence(groups[group * 2], groups[group * 2 + 1]).toString(); |
| 491 | } |
| 492 | |
| 493 | /** |
| 494 | * Returns the number of capturing groups in this matcher's pattern. |
| 495 | * |
| 496 | * <p> Group zero denotes the entire pattern by convention. It is not |
| 497 | * included in this count. |
| 498 | * |
| 499 | * <p> Any non-negative integer smaller than or equal to the value |
| 500 | * returned by this method is guaranteed to be a valid group index for |
| 501 | * this matcher. </p> |
| 502 | * |
| 503 | * @return The number of capturing groups in this matcher's pattern |
| 504 | */ |
| 505 | public int groupCount() { |
| 506 | return parentPattern.capturingGroupCount - 1; |
| 507 | } |
| 508 | |
| 509 | /** |
| 510 | * Attempts to match the entire region against the pattern. |
| 511 | * |
| 512 | * <p> If the match succeeds then more information can be obtained via the |
| 513 | * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods. </p> |
| 514 | * |
| 515 | * @return <tt>true</tt> if, and only if, the entire region sequence |
| 516 | * matches this matcher's pattern |
| 517 | */ |
| 518 | public boolean matches() { |
| 519 | return match(from, ENDANCHOR); |
| 520 | } |
| 521 | |
| 522 | /** |
| 523 | * Attempts to find the next subsequence of the input sequence that matches |
| 524 | * the pattern. |
| 525 | * |
| 526 | * <p> This method starts at the beginning of this matcher's region, or, if |
| 527 | * a previous invocation of the method was successful and the matcher has |
| 528 | * not since been reset, at the first character not matched by the previous |
| 529 | * match. |
| 530 | * |
| 531 | * <p> If the match succeeds then more information can be obtained via the |
| 532 | * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods. </p> |
| 533 | * |
| 534 | * @return <tt>true</tt> if, and only if, a subsequence of the input |
| 535 | * sequence matches this matcher's pattern |
| 536 | */ |
| 537 | public boolean find() { |
| 538 | int nextSearchIndex = last; |
| 539 | if (nextSearchIndex == first) |
| 540 | nextSearchIndex++; |
| 541 | |
| 542 | // If next search starts before region, start it at region |
| 543 | if (nextSearchIndex < from) |
| 544 | nextSearchIndex = from; |
| 545 | |
| 546 | // If next search starts beyond region then it fails |
| 547 | if (nextSearchIndex > to) { |
| 548 | for (int i = 0; i < groups.length; i++) |
| 549 | groups[i] = -1; |
| 550 | return false; |
| 551 | } |
| 552 | return search(nextSearchIndex); |
| 553 | } |
| 554 | |
| 555 | /** |
| 556 | * Resets this matcher and then attempts to find the next subsequence of |
| 557 | * the input sequence that matches the pattern, starting at the specified |
| 558 | * index. |
| 559 | * |
| 560 | * <p> If the match succeeds then more information can be obtained via the |
| 561 | * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods, and subsequent |
| 562 | * invocations of the {@link #find()} method will start at the first |
| 563 | * character not matched by this match. </p> |
| 564 | * |
| 565 | * @throws IndexOutOfBoundsException |
| 566 | * If start is less than zero or if start is greater than the |
| 567 | * length of the input sequence. |
| 568 | * |
| 569 | * @return <tt>true</tt> if, and only if, a subsequence of the input |
| 570 | * sequence starting at the given index matches this matcher's |
| 571 | * pattern |
| 572 | */ |
| 573 | public boolean find(int start) { |
| 574 | int limit = getTextLength(); |
| 575 | if ((start < 0) || (start > limit)) |
| 576 | throw new IndexOutOfBoundsException("Illegal start index"); |
| 577 | reset(); |
| 578 | return search(start); |
| 579 | } |
| 580 | |
| 581 | /** |
| 582 | * Attempts to match the input sequence, starting at the beginning of the |
| 583 | * region, against the pattern. |
| 584 | * |
| 585 | * <p> Like the {@link #matches matches} method, this method always starts |
| 586 | * at the beginning of the region; unlike that method, it does not |
| 587 | * require that the entire region be matched. |
| 588 | * |
| 589 | * <p> If the match succeeds then more information can be obtained via the |
| 590 | * <tt>start</tt>, <tt>end</tt>, and <tt>group</tt> methods. </p> |
| 591 | * |
| 592 | * @return <tt>true</tt> if, and only if, a prefix of the input |
| 593 | * sequence matches this matcher's pattern |
| 594 | */ |
| 595 | public boolean lookingAt() { |
| 596 | return match(from, NOANCHOR); |
| 597 | } |
| 598 | |
| 599 | /** |
| 600 | * Returns a literal replacement <code>String</code> for the specified |
| 601 | * <code>String</code>. |
| 602 | * |
| 603 | * This method produces a <code>String</code> that will work |
| 604 | * as a literal replacement <code>s</code> in the |
| 605 | * <code>appendReplacement</code> method of the {@link Matcher} class. |
| 606 | * The <code>String</code> produced will match the sequence of characters |
| 607 | * in <code>s</code> treated as a literal sequence. Slashes ('\') and |
| 608 | * dollar signs ('$') will be given no special meaning. |
| 609 | * |
| 610 | * @param s The string to be literalized |
| 611 | * @return A literal string replacement |
| 612 | * @since 1.5 |
| 613 | */ |
| 614 | public static String quoteReplacement(String s) { |
| 615 | if ((s.indexOf('\\') == -1) && (s.indexOf('$') == -1)) |
| 616 | return s; |
| 617 | StringBuilder sb = new StringBuilder(); |
| 618 | for (int i=0; i<s.length(); i++) { |
| 619 | char c = s.charAt(i); |
| 620 | if (c == '\\' || c == '$') { |
| 621 | sb.append('\\'); |
| 622 | } |
| 623 | sb.append(c); |
| 624 | } |
| 625 | return sb.toString(); |
| 626 | } |
| 627 | |
| 628 | /** |
| 629 | * Implements a non-terminal append-and-replace step. |
| 630 | * |
| 631 | * <p> This method performs the following actions: </p> |
| 632 | * |
| 633 | * <ol> |
| 634 | * |
| 635 | * <li><p> It reads characters from the input sequence, starting at the |
| 636 | * append position, and appends them to the given string buffer. It |
| 637 | * stops after reading the last character preceding the previous match, |
| 638 | * that is, the character at index {@link |
| 639 | * #start()} <tt>-</tt> <tt>1</tt>. </p></li> |
| 640 | * |
| 641 | * <li><p> It appends the given replacement string to the string buffer. |
| 642 | * </p></li> |
| 643 | * |
| 644 | * <li><p> It sets the append position of this matcher to the index of |
| 645 | * the last character matched, plus one, that is, to {@link #end()}. |
| 646 | * </p></li> |
| 647 | * |
| 648 | * </ol> |
| 649 | * |
| 650 | * <p> The replacement string may contain references to subsequences |
| 651 | * captured during the previous match: Each occurrence of |
| 652 | * <tt>$</tt><i>g</i><tt></tt> will be replaced by the result of |
| 653 | * evaluating {@link #group(int) group}<tt>(</tt><i>g</i><tt>)</tt>. |
| 654 | * The first number after the <tt>$</tt> is always treated as part of |
| 655 | * the group reference. Subsequent numbers are incorporated into g if |
| 656 | * they would form a legal group reference. Only the numerals '0' |
| 657 | * through '9' are considered as potential components of the group |
| 658 | * reference. If the second group matched the string <tt>"foo"</tt>, for |
| 659 | * example, then passing the replacement string <tt>"$2bar"</tt> would |
| 660 | * cause <tt>"foobar"</tt> to be appended to the string buffer. A dollar |
| 661 | * sign (<tt>$</tt>) may be included as a literal in the replacement |
| 662 | * string by preceding it with a backslash (<tt>\$</tt>). |
| 663 | * |
| 664 | * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in |
| 665 | * the replacement string may cause the results to be different than if it |
| 666 | * were being treated as a literal replacement string. Dollar signs may be |
| 667 | * treated as references to captured subsequences as described above, and |
| 668 | * backslashes are used to escape literal characters in the replacement |
| 669 | * string. |
| 670 | * |
| 671 | * <p> This method is intended to be used in a loop together with the |
| 672 | * {@link #appendTail appendTail} and {@link #find find} methods. The |
| 673 | * following code, for example, writes <tt>one dog two dogs in the |
| 674 | * yard</tt> to the standard-output stream: </p> |
| 675 | * |
| 676 | * <blockquote><pre> |
| 677 | * Pattern p = Pattern.compile("cat"); |
| 678 | * Matcher m = p.matcher("one cat two cats in the yard"); |
| 679 | * StringBuffer sb = new StringBuffer(); |
| 680 | * while (m.find()) { |
| 681 | * m.appendReplacement(sb, "dog"); |
| 682 | * } |
| 683 | * m.appendTail(sb); |
| 684 | * System.out.println(sb.toString());</pre></blockquote> |
| 685 | * |
| 686 | * @param sb |
| 687 | * The target string buffer |
| 688 | * |
| 689 | * @param replacement |
| 690 | * The replacement string |
| 691 | * |
| 692 | * @return This matcher |
| 693 | * |
| 694 | * @throws IllegalStateException |
| 695 | * If no match has yet been attempted, |
| 696 | * or if the previous match operation failed |
| 697 | * |
| 698 | * @throws IndexOutOfBoundsException |
| 699 | * If the replacement string refers to a capturing group |
| 700 | * that does not exist in the pattern |
| 701 | */ |
| 702 | public Matcher appendReplacement(StringBuffer sb, String replacement) { |
| 703 | |
| 704 | // If no match, return error |
| 705 | if (first < 0) |
| 706 | throw new IllegalStateException("No match available"); |
| 707 | |
| 708 | // Process substitution string to replace group references with groups |
| 709 | int cursor = 0; |
| 710 | StringBuilder result = new StringBuilder(); |
| 711 | |
| 712 | while (cursor < replacement.length()) { |
| 713 | char nextChar = replacement.charAt(cursor); |
| 714 | if (nextChar == '\\') { |
| 715 | cursor++; |
| 716 | nextChar = replacement.charAt(cursor); |
| 717 | result.append(nextChar); |
| 718 | cursor++; |
| 719 | } else if (nextChar == '$') { |
| 720 | // Skip past $ |
| 721 | cursor++; |
| 722 | // The first number is always a group |
| 723 | int refNum = (int)replacement.charAt(cursor) - '0'; |
| 724 | if ((refNum < 0)||(refNum > 9)) |
| 725 | throw new IllegalArgumentException( |
| 726 | "Illegal group reference"); |
| 727 | cursor++; |
| 728 | |
| 729 | // Capture the largest legal group string |
| 730 | boolean done = false; |
| 731 | while (!done) { |
| 732 | if (cursor >= replacement.length()) { |
| 733 | break; |
| 734 | } |
| 735 | int nextDigit = replacement.charAt(cursor) - '0'; |
| 736 | if ((nextDigit < 0)||(nextDigit > 9)) { // not a number |
| 737 | break; |
| 738 | } |
| 739 | int newRefNum = (refNum * 10) + nextDigit; |
| 740 | if (groupCount() < newRefNum) { |
| 741 | done = true; |
| 742 | } else { |
| 743 | refNum = newRefNum; |
| 744 | cursor++; |
| 745 | } |
| 746 | } |
| 747 | // Append group |
| 748 | if (start(refNum) != -1 && end(refNum) != -1) |
| 749 | result.append(text, start(refNum), end(refNum)); |
| 750 | } else { |
| 751 | result.append(nextChar); |
| 752 | cursor++; |
| 753 | } |
| 754 | } |
| 755 | // Append the intervening text |
| 756 | sb.append(text, lastAppendPosition, first); |
| 757 | // Append the match substitution |
| 758 | sb.append(result); |
| 759 | |
| 760 | lastAppendPosition = last; |
| 761 | return this; |
| 762 | } |
| 763 | |
| 764 | /** |
| 765 | * Implements a terminal append-and-replace step. |
| 766 | * |
| 767 | * <p> This method reads characters from the input sequence, starting at |
| 768 | * the append position, and appends them to the given string buffer. It is |
| 769 | * intended to be invoked after one or more invocations of the {@link |
| 770 | * #appendReplacement appendReplacement} method in order to copy the |
| 771 | * remainder of the input sequence. </p> |
| 772 | * |
| 773 | * @param sb |
| 774 | * The target string buffer |
| 775 | * |
| 776 | * @return The target string buffer |
| 777 | */ |
| 778 | public StringBuffer appendTail(StringBuffer sb) { |
| 779 | sb.append(text, lastAppendPosition, getTextLength()); |
| 780 | return sb; |
| 781 | } |
| 782 | |
| 783 | /** |
| 784 | * Replaces every subsequence of the input sequence that matches the |
| 785 | * pattern with the given replacement string. |
| 786 | * |
| 787 | * <p> This method first resets this matcher. It then scans the input |
| 788 | * sequence looking for matches of the pattern. Characters that are not |
| 789 | * part of any match are appended directly to the result string; each match |
| 790 | * is replaced in the result by the replacement string. The replacement |
| 791 | * string may contain references to captured subsequences as in the {@link |
| 792 | * #appendReplacement appendReplacement} method. |
| 793 | * |
| 794 | * <p> Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in |
| 795 | * the replacement string may cause the results to be different than if it |
| 796 | * were being treated as a literal replacement string. Dollar signs may be |
| 797 | * treated as references to captured subsequences as described above, and |
| 798 | * backslashes are used to escape literal characters in the replacement |
| 799 | * string. |
| 800 | * |
| 801 | * <p> Given the regular expression <tt>a*b</tt>, the input |
| 802 | * <tt>"aabfooaabfooabfoob"</tt>, and the replacement string |
| 803 | * <tt>"-"</tt>, an invocation of this method on a matcher for that |
| 804 | * expression would yield the string <tt>"-foo-foo-foo-"</tt>. |
| 805 | * |
| 806 | * <p> Invoking this method changes this matcher's state. If the matcher |
| 807 | * is to be used in further matching operations then it should first be |
| 808 | * reset. </p> |
| 809 | * |
| 810 | * @param replacement |
| 811 | * The replacement string |
| 812 | * |
| 813 | * @return The string constructed by replacing each matching subsequence |
| 814 | * by the replacement string, substituting captured subsequences |
| 815 | * as needed |
| 816 | */ |
| 817 | public String replaceAll(String replacement) { |
| 818 | reset(); |
| 819 | boolean result = find(); |
| 820 | if (result) { |
| 821 | StringBuffer sb = new StringBuffer(); |
| 822 | do { |
| 823 | appendReplacement(sb, replacement); |
| 824 | result = find(); |
| 825 | } while (result); |
| 826 | appendTail(sb); |
| 827 | return sb.toString(); |
| 828 | } |
| 829 | return text.toString(); |
| 830 | } |
| 831 | |
| 832 | /** |
| 833 | * Replaces the first subsequence of the input sequence that matches the |
| 834 | * pattern with the given replacement string. |
| 835 | * |
| 836 | * <p> This method first resets this matcher. It then scans the input |
| 837 | * sequence looking for a match of the pattern. Characters that are not |
| 838 | * part of the match are appended directly to the result string; the match |
| 839 | * is replaced in the result by the replacement string. The replacement |
| 840 | * string may contain references to captured subsequences as in the {@link |
| 841 | * #appendReplacement appendReplacement} method. |
| 842 | * |
| 843 | * <p>Note that backslashes (<tt>\</tt>) and dollar signs (<tt>$</tt>) in |
| 844 | * the replacement string may cause the results to be different than if it |
| 845 | * were being treated as a literal replacement string. Dollar signs may be |
| 846 | * treated as references to captured subsequences as described above, and |
| 847 | * backslashes are used to escape literal characters in the replacement |
| 848 | * string. |
| 849 | * |
| 850 | * <p> Given the regular expression <tt>dog</tt>, the input |
| 851 | * <tt>"zzzdogzzzdogzzz"</tt>, and the replacement string |
| 852 | * <tt>"cat"</tt>, an invocation of this method on a matcher for that |
| 853 | * expression would yield the string <tt>"zzzcatzzzdogzzz"</tt>. </p> |
| 854 | * |
| 855 | * <p> Invoking this method changes this matcher's state. If the matcher |
| 856 | * is to be used in further matching operations then it should first be |
| 857 | * reset. </p> |
| 858 | * |
| 859 | * @param replacement |
| 860 | * The replacement string |
| 861 | * @return The string constructed by replacing the first matching |
| 862 | * subsequence by the replacement string, substituting captured |
| 863 | * subsequences as needed |
| 864 | */ |
| 865 | public String replaceFirst(String replacement) { |
| 866 | if (replacement == null) |
| 867 | throw new NullPointerException("replacement"); |
| 868 | reset(); |
| 869 | if (!find()) |
| 870 | return text.toString(); |
| 871 | StringBuffer sb = new StringBuffer(); |
| 872 | appendReplacement(sb, replacement); |
| 873 | appendTail(sb); |
| 874 | return sb.toString(); |
| 875 | } |
| 876 | |
| 877 | /** |
| 878 | * Sets the limits of this matcher's region. The region is the part of the |
| 879 | * input sequence that will be searched to find a match. Invoking this |
| 880 | * method resets the matcher, and then sets the region to start at the |
| 881 | * index specified by the <code>start</code> parameter and end at the |
| 882 | * index specified by the <code>end</code> parameter. |
| 883 | * |
| 884 | * <p>Depending on the transparency and anchoring being used (see |
| 885 | * {@link #useTransparentBounds useTransparentBounds} and |
| 886 | * {@link #useAnchoringBounds useAnchoringBounds}), certain constructs such |
| 887 | * as anchors may behave differently at or around the boundaries of the |
| 888 | * region. |
| 889 | * |
| 890 | * @param start |
| 891 | * The index to start searching at (inclusive) |
| 892 | * @param end |
| 893 | * The index to end searching at (exclusive) |
| 894 | * @throws IndexOutOfBoundsException |
| 895 | * If start or end is less than zero, if |
| 896 | * start is greater than the length of the input sequence, if |
| 897 | * end is greater than the length of the input sequence, or if |
| 898 | * start is greater than end. |
| 899 | * @return this matcher |
| 900 | * @since 1.5 |
| 901 | */ |
| 902 | public Matcher region(int start, int end) { |
| 903 | if ((start < 0) || (start > getTextLength())) |
| 904 | throw new IndexOutOfBoundsException("start"); |
| 905 | if ((end < 0) || (end > getTextLength())) |
| 906 | throw new IndexOutOfBoundsException("end"); |
| 907 | if (start > end) |
| 908 | throw new IndexOutOfBoundsException("start > end"); |
| 909 | reset(); |
| 910 | from = start; |
| 911 | to = end; |
| 912 | return this; |
| 913 | } |
| 914 | |
| 915 | /** |
| 916 | * Reports the start index of this matcher's region. The |
| 917 | * searches this matcher conducts are limited to finding matches |
| 918 | * within {@link #regionStart regionStart} (inclusive) and |
| 919 | * {@link #regionEnd regionEnd} (exclusive). |
| 920 | * |
| 921 | * @return The starting point of this matcher's region |
| 922 | * @since 1.5 |
| 923 | */ |
| 924 | public int regionStart() { |
| 925 | return from; |
| 926 | } |
| 927 | |
| 928 | /** |
| 929 | * Reports the end index (exclusive) of this matcher's region. |
| 930 | * The searches this matcher conducts are limited to finding matches |
| 931 | * within {@link #regionStart regionStart} (inclusive) and |
| 932 | * {@link #regionEnd regionEnd} (exclusive). |
| 933 | * |
| 934 | * @return the ending point of this matcher's region |
| 935 | * @since 1.5 |
| 936 | */ |
| 937 | public int regionEnd() { |
| 938 | return to; |
| 939 | } |
| 940 | |
| 941 | /** |
| 942 | * Queries the transparency of region bounds for this matcher. |
| 943 | * |
| 944 | * <p> This method returns <tt>true</tt> if this matcher uses |
| 945 | * <i>transparent</i> bounds, <tt>false</tt> if it uses <i>opaque</i> |
| 946 | * bounds. |
| 947 | * |
| 948 | * <p> See {@link #useTransparentBounds useTransparentBounds} for a |
| 949 | * description of transparent and opaque bounds. |
| 950 | * |
| 951 | * <p> By default, a matcher uses opaque region boundaries. |
| 952 | * |
| 953 | * @return <tt>true</tt> iff this matcher is using transparent bounds, |
| 954 | * <tt>false</tt> otherwise. |
| 955 | * @see java.util.regex.Matcher#useTransparentBounds(boolean) |
| 956 | * @since 1.5 |
| 957 | */ |
| 958 | public boolean hasTransparentBounds() { |
| 959 | return transparentBounds; |
| 960 | } |
| 961 | |
| 962 | /** |
| 963 | * Sets the transparency of region bounds for this matcher. |
| 964 | * |
| 965 | * <p> Invoking this method with an argument of <tt>true</tt> will set this |
| 966 | * matcher to use <i>transparent</i> bounds. If the boolean |
| 967 | * argument is <tt>false</tt>, then <i>opaque</i> bounds will be used. |
| 968 | * |
| 969 | * <p> Using transparent bounds, the boundaries of this |
| 970 | * matcher's region are transparent to lookahead, lookbehind, |
| 971 | * and boundary matching constructs. Those constructs can see beyond the |
| 972 | * boundaries of the region to see if a match is appropriate. |
| 973 | * |
| 974 | * <p> Using opaque bounds, the boundaries of this matcher's |
| 975 | * region are opaque to lookahead, lookbehind, and boundary matching |
| 976 | * constructs that may try to see beyond them. Those constructs cannot |
| 977 | * look past the boundaries so they will fail to match anything outside |
| 978 | * of the region. |
| 979 | * |
| 980 | * <p> By default, a matcher uses opaque bounds. |
| 981 | * |
| 982 | * @param b a boolean indicating whether to use opaque or transparent |
| 983 | * regions |
| 984 | * @return this matcher |
| 985 | * @see java.util.regex.Matcher#hasTransparentBounds |
| 986 | * @since 1.5 |
| 987 | */ |
| 988 | public Matcher useTransparentBounds(boolean b) { |
| 989 | transparentBounds = b; |
| 990 | return this; |
| 991 | } |
| 992 | |
| 993 | /** |
| 994 | * Queries the anchoring of region bounds for this matcher. |
| 995 | * |
| 996 | * <p> This method returns <tt>true</tt> if this matcher uses |
| 997 | * <i>anchoring</i> bounds, <tt>false</tt> otherwise. |
| 998 | * |
| 999 | * <p> See {@link #useAnchoringBounds useAnchoringBounds} for a |
| 1000 | * description of anchoring bounds. |
| 1001 | * |
| 1002 | * <p> By default, a matcher uses anchoring region boundaries. |
| 1003 | * |
| 1004 | * @return <tt>true</tt> iff this matcher is using anchoring bounds, |
| 1005 | * <tt>false</tt> otherwise. |
| 1006 | * @see java.util.regex.Matcher#useAnchoringBounds(boolean) |
| 1007 | * @since 1.5 |
| 1008 | */ |
| 1009 | public boolean hasAnchoringBounds() { |
| 1010 | return anchoringBounds; |
| 1011 | } |
| 1012 | |
| 1013 | /** |
| 1014 | * Sets the anchoring of region bounds for this matcher. |
| 1015 | * |
| 1016 | * <p> Invoking this method with an argument of <tt>true</tt> will set this |
| 1017 | * matcher to use <i>anchoring</i> bounds. If the boolean |
| 1018 | * argument is <tt>false</tt>, then <i>non-anchoring</i> bounds will be |
| 1019 | * used. |
| 1020 | * |
| 1021 | * <p> Using anchoring bounds, the boundaries of this |
| 1022 | * matcher's region match anchors such as ^ and $. |
| 1023 | * |
| 1024 | * <p> Without anchoring bounds, the boundaries of this |
| 1025 | * matcher's region will not match anchors such as ^ and $. |
| 1026 | * |
| 1027 | * <p> By default, a matcher uses anchoring region boundaries. |
| 1028 | * |
| 1029 | * @param b a boolean indicating whether or not to use anchoring bounds. |
| 1030 | * @return this matcher |
| 1031 | * @see java.util.regex.Matcher#hasAnchoringBounds |
| 1032 | * @since 1.5 |
| 1033 | */ |
| 1034 | public Matcher useAnchoringBounds(boolean b) { |
| 1035 | anchoringBounds = b; |
| 1036 | return this; |
| 1037 | } |
| 1038 | |
| 1039 | /** |
| 1040 | * <p>Returns the string representation of this matcher. The |
| 1041 | * string representation of a <code>Matcher</code> contains information |
| 1042 | * that may be useful for debugging. The exact format is unspecified. |
| 1043 | * |
| 1044 | * @return The string representation of this matcher |
| 1045 | * @since 1.5 |
| 1046 | */ |
| 1047 | public String toString() { |
| 1048 | StringBuilder sb = new StringBuilder(); |
| 1049 | sb.append("java.util.regex.Matcher"); |
| 1050 | sb.append("[pattern=" + pattern()); |
| 1051 | sb.append(" region="); |
| 1052 | sb.append(regionStart() + "," + regionEnd()); |
| 1053 | sb.append(" lastmatch="); |
| 1054 | if ((first >= 0) && (group() != null)) { |
| 1055 | sb.append(group()); |
| 1056 | } |
| 1057 | sb.append("]"); |
| 1058 | return sb.toString(); |
| 1059 | } |
| 1060 | |
| 1061 | /** |
| 1062 | * <p>Returns true if the end of input was hit by the search engine in |
| 1063 | * the last match operation performed by this matcher. |
| 1064 | * |
| 1065 | * <p>When this method returns true, then it is possible that more input |
| 1066 | * would have changed the result of the last search. |
| 1067 | * |
| 1068 | * @return true iff the end of input was hit in the last match; false |
| 1069 | * otherwise |
| 1070 | * @since 1.5 |
| 1071 | */ |
| 1072 | public boolean hitEnd() { |
| 1073 | return hitEnd; |
| 1074 | } |
| 1075 | |
| 1076 | /** |
| 1077 | * <p>Returns true if more input could change a positive match into a |
| 1078 | * negative one. |
| 1079 | * |
| 1080 | * <p>If this method returns true, and a match was found, then more |
| 1081 | * input could cause the match to be lost. If this method returns false |
| 1082 | * and a match was found, then more input might change the match but the |
| 1083 | * match won't be lost. If a match was not found, then requireEnd has no |
| 1084 | * meaning. |
| 1085 | * |
| 1086 | * @return true iff more input could change a positive match into a |
| 1087 | * negative one. |
| 1088 | * @since 1.5 |
| 1089 | */ |
| 1090 | public boolean requireEnd() { |
| 1091 | return requireEnd; |
| 1092 | } |
| 1093 | |
| 1094 | /** |
| 1095 | * Initiates a search to find a Pattern within the given bounds. |
| 1096 | * The groups are filled with default values and the match of the root |
| 1097 | * of the state machine is called. The state machine will hold the state |
| 1098 | * of the match as it proceeds in this matcher. |
| 1099 | * |
| 1100 | * Matcher.from is not set here, because it is the "hard" boundary |
| 1101 | * of the start of the search which anchors will set to. The from param |
| 1102 | * is the "soft" boundary of the start of the search, meaning that the |
| 1103 | * regex tries to match at that index but ^ won't match there. Subsequent |
| 1104 | * calls to the search methods start at a new "soft" boundary which is |
| 1105 | * the end of the previous match. |
| 1106 | */ |
| 1107 | boolean search(int from) { |
| 1108 | this.hitEnd = false; |
| 1109 | this.requireEnd = false; |
| 1110 | from = from < 0 ? 0 : from; |
| 1111 | this.first = from; |
| 1112 | this.oldLast = oldLast < 0 ? from : oldLast; |
| 1113 | for (int i = 0; i < groups.length; i++) |
| 1114 | groups[i] = -1; |
| 1115 | acceptMode = NOANCHOR; |
| 1116 | boolean result = parentPattern.root.match(this, from, text); |
| 1117 | if (!result) |
| 1118 | this.first = -1; |
| 1119 | this.oldLast = this.last; |
| 1120 | return result; |
| 1121 | } |
| 1122 | |
| 1123 | /** |
| 1124 | * Initiates a search for an anchored match to a Pattern within the given |
| 1125 | * bounds. The groups are filled with default values and the match of the |
| 1126 | * root of the state machine is called. The state machine will hold the |
| 1127 | * state of the match as it proceeds in this matcher. |
| 1128 | */ |
| 1129 | boolean match(int from, int anchor) { |
| 1130 | this.hitEnd = false; |
| 1131 | this.requireEnd = false; |
| 1132 | from = from < 0 ? 0 : from; |
| 1133 | this.first = from; |
| 1134 | this.oldLast = oldLast < 0 ? from : oldLast; |
| 1135 | for (int i = 0; i < groups.length; i++) |
| 1136 | groups[i] = -1; |
| 1137 | acceptMode = anchor; |
| 1138 | boolean result = parentPattern.matchRoot.match(this, from, text); |
| 1139 | if (!result) |
| 1140 | this.first = -1; |
| 1141 | this.oldLast = this.last; |
| 1142 | return result; |
| 1143 | } |
| 1144 | |
| 1145 | /** |
| 1146 | * Returns the end index of the text. |
| 1147 | * |
| 1148 | * @return the index after the last character in the text |
| 1149 | */ |
| 1150 | int getTextLength() { |
| 1151 | return text.length(); |
| 1152 | } |
| 1153 | |
| 1154 | /** |
| 1155 | * Generates a String from this Matcher's input in the specified range. |
| 1156 | * |
| 1157 | * @param beginIndex the beginning index, inclusive |
| 1158 | * @param endIndex the ending index, exclusive |
| 1159 | * @return A String generated from this Matcher's input |
| 1160 | */ |
| 1161 | CharSequence getSubSequence(int beginIndex, int endIndex) { |
| 1162 | return text.subSequence(beginIndex, endIndex); |
| 1163 | } |
| 1164 | |
| 1165 | /** |
| 1166 | * Returns this Matcher's input character at index i. |
| 1167 | * |
| 1168 | * @return A char from the specified index |
| 1169 | */ |
| 1170 | char charAt(int i) { |
| 1171 | return text.charAt(i); |
| 1172 | } |
| 1173 | |
| 1174 | } |