The Android Open Source Project | 9066cfe | 2009-03-03 19:31:44 -0800 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (C) 2008 The Android Open Source Project |
| 3 | * |
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | * you may not use this file except in compliance with the License. |
| 6 | * You may obtain a copy of the License at |
| 7 | * |
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | * |
| 10 | * Unless required by applicable law or agreed to in writing, software |
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | * See the License for the specific language governing permissions and |
| 14 | * limitations under the License. |
| 15 | */ |
| 16 | |
| 17 | package android.os; |
| 18 | |
Yi Jin | 129fc6c | 2017-09-28 15:48:38 -0700 | [diff] [blame] | 19 | import android.util.proto.ProtoOutputStream; |
Patrick Baumann | 2baf095 | 2016-08-25 15:56:46 -0700 | [diff] [blame] | 20 | |
| 21 | import java.util.Arrays; |
| 22 | |
The Android Open Source Project | 9066cfe | 2009-03-03 19:31:44 -0800 | [diff] [blame] | 23 | /** |
| 24 | * A simple pattern matcher, which is safe to use on untrusted data: it does |
| 25 | * not provide full reg-exp support, only simple globbing that can not be |
| 26 | * used maliciously. |
| 27 | */ |
| 28 | public class PatternMatcher implements Parcelable { |
| 29 | /** |
| 30 | * Pattern type: the given pattern must exactly match the string it is |
| 31 | * tested against. |
| 32 | */ |
| 33 | public static final int PATTERN_LITERAL = 0; |
| 34 | |
| 35 | /** |
| 36 | * Pattern type: the given pattern must match the |
| 37 | * beginning of the string it is tested against. |
| 38 | */ |
| 39 | public static final int PATTERN_PREFIX = 1; |
| 40 | |
| 41 | /** |
| 42 | * Pattern type: the given pattern is interpreted with a |
| 43 | * simple glob syntax for matching against the string it is tested against. |
| 44 | * In this syntax, you can use the '*' character to match against zero or |
| 45 | * more occurrences of the character immediately before. If the |
| 46 | * character before it is '.' it will match any character. The character |
| 47 | * '\' can be used as an escape. This essentially provides only the '*' |
| 48 | * wildcard part of a normal regexp. |
| 49 | */ |
| 50 | public static final int PATTERN_SIMPLE_GLOB = 2; |
Patrick Baumann | 2baf095 | 2016-08-25 15:56:46 -0700 | [diff] [blame] | 51 | |
| 52 | /** |
| 53 | * Pattern type: the given pattern is interpreted with a regular |
| 54 | * expression-like syntax for matching against the string it is tested |
| 55 | * against. Supported tokens include dot ({@code .}) and sets ({@code [...]}) |
| 56 | * with full support for character ranges and the not ({@code ^}) modifier. |
| 57 | * Supported modifiers include star ({@code *}) for zero-or-more, plus ({@code +}) |
| 58 | * for one-or-more and full range ({@code {...}}) support. This is a simple |
Todd Kennedy | 9c9fdf2 | 2017-03-06 10:58:27 -0800 | [diff] [blame] | 59 | * evaluation implementation in which matching is done against the pattern in |
| 60 | * real time with no backtracking support. |
Patrick Baumann | 2baf095 | 2016-08-25 15:56:46 -0700 | [diff] [blame] | 61 | */ |
| 62 | public static final int PATTERN_ADVANCED_GLOB = 3; |
| 63 | |
| 64 | // token types for advanced matching |
| 65 | private static final int TOKEN_TYPE_LITERAL = 0; |
| 66 | private static final int TOKEN_TYPE_ANY = 1; |
| 67 | private static final int TOKEN_TYPE_SET = 2; |
| 68 | private static final int TOKEN_TYPE_INVERSE_SET = 3; |
| 69 | |
| 70 | // Return for no match |
| 71 | private static final int NO_MATCH = -1; |
| 72 | |
| 73 | private static final String TAG = "PatternMatcher"; |
| 74 | |
| 75 | // Parsed placeholders for advanced patterns |
| 76 | private static final int PARSED_TOKEN_CHAR_SET_START = -1; |
| 77 | private static final int PARSED_TOKEN_CHAR_SET_INVERSE_START = -2; |
| 78 | private static final int PARSED_TOKEN_CHAR_SET_STOP = -3; |
| 79 | private static final int PARSED_TOKEN_CHAR_ANY = -4; |
| 80 | private static final int PARSED_MODIFIER_RANGE_START = -5; |
| 81 | private static final int PARSED_MODIFIER_RANGE_STOP = -6; |
| 82 | private static final int PARSED_MODIFIER_ZERO_OR_MORE = -7; |
| 83 | private static final int PARSED_MODIFIER_ONE_OR_MORE = -8; |
| 84 | |
The Android Open Source Project | 9066cfe | 2009-03-03 19:31:44 -0800 | [diff] [blame] | 85 | private final String mPattern; |
| 86 | private final int mType; |
Patrick Baumann | 2baf095 | 2016-08-25 15:56:46 -0700 | [diff] [blame] | 87 | private final int[] mParsedPattern; |
| 88 | |
| 89 | |
| 90 | private static final int MAX_PATTERN_STORAGE = 2048; |
| 91 | // workspace to use for building a parsed advanced pattern; |
| 92 | private static final int[] sParsedPatternScratch = new int[MAX_PATTERN_STORAGE]; |
| 93 | |
The Android Open Source Project | 9066cfe | 2009-03-03 19:31:44 -0800 | [diff] [blame] | 94 | public PatternMatcher(String pattern, int type) { |
| 95 | mPattern = pattern; |
| 96 | mType = type; |
Patrick Baumann | 2baf095 | 2016-08-25 15:56:46 -0700 | [diff] [blame] | 97 | if (mType == PATTERN_ADVANCED_GLOB) { |
| 98 | mParsedPattern = parseAndVerifyAdvancedPattern(pattern); |
| 99 | } else { |
| 100 | mParsedPattern = null; |
| 101 | } |
The Android Open Source Project | 9066cfe | 2009-03-03 19:31:44 -0800 | [diff] [blame] | 102 | } |
| 103 | |
| 104 | public final String getPath() { |
| 105 | return mPattern; |
| 106 | } |
| 107 | |
| 108 | public final int getType() { |
| 109 | return mType; |
| 110 | } |
| 111 | |
| 112 | public boolean match(String str) { |
Patrick Baumann | 2baf095 | 2016-08-25 15:56:46 -0700 | [diff] [blame] | 113 | return matchPattern(str, mPattern, mParsedPattern, mType); |
The Android Open Source Project | 9066cfe | 2009-03-03 19:31:44 -0800 | [diff] [blame] | 114 | } |
| 115 | |
| 116 | public String toString() { |
| 117 | String type = "? "; |
| 118 | switch (mType) { |
| 119 | case PATTERN_LITERAL: |
| 120 | type = "LITERAL: "; |
| 121 | break; |
| 122 | case PATTERN_PREFIX: |
| 123 | type = "PREFIX: "; |
| 124 | break; |
| 125 | case PATTERN_SIMPLE_GLOB: |
| 126 | type = "GLOB: "; |
| 127 | break; |
Patrick Baumann | 2baf095 | 2016-08-25 15:56:46 -0700 | [diff] [blame] | 128 | case PATTERN_ADVANCED_GLOB: |
| 129 | type = "ADVANCED: "; |
| 130 | break; |
The Android Open Source Project | 9066cfe | 2009-03-03 19:31:44 -0800 | [diff] [blame] | 131 | } |
| 132 | return "PatternMatcher{" + type + mPattern + "}"; |
| 133 | } |
Yi Jin | 129fc6c | 2017-09-28 15:48:38 -0700 | [diff] [blame] | 134 | |
| 135 | /** @hide */ |
| 136 | public void writeToProto(ProtoOutputStream proto, long fieldId) { |
| 137 | long token = proto.start(fieldId); |
| 138 | proto.write(PatternMatcherProto.PATTERN, mPattern); |
| 139 | proto.write(PatternMatcherProto.TYPE, mType); |
| 140 | // PatternMatcherProto.PARSED_PATTERN is too much to dump, but the field is reserved to |
| 141 | // match the current data structure. |
| 142 | proto.end(token); |
| 143 | } |
| 144 | |
The Android Open Source Project | 9066cfe | 2009-03-03 19:31:44 -0800 | [diff] [blame] | 145 | public int describeContents() { |
| 146 | return 0; |
| 147 | } |
| 148 | |
| 149 | public void writeToParcel(Parcel dest, int flags) { |
| 150 | dest.writeString(mPattern); |
| 151 | dest.writeInt(mType); |
Patrick Baumann | 2baf095 | 2016-08-25 15:56:46 -0700 | [diff] [blame] | 152 | dest.writeIntArray(mParsedPattern); |
The Android Open Source Project | 9066cfe | 2009-03-03 19:31:44 -0800 | [diff] [blame] | 153 | } |
Yi Jin | 129fc6c | 2017-09-28 15:48:38 -0700 | [diff] [blame] | 154 | |
The Android Open Source Project | 9066cfe | 2009-03-03 19:31:44 -0800 | [diff] [blame] | 155 | public PatternMatcher(Parcel src) { |
| 156 | mPattern = src.readString(); |
| 157 | mType = src.readInt(); |
Patrick Baumann | 2baf095 | 2016-08-25 15:56:46 -0700 | [diff] [blame] | 158 | mParsedPattern = src.createIntArray(); |
The Android Open Source Project | 9066cfe | 2009-03-03 19:31:44 -0800 | [diff] [blame] | 159 | } |
| 160 | |
Jeff Sharkey | 9e8f83d | 2019-02-28 12:06:45 -0700 | [diff] [blame] | 161 | public static final @android.annotation.NonNull Parcelable.Creator<PatternMatcher> CREATOR |
The Android Open Source Project | 9066cfe | 2009-03-03 19:31:44 -0800 | [diff] [blame] | 162 | = new Parcelable.Creator<PatternMatcher>() { |
| 163 | public PatternMatcher createFromParcel(Parcel source) { |
| 164 | return new PatternMatcher(source); |
| 165 | } |
| 166 | |
| 167 | public PatternMatcher[] newArray(int size) { |
| 168 | return new PatternMatcher[size]; |
| 169 | } |
| 170 | }; |
| 171 | |
Patrick Baumann | 2baf095 | 2016-08-25 15:56:46 -0700 | [diff] [blame] | 172 | static boolean matchPattern(String match, String pattern, int[] parsedPattern, int type) { |
The Android Open Source Project | 9066cfe | 2009-03-03 19:31:44 -0800 | [diff] [blame] | 173 | if (match == null) return false; |
| 174 | if (type == PATTERN_LITERAL) { |
| 175 | return pattern.equals(match); |
| 176 | } if (type == PATTERN_PREFIX) { |
| 177 | return match.startsWith(pattern); |
Patrick Baumann | 2baf095 | 2016-08-25 15:56:46 -0700 | [diff] [blame] | 178 | } else if (type == PATTERN_SIMPLE_GLOB) { |
| 179 | return matchGlobPattern(pattern, match); |
| 180 | } else if (type == PATTERN_ADVANCED_GLOB) { |
| 181 | return matchAdvancedPattern(parsedPattern, match); |
The Android Open Source Project | 9066cfe | 2009-03-03 19:31:44 -0800 | [diff] [blame] | 182 | } |
Patrick Baumann | 2baf095 | 2016-08-25 15:56:46 -0700 | [diff] [blame] | 183 | return false; |
| 184 | } |
| 185 | |
| 186 | static boolean matchGlobPattern(String pattern, String match) { |
The Android Open Source Project | 9066cfe | 2009-03-03 19:31:44 -0800 | [diff] [blame] | 187 | final int NP = pattern.length(); |
| 188 | if (NP <= 0) { |
| 189 | return match.length() <= 0; |
| 190 | } |
| 191 | final int NM = match.length(); |
| 192 | int ip = 0, im = 0; |
| 193 | char nextChar = pattern.charAt(0); |
| 194 | while ((ip<NP) && (im<NM)) { |
| 195 | char c = nextChar; |
| 196 | ip++; |
| 197 | nextChar = ip < NP ? pattern.charAt(ip) : 0; |
| 198 | final boolean escaped = (c == '\\'); |
| 199 | if (escaped) { |
| 200 | c = nextChar; |
| 201 | ip++; |
| 202 | nextChar = ip < NP ? pattern.charAt(ip) : 0; |
| 203 | } |
| 204 | if (nextChar == '*') { |
| 205 | if (!escaped && c == '.') { |
| 206 | if (ip >= (NP-1)) { |
| 207 | // at the end with a pattern match, so |
| 208 | // all is good without checking! |
| 209 | return true; |
| 210 | } |
| 211 | ip++; |
| 212 | nextChar = pattern.charAt(ip); |
| 213 | // Consume everything until the next character in the |
| 214 | // pattern is found. |
| 215 | if (nextChar == '\\') { |
| 216 | ip++; |
| 217 | nextChar = ip < NP ? pattern.charAt(ip) : 0; |
| 218 | } |
| 219 | do { |
| 220 | if (match.charAt(im) == nextChar) { |
| 221 | break; |
| 222 | } |
| 223 | im++; |
| 224 | } while (im < NM); |
| 225 | if (im == NM) { |
| 226 | // Whoops, the next character in the pattern didn't |
| 227 | // exist in the match. |
| 228 | return false; |
| 229 | } |
| 230 | ip++; |
| 231 | nextChar = ip < NP ? pattern.charAt(ip) : 0; |
| 232 | im++; |
| 233 | } else { |
| 234 | // Consume only characters matching the one before '*'. |
| 235 | do { |
| 236 | if (match.charAt(im) != c) { |
| 237 | break; |
| 238 | } |
| 239 | im++; |
| 240 | } while (im < NM); |
| 241 | ip++; |
| 242 | nextChar = ip < NP ? pattern.charAt(ip) : 0; |
| 243 | } |
| 244 | } else { |
| 245 | if (c != '.' && match.charAt(im) != c) return false; |
| 246 | im++; |
| 247 | } |
| 248 | } |
| 249 | |
| 250 | if (ip >= NP && im >= NM) { |
| 251 | // Reached the end of both strings, all is good! |
| 252 | return true; |
| 253 | } |
| 254 | |
| 255 | // One last check: we may have finished the match string, but still |
| 256 | // have a '.*' at the end of the pattern, which should still count |
| 257 | // as a match. |
| 258 | if (ip == NP-2 && pattern.charAt(ip) == '.' |
| 259 | && pattern.charAt(ip+1) == '*') { |
| 260 | return true; |
| 261 | } |
| 262 | |
| 263 | return false; |
| 264 | } |
Patrick Baumann | 2baf095 | 2016-08-25 15:56:46 -0700 | [diff] [blame] | 265 | |
| 266 | /** |
| 267 | * Parses the advanced pattern and returns an integer array representation of it. The integer |
| 268 | * array treats each field as a character if positive and a unique token placeholder if |
| 269 | * negative. This method will throw on any pattern structure violations. |
| 270 | */ |
| 271 | synchronized static int[] parseAndVerifyAdvancedPattern(String pattern) { |
| 272 | int ip = 0; |
| 273 | final int LP = pattern.length(); |
| 274 | |
| 275 | int it = 0; |
| 276 | |
| 277 | boolean inSet = false; |
| 278 | boolean inRange = false; |
| 279 | boolean inCharClass = false; |
| 280 | |
| 281 | boolean addToParsedPattern; |
| 282 | |
| 283 | while (ip < LP) { |
| 284 | if (it > MAX_PATTERN_STORAGE - 3) { |
| 285 | throw new IllegalArgumentException("Pattern is too large!"); |
| 286 | } |
| 287 | |
| 288 | char c = pattern.charAt(ip); |
| 289 | addToParsedPattern = false; |
| 290 | |
| 291 | switch (c) { |
| 292 | case '[': |
| 293 | if (inSet) { |
| 294 | addToParsedPattern = true; // treat as literal or char class in set |
| 295 | } else { |
| 296 | if (pattern.charAt(ip + 1) == '^') { |
| 297 | sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_INVERSE_START; |
| 298 | ip++; // skip over the '^' |
| 299 | } else { |
| 300 | sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_START; |
| 301 | } |
| 302 | ip++; // move to the next pattern char |
| 303 | inSet = true; |
| 304 | continue; |
| 305 | } |
| 306 | break; |
| 307 | case ']': |
| 308 | if (!inSet) { |
| 309 | addToParsedPattern = true; // treat as literal outside of set |
| 310 | } else { |
| 311 | int parsedToken = sParsedPatternScratch[it - 1]; |
| 312 | if (parsedToken == PARSED_TOKEN_CHAR_SET_START || |
| 313 | parsedToken == PARSED_TOKEN_CHAR_SET_INVERSE_START) { |
| 314 | throw new IllegalArgumentException( |
| 315 | "You must define characters in a set."); |
| 316 | } |
| 317 | sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_STOP; |
| 318 | inSet = false; |
| 319 | inCharClass = false; |
| 320 | } |
| 321 | break; |
| 322 | case '{': |
| 323 | if (!inSet) { |
| 324 | if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) { |
| 325 | throw new IllegalArgumentException("Modifier must follow a token."); |
| 326 | } |
| 327 | sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_START; |
| 328 | ip++; |
| 329 | inRange = true; |
| 330 | } |
| 331 | break; |
| 332 | case '}': |
| 333 | if (inRange) { // only terminate the range if we're currently in one |
| 334 | sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_STOP; |
| 335 | inRange = false; |
| 336 | } |
| 337 | break; |
| 338 | case '*': |
| 339 | if (!inSet) { |
| 340 | if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) { |
| 341 | throw new IllegalArgumentException("Modifier must follow a token."); |
| 342 | } |
| 343 | sParsedPatternScratch[it++] = PARSED_MODIFIER_ZERO_OR_MORE; |
| 344 | } |
| 345 | break; |
| 346 | case '+': |
| 347 | if (!inSet) { |
| 348 | if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) { |
| 349 | throw new IllegalArgumentException("Modifier must follow a token."); |
| 350 | } |
| 351 | sParsedPatternScratch[it++] = PARSED_MODIFIER_ONE_OR_MORE; |
| 352 | } |
| 353 | break; |
| 354 | case '.': |
| 355 | if (!inSet) { |
| 356 | sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_ANY; |
| 357 | } |
| 358 | break; |
| 359 | case '\\': // escape |
| 360 | if (ip + 1 >= LP) { |
| 361 | throw new IllegalArgumentException("Escape found at end of pattern!"); |
| 362 | } |
| 363 | c = pattern.charAt(++ip); |
| 364 | addToParsedPattern = true; |
| 365 | break; |
| 366 | default: |
| 367 | addToParsedPattern = true; |
| 368 | break; |
| 369 | } |
| 370 | if (inSet) { |
| 371 | if (inCharClass) { |
| 372 | sParsedPatternScratch[it++] = c; |
| 373 | inCharClass = false; |
| 374 | } else { |
| 375 | // look forward for character class |
| 376 | if (ip + 2 < LP |
| 377 | && pattern.charAt(ip + 1) == '-' |
| 378 | && pattern.charAt(ip + 2) != ']') { |
| 379 | inCharClass = true; |
| 380 | sParsedPatternScratch[it++] = c; // set first token as lower end of range |
| 381 | ip++; // advance past dash |
| 382 | } else { // literal |
| 383 | sParsedPatternScratch[it++] = c; // set first token as literal |
| 384 | sParsedPatternScratch[it++] = c; // set second set as literal |
| 385 | } |
| 386 | } |
| 387 | } else if (inRange) { |
| 388 | int endOfSet = pattern.indexOf('}', ip); |
| 389 | if (endOfSet < 0) { |
| 390 | throw new IllegalArgumentException("Range not ended with '}'"); |
| 391 | } |
| 392 | String rangeString = pattern.substring(ip, endOfSet); |
| 393 | int commaIndex = rangeString.indexOf(','); |
| 394 | try { |
| 395 | final int rangeMin; |
| 396 | final int rangeMax; |
| 397 | if (commaIndex < 0) { |
| 398 | int parsedRange = Integer.parseInt(rangeString); |
| 399 | rangeMin = rangeMax = parsedRange; |
| 400 | } else { |
| 401 | rangeMin = Integer.parseInt(rangeString.substring(0, commaIndex)); |
| 402 | if (commaIndex == rangeString.length() - 1) { // e.g. {n,} (n or more) |
| 403 | rangeMax = Integer.MAX_VALUE; |
| 404 | } else { |
| 405 | rangeMax = Integer.parseInt(rangeString.substring(commaIndex + 1)); |
| 406 | } |
| 407 | } |
| 408 | if (rangeMin > rangeMax) { |
| 409 | throw new IllegalArgumentException( |
| 410 | "Range quantifier minimum is greater than maximum"); |
| 411 | } |
| 412 | sParsedPatternScratch[it++] = rangeMin; |
| 413 | sParsedPatternScratch[it++] = rangeMax; |
| 414 | } catch (NumberFormatException e) { |
| 415 | throw new IllegalArgumentException("Range number format incorrect", e); |
| 416 | } |
| 417 | ip = endOfSet; |
| 418 | continue; // don't increment ip |
| 419 | } else if (addToParsedPattern) { |
| 420 | sParsedPatternScratch[it++] = c; |
| 421 | } |
| 422 | ip++; |
| 423 | } |
| 424 | if (inSet) { |
| 425 | throw new IllegalArgumentException("Set was not terminated!"); |
| 426 | } |
| 427 | return Arrays.copyOf(sParsedPatternScratch, it); |
| 428 | } |
| 429 | |
| 430 | private static boolean isParsedModifier(int parsedChar) { |
| 431 | return parsedChar == PARSED_MODIFIER_ONE_OR_MORE || |
| 432 | parsedChar == PARSED_MODIFIER_ZERO_OR_MORE || |
| 433 | parsedChar == PARSED_MODIFIER_RANGE_STOP || |
| 434 | parsedChar == PARSED_MODIFIER_RANGE_START; |
| 435 | } |
| 436 | |
| 437 | static boolean matchAdvancedPattern(int[] parsedPattern, String match) { |
| 438 | |
| 439 | // create indexes |
| 440 | int ip = 0, im = 0; |
| 441 | |
| 442 | // one-time length check |
| 443 | final int LP = parsedPattern.length, LM = match.length(); |
| 444 | |
| 445 | // The current character being analyzed in the pattern |
| 446 | int patternChar; |
| 447 | |
| 448 | int tokenType; |
| 449 | |
| 450 | int charSetStart = 0, charSetEnd = 0; |
| 451 | |
| 452 | while (ip < LP) { // we still have content in the pattern |
| 453 | |
| 454 | patternChar = parsedPattern[ip]; |
| 455 | // get the match type of the next verb |
| 456 | |
| 457 | switch (patternChar) { |
| 458 | case PARSED_TOKEN_CHAR_ANY: |
| 459 | tokenType = TOKEN_TYPE_ANY; |
| 460 | ip++; |
| 461 | break; |
| 462 | case PARSED_TOKEN_CHAR_SET_START: |
| 463 | case PARSED_TOKEN_CHAR_SET_INVERSE_START: |
| 464 | tokenType = patternChar == PARSED_TOKEN_CHAR_SET_START |
| 465 | ? TOKEN_TYPE_SET |
| 466 | : TOKEN_TYPE_INVERSE_SET; |
| 467 | charSetStart = ip + 1; // start from the char after the set start |
| 468 | while (++ip < LP && parsedPattern[ip] != PARSED_TOKEN_CHAR_SET_STOP); |
| 469 | charSetEnd = ip - 1; // we're on the set stop, end is the previous |
| 470 | ip++; // move the pointer to the next pattern entry |
| 471 | break; |
| 472 | default: |
| 473 | charSetStart = ip; |
| 474 | tokenType = TOKEN_TYPE_LITERAL; |
| 475 | ip++; |
| 476 | break; |
| 477 | } |
| 478 | |
| 479 | final int minRepetition; |
| 480 | final int maxRepetition; |
| 481 | |
| 482 | // look for a match length modifier |
| 483 | if (ip >= LP) { |
| 484 | minRepetition = maxRepetition = 1; |
| 485 | } else { |
| 486 | patternChar = parsedPattern[ip]; |
| 487 | switch (patternChar) { |
| 488 | case PARSED_MODIFIER_ZERO_OR_MORE: |
| 489 | minRepetition = 0; |
| 490 | maxRepetition = Integer.MAX_VALUE; |
| 491 | ip++; |
| 492 | break; |
| 493 | case PARSED_MODIFIER_ONE_OR_MORE: |
| 494 | minRepetition = 1; |
| 495 | maxRepetition = Integer.MAX_VALUE; |
| 496 | ip++; |
| 497 | break; |
| 498 | case PARSED_MODIFIER_RANGE_START: |
| 499 | minRepetition = parsedPattern[++ip]; |
| 500 | maxRepetition = parsedPattern[++ip]; |
| 501 | ip += 2; // step over PARSED_MODIFIER_RANGE_STOP and on to the next token |
| 502 | break; |
| 503 | default: |
| 504 | minRepetition = maxRepetition = 1; // implied literal |
| 505 | break; |
| 506 | } |
| 507 | } |
| 508 | if (minRepetition > maxRepetition) { |
| 509 | return false; |
| 510 | } |
| 511 | |
| 512 | // attempt to match as many characters as possible |
| 513 | int matched = matchChars(match, im, LM, tokenType, minRepetition, maxRepetition, |
| 514 | parsedPattern, charSetStart, charSetEnd); |
| 515 | |
| 516 | // if we found a conflict, return false immediately |
| 517 | if (matched == NO_MATCH) { |
| 518 | return false; |
| 519 | } |
| 520 | |
| 521 | // move the match pointer the number of characters matched |
| 522 | im += matched; |
| 523 | } |
| 524 | return ip >= LP && im >= LM; // have parsed entire string and regex |
| 525 | } |
| 526 | |
| 527 | private static int matchChars(String match, int im, final int lm, int tokenType, |
| 528 | int minRepetition, int maxRepetition, int[] parsedPattern, |
| 529 | int tokenStart, int tokenEnd) { |
| 530 | int matched = 0; |
| 531 | |
| 532 | while(matched < maxRepetition |
| 533 | && matchChar(match, im + matched, lm, tokenType, parsedPattern, tokenStart, |
| 534 | tokenEnd)) { |
| 535 | matched++; |
| 536 | } |
| 537 | |
| 538 | return matched < minRepetition ? NO_MATCH : matched; |
| 539 | } |
| 540 | |
| 541 | private static boolean matchChar(String match, int im, final int lm, int tokenType, |
| 542 | int[] parsedPattern, int tokenStart, int tokenEnd) { |
| 543 | if (im >= lm) { // we've overrun the string, no match |
| 544 | return false; |
| 545 | } |
| 546 | switch (tokenType) { |
| 547 | case TOKEN_TYPE_ANY: |
| 548 | return true; |
| 549 | case TOKEN_TYPE_SET: |
| 550 | for (int i = tokenStart; i < tokenEnd; i += 2) { |
| 551 | char matchChar = match.charAt(im); |
| 552 | if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) { |
| 553 | return true; |
| 554 | } |
| 555 | } |
| 556 | return false; |
| 557 | case TOKEN_TYPE_INVERSE_SET: |
| 558 | for (int i = tokenStart; i < tokenEnd; i += 2) { |
| 559 | char matchChar = match.charAt(im); |
| 560 | if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) { |
| 561 | return false; |
| 562 | } |
| 563 | } |
| 564 | return true; |
| 565 | case TOKEN_TYPE_LITERAL: |
| 566 | return match.charAt(im) == parsedPattern[tokenStart]; |
| 567 | default: |
| 568 | return false; |
| 569 | } |
| 570 | } |
| 571 | } |