blob: ef03e8c3c701711673d1170e2f43ff8db5293100 [file] [log] [blame]
The Android Open Source Project9066cfe2009-03-03 19:31:44 -08001/*
2 * Copyright (C) 2008 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17package android.os;
18
Yi Jin129fc6c2017-09-28 15:48:38 -070019import android.util.proto.ProtoOutputStream;
Patrick Baumann2baf0952016-08-25 15:56:46 -070020
21import java.util.Arrays;
22
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080023/**
24 * A simple pattern matcher, which is safe to use on untrusted data: it does
25 * not provide full reg-exp support, only simple globbing that can not be
26 * used maliciously.
27 */
28public class PatternMatcher implements Parcelable {
29 /**
30 * Pattern type: the given pattern must exactly match the string it is
31 * tested against.
32 */
33 public static final int PATTERN_LITERAL = 0;
34
35 /**
36 * Pattern type: the given pattern must match the
37 * beginning of the string it is tested against.
38 */
39 public static final int PATTERN_PREFIX = 1;
40
41 /**
42 * Pattern type: the given pattern is interpreted with a
43 * simple glob syntax for matching against the string it is tested against.
44 * In this syntax, you can use the '*' character to match against zero or
45 * more occurrences of the character immediately before. If the
46 * character before it is '.' it will match any character. The character
47 * '\' can be used as an escape. This essentially provides only the '*'
48 * wildcard part of a normal regexp.
49 */
50 public static final int PATTERN_SIMPLE_GLOB = 2;
Patrick Baumann2baf0952016-08-25 15:56:46 -070051
52 /**
53 * Pattern type: the given pattern is interpreted with a regular
54 * expression-like syntax for matching against the string it is tested
55 * against. Supported tokens include dot ({@code .}) and sets ({@code [...]})
56 * with full support for character ranges and the not ({@code ^}) modifier.
57 * Supported modifiers include star ({@code *}) for zero-or-more, plus ({@code +})
58 * for one-or-more and full range ({@code {...}}) support. This is a simple
Todd Kennedy9c9fdf22017-03-06 10:58:27 -080059 * evaluation implementation in which matching is done against the pattern in
60 * real time with no backtracking support.
Patrick Baumann2baf0952016-08-25 15:56:46 -070061 */
62 public static final int PATTERN_ADVANCED_GLOB = 3;
63
64 // token types for advanced matching
65 private static final int TOKEN_TYPE_LITERAL = 0;
66 private static final int TOKEN_TYPE_ANY = 1;
67 private static final int TOKEN_TYPE_SET = 2;
68 private static final int TOKEN_TYPE_INVERSE_SET = 3;
69
70 // Return for no match
71 private static final int NO_MATCH = -1;
72
73 private static final String TAG = "PatternMatcher";
74
75 // Parsed placeholders for advanced patterns
76 private static final int PARSED_TOKEN_CHAR_SET_START = -1;
77 private static final int PARSED_TOKEN_CHAR_SET_INVERSE_START = -2;
78 private static final int PARSED_TOKEN_CHAR_SET_STOP = -3;
79 private static final int PARSED_TOKEN_CHAR_ANY = -4;
80 private static final int PARSED_MODIFIER_RANGE_START = -5;
81 private static final int PARSED_MODIFIER_RANGE_STOP = -6;
82 private static final int PARSED_MODIFIER_ZERO_OR_MORE = -7;
83 private static final int PARSED_MODIFIER_ONE_OR_MORE = -8;
84
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080085 private final String mPattern;
86 private final int mType;
Patrick Baumann2baf0952016-08-25 15:56:46 -070087 private final int[] mParsedPattern;
88
89
90 private static final int MAX_PATTERN_STORAGE = 2048;
91 // workspace to use for building a parsed advanced pattern;
92 private static final int[] sParsedPatternScratch = new int[MAX_PATTERN_STORAGE];
93
The Android Open Source Project9066cfe2009-03-03 19:31:44 -080094 public PatternMatcher(String pattern, int type) {
95 mPattern = pattern;
96 mType = type;
Patrick Baumann2baf0952016-08-25 15:56:46 -070097 if (mType == PATTERN_ADVANCED_GLOB) {
98 mParsedPattern = parseAndVerifyAdvancedPattern(pattern);
99 } else {
100 mParsedPattern = null;
101 }
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800102 }
103
104 public final String getPath() {
105 return mPattern;
106 }
107
108 public final int getType() {
109 return mType;
110 }
111
112 public boolean match(String str) {
Patrick Baumann2baf0952016-08-25 15:56:46 -0700113 return matchPattern(str, mPattern, mParsedPattern, mType);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800114 }
115
116 public String toString() {
117 String type = "? ";
118 switch (mType) {
119 case PATTERN_LITERAL:
120 type = "LITERAL: ";
121 break;
122 case PATTERN_PREFIX:
123 type = "PREFIX: ";
124 break;
125 case PATTERN_SIMPLE_GLOB:
126 type = "GLOB: ";
127 break;
Patrick Baumann2baf0952016-08-25 15:56:46 -0700128 case PATTERN_ADVANCED_GLOB:
129 type = "ADVANCED: ";
130 break;
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800131 }
132 return "PatternMatcher{" + type + mPattern + "}";
133 }
Yi Jin129fc6c2017-09-28 15:48:38 -0700134
135 /** @hide */
136 public void writeToProto(ProtoOutputStream proto, long fieldId) {
137 long token = proto.start(fieldId);
138 proto.write(PatternMatcherProto.PATTERN, mPattern);
139 proto.write(PatternMatcherProto.TYPE, mType);
140 // PatternMatcherProto.PARSED_PATTERN is too much to dump, but the field is reserved to
141 // match the current data structure.
142 proto.end(token);
143 }
144
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800145 public int describeContents() {
146 return 0;
147 }
148
149 public void writeToParcel(Parcel dest, int flags) {
150 dest.writeString(mPattern);
151 dest.writeInt(mType);
Patrick Baumann2baf0952016-08-25 15:56:46 -0700152 dest.writeIntArray(mParsedPattern);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800153 }
Yi Jin129fc6c2017-09-28 15:48:38 -0700154
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800155 public PatternMatcher(Parcel src) {
156 mPattern = src.readString();
157 mType = src.readInt();
Patrick Baumann2baf0952016-08-25 15:56:46 -0700158 mParsedPattern = src.createIntArray();
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800159 }
160
Jeff Sharkey9e8f83d2019-02-28 12:06:45 -0700161 public static final @android.annotation.NonNull Parcelable.Creator<PatternMatcher> CREATOR
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800162 = new Parcelable.Creator<PatternMatcher>() {
163 public PatternMatcher createFromParcel(Parcel source) {
164 return new PatternMatcher(source);
165 }
166
167 public PatternMatcher[] newArray(int size) {
168 return new PatternMatcher[size];
169 }
170 };
171
Patrick Baumann2baf0952016-08-25 15:56:46 -0700172 static boolean matchPattern(String match, String pattern, int[] parsedPattern, int type) {
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800173 if (match == null) return false;
174 if (type == PATTERN_LITERAL) {
175 return pattern.equals(match);
176 } if (type == PATTERN_PREFIX) {
177 return match.startsWith(pattern);
Patrick Baumann2baf0952016-08-25 15:56:46 -0700178 } else if (type == PATTERN_SIMPLE_GLOB) {
179 return matchGlobPattern(pattern, match);
180 } else if (type == PATTERN_ADVANCED_GLOB) {
181 return matchAdvancedPattern(parsedPattern, match);
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800182 }
Patrick Baumann2baf0952016-08-25 15:56:46 -0700183 return false;
184 }
185
186 static boolean matchGlobPattern(String pattern, String match) {
The Android Open Source Project9066cfe2009-03-03 19:31:44 -0800187 final int NP = pattern.length();
188 if (NP <= 0) {
189 return match.length() <= 0;
190 }
191 final int NM = match.length();
192 int ip = 0, im = 0;
193 char nextChar = pattern.charAt(0);
194 while ((ip<NP) && (im<NM)) {
195 char c = nextChar;
196 ip++;
197 nextChar = ip < NP ? pattern.charAt(ip) : 0;
198 final boolean escaped = (c == '\\');
199 if (escaped) {
200 c = nextChar;
201 ip++;
202 nextChar = ip < NP ? pattern.charAt(ip) : 0;
203 }
204 if (nextChar == '*') {
205 if (!escaped && c == '.') {
206 if (ip >= (NP-1)) {
207 // at the end with a pattern match, so
208 // all is good without checking!
209 return true;
210 }
211 ip++;
212 nextChar = pattern.charAt(ip);
213 // Consume everything until the next character in the
214 // pattern is found.
215 if (nextChar == '\\') {
216 ip++;
217 nextChar = ip < NP ? pattern.charAt(ip) : 0;
218 }
219 do {
220 if (match.charAt(im) == nextChar) {
221 break;
222 }
223 im++;
224 } while (im < NM);
225 if (im == NM) {
226 // Whoops, the next character in the pattern didn't
227 // exist in the match.
228 return false;
229 }
230 ip++;
231 nextChar = ip < NP ? pattern.charAt(ip) : 0;
232 im++;
233 } else {
234 // Consume only characters matching the one before '*'.
235 do {
236 if (match.charAt(im) != c) {
237 break;
238 }
239 im++;
240 } while (im < NM);
241 ip++;
242 nextChar = ip < NP ? pattern.charAt(ip) : 0;
243 }
244 } else {
245 if (c != '.' && match.charAt(im) != c) return false;
246 im++;
247 }
248 }
249
250 if (ip >= NP && im >= NM) {
251 // Reached the end of both strings, all is good!
252 return true;
253 }
254
255 // One last check: we may have finished the match string, but still
256 // have a '.*' at the end of the pattern, which should still count
257 // as a match.
258 if (ip == NP-2 && pattern.charAt(ip) == '.'
259 && pattern.charAt(ip+1) == '*') {
260 return true;
261 }
262
263 return false;
264 }
Patrick Baumann2baf0952016-08-25 15:56:46 -0700265
266 /**
267 * Parses the advanced pattern and returns an integer array representation of it. The integer
268 * array treats each field as a character if positive and a unique token placeholder if
269 * negative. This method will throw on any pattern structure violations.
270 */
271 synchronized static int[] parseAndVerifyAdvancedPattern(String pattern) {
272 int ip = 0;
273 final int LP = pattern.length();
274
275 int it = 0;
276
277 boolean inSet = false;
278 boolean inRange = false;
279 boolean inCharClass = false;
280
281 boolean addToParsedPattern;
282
283 while (ip < LP) {
284 if (it > MAX_PATTERN_STORAGE - 3) {
285 throw new IllegalArgumentException("Pattern is too large!");
286 }
287
288 char c = pattern.charAt(ip);
289 addToParsedPattern = false;
290
291 switch (c) {
292 case '[':
293 if (inSet) {
294 addToParsedPattern = true; // treat as literal or char class in set
295 } else {
296 if (pattern.charAt(ip + 1) == '^') {
297 sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_INVERSE_START;
298 ip++; // skip over the '^'
299 } else {
300 sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_START;
301 }
302 ip++; // move to the next pattern char
303 inSet = true;
304 continue;
305 }
306 break;
307 case ']':
308 if (!inSet) {
309 addToParsedPattern = true; // treat as literal outside of set
310 } else {
311 int parsedToken = sParsedPatternScratch[it - 1];
312 if (parsedToken == PARSED_TOKEN_CHAR_SET_START ||
313 parsedToken == PARSED_TOKEN_CHAR_SET_INVERSE_START) {
314 throw new IllegalArgumentException(
315 "You must define characters in a set.");
316 }
317 sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_STOP;
318 inSet = false;
319 inCharClass = false;
320 }
321 break;
322 case '{':
323 if (!inSet) {
324 if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
325 throw new IllegalArgumentException("Modifier must follow a token.");
326 }
327 sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_START;
328 ip++;
329 inRange = true;
330 }
331 break;
332 case '}':
333 if (inRange) { // only terminate the range if we're currently in one
334 sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_STOP;
335 inRange = false;
336 }
337 break;
338 case '*':
339 if (!inSet) {
340 if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
341 throw new IllegalArgumentException("Modifier must follow a token.");
342 }
343 sParsedPatternScratch[it++] = PARSED_MODIFIER_ZERO_OR_MORE;
344 }
345 break;
346 case '+':
347 if (!inSet) {
348 if (it == 0 || isParsedModifier(sParsedPatternScratch[it - 1])) {
349 throw new IllegalArgumentException("Modifier must follow a token.");
350 }
351 sParsedPatternScratch[it++] = PARSED_MODIFIER_ONE_OR_MORE;
352 }
353 break;
354 case '.':
355 if (!inSet) {
356 sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_ANY;
357 }
358 break;
359 case '\\': // escape
360 if (ip + 1 >= LP) {
361 throw new IllegalArgumentException("Escape found at end of pattern!");
362 }
363 c = pattern.charAt(++ip);
364 addToParsedPattern = true;
365 break;
366 default:
367 addToParsedPattern = true;
368 break;
369 }
370 if (inSet) {
371 if (inCharClass) {
372 sParsedPatternScratch[it++] = c;
373 inCharClass = false;
374 } else {
375 // look forward for character class
376 if (ip + 2 < LP
377 && pattern.charAt(ip + 1) == '-'
378 && pattern.charAt(ip + 2) != ']') {
379 inCharClass = true;
380 sParsedPatternScratch[it++] = c; // set first token as lower end of range
381 ip++; // advance past dash
382 } else { // literal
383 sParsedPatternScratch[it++] = c; // set first token as literal
384 sParsedPatternScratch[it++] = c; // set second set as literal
385 }
386 }
387 } else if (inRange) {
388 int endOfSet = pattern.indexOf('}', ip);
389 if (endOfSet < 0) {
390 throw new IllegalArgumentException("Range not ended with '}'");
391 }
392 String rangeString = pattern.substring(ip, endOfSet);
393 int commaIndex = rangeString.indexOf(',');
394 try {
395 final int rangeMin;
396 final int rangeMax;
397 if (commaIndex < 0) {
398 int parsedRange = Integer.parseInt(rangeString);
399 rangeMin = rangeMax = parsedRange;
400 } else {
401 rangeMin = Integer.parseInt(rangeString.substring(0, commaIndex));
402 if (commaIndex == rangeString.length() - 1) { // e.g. {n,} (n or more)
403 rangeMax = Integer.MAX_VALUE;
404 } else {
405 rangeMax = Integer.parseInt(rangeString.substring(commaIndex + 1));
406 }
407 }
408 if (rangeMin > rangeMax) {
409 throw new IllegalArgumentException(
410 "Range quantifier minimum is greater than maximum");
411 }
412 sParsedPatternScratch[it++] = rangeMin;
413 sParsedPatternScratch[it++] = rangeMax;
414 } catch (NumberFormatException e) {
415 throw new IllegalArgumentException("Range number format incorrect", e);
416 }
417 ip = endOfSet;
418 continue; // don't increment ip
419 } else if (addToParsedPattern) {
420 sParsedPatternScratch[it++] = c;
421 }
422 ip++;
423 }
424 if (inSet) {
425 throw new IllegalArgumentException("Set was not terminated!");
426 }
427 return Arrays.copyOf(sParsedPatternScratch, it);
428 }
429
430 private static boolean isParsedModifier(int parsedChar) {
431 return parsedChar == PARSED_MODIFIER_ONE_OR_MORE ||
432 parsedChar == PARSED_MODIFIER_ZERO_OR_MORE ||
433 parsedChar == PARSED_MODIFIER_RANGE_STOP ||
434 parsedChar == PARSED_MODIFIER_RANGE_START;
435 }
436
437 static boolean matchAdvancedPattern(int[] parsedPattern, String match) {
438
439 // create indexes
440 int ip = 0, im = 0;
441
442 // one-time length check
443 final int LP = parsedPattern.length, LM = match.length();
444
445 // The current character being analyzed in the pattern
446 int patternChar;
447
448 int tokenType;
449
450 int charSetStart = 0, charSetEnd = 0;
451
452 while (ip < LP) { // we still have content in the pattern
453
454 patternChar = parsedPattern[ip];
455 // get the match type of the next verb
456
457 switch (patternChar) {
458 case PARSED_TOKEN_CHAR_ANY:
459 tokenType = TOKEN_TYPE_ANY;
460 ip++;
461 break;
462 case PARSED_TOKEN_CHAR_SET_START:
463 case PARSED_TOKEN_CHAR_SET_INVERSE_START:
464 tokenType = patternChar == PARSED_TOKEN_CHAR_SET_START
465 ? TOKEN_TYPE_SET
466 : TOKEN_TYPE_INVERSE_SET;
467 charSetStart = ip + 1; // start from the char after the set start
468 while (++ip < LP && parsedPattern[ip] != PARSED_TOKEN_CHAR_SET_STOP);
469 charSetEnd = ip - 1; // we're on the set stop, end is the previous
470 ip++; // move the pointer to the next pattern entry
471 break;
472 default:
473 charSetStart = ip;
474 tokenType = TOKEN_TYPE_LITERAL;
475 ip++;
476 break;
477 }
478
479 final int minRepetition;
480 final int maxRepetition;
481
482 // look for a match length modifier
483 if (ip >= LP) {
484 minRepetition = maxRepetition = 1;
485 } else {
486 patternChar = parsedPattern[ip];
487 switch (patternChar) {
488 case PARSED_MODIFIER_ZERO_OR_MORE:
489 minRepetition = 0;
490 maxRepetition = Integer.MAX_VALUE;
491 ip++;
492 break;
493 case PARSED_MODIFIER_ONE_OR_MORE:
494 minRepetition = 1;
495 maxRepetition = Integer.MAX_VALUE;
496 ip++;
497 break;
498 case PARSED_MODIFIER_RANGE_START:
499 minRepetition = parsedPattern[++ip];
500 maxRepetition = parsedPattern[++ip];
501 ip += 2; // step over PARSED_MODIFIER_RANGE_STOP and on to the next token
502 break;
503 default:
504 minRepetition = maxRepetition = 1; // implied literal
505 break;
506 }
507 }
508 if (minRepetition > maxRepetition) {
509 return false;
510 }
511
512 // attempt to match as many characters as possible
513 int matched = matchChars(match, im, LM, tokenType, minRepetition, maxRepetition,
514 parsedPattern, charSetStart, charSetEnd);
515
516 // if we found a conflict, return false immediately
517 if (matched == NO_MATCH) {
518 return false;
519 }
520
521 // move the match pointer the number of characters matched
522 im += matched;
523 }
524 return ip >= LP && im >= LM; // have parsed entire string and regex
525 }
526
527 private static int matchChars(String match, int im, final int lm, int tokenType,
528 int minRepetition, int maxRepetition, int[] parsedPattern,
529 int tokenStart, int tokenEnd) {
530 int matched = 0;
531
532 while(matched < maxRepetition
533 && matchChar(match, im + matched, lm, tokenType, parsedPattern, tokenStart,
534 tokenEnd)) {
535 matched++;
536 }
537
538 return matched < minRepetition ? NO_MATCH : matched;
539 }
540
541 private static boolean matchChar(String match, int im, final int lm, int tokenType,
542 int[] parsedPattern, int tokenStart, int tokenEnd) {
543 if (im >= lm) { // we've overrun the string, no match
544 return false;
545 }
546 switch (tokenType) {
547 case TOKEN_TYPE_ANY:
548 return true;
549 case TOKEN_TYPE_SET:
550 for (int i = tokenStart; i < tokenEnd; i += 2) {
551 char matchChar = match.charAt(im);
552 if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) {
553 return true;
554 }
555 }
556 return false;
557 case TOKEN_TYPE_INVERSE_SET:
558 for (int i = tokenStart; i < tokenEnd; i += 2) {
559 char matchChar = match.charAt(im);
560 if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) {
561 return false;
562 }
563 }
564 return true;
565 case TOKEN_TYPE_LITERAL:
566 return match.charAt(im) == parsedPattern[tokenStart];
567 default:
568 return false;
569 }
570 }
571}