Blame - core/java/android/os/PatternMatcher.java - platform/frameworks/base

blob: ef03e8c3c701711673d1170e2f43ff8db5293100 [file] [log] [blame]

The Android Open Source Project	9066cfe	2009-03-03 19:31:44 -0800	[diff] [blame]	1	/*
				2	* Copyright (C) 2008 The Android Open Source Project
				3	*
				4	* Licensed under the Apache License, Version 2.0 (the "License");
				5	* you may not use this file except in compliance with the License.
				6	* You may obtain a copy of the License at
				7	*
				8	* http://www.apache.org/licenses/LICENSE-2.0
				9	*
				10	* Unless required by applicable law or agreed to in writing, software
				11	* distributed under the License is distributed on an "AS IS" BASIS,
				12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	* See the License for the specific language governing permissions and
				14	* limitations under the License.
				15	*/
				16
				17	package android.os;
				18
Yi Jin	129fc6c	2017-09-28 15:48:38 -0700	[diff] [blame]	19	import android.util.proto.ProtoOutputStream;
Patrick Baumann	2baf095	2016-08-25 15:56:46 -0700	[diff] [blame]	20
				21	import java.util.Arrays;
				22
The Android Open Source Project	9066cfe	2009-03-03 19:31:44 -0800	[diff] [blame]	23	/**
				24	* A simple pattern matcher, which is safe to use on untrusted data: it does
				25	* not provide full reg-exp support, only simple globbing that can not be
				26	* used maliciously.
				27	*/
				28	public class PatternMatcher implements Parcelable {
				29	/**
				30	* Pattern type: the given pattern must exactly match the string it is
				31	* tested against.
				32	*/
				33	public static final int PATTERN_LITERAL = 0;
				34
				35	/**
				36	* Pattern type: the given pattern must match the
				37	* beginning of the string it is tested against.
				38	*/
				39	public static final int PATTERN_PREFIX = 1;
				40
				41	/**
				42	* Pattern type: the given pattern is interpreted with a
				43	* simple glob syntax for matching against the string it is tested against.
				44	* In this syntax, you can use the '*' character to match against zero or
				45	* more occurrences of the character immediately before. If the
				46	* character before it is '.' it will match any character. The character
				47	* '\' can be used as an escape. This essentially provides only the '*'
				48	* wildcard part of a normal regexp.
				49	*/
				50	public static final int PATTERN_SIMPLE_GLOB = 2;
Patrick Baumann	2baf095	2016-08-25 15:56:46 -0700	[diff] [blame]	51
				52	/**
				53	* Pattern type: the given pattern is interpreted with a regular
				54	* expression-like syntax for matching against the string it is tested
				55	* against. Supported tokens include dot ({@code .}) and sets ({@code [...]})
				56	* with full support for character ranges and the not ({@code ^}) modifier.
				57	* Supported modifiers include star ({@code *}) for zero-or-more, plus ({@code +})
				58	* for one-or-more and full range ({@code {...}}) support. This is a simple
Todd Kennedy	9c9fdf2	2017-03-06 10:58:27 -0800	[diff] [blame]	59	* evaluation implementation in which matching is done against the pattern in
				60	* real time with no backtracking support.
Patrick Baumann	2baf095	2016-08-25 15:56:46 -0700	[diff] [blame]	61	*/
				62	public static final int PATTERN_ADVANCED_GLOB = 3;
				63
				64	// token types for advanced matching
				65	private static final int TOKEN_TYPE_LITERAL = 0;
				66	private static final int TOKEN_TYPE_ANY = 1;
				67	private static final int TOKEN_TYPE_SET = 2;
				68	private static final int TOKEN_TYPE_INVERSE_SET = 3;
				69
				70	// Return for no match
				71	private static final int NO_MATCH = -1;
				72
				73	private static final String TAG = "PatternMatcher";
				74
				75	// Parsed placeholders for advanced patterns
				76	private static final int PARSED_TOKEN_CHAR_SET_START = -1;
				77	private static final int PARSED_TOKEN_CHAR_SET_INVERSE_START = -2;
				78	private static final int PARSED_TOKEN_CHAR_SET_STOP = -3;
				79	private static final int PARSED_TOKEN_CHAR_ANY = -4;
				80	private static final int PARSED_MODIFIER_RANGE_START = -5;
				81	private static final int PARSED_MODIFIER_RANGE_STOP = -6;
				82	private static final int PARSED_MODIFIER_ZERO_OR_MORE = -7;
				83	private static final int PARSED_MODIFIER_ONE_OR_MORE = -8;
				84
The Android Open Source Project	9066cfe	2009-03-03 19:31:44 -0800	[diff] [blame]	85	private final String mPattern;
				86	private final int mType;
Patrick Baumann	2baf095	2016-08-25 15:56:46 -0700	[diff] [blame]	87	private final int[] mParsedPattern;
				88
				89
				90	private static final int MAX_PATTERN_STORAGE = 2048;
				91	// workspace to use for building a parsed advanced pattern;
				92	private static final int[] sParsedPatternScratch = new int[MAX_PATTERN_STORAGE];
				93
The Android Open Source Project	9066cfe	2009-03-03 19:31:44 -0800	[diff] [blame]	94	public PatternMatcher(String pattern, int type) {
				95	mPattern = pattern;
				96	mType = type;
Patrick Baumann	2baf095	2016-08-25 15:56:46 -0700	[diff] [blame]	97	if (mType == PATTERN_ADVANCED_GLOB) {
				98	mParsedPattern = parseAndVerifyAdvancedPattern(pattern);
				99	} else {
				100	mParsedPattern = null;
				101	}
The Android Open Source Project	9066cfe	2009-03-03 19:31:44 -0800	[diff] [blame]	102	}
				103
				104	public final String getPath() {
				105	return mPattern;
				106	}
				107
				108	public final int getType() {
				109	return mType;
				110	}
				111
				112	public boolean match(String str) {
Patrick Baumann	2baf095	2016-08-25 15:56:46 -0700	[diff] [blame]	113	return matchPattern(str, mPattern, mParsedPattern, mType);
The Android Open Source Project	9066cfe	2009-03-03 19:31:44 -0800	[diff] [blame]	114	}
				115
				116	public String toString() {
				117	String type = "? ";
				118	switch (mType) {
				119	case PATTERN_LITERAL:
				120	type = "LITERAL: ";
				121	break;
				122	case PATTERN_PREFIX:
				123	type = "PREFIX: ";
				124	break;
				125	case PATTERN_SIMPLE_GLOB:
				126	type = "GLOB: ";
				127	break;
Patrick Baumann	2baf095	2016-08-25 15:56:46 -0700	[diff] [blame]	128	case PATTERN_ADVANCED_GLOB:
				129	type = "ADVANCED: ";
				130	break;
The Android Open Source Project	9066cfe	2009-03-03 19:31:44 -0800	[diff] [blame]	131	}
				132	return "PatternMatcher{" + type + mPattern + "}";
				133	}
Yi Jin	129fc6c	2017-09-28 15:48:38 -0700	[diff] [blame]	134
				135	/** @hide */
				136	public void writeToProto(ProtoOutputStream proto, long fieldId) {
				137	long token = proto.start(fieldId);
				138	proto.write(PatternMatcherProto.PATTERN, mPattern);
				139	proto.write(PatternMatcherProto.TYPE, mType);
				140	// PatternMatcherProto.PARSED_PATTERN is too much to dump, but the field is reserved to
				141	// match the current data structure.
				142	proto.end(token);
				143	}
				144
The Android Open Source Project	9066cfe	2009-03-03 19:31:44 -0800	[diff] [blame]	145	public int describeContents() {
				146	return 0;
				147	}
				148
				149	public void writeToParcel(Parcel dest, int flags) {
				150	dest.writeString(mPattern);
				151	dest.writeInt(mType);
Patrick Baumann	2baf095	2016-08-25 15:56:46 -0700	[diff] [blame]	152	dest.writeIntArray(mParsedPattern);
The Android Open Source Project	9066cfe	2009-03-03 19:31:44 -0800	[diff] [blame]	153	}
Yi Jin	129fc6c	2017-09-28 15:48:38 -0700	[diff] [blame]	154
The Android Open Source Project	9066cfe	2009-03-03 19:31:44 -0800	[diff] [blame]	155	public PatternMatcher(Parcel src) {
				156	mPattern = src.readString();
				157	mType = src.readInt();
Patrick Baumann	2baf095	2016-08-25 15:56:46 -0700	[diff] [blame]	158	mParsedPattern = src.createIntArray();
The Android Open Source Project	9066cfe	2009-03-03 19:31:44 -0800	[diff] [blame]	159	}
				160
Jeff Sharkey	9e8f83d	2019-02-28 12:06:45 -0700	[diff] [blame]	161	public static final @android.annotation.NonNull Parcelable.Creator<PatternMatcher> CREATOR
The Android Open Source Project	9066cfe	2009-03-03 19:31:44 -0800	[diff] [blame]	162	= new Parcelable.Creator<PatternMatcher>() {
				163	public PatternMatcher createFromParcel(Parcel source) {
				164	return new PatternMatcher(source);
				165	}
				166
				167	public PatternMatcher[] newArray(int size) {
				168	return new PatternMatcher[size];
				169	}
				170	};
				171
Patrick Baumann	2baf095	2016-08-25 15:56:46 -0700	[diff] [blame]	172	static boolean matchPattern(String match, String pattern, int[] parsedPattern, int type) {
The Android Open Source Project	9066cfe	2009-03-03 19:31:44 -0800	[diff] [blame]	173	if (match == null) return false;
				174	if (type == PATTERN_LITERAL) {
				175	return pattern.equals(match);
				176	} if (type == PATTERN_PREFIX) {
				177	return match.startsWith(pattern);
Patrick Baumann	2baf095	2016-08-25 15:56:46 -0700	[diff] [blame]	178	} else if (type == PATTERN_SIMPLE_GLOB) {
				179	return matchGlobPattern(pattern, match);
				180	} else if (type == PATTERN_ADVANCED_GLOB) {
				181	return matchAdvancedPattern(parsedPattern, match);
The Android Open Source Project	9066cfe	2009-03-03 19:31:44 -0800	[diff] [blame]	182	}
Patrick Baumann	2baf095	2016-08-25 15:56:46 -0700	[diff] [blame]	183	return false;
				184	}
				185
				186	static boolean matchGlobPattern(String pattern, String match) {
The Android Open Source Project	9066cfe	2009-03-03 19:31:44 -0800	[diff] [blame]	187	final int NP = pattern.length();
				188	if (NP <= 0) {
				189	return match.length() <= 0;
				190	}
				191	final int NM = match.length();
				192	int ip = 0, im = 0;
				193	char nextChar = pattern.charAt(0);
				194	while ((ip<NP) && (im<NM)) {
				195	char c = nextChar;
				196	ip++;
				197	nextChar = ip < NP ? pattern.charAt(ip) : 0;
				198	final boolean escaped = (c == '\\');
				199	if (escaped) {
				200	c = nextChar;
				201	ip++;
				202	nextChar = ip < NP ? pattern.charAt(ip) : 0;
				203	}
				204	if (nextChar == '*') {
				205	if (!escaped && c == '.') {
				206	if (ip >= (NP-1)) {
				207	// at the end with a pattern match, so
				208	// all is good without checking!
				209	return true;
				210	}
				211	ip++;
				212	nextChar = pattern.charAt(ip);
				213	// Consume everything until the next character in the
				214	// pattern is found.
				215	if (nextChar == '\\') {
				216	ip++;
				217	nextChar = ip < NP ? pattern.charAt(ip) : 0;
				218	}
				219	do {
				220	if (match.charAt(im) == nextChar) {
				221	break;
				222	}
				223	im++;
				224	} while (im < NM);
				225	if (im == NM) {
				226	// Whoops, the next character in the pattern didn't
				227	// exist in the match.
				228	return false;
				229	}
				230	ip++;
				231	nextChar = ip < NP ? pattern.charAt(ip) : 0;
				232	im++;
				233	} else {
				234	// Consume only characters matching the one before '*'.
				235	do {
				236	if (match.charAt(im) != c) {
				237	break;
				238	}
				239	im++;
				240	} while (im < NM);
				241	ip++;
				242	nextChar = ip < NP ? pattern.charAt(ip) : 0;
				243	}
				244	} else {
				245	if (c != '.' && match.charAt(im) != c) return false;
				246	im++;
				247	}
				248	}
				249
				250	if (ip >= NP && im >= NM) {
				251	// Reached the end of both strings, all is good!
				252	return true;
				253	}
				254
				255	// One last check: we may have finished the match string, but still
				256	// have a '.*' at the end of the pattern, which should still count
				257	// as a match.
				258	if (ip == NP-2 && pattern.charAt(ip) == '.'
				259	&& pattern.charAt(ip+1) == '*') {
				260	return true;
				261	}
				262
				263	return false;
				264	}
Patrick Baumann	2baf095	2016-08-25 15:56:46 -0700	[diff] [blame]	265
				266	/**
				267	* Parses the advanced pattern and returns an integer array representation of it. The integer
				268	* array treats each field as a character if positive and a unique token placeholder if
				269	* negative. This method will throw on any pattern structure violations.
				270	*/
				271	synchronized static int[] parseAndVerifyAdvancedPattern(String pattern) {
				272	int ip = 0;
				273	final int LP = pattern.length();
				274
				275	int it = 0;
				276
				277	boolean inSet = false;
				278	boolean inRange = false;
				279	boolean inCharClass = false;
				280
				281	boolean addToParsedPattern;
				282
				283	while (ip < LP) {
				284	if (it > MAX_PATTERN_STORAGE - 3) {
				285	throw new IllegalArgumentException("Pattern is too large!");
				286	}
				287
				288	char c = pattern.charAt(ip);
				289	addToParsedPattern = false;
				290
				291	switch (c) {
				292	case '[':
				293	if (inSet) {
				294	addToParsedPattern = true; // treat as literal or char class in set
				295	} else {
				296	if (pattern.charAt(ip + 1) == '^') {
				297	sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_INVERSE_START;
				298	ip++; // skip over the '^'
				299	} else {
				300	sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_START;
				301	}
				302	ip++; // move to the next pattern char
				303	inSet = true;
				304	continue;
				305	}
				306	break;
				307	case ']':
				308	if (!inSet) {
				309	addToParsedPattern = true; // treat as literal outside of set
				310	} else {
				311	int parsedToken = sParsedPatternScratch[it - 1];
				312	if (parsedToken == PARSED_TOKEN_CHAR_SET_START \|\|
				313	parsedToken == PARSED_TOKEN_CHAR_SET_INVERSE_START) {
				314	throw new IllegalArgumentException(
				315	"You must define characters in a set.");
				316	}
				317	sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_SET_STOP;
				318	inSet = false;
				319	inCharClass = false;
				320	}
				321	break;
				322	case '{':
				323	if (!inSet) {
				324	if (it == 0 \|\| isParsedModifier(sParsedPatternScratch[it - 1])) {
				325	throw new IllegalArgumentException("Modifier must follow a token.");
				326	}
				327	sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_START;
				328	ip++;
				329	inRange = true;
				330	}
				331	break;
				332	case '}':
				333	if (inRange) { // only terminate the range if we're currently in one
				334	sParsedPatternScratch[it++] = PARSED_MODIFIER_RANGE_STOP;
				335	inRange = false;
				336	}
				337	break;
				338	case '*':
				339	if (!inSet) {
				340	if (it == 0 \|\| isParsedModifier(sParsedPatternScratch[it - 1])) {
				341	throw new IllegalArgumentException("Modifier must follow a token.");
				342	}
				343	sParsedPatternScratch[it++] = PARSED_MODIFIER_ZERO_OR_MORE;
				344	}
				345	break;
				346	case '+':
				347	if (!inSet) {
				348	if (it == 0 \|\| isParsedModifier(sParsedPatternScratch[it - 1])) {
				349	throw new IllegalArgumentException("Modifier must follow a token.");
				350	}
				351	sParsedPatternScratch[it++] = PARSED_MODIFIER_ONE_OR_MORE;
				352	}
				353	break;
				354	case '.':
				355	if (!inSet) {
				356	sParsedPatternScratch[it++] = PARSED_TOKEN_CHAR_ANY;
				357	}
				358	break;
				359	case '\\': // escape
				360	if (ip + 1 >= LP) {
				361	throw new IllegalArgumentException("Escape found at end of pattern!");
				362	}
				363	c = pattern.charAt(++ip);
				364	addToParsedPattern = true;
				365	break;
				366	default:
				367	addToParsedPattern = true;
				368	break;
				369	}
				370	if (inSet) {
				371	if (inCharClass) {
				372	sParsedPatternScratch[it++] = c;
				373	inCharClass = false;
				374	} else {
				375	// look forward for character class
				376	if (ip + 2 < LP
				377	&& pattern.charAt(ip + 1) == '-'
				378	&& pattern.charAt(ip + 2) != ']') {
				379	inCharClass = true;
				380	sParsedPatternScratch[it++] = c; // set first token as lower end of range
				381	ip++; // advance past dash
				382	} else { // literal
				383	sParsedPatternScratch[it++] = c; // set first token as literal
				384	sParsedPatternScratch[it++] = c; // set second set as literal
				385	}
				386	}
				387	} else if (inRange) {
				388	int endOfSet = pattern.indexOf('}', ip);
				389	if (endOfSet < 0) {
				390	throw new IllegalArgumentException("Range not ended with '}'");
				391	}
				392	String rangeString = pattern.substring(ip, endOfSet);
				393	int commaIndex = rangeString.indexOf(',');
				394	try {
				395	final int rangeMin;
				396	final int rangeMax;
				397	if (commaIndex < 0) {
				398	int parsedRange = Integer.parseInt(rangeString);
				399	rangeMin = rangeMax = parsedRange;
				400	} else {
				401	rangeMin = Integer.parseInt(rangeString.substring(0, commaIndex));
				402	if (commaIndex == rangeString.length() - 1) { // e.g. {n,} (n or more)
				403	rangeMax = Integer.MAX_VALUE;
				404	} else {
				405	rangeMax = Integer.parseInt(rangeString.substring(commaIndex + 1));
				406	}
				407	}
				408	if (rangeMin > rangeMax) {
				409	throw new IllegalArgumentException(
				410	"Range quantifier minimum is greater than maximum");
				411	}
				412	sParsedPatternScratch[it++] = rangeMin;
				413	sParsedPatternScratch[it++] = rangeMax;
				414	} catch (NumberFormatException e) {
				415	throw new IllegalArgumentException("Range number format incorrect", e);
				416	}
				417	ip = endOfSet;
				418	continue; // don't increment ip
				419	} else if (addToParsedPattern) {
				420	sParsedPatternScratch[it++] = c;
				421	}
				422	ip++;
				423	}
				424	if (inSet) {
				425	throw new IllegalArgumentException("Set was not terminated!");
				426	}
				427	return Arrays.copyOf(sParsedPatternScratch, it);
				428	}
				429
				430	private static boolean isParsedModifier(int parsedChar) {
				431	return parsedChar == PARSED_MODIFIER_ONE_OR_MORE \|\|
				432	parsedChar == PARSED_MODIFIER_ZERO_OR_MORE \|\|
				433	parsedChar == PARSED_MODIFIER_RANGE_STOP \|\|
				434	parsedChar == PARSED_MODIFIER_RANGE_START;
				435	}
				436
				437	static boolean matchAdvancedPattern(int[] parsedPattern, String match) {
				438
				439	// create indexes
				440	int ip = 0, im = 0;
				441
				442	// one-time length check
				443	final int LP = parsedPattern.length, LM = match.length();
				444
				445	// The current character being analyzed in the pattern
				446	int patternChar;
				447
				448	int tokenType;
				449
				450	int charSetStart = 0, charSetEnd = 0;
				451
				452	while (ip < LP) { // we still have content in the pattern
				453
				454	patternChar = parsedPattern[ip];
				455	// get the match type of the next verb
				456
				457	switch (patternChar) {
				458	case PARSED_TOKEN_CHAR_ANY:
				459	tokenType = TOKEN_TYPE_ANY;
				460	ip++;
				461	break;
				462	case PARSED_TOKEN_CHAR_SET_START:
				463	case PARSED_TOKEN_CHAR_SET_INVERSE_START:
				464	tokenType = patternChar == PARSED_TOKEN_CHAR_SET_START
				465	? TOKEN_TYPE_SET
				466	: TOKEN_TYPE_INVERSE_SET;
				467	charSetStart = ip + 1; // start from the char after the set start
				468	while (++ip < LP && parsedPattern[ip] != PARSED_TOKEN_CHAR_SET_STOP);
				469	charSetEnd = ip - 1; // we're on the set stop, end is the previous
				470	ip++; // move the pointer to the next pattern entry
				471	break;
				472	default:
				473	charSetStart = ip;
				474	tokenType = TOKEN_TYPE_LITERAL;
				475	ip++;
				476	break;
				477	}
				478
				479	final int minRepetition;
				480	final int maxRepetition;
				481
				482	// look for a match length modifier
				483	if (ip >= LP) {
				484	minRepetition = maxRepetition = 1;
				485	} else {
				486	patternChar = parsedPattern[ip];
				487	switch (patternChar) {
				488	case PARSED_MODIFIER_ZERO_OR_MORE:
				489	minRepetition = 0;
				490	maxRepetition = Integer.MAX_VALUE;
				491	ip++;
				492	break;
				493	case PARSED_MODIFIER_ONE_OR_MORE:
				494	minRepetition = 1;
				495	maxRepetition = Integer.MAX_VALUE;
				496	ip++;
				497	break;
				498	case PARSED_MODIFIER_RANGE_START:
				499	minRepetition = parsedPattern[++ip];
				500	maxRepetition = parsedPattern[++ip];
				501	ip += 2; // step over PARSED_MODIFIER_RANGE_STOP and on to the next token
				502	break;
				503	default:
				504	minRepetition = maxRepetition = 1; // implied literal
				505	break;
				506	}
				507	}
				508	if (minRepetition > maxRepetition) {
				509	return false;
				510	}
				511
				512	// attempt to match as many characters as possible
				513	int matched = matchChars(match, im, LM, tokenType, minRepetition, maxRepetition,
				514	parsedPattern, charSetStart, charSetEnd);
				515
				516	// if we found a conflict, return false immediately
				517	if (matched == NO_MATCH) {
				518	return false;
				519	}
				520
				521	// move the match pointer the number of characters matched
				522	im += matched;
				523	}
				524	return ip >= LP && im >= LM; // have parsed entire string and regex
				525	}
				526
				527	private static int matchChars(String match, int im, final int lm, int tokenType,
				528	int minRepetition, int maxRepetition, int[] parsedPattern,
				529	int tokenStart, int tokenEnd) {
				530	int matched = 0;
				531
				532	while(matched < maxRepetition
				533	&& matchChar(match, im + matched, lm, tokenType, parsedPattern, tokenStart,
				534	tokenEnd)) {
				535	matched++;
				536	}
				537
				538	return matched < minRepetition ? NO_MATCH : matched;
				539	}
				540
				541	private static boolean matchChar(String match, int im, final int lm, int tokenType,
				542	int[] parsedPattern, int tokenStart, int tokenEnd) {
				543	if (im >= lm) { // we've overrun the string, no match
				544	return false;
				545	}
				546	switch (tokenType) {
				547	case TOKEN_TYPE_ANY:
				548	return true;
				549	case TOKEN_TYPE_SET:
				550	for (int i = tokenStart; i < tokenEnd; i += 2) {
				551	char matchChar = match.charAt(im);
				552	if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) {
				553	return true;
				554	}
				555	}
				556	return false;
				557	case TOKEN_TYPE_INVERSE_SET:
				558	for (int i = tokenStart; i < tokenEnd; i += 2) {
				559	char matchChar = match.charAt(im);
				560	if (matchChar >= parsedPattern[i] && matchChar <= parsedPattern[i + 1]) {
				561	return false;
				562	}
				563	}
				564	return true;
				565	case TOKEN_TYPE_LITERAL:
				566	return match.charAt(im) == parsedPattern[tokenStart];
				567	default:
				568	return false;
				569	}
				570	}
				571	}