Blame - cmdline/token_range.h - platform/art

blob: e28ead92dfb85c54d977121be4ff9a9ce3480853 [file] [log] [blame]

Igor Murashkin	aaebaa0	2015-01-26 10:55:53 -0800	[diff] [blame]	1	/*
				2	* Copyright (C) 2015 The Android Open Source Project
				3	*
				4	* Licensed under the Apache License, Version 2.0 (the "License");
				5	* you may not use this file except in compliance with the License.
				6	* You may obtain a copy of the License at
				7	*
				8	* http://www.apache.org/licenses/LICENSE-2.0
				9	*
				10	* Unless required by applicable law or agreed to in writing, software
				11	* distributed under the License is distributed on an "AS IS" BASIS,
				12	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				13	* See the License for the specific language governing permissions and
				14	* limitations under the License.
				15	*/
				16
				17	#ifndef ART_CMDLINE_TOKEN_RANGE_H_
				18	#define ART_CMDLINE_TOKEN_RANGE_H_
				19
				20	#include <assert.h>
Igor Murashkin	aaebaa0	2015-01-26 10:55:53 -0800	[diff] [blame]	21	#include <algorithm>
				22	#include <memory>
Andreas Gampe	8cf9cb3	2017-07-19 09:28:38 -0700	[diff] [blame]	23	#include <string>
				24	#include <vector>
Igor Murashkin	aaebaa0	2015-01-26 10:55:53 -0800	[diff] [blame]	25
Andreas Gampe	9186ced	2016-12-12 14:28:21 -0800	[diff] [blame]	26	#include "android-base/strings.h"
				27
Igor Murashkin	aaebaa0	2015-01-26 10:55:53 -0800	[diff] [blame]	28	namespace art {
				29	// A range of tokens to make token matching algorithms easier.
				30	//
				31	// We try really hard to avoid copying and store only a pointer and iterators to the
				32	// interiors of the vector, so a typical copy constructor never ends up doing a deep copy.
				33	// It is up to the user to play nice and not to mutate the strings in-place.
				34	//
				35	// Tokens are only copied if a mutating operation is performed (and even then only
				36	// if it actually mutates the token).
				37	struct TokenRange {
				38	// Short-hand for a vector of strings. A single string and a token is synonymous.
				39	using TokenList = std::vector<std::string>;
				40
				41	// Copying-from-vector constructor.
				42	explicit TokenRange(const TokenList& token_list)
				43	: token_list_(new TokenList(token_list)),
				44	begin_(token_list_->begin()),
				45	end_(token_list_->end())
				46	{}
				47
				48	// Copying-from-iterator constructor
				49	template <typename ForwardIterator>
Roland Levillain	3887c46	2015-08-12 18:15:42 +0100	[diff] [blame]	50	TokenRange(ForwardIterator it_begin, ForwardIterator it_end)
Igor Murashkin	aaebaa0	2015-01-26 10:55:53 -0800	[diff] [blame]	51	: token_list_(new TokenList(it_begin, it_end)),
				52	begin_(token_list_->begin()),
				53	end_(token_list_->end())
				54	{}
				55
				56	#if 0
				57	// Copying-from-vector constructor.
				58	TokenRange(const TokenList& token_list ATTRIBUTE_UNUSED,
				59	TokenList::const_iterator it_begin,
				60	TokenList::const_iterator it_end)
				61	: token_list_(new TokenList(it_begin, it_end)),
				62	begin_(token_list_->begin()),
				63	end_(token_list_->end()) {
				64	assert(it_begin >= token_list.begin());
				65	assert(it_end <= token_list.end());
				66	}
				67	#endif
				68
				69	// Copying from char array constructor, convertings into tokens (strings) along the way.
				70	TokenRange(const char* token_list[], size_t length)
				71	: token_list_(new TokenList(&token_list[0], &token_list[length])),
				72	begin_(token_list_->begin()),
				73	end_(token_list_->end())
				74	{}
				75
				76	// Non-copying move-from-vector constructor. Takes over the token vector.
				77	explicit TokenRange(TokenList&& token_list)
				78	: token_list_(new TokenList(std::forward<TokenList>(token_list))),
				79	begin_(token_list_->begin()),
				80	end_(token_list_->end())
				81	{}
				82
				83	// Non-copying constructor. Retain reference to existing list of tokens.
				84	TokenRange(std::shared_ptr<TokenList> token_list,
				85	TokenList::const_iterator it_begin,
				86	TokenList::const_iterator it_end)
				87	: token_list_(token_list),
				88	begin_(it_begin),
				89	end_(it_end) {
				90	assert(it_begin >= token_list->begin());
				91	assert(it_end <= token_list->end());
				92	}
				93
				94	// Non-copying copy constructor.
Andreas Gampe	c801f0d	2015-02-24 20:55:16 -0800	[diff] [blame]	95	TokenRange(const TokenRange&) = default;
Igor Murashkin	aaebaa0	2015-01-26 10:55:53 -0800	[diff] [blame]	96
				97	// Non-copying move constructor.
Andreas Gampe	c801f0d	2015-02-24 20:55:16 -0800	[diff] [blame]	98	TokenRange(TokenRange&&) = default;
Igor Murashkin	aaebaa0	2015-01-26 10:55:53 -0800	[diff] [blame]	99
				100	// Non-copying constructor. Retains reference to an existing list of tokens, with offset.
				101	explicit TokenRange(std::shared_ptr<TokenList> token_list)
				102	: token_list_(token_list),
				103	begin_(token_list_->begin()),
				104	end_(token_list_->end())
				105	{}
				106
				107	// Iterator type for begin() and end(). Guaranteed to be a RandomAccessIterator.
				108	using iterator = TokenList::const_iterator;
				109
				110	// Iterator type for const begin() and const end(). Guaranteed to be a RandomAccessIterator.
				111	using const_iterator = iterator;
				112
				113	// Create a token range by splitting a string. Each separator gets their own token.
				114	// Since the separator are retained as tokens, it might be useful to call
				115	// RemoveToken afterwards.
				116	static TokenRange Split(const std::string& string, std::initializer_list<char> separators) {
				117	TokenList new_token_list;
				118
				119	std::string tok;
				120	for (auto&& c : string) {
				121	for (char sep : separators) {
				122	if (c == sep) {
				123	// We spotted a separator character.
				124	// Push back everything before the last separator as a new token.
				125	// Push back the separator as a token.
				126	if (!tok.empty()) {
				127	new_token_list.push_back(tok);
				128	tok = "";
				129	}
				130	new_token_list.push_back(std::string() + sep);
				131	} else {
				132	// Build up the token with another character.
				133	tok += c;
				134	}
				135	}
				136	}
				137
				138	if (!tok.empty()) {
				139	new_token_list.push_back(tok);
				140	}
				141
				142	return TokenRange(std::move(new_token_list));
				143	}
				144
				145	// A RandomAccessIterator to the first element in this range.
				146	iterator begin() const {
				147	return begin_;
				148	}
				149
				150	// A RandomAccessIterator to one past the last element in this range.
				151	iterator end() const {
				152	return end_;
				153	}
				154
				155	// The size of the range, i.e. how many tokens are in it.
				156	size_t Size() const {
				157	return std::distance(begin_, end_);
				158	}
				159
				160	// Are there 0 tokens in this range?
				161	bool IsEmpty() const {
				162	return Size() > 0;
				163	}
				164
				165	// Look up a token by it's offset.
				166	const std::string& GetToken(size_t offset) const {
				167	assert(offset < Size());
				168	return *(begin_ + offset);
				169	}
				170
				171	// Does this token range equal the other range?
				172	// Equality is defined as having both the same size, and
				173	// each corresponding token being equal.
				174	bool operator==(const TokenRange& other) const {
				175	if (this == &other) {
				176	return true;
				177	}
				178
				179	if (Size() != other.Size()) {
				180	return false;
				181	}
				182
				183	return std::equal(begin(), end(), other.begin());
				184	}
				185
				186	// Look up the token at the requested index.
				187	const std::string& operator[](int index) const {
				188	assert(index >= 0 && static_cast<size_t>(index) < Size());
				189	return *(begin() + index);
				190	}
				191
				192	// Does this current range start with the other range?
				193	bool StartsWith(const TokenRange& other) const {
				194	if (this == &other) {
				195	return true;
				196	}
				197
				198	if (Size() < other.Size()) {
				199	return false;
				200	}
				201
				202	auto& smaller = Size() < other.Size() ? *this : other;
				203	auto& greater = Size() < other.Size() ? other : *this;
				204
				205	return std::equal(smaller.begin(), smaller.end(), greater.begin());
				206	}
				207
				208	// Remove all characters 'c' from each token, potentially copying the underlying tokens.
				209	TokenRange RemoveCharacter(char c) const {
				210	TokenList new_token_list(begin(), end());
				211
				212	bool changed = false;
				213	for (auto&& token : new_token_list) {
				214	auto it = std::remove_if(token.begin(), token.end(), [&](char ch) {
				215	if (ch == c) {
				216	changed = true;
				217	return true;
				218	}
				219	return false;
				220	});
				221	token.erase(it, token.end());
				222	}
				223
				224	if (!changed) {
				225	return *this;
				226	}
				227
				228	return TokenRange(std::move(new_token_list));
				229	}
				230
				231	// Remove all tokens matching this one, potentially copying the underlying tokens.
				232	TokenRange RemoveToken(const std::string& token) {
				233	return RemoveIf([&](const std::string& tok) { return tok == token; });
				234	}
				235
				236	// Discard all empty tokens, potentially copying the underlying tokens.
				237	TokenRange DiscardEmpty() const {
				238	return RemoveIf([](const std::string& token) { return token.empty(); });
				239	}
				240
				241	// Create a non-copying subset of this range.
				242	// Length is trimmed so that the Slice does not go out of range.
				243	TokenRange Slice(size_t offset, size_t length = std::string::npos) const {
				244	assert(offset < Size());
				245
				246	if (length != std::string::npos && offset + length > Size()) {
				247	length = Size() - offset;
				248	}
				249
				250	iterator it_end;
				251	if (length == std::string::npos) {
				252	it_end = end();
				253	} else {
				254	it_end = begin() + offset + length;
				255	}
				256
				257	return TokenRange(token_list_, begin() + offset, it_end);
				258	}
				259
				260	// Try to match the string with tokens from this range.
				261	// Each token is used to match exactly once (after which the next token is used, and so on).
				262	// The matching happens from left-to-right in a non-greedy fashion.
				263	// If the currently-matched token is the wildcard, then the new outputted token will
				264	// contain as much as possible until the next token is matched.
				265	//
				266	// For example, if this == ["a:", "_", "b:] and "_" is the match string, then
				267	// MatchSubstrings on "a:foob:" will yield: ["a:", "foo", "b:"]
				268	//
				269	// Since the string matching can fail (e.g. ["foo"] against "bar"), then this
				270	// function can fail, in which cause it will return null.
				271	std::unique_ptr<TokenRange> MatchSubstrings(const std::string& string,
				272	const std::string& wildcard) const {
				273	TokenList new_token_list;
				274
				275	size_t wildcard_idx = std::string::npos;
				276	size_t string_idx = 0;
				277
				278	// Function to push all the characters matched as a wildcard so far
				279	// as a brand new token. It resets the wildcard matching.
				280	// Empty wildcards are possible and ok, but only if wildcard matching was on.
				281	auto maybe_push_wildcard_token = [&]() {
				282	if (wildcard_idx != std::string::npos) {
				283	size_t wildcard_length = string_idx - wildcard_idx;
				284	std::string wildcard_substr = string.substr(wildcard_idx, wildcard_length);
				285	new_token_list.push_back(std::move(wildcard_substr));
				286
				287	wildcard_idx = std::string::npos;
				288	}
				289	};
				290
				291	for (iterator it = begin(); it != end(); ++it) {
				292	const std::string& tok = *it;
				293
				294	if (tok == wildcard) {
				295	maybe_push_wildcard_token();
				296	wildcard_idx = string_idx;
				297	continue;
				298	}
				299
				300	size_t next_token_idx = string.find(tok);
				301	if (next_token_idx == std::string::npos) {
				302	// Could not find token at all
				303	return nullptr;
				304	} else if (next_token_idx != string_idx && wildcard_idx == std::string::npos) {
				305	// Found the token at a non-starting location, and we weren't
				306	// trying to parse the wildcard.
				307	return nullptr;
				308	}
				309
				310	new_token_list.push_back(string.substr(next_token_idx, tok.size()));
				311	maybe_push_wildcard_token();
				312	string_idx += tok.size();
				313	}
				314
				315	size_t remaining = string.size() - string_idx;
				316	if (remaining > 0) {
				317	if (wildcard_idx == std::string::npos) {
				318	// Some characters were still remaining in the string,
				319	// but it wasn't trying to match a wildcard.
				320	return nullptr;
				321	}
				322	}
				323
				324	// If some characters are remaining, the rest must be a wildcard.
				325	string_idx += remaining;
				326	maybe_push_wildcard_token();
				327
Yi Kong	c57c680	2018-10-29 14:28:56 -0700	[diff] [blame]	328	return std::make_unique<TokenRange>(std::move(new_token_list));
Igor Murashkin	aaebaa0	2015-01-26 10:55:53 -0800	[diff] [blame]	329	}
				330
				331	// Do a quick match token-by-token, and see if they match.
				332	// Any tokens with a wildcard in them are only matched up until the wildcard.
				333	// If this is true, then the wildcard matching later on can still fail, so this is not
				334	// a guarantee that the argument is correct, it's more of a strong hint that the
				335	// user-provided input probably was trying to match this argument.
				336	//
				337	// Returns how many tokens were either matched (or ignored because there was a
				338	// wildcard present). 0 means no match. If the size() tokens are returned.
				339	size_t MaybeMatches(const TokenRange& token_list, const std::string& wildcard) const {
				340	auto token_it = token_list.begin();
				341	auto token_end = token_list.end();
				342	auto name_it = begin();
				343	auto name_end = end();
				344
				345	size_t matched_tokens = 0;
				346
				347	while (token_it != token_end && name_it != name_end) {
				348	// Skip token matching when the corresponding name has a wildcard in it.
				349	const std::string& name = *name_it;
				350
				351	size_t wildcard_idx = name.find(wildcard);
				352	if (wildcard_idx == std::string::npos) { // No wildcard present
				353	// Did the definition token match the user token?
				354	if (name != *token_it) {
				355	return matched_tokens;
				356	}
				357	} else {
				358	std::string name_prefix = name.substr(0, wildcard_idx);
				359
				360	// Did the user token start with the up-to-the-wildcard prefix?
				361	if (!StartsWith(*token_it, name_prefix)) {
				362	return matched_tokens;
				363	}
				364	}
				365
				366	++token_it;
				367	++name_it;
				368	++matched_tokens;
				369	}
				370
				371	// If we got this far, it's either a full match or the token list was too short.
				372	return matched_tokens;
				373	}
				374
				375	// Flatten the token range by joining every adjacent token with the separator character.
				376	// e.g. ["hello", "world"].join('$') == "hello$world"
				377	std::string Join(char separator) const {
				378	TokenList tmp(begin(), end());
Andreas Gampe	9186ced	2016-12-12 14:28:21 -0800	[diff] [blame]	379	return android::base::Join(tmp, separator);
Igor Murashkin	aaebaa0	2015-01-26 10:55:53 -0800	[diff] [blame]	380	// TODO: Join should probably take an offset or iterators
				381	}
				382
				383	private:
				384	static bool StartsWith(const std::string& larger, const std::string& smaller) {
				385	if (larger.size() >= smaller.size()) {
				386	return std::equal(smaller.begin(), smaller.end(), larger.begin());
				387	}
				388
				389	return false;
				390	}
				391
				392	template <typename TPredicate>
				393	TokenRange RemoveIf(const TPredicate& predicate) const {
				394	// If any of the tokens in the token lists are empty, then
				395	// we need to remove them and compress the token list into a smaller one.
				396	bool remove = false;
				397	for (auto it = begin_; it != end_; ++it) {
				398	auto&& token = *it;
				399
				400	if (predicate(token)) {
				401	remove = true;
				402	break;
				403	}
				404	}
				405
				406	// Actually copy the token list and remove the tokens that don't match our predicate.
				407	if (remove) {
				408	auto token_list = std::make_shared<TokenList>(begin(), end());
				409	TokenList::iterator new_end =
				410	std::remove_if(token_list->begin(), token_list->end(), predicate);
				411	token_list->erase(new_end, token_list->end());
				412
				413	assert(token_list_->size() > token_list->size() && "Nothing was actually removed!");
				414
				415	return TokenRange(token_list);
				416	}
				417
				418	return *this;
				419	}
				420
				421	const std::shared_ptr<std::vector<std::string>> token_list_;
				422	const iterator begin_;
				423	const iterator end_;
				424	};
				425	} // namespace art
				426
				427	#endif // ART_CMDLINE_TOKEN_RANGE_H_