blob: e28ead92dfb85c54d977121be4ff9a9ce3480853 [file] [log] [blame]
Igor Murashkinaaebaa02015-01-26 10:55:53 -08001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_CMDLINE_TOKEN_RANGE_H_
18#define ART_CMDLINE_TOKEN_RANGE_H_
19
20#include <assert.h>
Igor Murashkinaaebaa02015-01-26 10:55:53 -080021#include <algorithm>
22#include <memory>
Andreas Gampe8cf9cb32017-07-19 09:28:38 -070023#include <string>
24#include <vector>
Igor Murashkinaaebaa02015-01-26 10:55:53 -080025
Andreas Gampe9186ced2016-12-12 14:28:21 -080026#include "android-base/strings.h"
27
Igor Murashkinaaebaa02015-01-26 10:55:53 -080028namespace art {
29// A range of tokens to make token matching algorithms easier.
30//
31// We try really hard to avoid copying and store only a pointer and iterators to the
32// interiors of the vector, so a typical copy constructor never ends up doing a deep copy.
33// It is up to the user to play nice and not to mutate the strings in-place.
34//
35// Tokens are only copied if a mutating operation is performed (and even then only
36// if it *actually* mutates the token).
37struct TokenRange {
38 // Short-hand for a vector of strings. A single string and a token is synonymous.
39 using TokenList = std::vector<std::string>;
40
41 // Copying-from-vector constructor.
42 explicit TokenRange(const TokenList& token_list)
43 : token_list_(new TokenList(token_list)),
44 begin_(token_list_->begin()),
45 end_(token_list_->end())
46 {}
47
48 // Copying-from-iterator constructor
49 template <typename ForwardIterator>
Roland Levillain3887c462015-08-12 18:15:42 +010050 TokenRange(ForwardIterator it_begin, ForwardIterator it_end)
Igor Murashkinaaebaa02015-01-26 10:55:53 -080051 : token_list_(new TokenList(it_begin, it_end)),
52 begin_(token_list_->begin()),
53 end_(token_list_->end())
54 {}
55
56#if 0
57 // Copying-from-vector constructor.
58 TokenRange(const TokenList& token_list ATTRIBUTE_UNUSED,
59 TokenList::const_iterator it_begin,
60 TokenList::const_iterator it_end)
61 : token_list_(new TokenList(it_begin, it_end)),
62 begin_(token_list_->begin()),
63 end_(token_list_->end()) {
64 assert(it_begin >= token_list.begin());
65 assert(it_end <= token_list.end());
66 }
67#endif
68
69 // Copying from char array constructor, convertings into tokens (strings) along the way.
70 TokenRange(const char* token_list[], size_t length)
71 : token_list_(new TokenList(&token_list[0], &token_list[length])),
72 begin_(token_list_->begin()),
73 end_(token_list_->end())
74 {}
75
76 // Non-copying move-from-vector constructor. Takes over the token vector.
77 explicit TokenRange(TokenList&& token_list)
78 : token_list_(new TokenList(std::forward<TokenList>(token_list))),
79 begin_(token_list_->begin()),
80 end_(token_list_->end())
81 {}
82
83 // Non-copying constructor. Retain reference to existing list of tokens.
84 TokenRange(std::shared_ptr<TokenList> token_list,
85 TokenList::const_iterator it_begin,
86 TokenList::const_iterator it_end)
87 : token_list_(token_list),
88 begin_(it_begin),
89 end_(it_end) {
90 assert(it_begin >= token_list->begin());
91 assert(it_end <= token_list->end());
92 }
93
94 // Non-copying copy constructor.
Andreas Gampec801f0d2015-02-24 20:55:16 -080095 TokenRange(const TokenRange&) = default;
Igor Murashkinaaebaa02015-01-26 10:55:53 -080096
97 // Non-copying move constructor.
Andreas Gampec801f0d2015-02-24 20:55:16 -080098 TokenRange(TokenRange&&) = default;
Igor Murashkinaaebaa02015-01-26 10:55:53 -080099
100 // Non-copying constructor. Retains reference to an existing list of tokens, with offset.
101 explicit TokenRange(std::shared_ptr<TokenList> token_list)
102 : token_list_(token_list),
103 begin_(token_list_->begin()),
104 end_(token_list_->end())
105 {}
106
107 // Iterator type for begin() and end(). Guaranteed to be a RandomAccessIterator.
108 using iterator = TokenList::const_iterator;
109
110 // Iterator type for const begin() and const end(). Guaranteed to be a RandomAccessIterator.
111 using const_iterator = iterator;
112
113 // Create a token range by splitting a string. Each separator gets their own token.
114 // Since the separator are retained as tokens, it might be useful to call
115 // RemoveToken afterwards.
116 static TokenRange Split(const std::string& string, std::initializer_list<char> separators) {
117 TokenList new_token_list;
118
119 std::string tok;
120 for (auto&& c : string) {
121 for (char sep : separators) {
122 if (c == sep) {
123 // We spotted a separator character.
124 // Push back everything before the last separator as a new token.
125 // Push back the separator as a token.
126 if (!tok.empty()) {
127 new_token_list.push_back(tok);
128 tok = "";
129 }
130 new_token_list.push_back(std::string() + sep);
131 } else {
132 // Build up the token with another character.
133 tok += c;
134 }
135 }
136 }
137
138 if (!tok.empty()) {
139 new_token_list.push_back(tok);
140 }
141
142 return TokenRange(std::move(new_token_list));
143 }
144
145 // A RandomAccessIterator to the first element in this range.
146 iterator begin() const {
147 return begin_;
148 }
149
150 // A RandomAccessIterator to one past the last element in this range.
151 iterator end() const {
152 return end_;
153 }
154
155 // The size of the range, i.e. how many tokens are in it.
156 size_t Size() const {
157 return std::distance(begin_, end_);
158 }
159
160 // Are there 0 tokens in this range?
161 bool IsEmpty() const {
162 return Size() > 0;
163 }
164
165 // Look up a token by it's offset.
166 const std::string& GetToken(size_t offset) const {
167 assert(offset < Size());
168 return *(begin_ + offset);
169 }
170
171 // Does this token range equal the other range?
172 // Equality is defined as having both the same size, and
173 // each corresponding token being equal.
174 bool operator==(const TokenRange& other) const {
175 if (this == &other) {
176 return true;
177 }
178
179 if (Size() != other.Size()) {
180 return false;
181 }
182
183 return std::equal(begin(), end(), other.begin());
184 }
185
186 // Look up the token at the requested index.
187 const std::string& operator[](int index) const {
188 assert(index >= 0 && static_cast<size_t>(index) < Size());
189 return *(begin() + index);
190 }
191
192 // Does this current range start with the other range?
193 bool StartsWith(const TokenRange& other) const {
194 if (this == &other) {
195 return true;
196 }
197
198 if (Size() < other.Size()) {
199 return false;
200 }
201
202 auto& smaller = Size() < other.Size() ? *this : other;
203 auto& greater = Size() < other.Size() ? other : *this;
204
205 return std::equal(smaller.begin(), smaller.end(), greater.begin());
206 }
207
208 // Remove all characters 'c' from each token, potentially copying the underlying tokens.
209 TokenRange RemoveCharacter(char c) const {
210 TokenList new_token_list(begin(), end());
211
212 bool changed = false;
213 for (auto&& token : new_token_list) {
214 auto it = std::remove_if(token.begin(), token.end(), [&](char ch) {
215 if (ch == c) {
216 changed = true;
217 return true;
218 }
219 return false;
220 });
221 token.erase(it, token.end());
222 }
223
224 if (!changed) {
225 return *this;
226 }
227
228 return TokenRange(std::move(new_token_list));
229 }
230
231 // Remove all tokens matching this one, potentially copying the underlying tokens.
232 TokenRange RemoveToken(const std::string& token) {
233 return RemoveIf([&](const std::string& tok) { return tok == token; });
234 }
235
236 // Discard all empty tokens, potentially copying the underlying tokens.
237 TokenRange DiscardEmpty() const {
238 return RemoveIf([](const std::string& token) { return token.empty(); });
239 }
240
241 // Create a non-copying subset of this range.
242 // Length is trimmed so that the Slice does not go out of range.
243 TokenRange Slice(size_t offset, size_t length = std::string::npos) const {
244 assert(offset < Size());
245
246 if (length != std::string::npos && offset + length > Size()) {
247 length = Size() - offset;
248 }
249
250 iterator it_end;
251 if (length == std::string::npos) {
252 it_end = end();
253 } else {
254 it_end = begin() + offset + length;
255 }
256
257 return TokenRange(token_list_, begin() + offset, it_end);
258 }
259
260 // Try to match the string with tokens from this range.
261 // Each token is used to match exactly once (after which the next token is used, and so on).
262 // The matching happens from left-to-right in a non-greedy fashion.
263 // If the currently-matched token is the wildcard, then the new outputted token will
264 // contain as much as possible until the next token is matched.
265 //
266 // For example, if this == ["a:", "_", "b:] and "_" is the match string, then
267 // MatchSubstrings on "a:foob:" will yield: ["a:", "foo", "b:"]
268 //
269 // Since the string matching can fail (e.g. ["foo"] against "bar"), then this
270 // function can fail, in which cause it will return null.
271 std::unique_ptr<TokenRange> MatchSubstrings(const std::string& string,
272 const std::string& wildcard) const {
273 TokenList new_token_list;
274
275 size_t wildcard_idx = std::string::npos;
276 size_t string_idx = 0;
277
278 // Function to push all the characters matched as a wildcard so far
279 // as a brand new token. It resets the wildcard matching.
280 // Empty wildcards are possible and ok, but only if wildcard matching was on.
281 auto maybe_push_wildcard_token = [&]() {
282 if (wildcard_idx != std::string::npos) {
283 size_t wildcard_length = string_idx - wildcard_idx;
284 std::string wildcard_substr = string.substr(wildcard_idx, wildcard_length);
285 new_token_list.push_back(std::move(wildcard_substr));
286
287 wildcard_idx = std::string::npos;
288 }
289 };
290
291 for (iterator it = begin(); it != end(); ++it) {
292 const std::string& tok = *it;
293
294 if (tok == wildcard) {
295 maybe_push_wildcard_token();
296 wildcard_idx = string_idx;
297 continue;
298 }
299
300 size_t next_token_idx = string.find(tok);
301 if (next_token_idx == std::string::npos) {
302 // Could not find token at all
303 return nullptr;
304 } else if (next_token_idx != string_idx && wildcard_idx == std::string::npos) {
305 // Found the token at a non-starting location, and we weren't
306 // trying to parse the wildcard.
307 return nullptr;
308 }
309
310 new_token_list.push_back(string.substr(next_token_idx, tok.size()));
311 maybe_push_wildcard_token();
312 string_idx += tok.size();
313 }
314
315 size_t remaining = string.size() - string_idx;
316 if (remaining > 0) {
317 if (wildcard_idx == std::string::npos) {
318 // Some characters were still remaining in the string,
319 // but it wasn't trying to match a wildcard.
320 return nullptr;
321 }
322 }
323
324 // If some characters are remaining, the rest must be a wildcard.
325 string_idx += remaining;
326 maybe_push_wildcard_token();
327
Yi Kongc57c6802018-10-29 14:28:56 -0700328 return std::make_unique<TokenRange>(std::move(new_token_list));
Igor Murashkinaaebaa02015-01-26 10:55:53 -0800329 }
330
331 // Do a quick match token-by-token, and see if they match.
332 // Any tokens with a wildcard in them are only matched up until the wildcard.
333 // If this is true, then the wildcard matching later on can still fail, so this is not
334 // a guarantee that the argument is correct, it's more of a strong hint that the
335 // user-provided input *probably* was trying to match this argument.
336 //
337 // Returns how many tokens were either matched (or ignored because there was a
338 // wildcard present). 0 means no match. If the size() tokens are returned.
339 size_t MaybeMatches(const TokenRange& token_list, const std::string& wildcard) const {
340 auto token_it = token_list.begin();
341 auto token_end = token_list.end();
342 auto name_it = begin();
343 auto name_end = end();
344
345 size_t matched_tokens = 0;
346
347 while (token_it != token_end && name_it != name_end) {
348 // Skip token matching when the corresponding name has a wildcard in it.
349 const std::string& name = *name_it;
350
351 size_t wildcard_idx = name.find(wildcard);
352 if (wildcard_idx == std::string::npos) { // No wildcard present
353 // Did the definition token match the user token?
354 if (name != *token_it) {
355 return matched_tokens;
356 }
357 } else {
358 std::string name_prefix = name.substr(0, wildcard_idx);
359
360 // Did the user token start with the up-to-the-wildcard prefix?
361 if (!StartsWith(*token_it, name_prefix)) {
362 return matched_tokens;
363 }
364 }
365
366 ++token_it;
367 ++name_it;
368 ++matched_tokens;
369 }
370
371 // If we got this far, it's either a full match or the token list was too short.
372 return matched_tokens;
373 }
374
375 // Flatten the token range by joining every adjacent token with the separator character.
376 // e.g. ["hello", "world"].join('$') == "hello$world"
377 std::string Join(char separator) const {
378 TokenList tmp(begin(), end());
Andreas Gampe9186ced2016-12-12 14:28:21 -0800379 return android::base::Join(tmp, separator);
Igor Murashkinaaebaa02015-01-26 10:55:53 -0800380 // TODO: Join should probably take an offset or iterators
381 }
382
383 private:
384 static bool StartsWith(const std::string& larger, const std::string& smaller) {
385 if (larger.size() >= smaller.size()) {
386 return std::equal(smaller.begin(), smaller.end(), larger.begin());
387 }
388
389 return false;
390 }
391
392 template <typename TPredicate>
393 TokenRange RemoveIf(const TPredicate& predicate) const {
394 // If any of the tokens in the token lists are empty, then
395 // we need to remove them and compress the token list into a smaller one.
396 bool remove = false;
397 for (auto it = begin_; it != end_; ++it) {
398 auto&& token = *it;
399
400 if (predicate(token)) {
401 remove = true;
402 break;
403 }
404 }
405
406 // Actually copy the token list and remove the tokens that don't match our predicate.
407 if (remove) {
408 auto token_list = std::make_shared<TokenList>(begin(), end());
409 TokenList::iterator new_end =
410 std::remove_if(token_list->begin(), token_list->end(), predicate);
411 token_list->erase(new_end, token_list->end());
412
413 assert(token_list_->size() > token_list->size() && "Nothing was actually removed!");
414
415 return TokenRange(token_list);
416 }
417
418 return *this;
419 }
420
421 const std::shared_ptr<std::vector<std::string>> token_list_;
422 const iterator begin_;
423 const iterator end_;
424};
425} // namespace art
426
427#endif // ART_CMDLINE_TOKEN_RANGE_H_