blob: 335806795a4d01551d4b65b83c8e7c8849009b23 [file] [log] [blame]
Igor Murashkinaaebaa02015-01-26 10:55:53 -08001/*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17#ifndef ART_CMDLINE_TOKEN_RANGE_H_
18#define ART_CMDLINE_TOKEN_RANGE_H_
19
20#include <assert.h>
21#include <vector>
22#include <string>
23#include <algorithm>
24#include <memory>
25
26namespace art {
27// A range of tokens to make token matching algorithms easier.
28//
29// We try really hard to avoid copying and store only a pointer and iterators to the
30// interiors of the vector, so a typical copy constructor never ends up doing a deep copy.
31// It is up to the user to play nice and not to mutate the strings in-place.
32//
33// Tokens are only copied if a mutating operation is performed (and even then only
34// if it *actually* mutates the token).
35struct TokenRange {
36 // Short-hand for a vector of strings. A single string and a token is synonymous.
37 using TokenList = std::vector<std::string>;
38
39 // Copying-from-vector constructor.
40 explicit TokenRange(const TokenList& token_list)
41 : token_list_(new TokenList(token_list)),
42 begin_(token_list_->begin()),
43 end_(token_list_->end())
44 {}
45
46 // Copying-from-iterator constructor
47 template <typename ForwardIterator>
Roland Levillain3887c462015-08-12 18:15:42 +010048 TokenRange(ForwardIterator it_begin, ForwardIterator it_end)
Igor Murashkinaaebaa02015-01-26 10:55:53 -080049 : token_list_(new TokenList(it_begin, it_end)),
50 begin_(token_list_->begin()),
51 end_(token_list_->end())
52 {}
53
54#if 0
55 // Copying-from-vector constructor.
56 TokenRange(const TokenList& token_list ATTRIBUTE_UNUSED,
57 TokenList::const_iterator it_begin,
58 TokenList::const_iterator it_end)
59 : token_list_(new TokenList(it_begin, it_end)),
60 begin_(token_list_->begin()),
61 end_(token_list_->end()) {
62 assert(it_begin >= token_list.begin());
63 assert(it_end <= token_list.end());
64 }
65#endif
66
67 // Copying from char array constructor, convertings into tokens (strings) along the way.
68 TokenRange(const char* token_list[], size_t length)
69 : token_list_(new TokenList(&token_list[0], &token_list[length])),
70 begin_(token_list_->begin()),
71 end_(token_list_->end())
72 {}
73
74 // Non-copying move-from-vector constructor. Takes over the token vector.
75 explicit TokenRange(TokenList&& token_list)
76 : token_list_(new TokenList(std::forward<TokenList>(token_list))),
77 begin_(token_list_->begin()),
78 end_(token_list_->end())
79 {}
80
81 // Non-copying constructor. Retain reference to existing list of tokens.
82 TokenRange(std::shared_ptr<TokenList> token_list,
83 TokenList::const_iterator it_begin,
84 TokenList::const_iterator it_end)
85 : token_list_(token_list),
86 begin_(it_begin),
87 end_(it_end) {
88 assert(it_begin >= token_list->begin());
89 assert(it_end <= token_list->end());
90 }
91
92 // Non-copying copy constructor.
Andreas Gampec801f0d2015-02-24 20:55:16 -080093 TokenRange(const TokenRange&) = default;
Igor Murashkinaaebaa02015-01-26 10:55:53 -080094
95 // Non-copying move constructor.
Andreas Gampec801f0d2015-02-24 20:55:16 -080096 TokenRange(TokenRange&&) = default;
Igor Murashkinaaebaa02015-01-26 10:55:53 -080097
98 // Non-copying constructor. Retains reference to an existing list of tokens, with offset.
99 explicit TokenRange(std::shared_ptr<TokenList> token_list)
100 : token_list_(token_list),
101 begin_(token_list_->begin()),
102 end_(token_list_->end())
103 {}
104
105 // Iterator type for begin() and end(). Guaranteed to be a RandomAccessIterator.
106 using iterator = TokenList::const_iterator;
107
108 // Iterator type for const begin() and const end(). Guaranteed to be a RandomAccessIterator.
109 using const_iterator = iterator;
110
111 // Create a token range by splitting a string. Each separator gets their own token.
112 // Since the separator are retained as tokens, it might be useful to call
113 // RemoveToken afterwards.
114 static TokenRange Split(const std::string& string, std::initializer_list<char> separators) {
115 TokenList new_token_list;
116
117 std::string tok;
118 for (auto&& c : string) {
119 for (char sep : separators) {
120 if (c == sep) {
121 // We spotted a separator character.
122 // Push back everything before the last separator as a new token.
123 // Push back the separator as a token.
124 if (!tok.empty()) {
125 new_token_list.push_back(tok);
126 tok = "";
127 }
128 new_token_list.push_back(std::string() + sep);
129 } else {
130 // Build up the token with another character.
131 tok += c;
132 }
133 }
134 }
135
136 if (!tok.empty()) {
137 new_token_list.push_back(tok);
138 }
139
140 return TokenRange(std::move(new_token_list));
141 }
142
143 // A RandomAccessIterator to the first element in this range.
144 iterator begin() const {
145 return begin_;
146 }
147
148 // A RandomAccessIterator to one past the last element in this range.
149 iterator end() const {
150 return end_;
151 }
152
153 // The size of the range, i.e. how many tokens are in it.
154 size_t Size() const {
155 return std::distance(begin_, end_);
156 }
157
158 // Are there 0 tokens in this range?
159 bool IsEmpty() const {
160 return Size() > 0;
161 }
162
163 // Look up a token by it's offset.
164 const std::string& GetToken(size_t offset) const {
165 assert(offset < Size());
166 return *(begin_ + offset);
167 }
168
169 // Does this token range equal the other range?
170 // Equality is defined as having both the same size, and
171 // each corresponding token being equal.
172 bool operator==(const TokenRange& other) const {
173 if (this == &other) {
174 return true;
175 }
176
177 if (Size() != other.Size()) {
178 return false;
179 }
180
181 return std::equal(begin(), end(), other.begin());
182 }
183
184 // Look up the token at the requested index.
185 const std::string& operator[](int index) const {
186 assert(index >= 0 && static_cast<size_t>(index) < Size());
187 return *(begin() + index);
188 }
189
190 // Does this current range start with the other range?
191 bool StartsWith(const TokenRange& other) const {
192 if (this == &other) {
193 return true;
194 }
195
196 if (Size() < other.Size()) {
197 return false;
198 }
199
200 auto& smaller = Size() < other.Size() ? *this : other;
201 auto& greater = Size() < other.Size() ? other : *this;
202
203 return std::equal(smaller.begin(), smaller.end(), greater.begin());
204 }
205
206 // Remove all characters 'c' from each token, potentially copying the underlying tokens.
207 TokenRange RemoveCharacter(char c) const {
208 TokenList new_token_list(begin(), end());
209
210 bool changed = false;
211 for (auto&& token : new_token_list) {
212 auto it = std::remove_if(token.begin(), token.end(), [&](char ch) {
213 if (ch == c) {
214 changed = true;
215 return true;
216 }
217 return false;
218 });
219 token.erase(it, token.end());
220 }
221
222 if (!changed) {
223 return *this;
224 }
225
226 return TokenRange(std::move(new_token_list));
227 }
228
229 // Remove all tokens matching this one, potentially copying the underlying tokens.
230 TokenRange RemoveToken(const std::string& token) {
231 return RemoveIf([&](const std::string& tok) { return tok == token; });
232 }
233
234 // Discard all empty tokens, potentially copying the underlying tokens.
235 TokenRange DiscardEmpty() const {
236 return RemoveIf([](const std::string& token) { return token.empty(); });
237 }
238
239 // Create a non-copying subset of this range.
240 // Length is trimmed so that the Slice does not go out of range.
241 TokenRange Slice(size_t offset, size_t length = std::string::npos) const {
242 assert(offset < Size());
243
244 if (length != std::string::npos && offset + length > Size()) {
245 length = Size() - offset;
246 }
247
248 iterator it_end;
249 if (length == std::string::npos) {
250 it_end = end();
251 } else {
252 it_end = begin() + offset + length;
253 }
254
255 return TokenRange(token_list_, begin() + offset, it_end);
256 }
257
258 // Try to match the string with tokens from this range.
259 // Each token is used to match exactly once (after which the next token is used, and so on).
260 // The matching happens from left-to-right in a non-greedy fashion.
261 // If the currently-matched token is the wildcard, then the new outputted token will
262 // contain as much as possible until the next token is matched.
263 //
264 // For example, if this == ["a:", "_", "b:] and "_" is the match string, then
265 // MatchSubstrings on "a:foob:" will yield: ["a:", "foo", "b:"]
266 //
267 // Since the string matching can fail (e.g. ["foo"] against "bar"), then this
268 // function can fail, in which cause it will return null.
269 std::unique_ptr<TokenRange> MatchSubstrings(const std::string& string,
270 const std::string& wildcard) const {
271 TokenList new_token_list;
272
273 size_t wildcard_idx = std::string::npos;
274 size_t string_idx = 0;
275
276 // Function to push all the characters matched as a wildcard so far
277 // as a brand new token. It resets the wildcard matching.
278 // Empty wildcards are possible and ok, but only if wildcard matching was on.
279 auto maybe_push_wildcard_token = [&]() {
280 if (wildcard_idx != std::string::npos) {
281 size_t wildcard_length = string_idx - wildcard_idx;
282 std::string wildcard_substr = string.substr(wildcard_idx, wildcard_length);
283 new_token_list.push_back(std::move(wildcard_substr));
284
285 wildcard_idx = std::string::npos;
286 }
287 };
288
289 for (iterator it = begin(); it != end(); ++it) {
290 const std::string& tok = *it;
291
292 if (tok == wildcard) {
293 maybe_push_wildcard_token();
294 wildcard_idx = string_idx;
295 continue;
296 }
297
298 size_t next_token_idx = string.find(tok);
299 if (next_token_idx == std::string::npos) {
300 // Could not find token at all
301 return nullptr;
302 } else if (next_token_idx != string_idx && wildcard_idx == std::string::npos) {
303 // Found the token at a non-starting location, and we weren't
304 // trying to parse the wildcard.
305 return nullptr;
306 }
307
308 new_token_list.push_back(string.substr(next_token_idx, tok.size()));
309 maybe_push_wildcard_token();
310 string_idx += tok.size();
311 }
312
313 size_t remaining = string.size() - string_idx;
314 if (remaining > 0) {
315 if (wildcard_idx == std::string::npos) {
316 // Some characters were still remaining in the string,
317 // but it wasn't trying to match a wildcard.
318 return nullptr;
319 }
320 }
321
322 // If some characters are remaining, the rest must be a wildcard.
323 string_idx += remaining;
324 maybe_push_wildcard_token();
325
326 return std::unique_ptr<TokenRange>(new TokenRange(std::move(new_token_list)));
327 }
328
329 // Do a quick match token-by-token, and see if they match.
330 // Any tokens with a wildcard in them are only matched up until the wildcard.
331 // If this is true, then the wildcard matching later on can still fail, so this is not
332 // a guarantee that the argument is correct, it's more of a strong hint that the
333 // user-provided input *probably* was trying to match this argument.
334 //
335 // Returns how many tokens were either matched (or ignored because there was a
336 // wildcard present). 0 means no match. If the size() tokens are returned.
337 size_t MaybeMatches(const TokenRange& token_list, const std::string& wildcard) const {
338 auto token_it = token_list.begin();
339 auto token_end = token_list.end();
340 auto name_it = begin();
341 auto name_end = end();
342
343 size_t matched_tokens = 0;
344
345 while (token_it != token_end && name_it != name_end) {
346 // Skip token matching when the corresponding name has a wildcard in it.
347 const std::string& name = *name_it;
348
349 size_t wildcard_idx = name.find(wildcard);
350 if (wildcard_idx == std::string::npos) { // No wildcard present
351 // Did the definition token match the user token?
352 if (name != *token_it) {
353 return matched_tokens;
354 }
355 } else {
356 std::string name_prefix = name.substr(0, wildcard_idx);
357
358 // Did the user token start with the up-to-the-wildcard prefix?
359 if (!StartsWith(*token_it, name_prefix)) {
360 return matched_tokens;
361 }
362 }
363
364 ++token_it;
365 ++name_it;
366 ++matched_tokens;
367 }
368
369 // If we got this far, it's either a full match or the token list was too short.
370 return matched_tokens;
371 }
372
373 // Flatten the token range by joining every adjacent token with the separator character.
374 // e.g. ["hello", "world"].join('$') == "hello$world"
375 std::string Join(char separator) const {
376 TokenList tmp(begin(), end());
377 return art::Join(tmp, separator);
378 // TODO: Join should probably take an offset or iterators
379 }
380
381 private:
382 static bool StartsWith(const std::string& larger, const std::string& smaller) {
383 if (larger.size() >= smaller.size()) {
384 return std::equal(smaller.begin(), smaller.end(), larger.begin());
385 }
386
387 return false;
388 }
389
390 template <typename TPredicate>
391 TokenRange RemoveIf(const TPredicate& predicate) const {
392 // If any of the tokens in the token lists are empty, then
393 // we need to remove them and compress the token list into a smaller one.
394 bool remove = false;
395 for (auto it = begin_; it != end_; ++it) {
396 auto&& token = *it;
397
398 if (predicate(token)) {
399 remove = true;
400 break;
401 }
402 }
403
404 // Actually copy the token list and remove the tokens that don't match our predicate.
405 if (remove) {
406 auto token_list = std::make_shared<TokenList>(begin(), end());
407 TokenList::iterator new_end =
408 std::remove_if(token_list->begin(), token_list->end(), predicate);
409 token_list->erase(new_end, token_list->end());
410
411 assert(token_list_->size() > token_list->size() && "Nothing was actually removed!");
412
413 return TokenRange(token_list);
414 }
415
416 return *this;
417 }
418
419 const std::shared_ptr<std::vector<std::string>> token_list_;
420 const iterator begin_;
421 const iterator end_;
422};
423} // namespace art
424
425#endif // ART_CMDLINE_TOKEN_RANGE_H_