| /* |
| * Copyright (C) 2015 The Android Open Source Project |
| * |
| * Licensed under the Apache License, Version 2.0 (the "License"); |
| * you may not use this file except in compliance with the License. |
| * You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| #ifndef ART_CMDLINE_TOKEN_RANGE_H_ |
| #define ART_CMDLINE_TOKEN_RANGE_H_ |
| |
| #include <assert.h> |
| #include <vector> |
| #include <string> |
| #include <algorithm> |
| #include <memory> |
| |
| namespace art { |
| // A range of tokens to make token matching algorithms easier. |
| // |
| // We try really hard to avoid copying and store only a pointer and iterators to the |
| // interiors of the vector, so a typical copy constructor never ends up doing a deep copy. |
| // It is up to the user to play nice and not to mutate the strings in-place. |
| // |
| // Tokens are only copied if a mutating operation is performed (and even then only |
| // if it *actually* mutates the token). |
| struct TokenRange { |
| // Short-hand for a vector of strings. A single string and a token is synonymous. |
| using TokenList = std::vector<std::string>; |
| |
| // Copying-from-vector constructor. |
| explicit TokenRange(const TokenList& token_list) |
| : token_list_(new TokenList(token_list)), |
| begin_(token_list_->begin()), |
| end_(token_list_->end()) |
| {} |
| |
| // Copying-from-iterator constructor |
| template <typename ForwardIterator> |
| TokenRange(ForwardIterator it_begin, ForwardIterator it_end) |
| : token_list_(new TokenList(it_begin, it_end)), |
| begin_(token_list_->begin()), |
| end_(token_list_->end()) |
| {} |
| |
| #if 0 |
| // Copying-from-vector constructor. |
| TokenRange(const TokenList& token_list ATTRIBUTE_UNUSED, |
| TokenList::const_iterator it_begin, |
| TokenList::const_iterator it_end) |
| : token_list_(new TokenList(it_begin, it_end)), |
| begin_(token_list_->begin()), |
| end_(token_list_->end()) { |
| assert(it_begin >= token_list.begin()); |
| assert(it_end <= token_list.end()); |
| } |
| #endif |
| |
| // Copying from char array constructor, convertings into tokens (strings) along the way. |
| TokenRange(const char* token_list[], size_t length) |
| : token_list_(new TokenList(&token_list[0], &token_list[length])), |
| begin_(token_list_->begin()), |
| end_(token_list_->end()) |
| {} |
| |
| // Non-copying move-from-vector constructor. Takes over the token vector. |
| explicit TokenRange(TokenList&& token_list) |
| : token_list_(new TokenList(std::forward<TokenList>(token_list))), |
| begin_(token_list_->begin()), |
| end_(token_list_->end()) |
| {} |
| |
| // Non-copying constructor. Retain reference to existing list of tokens. |
| TokenRange(std::shared_ptr<TokenList> token_list, |
| TokenList::const_iterator it_begin, |
| TokenList::const_iterator it_end) |
| : token_list_(token_list), |
| begin_(it_begin), |
| end_(it_end) { |
| assert(it_begin >= token_list->begin()); |
| assert(it_end <= token_list->end()); |
| } |
| |
| // Non-copying copy constructor. |
| TokenRange(const TokenRange&) = default; |
| |
| // Non-copying move constructor. |
| TokenRange(TokenRange&&) = default; |
| |
| // Non-copying constructor. Retains reference to an existing list of tokens, with offset. |
| explicit TokenRange(std::shared_ptr<TokenList> token_list) |
| : token_list_(token_list), |
| begin_(token_list_->begin()), |
| end_(token_list_->end()) |
| {} |
| |
| // Iterator type for begin() and end(). Guaranteed to be a RandomAccessIterator. |
| using iterator = TokenList::const_iterator; |
| |
| // Iterator type for const begin() and const end(). Guaranteed to be a RandomAccessIterator. |
| using const_iterator = iterator; |
| |
| // Create a token range by splitting a string. Each separator gets their own token. |
| // Since the separator are retained as tokens, it might be useful to call |
| // RemoveToken afterwards. |
| static TokenRange Split(const std::string& string, std::initializer_list<char> separators) { |
| TokenList new_token_list; |
| |
| std::string tok; |
| for (auto&& c : string) { |
| for (char sep : separators) { |
| if (c == sep) { |
| // We spotted a separator character. |
| // Push back everything before the last separator as a new token. |
| // Push back the separator as a token. |
| if (!tok.empty()) { |
| new_token_list.push_back(tok); |
| tok = ""; |
| } |
| new_token_list.push_back(std::string() + sep); |
| } else { |
| // Build up the token with another character. |
| tok += c; |
| } |
| } |
| } |
| |
| if (!tok.empty()) { |
| new_token_list.push_back(tok); |
| } |
| |
| return TokenRange(std::move(new_token_list)); |
| } |
| |
| // A RandomAccessIterator to the first element in this range. |
| iterator begin() const { |
| return begin_; |
| } |
| |
| // A RandomAccessIterator to one past the last element in this range. |
| iterator end() const { |
| return end_; |
| } |
| |
| // The size of the range, i.e. how many tokens are in it. |
| size_t Size() const { |
| return std::distance(begin_, end_); |
| } |
| |
| // Are there 0 tokens in this range? |
| bool IsEmpty() const { |
| return Size() > 0; |
| } |
| |
| // Look up a token by it's offset. |
| const std::string& GetToken(size_t offset) const { |
| assert(offset < Size()); |
| return *(begin_ + offset); |
| } |
| |
| // Does this token range equal the other range? |
| // Equality is defined as having both the same size, and |
| // each corresponding token being equal. |
| bool operator==(const TokenRange& other) const { |
| if (this == &other) { |
| return true; |
| } |
| |
| if (Size() != other.Size()) { |
| return false; |
| } |
| |
| return std::equal(begin(), end(), other.begin()); |
| } |
| |
| // Look up the token at the requested index. |
| const std::string& operator[](int index) const { |
| assert(index >= 0 && static_cast<size_t>(index) < Size()); |
| return *(begin() + index); |
| } |
| |
| // Does this current range start with the other range? |
| bool StartsWith(const TokenRange& other) const { |
| if (this == &other) { |
| return true; |
| } |
| |
| if (Size() < other.Size()) { |
| return false; |
| } |
| |
| auto& smaller = Size() < other.Size() ? *this : other; |
| auto& greater = Size() < other.Size() ? other : *this; |
| |
| return std::equal(smaller.begin(), smaller.end(), greater.begin()); |
| } |
| |
| // Remove all characters 'c' from each token, potentially copying the underlying tokens. |
| TokenRange RemoveCharacter(char c) const { |
| TokenList new_token_list(begin(), end()); |
| |
| bool changed = false; |
| for (auto&& token : new_token_list) { |
| auto it = std::remove_if(token.begin(), token.end(), [&](char ch) { |
| if (ch == c) { |
| changed = true; |
| return true; |
| } |
| return false; |
| }); |
| token.erase(it, token.end()); |
| } |
| |
| if (!changed) { |
| return *this; |
| } |
| |
| return TokenRange(std::move(new_token_list)); |
| } |
| |
| // Remove all tokens matching this one, potentially copying the underlying tokens. |
| TokenRange RemoveToken(const std::string& token) { |
| return RemoveIf([&](const std::string& tok) { return tok == token; }); |
| } |
| |
| // Discard all empty tokens, potentially copying the underlying tokens. |
| TokenRange DiscardEmpty() const { |
| return RemoveIf([](const std::string& token) { return token.empty(); }); |
| } |
| |
| // Create a non-copying subset of this range. |
| // Length is trimmed so that the Slice does not go out of range. |
| TokenRange Slice(size_t offset, size_t length = std::string::npos) const { |
| assert(offset < Size()); |
| |
| if (length != std::string::npos && offset + length > Size()) { |
| length = Size() - offset; |
| } |
| |
| iterator it_end; |
| if (length == std::string::npos) { |
| it_end = end(); |
| } else { |
| it_end = begin() + offset + length; |
| } |
| |
| return TokenRange(token_list_, begin() + offset, it_end); |
| } |
| |
| // Try to match the string with tokens from this range. |
| // Each token is used to match exactly once (after which the next token is used, and so on). |
| // The matching happens from left-to-right in a non-greedy fashion. |
| // If the currently-matched token is the wildcard, then the new outputted token will |
| // contain as much as possible until the next token is matched. |
| // |
| // For example, if this == ["a:", "_", "b:] and "_" is the match string, then |
| // MatchSubstrings on "a:foob:" will yield: ["a:", "foo", "b:"] |
| // |
| // Since the string matching can fail (e.g. ["foo"] against "bar"), then this |
| // function can fail, in which cause it will return null. |
| std::unique_ptr<TokenRange> MatchSubstrings(const std::string& string, |
| const std::string& wildcard) const { |
| TokenList new_token_list; |
| |
| size_t wildcard_idx = std::string::npos; |
| size_t string_idx = 0; |
| |
| // Function to push all the characters matched as a wildcard so far |
| // as a brand new token. It resets the wildcard matching. |
| // Empty wildcards are possible and ok, but only if wildcard matching was on. |
| auto maybe_push_wildcard_token = [&]() { |
| if (wildcard_idx != std::string::npos) { |
| size_t wildcard_length = string_idx - wildcard_idx; |
| std::string wildcard_substr = string.substr(wildcard_idx, wildcard_length); |
| new_token_list.push_back(std::move(wildcard_substr)); |
| |
| wildcard_idx = std::string::npos; |
| } |
| }; |
| |
| for (iterator it = begin(); it != end(); ++it) { |
| const std::string& tok = *it; |
| |
| if (tok == wildcard) { |
| maybe_push_wildcard_token(); |
| wildcard_idx = string_idx; |
| continue; |
| } |
| |
| size_t next_token_idx = string.find(tok); |
| if (next_token_idx == std::string::npos) { |
| // Could not find token at all |
| return nullptr; |
| } else if (next_token_idx != string_idx && wildcard_idx == std::string::npos) { |
| // Found the token at a non-starting location, and we weren't |
| // trying to parse the wildcard. |
| return nullptr; |
| } |
| |
| new_token_list.push_back(string.substr(next_token_idx, tok.size())); |
| maybe_push_wildcard_token(); |
| string_idx += tok.size(); |
| } |
| |
| size_t remaining = string.size() - string_idx; |
| if (remaining > 0) { |
| if (wildcard_idx == std::string::npos) { |
| // Some characters were still remaining in the string, |
| // but it wasn't trying to match a wildcard. |
| return nullptr; |
| } |
| } |
| |
| // If some characters are remaining, the rest must be a wildcard. |
| string_idx += remaining; |
| maybe_push_wildcard_token(); |
| |
| return std::unique_ptr<TokenRange>(new TokenRange(std::move(new_token_list))); |
| } |
| |
| // Do a quick match token-by-token, and see if they match. |
| // Any tokens with a wildcard in them are only matched up until the wildcard. |
| // If this is true, then the wildcard matching later on can still fail, so this is not |
| // a guarantee that the argument is correct, it's more of a strong hint that the |
| // user-provided input *probably* was trying to match this argument. |
| // |
| // Returns how many tokens were either matched (or ignored because there was a |
| // wildcard present). 0 means no match. If the size() tokens are returned. |
| size_t MaybeMatches(const TokenRange& token_list, const std::string& wildcard) const { |
| auto token_it = token_list.begin(); |
| auto token_end = token_list.end(); |
| auto name_it = begin(); |
| auto name_end = end(); |
| |
| size_t matched_tokens = 0; |
| |
| while (token_it != token_end && name_it != name_end) { |
| // Skip token matching when the corresponding name has a wildcard in it. |
| const std::string& name = *name_it; |
| |
| size_t wildcard_idx = name.find(wildcard); |
| if (wildcard_idx == std::string::npos) { // No wildcard present |
| // Did the definition token match the user token? |
| if (name != *token_it) { |
| return matched_tokens; |
| } |
| } else { |
| std::string name_prefix = name.substr(0, wildcard_idx); |
| |
| // Did the user token start with the up-to-the-wildcard prefix? |
| if (!StartsWith(*token_it, name_prefix)) { |
| return matched_tokens; |
| } |
| } |
| |
| ++token_it; |
| ++name_it; |
| ++matched_tokens; |
| } |
| |
| // If we got this far, it's either a full match or the token list was too short. |
| return matched_tokens; |
| } |
| |
| // Flatten the token range by joining every adjacent token with the separator character. |
| // e.g. ["hello", "world"].join('$') == "hello$world" |
| std::string Join(char separator) const { |
| TokenList tmp(begin(), end()); |
| return art::Join(tmp, separator); |
| // TODO: Join should probably take an offset or iterators |
| } |
| |
| private: |
| static bool StartsWith(const std::string& larger, const std::string& smaller) { |
| if (larger.size() >= smaller.size()) { |
| return std::equal(smaller.begin(), smaller.end(), larger.begin()); |
| } |
| |
| return false; |
| } |
| |
| template <typename TPredicate> |
| TokenRange RemoveIf(const TPredicate& predicate) const { |
| // If any of the tokens in the token lists are empty, then |
| // we need to remove them and compress the token list into a smaller one. |
| bool remove = false; |
| for (auto it = begin_; it != end_; ++it) { |
| auto&& token = *it; |
| |
| if (predicate(token)) { |
| remove = true; |
| break; |
| } |
| } |
| |
| // Actually copy the token list and remove the tokens that don't match our predicate. |
| if (remove) { |
| auto token_list = std::make_shared<TokenList>(begin(), end()); |
| TokenList::iterator new_end = |
| std::remove_if(token_list->begin(), token_list->end(), predicate); |
| token_list->erase(new_end, token_list->end()); |
| |
| assert(token_list_->size() > token_list->size() && "Nothing was actually removed!"); |
| |
| return TokenRange(token_list); |
| } |
| |
| return *this; |
| } |
| |
| const std::shared_ptr<std::vector<std::string>> token_list_; |
| const iterator begin_; |
| const iterator end_; |
| }; |
| } // namespace art |
| |
| #endif // ART_CMDLINE_TOKEN_RANGE_H_ |