| /* |
| * Copyright 2018 Google Inc. |
| * |
| * Use of this source code is governed by a BSD-style license that can be |
| * found in the LICENSE file. |
| */ |
| |
| #ifndef textParser_DEFINED |
| #define textParser_DEFINED |
| |
| #include <functional> |
| |
| #include "bookmaker.h" |
| |
| class BmhParser; |
| class Definition; |
| |
| class TextParser : public NonAssignable { |
| TextParser() {} // only for ParserCommon, TextParserSave |
| friend class ParserCommon; |
| friend class TextParserSave; |
| public: |
| virtual ~TextParser() {} |
| |
| TextParser(string fileName, const char* start, const char* end, int lineCount) |
| : fFileName(fileName) |
| , fStart(start) |
| , fLine(start) |
| , fChar(start) |
| , fEnd(end) |
| , fLineCount(lineCount) |
| { |
| } |
| |
| TextParser(const Definition* ); |
| |
| const char* anyOf(const char* str) const { |
| const char* ptr = fChar; |
| while (ptr < fEnd) { |
| if (strchr(str, ptr[0])) { |
| return ptr; |
| } |
| ++ptr; |
| } |
| return nullptr; |
| } |
| |
| const char* anyOf(const char* wordStart, const char* wordList[], size_t wordListCount) const { |
| const char** wordPtr = wordList; |
| const char** wordEnd = wordPtr + wordListCount; |
| const size_t matchLen = fChar - wordStart; |
| while (wordPtr < wordEnd) { |
| const char* word = *wordPtr++; |
| if (strlen(word) == matchLen && !strncmp(wordStart, word, matchLen)) { |
| return word; |
| } |
| } |
| return nullptr; |
| } |
| |
| // words must be alpha only |
| string anyWord(const vector<string>& wordList, int spaces) const { |
| const char* matchStart = fChar; |
| do { |
| int count = spaces; |
| while (matchStart < fEnd && !isalpha(matchStart[0])) { |
| ++matchStart; |
| } |
| const char* matchEnd = matchStart; |
| const char* nextWord = nullptr; |
| while (matchEnd < fEnd) { |
| if (isalpha(matchEnd[0])) { |
| } else if (' ' == matchEnd[0] && --count >= 0) { |
| if (!nextWord) { |
| nextWord = matchEnd; |
| } |
| } else { |
| break; |
| } |
| ++matchEnd; |
| } |
| size_t matchLen = matchEnd - matchStart; |
| for (auto word : wordList) { |
| if (word.length() != matchLen) { |
| continue; |
| } |
| for (unsigned index = 0; index < matchLen; ++index) { |
| if (tolower(matchStart[index]) != word[index]) { |
| goto nextWord; |
| } |
| } |
| return word; |
| nextWord: ; |
| } |
| matchStart = nextWord ? nextWord : matchEnd; |
| } while (matchStart < fEnd); |
| return ""; |
| } |
| |
| bool back(const char* pattern) { |
| size_t len = strlen(pattern); |
| const char* start = fChar - len; |
| if (start <= fStart) { |
| return false; |
| } |
| if (strncmp(start, pattern, len)) { |
| return false; |
| } |
| fChar = start; |
| return true; |
| } |
| |
| char backup(const char* pattern) const { |
| size_t len = strlen(pattern); |
| const char* start = fChar - len; |
| if (start <= fStart) { |
| return '\0'; |
| } |
| if (strncmp(start, pattern, len)) { |
| return '\0'; |
| } |
| return start[-1]; |
| } |
| |
| void backupWord() { |
| while (fChar > fStart && isalpha(fChar[-1])) { |
| --fChar; |
| } |
| } |
| |
| bool contains(const char* match, const char* lineEnd, const char** loc) const { |
| const char* result = this->strnstr(match, lineEnd); |
| if (loc) { |
| *loc = result; |
| } |
| return result; |
| } |
| |
| bool containsWord(const char* match, const char* lineEnd, const char** loc) { |
| size_t len = strlen(match); |
| do { |
| const char* result = this->strnstr(match, lineEnd); |
| if (!result) { |
| return false; |
| } |
| if ((result > fStart && isalnum(result[-1])) || (result + len < fEnd |
| && isalnum(result[len]))) { |
| fChar = result + len; |
| continue; |
| } |
| if (loc) { |
| *loc = result; |
| } |
| return true; |
| } while (true); |
| } |
| |
| // either /n/n or /n# will stop parsing a typedef |
| const char* doubleLF() const { |
| const char* ptr = fChar - 1; |
| const char* doubleStart = nullptr; |
| while (++ptr < fEnd) { |
| if (!doubleStart) { |
| if ('\n' == ptr[0]) { |
| doubleStart = ptr; |
| } |
| continue; |
| } |
| if ('\n' == ptr[0] || '#' == ptr[0]) { |
| return doubleStart; |
| } |
| if (' ' < ptr[0]) { |
| doubleStart = nullptr; |
| } |
| } |
| return nullptr; |
| } |
| |
| bool endsWith(const char* match) { |
| int matchLen = strlen(match); |
| if (matchLen > fChar - fLine) { |
| return false; |
| } |
| return !strncmp(fChar - matchLen, match, matchLen); |
| } |
| |
| bool eof() const { return fChar >= fEnd; } |
| |
| const char* lineEnd() const { |
| const char* ptr = fChar; |
| do { |
| if (ptr >= fEnd) { |
| return ptr; |
| } |
| char test = *ptr++; |
| if (test == '\n' || test == '\0') { |
| break; |
| } |
| } while (true); |
| return ptr; |
| } |
| |
| ptrdiff_t lineLength() const { |
| return this->lineEnd() - fLine; |
| } |
| |
| bool match(TextParser* ); |
| |
| char next() { |
| SkASSERT(fChar < fEnd); |
| char result = *fChar++; |
| if ('\n' == result) { |
| ++fLineCount; |
| fLine = fChar; |
| } |
| return result; |
| } |
| |
| char peek() const { SkASSERT(fChar < fEnd); return *fChar; } |
| |
| void restorePlace(const TextParser& save) { |
| fChar = save.fChar; |
| fLine = save.fLine; |
| fLineCount = save.fLineCount; |
| } |
| |
| void savePlace(TextParser* save) { |
| save->fChar = fChar; |
| save->fLine = fLine; |
| save->fLineCount = fLineCount; |
| } |
| |
| void reportError(const char* errorStr) const; |
| static string ReportFilename(string file); |
| void reportWarning(const char* errorStr) const; |
| |
| template <typename T> T reportError(const char* errorStr) const { |
| this->reportError(errorStr); |
| return T(); |
| } |
| |
| bool sentenceEnd(const char* check) const { |
| while (check > fStart) { |
| --check; |
| if (' ' < check[0] && '.' != check[0]) { |
| return false; |
| } |
| if ('.' == check[0]) { |
| return ' ' >= check[1]; |
| } |
| if ('\n' == check[0] && '\n' == check[1]) { |
| return true; |
| } |
| } |
| return true; |
| } |
| |
| void setForErrorReporting(const Definition* , const char* ); |
| |
| bool skipToBalancedEndBracket(char startB, char endB) { |
| SkASSERT(fChar < fEnd); |
| SkASSERT(startB == fChar[0]); |
| int startCount = 0; |
| do { |
| char test = this->next(); |
| startCount += startB == test; |
| startCount -= endB == test; |
| } while (startCount && fChar < fEnd); |
| return !startCount; |
| } |
| |
| bool skipToEndBracket(char endBracket, const char* end = nullptr) { |
| if (nullptr == end) { |
| end = fEnd; |
| } |
| while (fChar[0] != endBracket) { |
| if (fChar >= end) { |
| return false; |
| } |
| (void) this->next(); |
| } |
| return true; |
| } |
| |
| bool skipToEndBracket(const char* endBracket) { |
| size_t len = strlen(endBracket); |
| while (strncmp(fChar, endBracket, len)) { |
| if (fChar >= fEnd) { |
| return false; |
| } |
| (void) this->next(); |
| } |
| return true; |
| } |
| |
| bool skipLine() { |
| return skipToEndBracket('\n'); |
| } |
| |
| void skipTo(const char* skip) { |
| while (fChar < skip) { |
| this->next(); |
| } |
| } |
| |
| void skipToAlpha() { |
| while (fChar < fEnd && !isalpha(fChar[0])) { |
| fChar++; |
| } |
| } |
| |
| // returns true if saw close brace |
| bool skipToAlphaNum() { |
| bool sawCloseBrace = false; |
| while (fChar < fEnd && !isalnum(fChar[0])) { |
| sawCloseBrace |= '}' == *fChar++; |
| } |
| return sawCloseBrace; |
| } |
| |
| bool skipExact(const char* pattern) { |
| if (!this->startsWith(pattern)) { |
| return false; |
| } |
| this->skipName(pattern); |
| return true; |
| } |
| |
| // differs from skipToNonAlphaNum in that a.b isn't considered a full name, |
| // since a.b can't be found as a named definition |
| void skipFullName() { |
| do { |
| char last = '\0'; |
| while (fChar < fEnd && (isalnum(fChar[0]) |
| || '_' == fChar[0] /* || '-' == fChar[0] */ |
| || (':' == fChar[0] && fChar + 1 < fEnd && ':' == fChar[1]))) { |
| if (':' == fChar[0] && fChar + 1 < fEnd && ':' == fChar[1]) { |
| fChar++; |
| } |
| last = fChar[0]; |
| fChar++; |
| } |
| if (fChar + 1 >= fEnd || '/' != fChar[0] || !isalpha(last) || !isalpha(fChar[1])) { |
| break; // stop unless pattern is xxx/xxx as in I/O |
| } |
| fChar++; // skip slash |
| } while (true); |
| } |
| |
| int skipToLineBalance(char open, char close) { |
| int match = 0; |
| while (!this->eof() && '\n' != fChar[0]) { |
| match += open == this->peek(); |
| match -= close == this->next(); |
| } |
| return match; |
| } |
| |
| bool skipToLineStart() { |
| if (!this->skipLine()) { |
| return false; |
| } |
| if (!this->eof()) { |
| return this->skipWhiteSpace(); |
| } |
| return true; |
| } |
| |
| void skipToLineStart(int* indent, bool* sawReturn) { |
| SkAssertResult(this->skipLine()); |
| this->skipWhiteSpace(indent, sawReturn); |
| } |
| |
| void skipLower() { |
| while (fChar < fEnd && (islower(fChar[0]) || '_' == fChar[0])) { |
| fChar++; |
| } |
| } |
| |
| void skipToNonAlphaNum() { |
| while (fChar < fEnd && (isalnum(fChar[0]) || '_' == fChar[0])) { |
| fChar++; |
| } |
| } |
| |
| void skipToNonName() { |
| while (fChar < fEnd && (isalnum(fChar[0]) |
| || '_' == fChar[0] || '-' == fChar[0] |
| || (':' == fChar[0] && fChar + 1 < fEnd && ':' == fChar[1]) |
| || ('.' == fChar[0] && fChar + 1 < fEnd && isalpha(fChar[1])))) { |
| if (':' == fChar[0] && fChar +1 < fEnd && ':' == fChar[1]) { |
| fChar++; |
| } |
| fChar++; |
| } |
| } |
| |
| void skipPhraseName() { |
| while (fChar < fEnd && (islower(fChar[0]) || '_' == fChar[0])) { |
| fChar++; |
| } |
| } |
| |
| void skipToSpace() { |
| while (fChar < fEnd && ' ' != fChar[0]) { |
| fChar++; |
| } |
| } |
| |
| void skipToWhiteSpace() { |
| while (fChar < fEnd && ' ' < fChar[0]) { |
| fChar++; |
| } |
| } |
| |
| bool skipName(const char* word) { |
| size_t len = strlen(word); |
| if (len <= (size_t) (fEnd - fChar) && !strncmp(word, fChar, len)) { |
| for (size_t i = 0; i < len; ++i) { |
| this->next(); |
| } |
| } |
| return this->eof() || ' ' >= fChar[0]; |
| } |
| |
| bool skipSpace() { |
| while (' ' == this->peek()) { |
| (void) this->next(); |
| if (fChar >= fEnd) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| bool skipWord(const char* word) { |
| if (!this->skipWhiteSpace()) { |
| return false; |
| } |
| const char* save = fChar; |
| if (!this->skipName(word)) { |
| fChar = save; |
| return false; |
| } |
| if (!this->skipWhiteSpace()) { |
| return false; |
| } |
| return true; |
| } |
| |
| bool skipWhiteSpace() { |
| while (' ' >= this->peek()) { |
| (void) this->next(); |
| if (fChar >= fEnd) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| bool skipWhiteSpace(int* indent, bool* skippedReturn) { |
| while (' ' >= this->peek()) { |
| *indent = *skippedReturn ? *indent + 1 : 1; |
| if ('\n' == this->peek()) { |
| *skippedReturn |= true; |
| *indent = 0; |
| } |
| (void) this->next(); |
| if (fChar >= fEnd) { |
| return false; |
| } |
| } |
| return true; |
| } |
| |
| bool startsWith(const char* str) const { |
| size_t len = strlen(str); |
| ptrdiff_t lineLen = fEnd - fChar; |
| return len <= (size_t) lineLen && 0 == strncmp(str, fChar, len); |
| } |
| |
| // ignores minor white space differences |
| bool startsWith(const char* str, size_t oLen) const { |
| size_t tIndex = 0; |
| size_t tLen = fEnd - fChar; |
| size_t oIndex = 0; |
| while (oIndex < oLen && tIndex < tLen) { |
| bool tSpace = ' ' >= fChar[tIndex]; |
| bool oSpace = ' ' >= str[oIndex]; |
| if (tSpace != oSpace) { |
| break; |
| } |
| if (tSpace) { |
| do { |
| ++tIndex; |
| } while (tIndex < tLen && ' ' >= fChar[tIndex]); |
| do { |
| ++oIndex; |
| } while (oIndex < oLen && ' ' >= str[oIndex]); |
| continue; |
| } |
| if (fChar[tIndex] != str[oIndex]) { |
| break; |
| } |
| ++tIndex; |
| ++oIndex; |
| } |
| return oIndex >= oLen; |
| } |
| |
| const char* strnchr(char ch, const char* end) const { |
| const char* ptr = fChar; |
| while (ptr < end) { |
| if (ptr[0] == ch) { |
| return ptr; |
| } |
| ++ptr; |
| } |
| return nullptr; |
| } |
| |
| const char* strnstr(const char *match, const char* end) const { |
| size_t matchLen = strlen(match); |
| SkASSERT(matchLen > 0); |
| ptrdiff_t len = end - fChar; |
| SkASSERT(len >= 0); |
| if ((size_t) len < matchLen ) { |
| return nullptr; |
| } |
| size_t count = len - matchLen; |
| for (size_t index = 0; index <= count; index++) { |
| if (0 == strncmp(&fChar[index], match, matchLen)) { |
| return &fChar[index]; |
| } |
| } |
| return nullptr; |
| } |
| |
| const char* trimmedBracketEnd(const char bracket) const { |
| int max = (int) (this->lineLength()); |
| int index = 0; |
| while (index < max && bracket != fChar[index]) { |
| ++index; |
| } |
| SkASSERT(index < max); |
| while (index > 0 && ' ' >= fChar[index - 1]) { |
| --index; |
| } |
| return fChar + index; |
| } |
| |
| const char* trimmedBracketEnd(string bracket) const { |
| size_t max = (size_t) (this->lineLength()); |
| string line(fChar, max); |
| size_t index = line.find(bracket); |
| SkASSERT(index < max); |
| while (index > 0 && ' ' >= fChar[index - 1]) { |
| --index; |
| } |
| return fChar + index; |
| } |
| |
| const char* trimmedBracketNoEnd(const char bracket) const { |
| int max = (int) (fEnd - fChar); |
| int index = 0; |
| while (index < max && bracket != fChar[index]) { |
| ++index; |
| } |
| SkASSERT(index < max); |
| while (index > 0 && ' ' >= fChar[index - 1]) { |
| --index; |
| } |
| return fChar + index; |
| } |
| |
| const char* trimmedLineEnd() const { |
| const char* result = this->lineEnd(); |
| while (result > fChar && ' ' >= result[-1]) { |
| --result; |
| } |
| return result; |
| } |
| |
| void trimEnd() { |
| while (fEnd > fStart && ' ' >= fEnd[-1]) { |
| --fEnd; |
| } |
| } |
| |
| // FIXME: nothing else in TextParser knows from C++ -- |
| // there could be a class between TextParser and ParserCommon |
| virtual string typedefName(); |
| |
| const char* wordEnd() const { |
| const char* end = fChar; |
| while (isalnum(end[0]) || '_' == end[0] || '-' == end[0]) { |
| ++end; |
| } |
| return end; |
| } |
| |
| string fFileName; |
| const char* fStart; |
| const char* fLine; |
| const char* fChar; |
| const char* fEnd; |
| size_t fLineCount; |
| }; |
| |
| class TextParserSave { |
| public: |
| TextParserSave(TextParser* parser) { |
| fParser = parser; |
| fSave.fFileName = parser->fFileName; |
| fSave.fStart = parser->fStart; |
| fSave.fLine = parser->fLine; |
| fSave.fChar = parser->fChar; |
| fSave.fEnd = parser->fEnd; |
| fSave.fLineCount = parser->fLineCount; |
| } |
| |
| void restore() const { |
| fParser->fFileName = fSave.fFileName; |
| fParser->fStart = fSave.fStart; |
| fParser->fLine = fSave.fLine; |
| fParser->fChar = fSave.fChar; |
| fParser->fEnd = fSave.fEnd; |
| fParser->fLineCount = fSave.fLineCount; |
| } |
| |
| private: |
| TextParser* fParser; |
| TextParser fSave; |
| }; |
| |
| static inline bool has_nonwhitespace(string s) { |
| bool nonwhite = false; |
| for (const char& c : s) { |
| if (' ' < c) { |
| nonwhite = true; |
| break; |
| } |
| } |
| return nonwhite; |
| } |
| |
| static inline void trim_end(string &s) { |
| s.erase(std::find_if(s.rbegin(), s.rend(), |
| std::not1(std::ptr_fun<int, int>(std::isspace))).base(), s.end()); |
| } |
| |
| static inline void trim_end_spaces(string &s) { |
| while (s.length() > 0 && ' ' == s.back()) { |
| s.pop_back(); |
| } |
| } |
| |
| static inline void trim_start(string &s) { |
| s.erase(s.begin(), std::find_if(s.begin(), s.end(), |
| std::not1(std::ptr_fun<int, int>(std::isspace)))); |
| } |
| |
| static inline void trim_start_end(string& s) { |
| trim_start(s); |
| trim_end(s); |
| } |
| |
| static inline string trim_inline_spaces(string s) { |
| bool lastSpace = false; |
| string trimmed; |
| for (const char* ptr = &s.front(); ptr <= &s.back(); ++ptr) { |
| char c = *ptr; |
| if (' ' >= c) { |
| if (!lastSpace) { |
| trimmed += ' '; |
| } |
| lastSpace = true; |
| continue; |
| } |
| lastSpace = false; |
| trimmed += c; |
| } |
| return trimmed; |
| } |
| |
| class EscapeParser : public TextParser { |
| public: |
| EscapeParser(const char* start, const char* end) : |
| TextParser("", start, end, 0) { |
| const char* reader = fStart; |
| fStorage = new char[end - start]; |
| char* writer = fStorage; |
| while (reader < fEnd) { |
| char ch = *reader++; |
| if (ch != '\\') { |
| *writer++ = ch; |
| } else { |
| char ctrl = *reader++; |
| if (ctrl == 'u') { |
| unsigned unicode = 0; |
| for (int i = 0; i < 4; ++i) { |
| unicode <<= 4; |
| SkASSERT((reader[0] >= '0' && reader[0] <= '9') || |
| (reader[0] >= 'A' && reader[0] <= 'F') || |
| (reader[0] >= 'a' && reader[0] <= 'f')); |
| int nibble = *reader++ - '0'; |
| if (nibble > 9) { |
| nibble = (nibble & ~('a' - 'A')) - 'A' + '9' + 1; |
| } |
| unicode |= nibble; |
| } |
| SkASSERT(unicode < 256); |
| *writer++ = (unsigned char) unicode; |
| } else { |
| SkASSERT(ctrl == 'n'); |
| *writer++ = '\n'; |
| } |
| } |
| } |
| fStart = fLine = fChar = fStorage; |
| fEnd = writer; |
| } |
| |
| ~EscapeParser() override { |
| delete fStorage; |
| } |
| private: |
| char* fStorage; |
| }; |
| |
| // some methods cannot be trivially parsed; look for class-name / ~ / operator |
| class MethodParser : public TextParser { |
| public: |
| MethodParser(string className, string fileName, |
| const char* start, const char* end, int lineCount) |
| : TextParser(fileName, start, end, lineCount) |
| , fClassName(className) { |
| size_t doubleColons = className.find_last_of("::"); |
| if (string::npos != doubleColons) { |
| fLocalName = className.substr(doubleColons + 1); |
| SkASSERT(fLocalName.length() > 0); |
| } |
| } |
| |
| ~MethodParser() override {} |
| |
| string localName() const { |
| return fLocalName; |
| } |
| |
| void setLocalName(string name) { |
| if (name == fClassName) { |
| fLocalName = ""; |
| } else { |
| fLocalName = name; |
| } |
| } |
| |
| // returns true if close brace was skipped |
| int skipToMethodStart() { |
| if (!fClassName.length()) { |
| return this->skipToAlphaNum(); |
| } |
| int braceCount = 0; |
| while (!this->eof() && !isalnum(this->peek()) && '~' != this->peek()) { |
| braceCount += '{' == this->peek(); |
| braceCount -= '}' == this->peek(); |
| this->next(); |
| } |
| return braceCount; |
| } |
| |
| void skipToMethodEnd(Resolvable resolvable); |
| |
| bool wordEndsWith(const char* str) const { |
| const char* space = this->strnchr(' ', fEnd); |
| if (!space) { |
| return false; |
| } |
| size_t len = strlen(str); |
| if (space < fChar + len) { |
| return false; |
| } |
| return !strncmp(str, space - len, len); |
| } |
| |
| private: |
| string fClassName; |
| string fLocalName; |
| typedef TextParser INHERITED; |
| }; |
| |
| #endif |