improved SkSL lexer performance
Bug: skia:
Change-Id: Ibbb427e511118a0a1819094a59c17cc0f966008c
Reviewed-on: https://skia-review.googlesource.com/145336
Reviewed-by: Greg Daniel <egdaniel@google.com>
Commit-Queue: Ethan Nicholas <ethannicholas@google.com>
diff --git a/src/sksl/lex/Main.cpp b/src/sksl/lex/Main.cpp
index 9a30b8b..04f266b 100644
--- a/src/sksl/lex/Main.cpp
+++ b/src/sksl/lex/Main.cpp
@@ -54,7 +54,7 @@
out << " , fOffset(-1)\n";
out << " , fLength(-1) {}\n";
out << "\n";
- out << " " << token << "(Kind kind, int offset, int length)\n";
+ out << " " << token << "(Kind kind, int32_t offset, int32_t length)\n";
out << " : fKind(kind)\n";
out << " , fOffset(offset)\n";
out << " , fLength(length) {}\n";
@@ -66,7 +66,7 @@
out << "\n";
out << "class " << lexer << " {\n";
out << "public:\n";
- out << " void start(const char* text, size_t length) {\n";
+ out << " void start(const char* text, int32_t length) {\n";
out << " fText = text;\n";
out << " fLength = length;\n";
out << " fOffset = 0;\n";
@@ -76,8 +76,8 @@
out << "\n";
out << "private:\n";
out << " const char* fText;\n";
- out << " int fLength;\n";
- out << " int fOffset;\n";
+ out << " int32_t fLength;\n";
+ out << " int32_t fOffset;\n";
out << "};\n";
out << "\n";
out << "} // namespace\n";
@@ -98,7 +98,7 @@
for (const auto& row : dfa.fTransitions) {
states = std::max(states, row.size());
}
- out << "static int16_t mappings[" << dfa.fCharMappings.size() << "] = {\n ";
+ out << "static int8_t mappings[" << dfa.fCharMappings.size() << "] = {\n ";
const char* separator = "";
for (int m : dfa.fCharMappings) {
out << separator << std::to_string(m);
@@ -131,32 +131,31 @@
out << " };\n";
out << "\n";
- out << token << " " << lexer << "::next() {\n";;
- out << " int startOffset = fOffset;\n";
+ out << token << " " << lexer << "::next() {\n";
+ out << " // note that we cheat here: normally a lexer needs to worry about the case\n";
+ out << " // where a token has a prefix which is not itself a valid token - for instance, \n";
+ out << " // maybe we have a valid token 'while', but 'w', 'wh', etc. are not valid\n";
+ out << " // tokens. Our grammar doesn't have this property, so we can simplify the logic\n";
+ out << " // a bit.\n";
+ out << " int32_t startOffset = fOffset;\n";
out << " if (startOffset == fLength) {\n";
out << " return " << token << "(" << token << "::END_OF_FILE, startOffset, 0);\n";
out << " }\n";
- out << " int offset = startOffset;\n";
- out << " int state = 1;\n";
- out << " " << token << "::Kind lastAccept = " << token << "::Kind::INVALID;\n";
- out << " int lastAcceptEnd = startOffset + 1;\n";
- out << " while (offset < fLength) {\n";
- out << " if ((uint8_t) fText[offset] >= " << dfa.fCharMappings.size() << ") {";
+ out << " int16_t state = 1;\n";
+ out << " while (fOffset < fLength) {\n";
+ out << " if ((uint8_t) fText[fOffset] >= " << dfa.fCharMappings.size() << ") {";
+ out << " ++fOffset;\n";
out << " break;";
out << " }";
- out << " state = transitions[mappings[(int) fText[offset]]][state];\n";
- out << " ++offset;\n";
- out << " if (!state) {\n";
+ out << " int16_t newState = transitions[mappings[(int) fText[fOffset]]][state];\n";
+ out << " if (!newState) {\n";
out << " break;\n";
out << " }\n";
- out << " // We seem to be getting away without doing this check.\n";
- out << " /*if (accepts[state] != -1)*/ {\n";
- out << " lastAccept = (" << token << "::Kind) accepts[state];\n";
- out << " lastAcceptEnd = offset;\n";
- out << " }\n";
+ out << " state = newState;";
+ out << " ++fOffset;\n";
out << " }\n";
- out << " fOffset = lastAcceptEnd;\n";
- out << " return " << token << "(lastAccept, startOffset, lastAcceptEnd - startOffset);\n";
+ out << " Token::Kind kind = (" << token << "::Kind) accepts[state];\n";
+ out << " return " << token << "(kind, startOffset, fOffset - startOffset);\n";
out << "}\n";
out << "\n";
out << "} // namespace\n";