improved SkSL lexer performance
Bug: skia:
Change-Id: Ibbb427e511118a0a1819094a59c17cc0f966008c
Reviewed-on: https://skia-review.googlesource.com/145336
Reviewed-by: Greg Daniel <egdaniel@google.com>
Commit-Queue: Ethan Nicholas <ethannicholas@google.com>
diff --git a/src/sksl/SkSLLexer.cpp b/src/sksl/SkSLLexer.cpp
index 5bb970a..b514462 100644
--- a/src/sksl/SkSLLexer.cpp
+++ b/src/sksl/SkSLLexer.cpp
@@ -11,7 +11,7 @@
namespace SkSL {
-static int16_t mappings[127] = {
+static int8_t mappings[127] = {
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 3, 1, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 4, 3, 5, 6, 7, 8, 3, 9, 10, 11, 12,
13, 14, 15, 16, 17, 18, 18, 18, 18, 18, 18, 18, 18, 18, 19, 20, 21, 22, 23, 24, 25, 26,
@@ -940,31 +940,30 @@
};
Token Lexer::next() {
- int startOffset = fOffset;
+ // note that we cheat here: normally a lexer needs to worry about the case
+ // where a token has a prefix which is not itself a valid token - for instance,
+ // maybe we have a valid token 'while', but 'w', 'wh', etc. are not valid
+ // tokens. Our grammar doesn't have this property, so we can simplify the logic
+ // a bit.
+ int32_t startOffset = fOffset;
if (startOffset == fLength) {
return Token(Token::END_OF_FILE, startOffset, 0);
}
- int offset = startOffset;
- int state = 1;
- Token::Kind lastAccept = Token::Kind::INVALID;
- int lastAcceptEnd = startOffset + 1;
- while (offset < fLength) {
- if ((uint8_t)fText[offset] >= 127) {
+ int16_t state = 1;
+ while (fOffset < fLength) {
+ if ((uint8_t)fText[fOffset] >= 127) {
+ ++fOffset;
break;
}
- state = transitions[mappings[(int)fText[offset]]][state];
- ++offset;
- if (!state) {
+ int16_t newState = transitions[mappings[(int)fText[fOffset]]][state];
+ if (!newState) {
break;
}
- // We seem to be getting away without doing this check.
- /*if (accepts[state] != -1)*/ {
- lastAccept = (Token::Kind)accepts[state];
- lastAcceptEnd = offset;
- }
+ state = newState;
+ ++fOffset;
}
- fOffset = lastAcceptEnd;
- return Token(lastAccept, startOffset, lastAcceptEnd - startOffset);
+ Token::Kind kind = (Token::Kind)accepts[state];
+ return Token(kind, startOffset, fOffset - startOffset);
}
-} // namespace SkSL
+} // namespace
diff --git a/src/sksl/SkSLLexer.h b/src/sksl/SkSLLexer.h
index 2700287..1d090f2 100644
--- a/src/sksl/SkSLLexer.h
+++ b/src/sksl/SkSLLexer.h
@@ -215,7 +215,8 @@
Token() : fKind(Kind::INVALID), fOffset(-1), fLength(-1) {}
- Token(Kind kind, int offset, int length) : fKind(kind), fOffset(offset), fLength(length) {}
+ Token(Kind kind, int32_t offset, int32_t length)
+ : fKind(kind), fOffset(offset), fLength(length) {}
Kind fKind;
int fOffset;
@@ -224,7 +225,7 @@
class Lexer {
public:
- void start(const char* text, size_t length) {
+ void start(const char* text, int32_t length) {
fText = text;
fLength = length;
fOffset = 0;
@@ -234,9 +235,9 @@
private:
const char* fText;
- int fLength;
- int fOffset;
+ int32_t fLength;
+ int32_t fOffset;
};
-} // namespace SkSL
+} // namespace
#endif
diff --git a/src/sksl/lex/Main.cpp b/src/sksl/lex/Main.cpp
index 9a30b8b..04f266b 100644
--- a/src/sksl/lex/Main.cpp
+++ b/src/sksl/lex/Main.cpp
@@ -54,7 +54,7 @@
out << " , fOffset(-1)\n";
out << " , fLength(-1) {}\n";
out << "\n";
- out << " " << token << "(Kind kind, int offset, int length)\n";
+ out << " " << token << "(Kind kind, int32_t offset, int32_t length)\n";
out << " : fKind(kind)\n";
out << " , fOffset(offset)\n";
out << " , fLength(length) {}\n";
@@ -66,7 +66,7 @@
out << "\n";
out << "class " << lexer << " {\n";
out << "public:\n";
- out << " void start(const char* text, size_t length) {\n";
+ out << " void start(const char* text, int32_t length) {\n";
out << " fText = text;\n";
out << " fLength = length;\n";
out << " fOffset = 0;\n";
@@ -76,8 +76,8 @@
out << "\n";
out << "private:\n";
out << " const char* fText;\n";
- out << " int fLength;\n";
- out << " int fOffset;\n";
+ out << " int32_t fLength;\n";
+ out << " int32_t fOffset;\n";
out << "};\n";
out << "\n";
out << "} // namespace\n";
@@ -98,7 +98,7 @@
for (const auto& row : dfa.fTransitions) {
states = std::max(states, row.size());
}
- out << "static int16_t mappings[" << dfa.fCharMappings.size() << "] = {\n ";
+ out << "static int8_t mappings[" << dfa.fCharMappings.size() << "] = {\n ";
const char* separator = "";
for (int m : dfa.fCharMappings) {
out << separator << std::to_string(m);
@@ -131,32 +131,31 @@
out << " };\n";
out << "\n";
- out << token << " " << lexer << "::next() {\n";;
- out << " int startOffset = fOffset;\n";
+ out << token << " " << lexer << "::next() {\n";
+ out << " // note that we cheat here: normally a lexer needs to worry about the case\n";
+ out << " // where a token has a prefix which is not itself a valid token - for instance, \n";
+ out << " // maybe we have a valid token 'while', but 'w', 'wh', etc. are not valid\n";
+ out << " // tokens. Our grammar doesn't have this property, so we can simplify the logic\n";
+ out << " // a bit.\n";
+ out << " int32_t startOffset = fOffset;\n";
out << " if (startOffset == fLength) {\n";
out << " return " << token << "(" << token << "::END_OF_FILE, startOffset, 0);\n";
out << " }\n";
- out << " int offset = startOffset;\n";
- out << " int state = 1;\n";
- out << " " << token << "::Kind lastAccept = " << token << "::Kind::INVALID;\n";
- out << " int lastAcceptEnd = startOffset + 1;\n";
- out << " while (offset < fLength) {\n";
- out << " if ((uint8_t) fText[offset] >= " << dfa.fCharMappings.size() << ") {";
+ out << " int16_t state = 1;\n";
+ out << " while (fOffset < fLength) {\n";
+ out << " if ((uint8_t) fText[fOffset] >= " << dfa.fCharMappings.size() << ") {";
+ out << " ++fOffset;\n";
out << " break;";
out << " }";
- out << " state = transitions[mappings[(int) fText[offset]]][state];\n";
- out << " ++offset;\n";
- out << " if (!state) {\n";
+ out << " int16_t newState = transitions[mappings[(int) fText[fOffset]]][state];\n";
+ out << " if (!newState) {\n";
out << " break;\n";
out << " }\n";
- out << " // We seem to be getting away without doing this check.\n";
- out << " /*if (accepts[state] != -1)*/ {\n";
- out << " lastAccept = (" << token << "::Kind) accepts[state];\n";
- out << " lastAcceptEnd = offset;\n";
- out << " }\n";
+ out << " state = newState;";
+ out << " ++fOffset;\n";
out << " }\n";
- out << " fOffset = lastAcceptEnd;\n";
- out << " return " << token << "(lastAccept, startOffset, lastAcceptEnd - startOffset);\n";
+ out << " Token::Kind kind = (" << token << "::Kind) accepts[state];\n";
+ out << " return " << token << "(kind, startOffset, fOffset - startOffset);\n";
out << "}\n";
out << "\n";
out << "} // namespace\n";