improved SkSL lexer performance

Bug: skia:
Change-Id: Ibbb427e511118a0a1819094a59c17cc0f966008c
Reviewed-on: https://skia-review.googlesource.com/145336
Reviewed-by: Greg Daniel <egdaniel@google.com>
Commit-Queue: Ethan Nicholas <ethannicholas@google.com>
diff --git a/src/sksl/lex/Main.cpp b/src/sksl/lex/Main.cpp
index 9a30b8b..04f266b 100644
--- a/src/sksl/lex/Main.cpp
+++ b/src/sksl/lex/Main.cpp
@@ -54,7 +54,7 @@
     out << "    , fOffset(-1)\n";
     out << "    , fLength(-1) {}\n";
     out << "\n";
-    out << "    " << token << "(Kind kind, int offset, int length)\n";
+    out << "    " << token << "(Kind kind, int32_t offset, int32_t length)\n";
     out << "    : fKind(kind)\n";
     out << "    , fOffset(offset)\n";
     out << "    , fLength(length) {}\n";
@@ -66,7 +66,7 @@
     out << "\n";
     out << "class " << lexer << " {\n";
     out << "public:\n";
-    out << "    void start(const char* text, size_t length) {\n";
+    out << "    void start(const char* text, int32_t length) {\n";
     out << "        fText = text;\n";
     out << "        fLength = length;\n";
     out << "        fOffset = 0;\n";
@@ -76,8 +76,8 @@
     out << "\n";
     out << "private:\n";
     out << "    const char* fText;\n";
-    out << "    int fLength;\n";
-    out << "    int fOffset;\n";
+    out << "    int32_t fLength;\n";
+    out << "    int32_t fOffset;\n";
     out << "};\n";
     out << "\n";
     out << "} // namespace\n";
@@ -98,7 +98,7 @@
     for (const auto& row : dfa.fTransitions) {
         states = std::max(states, row.size());
     }
-    out << "static int16_t mappings[" << dfa.fCharMappings.size() << "] = {\n    ";
+    out << "static int8_t mappings[" << dfa.fCharMappings.size() << "] = {\n    ";
     const char* separator = "";
     for (int m : dfa.fCharMappings) {
         out << separator << std::to_string(m);
@@ -131,32 +131,31 @@
     out << " };\n";
     out << "\n";
 
-    out << token << " " << lexer << "::next() {\n";;
-    out << "    int startOffset = fOffset;\n";
+    out << token << " " << lexer << "::next() {\n";
+    out << "    // note that we cheat here: normally a lexer needs to worry about the case\n";
+    out << "    // where a token has a prefix which is not itself a valid token - for instance, \n";
+    out << "    // maybe we have a valid token 'while', but 'w', 'wh', etc. are not valid\n";
+    out << "    // tokens. Our grammar doesn't have this property, so we can simplify the logic\n";
+    out << "    // a bit.\n";
+    out << "    int32_t startOffset = fOffset;\n";
     out << "    if (startOffset == fLength) {\n";
     out << "        return " << token << "(" << token << "::END_OF_FILE, startOffset, 0);\n";
     out << "    }\n";
-    out << "    int offset = startOffset;\n";
-    out << "    int state = 1;\n";
-    out << "    " << token << "::Kind lastAccept = " << token << "::Kind::INVALID;\n";
-    out << "    int lastAcceptEnd = startOffset + 1;\n";
-    out << "    while (offset < fLength) {\n";
-    out << "        if ((uint8_t) fText[offset] >= " << dfa.fCharMappings.size() << ") {";
+    out << "    int16_t state = 1;\n";
+    out << "    while (fOffset < fLength) {\n";
+    out << "        if ((uint8_t) fText[fOffset] >= " << dfa.fCharMappings.size() << ") {";
+    out << "            ++fOffset;\n";
     out << "            break;";
     out << "        }";
-    out << "        state = transitions[mappings[(int) fText[offset]]][state];\n";
-    out << "        ++offset;\n";
-    out << "        if (!state) {\n";
+    out << "        int16_t newState = transitions[mappings[(int) fText[fOffset]]][state];\n";
+    out << "        if (!newState) {\n";
     out << "            break;\n";
     out << "        }\n";
-    out << "        // We seem to be getting away without doing this check.\n";
-    out << "        /*if (accepts[state] != -1)*/ {\n";
-    out << "            lastAccept = (" << token << "::Kind) accepts[state];\n";
-    out << "            lastAcceptEnd = offset;\n";
-    out << "        }\n";
+    out << "        state = newState;";
+    out << "        ++fOffset;\n";
     out << "    }\n";
-    out << "    fOffset = lastAcceptEnd;\n";
-    out << "    return " << token << "(lastAccept, startOffset, lastAcceptEnd - startOffset);\n";
+    out << "    Token::Kind kind = (" << token << "::Kind) accepts[state];\n";
+    out << "    return " << token << "(kind, startOffset, fOffset - startOffset);\n";
     out << "}\n";
     out << "\n";
     out << "} // namespace\n";