blob: 37cc312e6cb1a5e6b1c4859a77b2b25f3f5bc769 [file] [log] [blame]
Ethan Nicholasca82a922017-09-07 09:39:50 -04001/*
2 * Copyright 2017 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
Mike Kleinc0bd9f92019-04-23 12:05:21 -05008#include "src/sksl/lex/NFAtoDFA.h"
9#include "src/sksl/lex/RegexParser.h"
Ethan Nicholasca82a922017-09-07 09:39:50 -040010
11#include <fstream>
12#include <sstream>
13#include <string>
14
15/**
16 * Processes a .lex file and produces .h and .cpp files which implement a lexical analyzer. The .lex
17 * file is a text file with one token definition per line. Each line is of the form:
18 * <TOKEN_NAME> = <pattern>
19 * where <pattern> is either a regular expression (e.g [0-9]) or a double-quoted literal string.
20 */
21
22static constexpr const char* HEADER =
23 "/*\n"
24 " * Copyright 2017 Google Inc.\n"
25 " *\n"
26 " * Use of this source code is governed by a BSD-style license that can be\n"
27 " * found in the LICENSE file.\n"
28 " */\n"
29 "/*****************************************************************************************\n"
30 " ******************** This file was generated by sksllex. Do not edit. *******************\n"
31 " *****************************************************************************************/\n";
32
33void writeH(const DFA& dfa, const char* lexer, const char* token,
34 const std::vector<std::string>& tokens, const char* hPath) {
35 std::ofstream out(hPath);
Ethan Nicholasd9d33c32018-06-12 11:05:59 -040036 SkASSERT(out.good());
Ethan Nicholasca82a922017-09-07 09:39:50 -040037 out << HEADER;
38 out << "#ifndef SKSL_" << lexer << "\n";
39 out << "#define SKSL_" << lexer << "\n";
Ethan Nicholas6823b502021-06-15 11:42:07 -040040 out << "#include \"include/core/SkStringView.h\"\n";
Ethan Nicholasca82a922017-09-07 09:39:50 -040041 out << "#include <cstddef>\n";
42 out << "#include <cstdint>\n";
43 out << "namespace SkSL {\n";
44 out << "\n";
45 out << "struct " << token << " {\n";
Ethan Nicholas5a9e7fb2020-04-17 12:45:51 -040046 out << " enum class Kind {\n";
Ethan Nicholasca82a922017-09-07 09:39:50 -040047 for (const std::string& t : tokens) {
Ethan Nicholas5a9e7fb2020-04-17 12:45:51 -040048 out << " TK_" << t << ",\n";
Ethan Nicholasca82a922017-09-07 09:39:50 -040049 }
John Stilescc6961b2021-01-22 09:49:45 -050050 out << " TK_NONE,";
51 out << R"(
52 };
53
Ethan Nicholas5fad2b82021-09-27 10:39:18 -040054 )" << token << "() {}";
John Stilescc6961b2021-01-22 09:49:45 -050055
Ethan Nicholas5fad2b82021-09-27 10:39:18 -040056 out << token << R"((Kind kind, int32_t offset, int32_t length, int32_t line)
John Stilescc6961b2021-01-22 09:49:45 -050057 : fKind(kind)
58 , fOffset(offset)
Ethan Nicholas5fad2b82021-09-27 10:39:18 -040059 , fLength(length)
60 , fLine(line) {}
John Stilescc6961b2021-01-22 09:49:45 -050061
Ethan Nicholas5fad2b82021-09-27 10:39:18 -040062 Kind fKind = Kind::TK_NONE;
63 int32_t fOffset = -1;
64 int32_t fLength = -1;
65 int32_t fLine = -1;
John Stilescc6961b2021-01-22 09:49:45 -050066};
67
68class )" << lexer << R"( {
69public:
Ethan Nicholas6823b502021-06-15 11:42:07 -040070 void start(skstd::string_view text) {
John Stilescc6961b2021-01-22 09:49:45 -050071 fText = text;
John Stilescc6961b2021-01-22 09:49:45 -050072 fOffset = 0;
Ethan Nicholas5fad2b82021-09-27 10:39:18 -040073 fLine = 1;
John Stilescc6961b2021-01-22 09:49:45 -050074 }
75
76 )" << token << R"( next();
77
Ethan Nicholas5fad2b82021-09-27 10:39:18 -040078 struct Checkpoint {
79 int32_t fOffset;
80 int32_t fLine;
81 };
82
83 Checkpoint getCheckpoint() const {
84 return {fOffset, fLine};
John Stilescc6961b2021-01-22 09:49:45 -050085 }
86
Ethan Nicholas5fad2b82021-09-27 10:39:18 -040087 void rewindToCheckpoint(Checkpoint checkpoint) {
88 fOffset = checkpoint.fOffset;
89 fLine = checkpoint.fLine;
John Stilescc6961b2021-01-22 09:49:45 -050090 }
91
92private:
Ethan Nicholas6823b502021-06-15 11:42:07 -040093 skstd::string_view fText;
John Stilescc6961b2021-01-22 09:49:45 -050094 int32_t fOffset;
Ethan Nicholas5fad2b82021-09-27 10:39:18 -040095 int32_t fLine;
John Stilescc6961b2021-01-22 09:49:45 -050096};
97
98} // namespace
99#endif
100)";
Ethan Nicholasca82a922017-09-07 09:39:50 -0400101}
102
103void writeCPP(const DFA& dfa, const char* lexer, const char* token, const char* include,
104 const char* cppPath) {
105 std::ofstream out(cppPath);
Ethan Nicholasd9d33c32018-06-12 11:05:59 -0400106 SkASSERT(out.good());
Ethan Nicholasca82a922017-09-07 09:39:50 -0400107 out << HEADER;
108 out << "#include \"" << include << "\"\n";
109 out << "\n";
110 out << "namespace SkSL {\n";
111 out << "\n";
112
113 size_t states = 0;
114 for (const auto& row : dfa.fTransitions) {
115 states = std::max(states, row.size());
116 }
Brian Osmanbfcd7822021-02-18 14:47:15 -0500117 out << "using State = " << (states <= 256 ? "uint8_t" : "int16_t") << ";\n";
Ethan Nicholas10be9d52019-03-29 14:16:50 -0400118 // arbitrarily-chosen character which is greater than START_CHAR and should not appear in actual
119 // input
120 out << "static const uint8_t INVALID_CHAR = 18;";
John Stiles31e48452021-09-13 13:47:04 -0400121 out << "static const int8_t kMappings[" << dfa.fCharMappings.size() << "] = {\n ";
Ethan Nicholas906126e2017-09-19 14:38:40 -0400122 const char* separator = "";
123 for (int m : dfa.fCharMappings) {
124 out << separator << std::to_string(m);
125 separator = ", ";
126 }
127 out << "\n};\n";
John Stiles31e48452021-09-13 13:47:04 -0400128 out << "static const State kTransitions[" << dfa.fTransitions.size() << "]["
129 << states << "] = {\n";
Ethan Nicholas906126e2017-09-19 14:38:40 -0400130 for (size_t c = 0; c < dfa.fTransitions.size(); ++c) {
Ethan Nicholasca82a922017-09-07 09:39:50 -0400131 out << " {";
Ethan Nicholas906126e2017-09-19 14:38:40 -0400132 for (size_t j = 0; j < states; ++j) {
133 if ((size_t) c < dfa.fTransitions.size() && j < dfa.fTransitions[c].size()) {
134 out << " " << dfa.fTransitions[c][j] << ",";
Ethan Nicholasca82a922017-09-07 09:39:50 -0400135 } else {
136 out << " 0,";
137 }
138 }
139 out << " },\n";
140 }
141 out << "};\n";
142 out << "\n";
143
John Stiles31e48452021-09-13 13:47:04 -0400144 out << "static const int8_t kAccepts[" << states << "] = {";
Ethan Nicholasca82a922017-09-07 09:39:50 -0400145 for (size_t i = 0; i < states; ++i) {
146 if (i < dfa.fAccepts.size()) {
147 out << " " << dfa.fAccepts[i] << ",";
148 } else {
Ethan Nicholas906126e2017-09-19 14:38:40 -0400149 out << " " << INVALID << ",";
Ethan Nicholasca82a922017-09-07 09:39:50 -0400150 }
151 }
152 out << " };\n";
153 out << "\n";
154
John Stilescc6961b2021-01-22 09:49:45 -0500155 out << token << " " << lexer << "::next() {";
156 out << R"(
157 // note that we cheat here: normally a lexer needs to worry about the case
158 // where a token has a prefix which is not itself a valid token - for instance,
159 // maybe we have a valid token 'while', but 'w', 'wh', etc. are not valid
160 // tokens. Our grammar doesn't have this property, so we can simplify the logic
161 // a bit.
162 int32_t startOffset = fOffset;
Ethan Nicholas6823b502021-06-15 11:42:07 -0400163 if (startOffset == (int32_t)fText.length()) {
Ethan Nicholas5fad2b82021-09-27 10:39:18 -0400164 return )" << token << "(" << token << R"(::Kind::TK_END_OF_FILE, startOffset, 0, fLine);
John Stilescc6961b2021-01-22 09:49:45 -0500165 }
Brian Osmanbfcd7822021-02-18 14:47:15 -0500166 State state = 1;
John Stilescc6961b2021-01-22 09:49:45 -0500167 for (;;) {
Ethan Nicholas6823b502021-06-15 11:42:07 -0400168 if (fOffset >= (int32_t)fText.length()) {
John Stiles31e48452021-09-13 13:47:04 -0400169 if (kAccepts[state] == -1) {
Ethan Nicholas5fad2b82021-09-27 10:39:18 -0400170 return Token(Token::Kind::TK_END_OF_FILE, startOffset, 0, fLine);
John Stilescc6961b2021-01-22 09:49:45 -0500171 }
172 break;
173 }
174 uint8_t c = (uint8_t) fText[fOffset];
175 if (c <= 8 || c >= )" << dfa.fCharMappings.size() << R"() {
176 c = INVALID_CHAR;
177 }
John Stiles31e48452021-09-13 13:47:04 -0400178 State newState = kTransitions[kMappings[c]][state];
John Stilescc6961b2021-01-22 09:49:45 -0500179 if (!newState) {
180 break;
181 }
182 state = newState;
183 ++fOffset;
Ethan Nicholas5fad2b82021-09-27 10:39:18 -0400184 if (c == '\n') {
185 ++fLine;
186 }
John Stilescc6961b2021-01-22 09:49:45 -0500187 }
John Stiles31e48452021-09-13 13:47:04 -0400188 Token::Kind kind = ()" << token << R"(::Kind) kAccepts[state];
Ethan Nicholas5fad2b82021-09-27 10:39:18 -0400189 return )" << token << R"((kind, startOffset, fOffset - startOffset, fLine);
John Stilescc6961b2021-01-22 09:49:45 -0500190}
191
192} // namespace
193)";
Ethan Nicholasca82a922017-09-07 09:39:50 -0400194}
195
196void process(const char* inPath, const char* lexer, const char* token, const char* hPath,
197 const char* cppPath) {
198 NFA nfa;
199 std::vector<std::string> tokens;
200 tokens.push_back("END_OF_FILE");
201 std::string line;
202 std::ifstream in(inPath);
203 while (std::getline(in, line)) {
Ethan Nicholasf3c8f5d2020-08-20 13:09:14 +0000204 if (line.length() == 0) {
205 continue;
206 }
207 if (line.length() >= 2 && line[0] == '/' && line[1] == '/') {
208 continue;
209 }
Ethan Nicholasca82a922017-09-07 09:39:50 -0400210 std::istringstream split(line);
211 std::string name, delimiter, pattern;
212 if (split >> name >> delimiter >> pattern) {
Ethan Nicholasd9d33c32018-06-12 11:05:59 -0400213 SkASSERT(split.eof());
214 SkASSERT(name != "");
215 SkASSERT(delimiter == "=");
216 SkASSERT(pattern != "");
Ethan Nicholasca82a922017-09-07 09:39:50 -0400217 tokens.push_back(name);
218 if (pattern[0] == '"') {
Ethan Nicholasd9d33c32018-06-12 11:05:59 -0400219 SkASSERT(pattern.size() > 2 && pattern[pattern.size() - 1] == '"');
Ethan Nicholasca82a922017-09-07 09:39:50 -0400220 RegexNode node = RegexNode(RegexNode::kChar_Kind, pattern[1]);
221 for (size_t i = 2; i < pattern.size() - 1; ++i) {
222 node = RegexNode(RegexNode::kConcat_Kind, node,
223 RegexNode(RegexNode::kChar_Kind, pattern[i]));
224 }
225 nfa.addRegex(node);
226 }
227 else {
228 nfa.addRegex(RegexParser().parse(pattern));
229 }
230 }
231 }
232 NFAtoDFA converter(&nfa);
233 DFA dfa = converter.convert();
234 writeH(dfa, lexer, token, tokens, hPath);
Mike Kleinc0bd9f92019-04-23 12:05:21 -0500235 writeCPP(dfa, lexer, token, (std::string("src/sksl/SkSL") + lexer + ".h").c_str(), cppPath);
Ethan Nicholasca82a922017-09-07 09:39:50 -0400236}
237
238int main(int argc, const char** argv) {
239 if (argc != 6) {
240 printf("usage: sksllex <input.lex> <lexername> <tokenname> <output.h> <output.cpp>\n");
241 exit(1);
242 }
243 process(argv[1], argv[2], argv[3], argv[4], argv[5]);
244 return 0;
245}