Blame - src/sksl/lex/Main.cpp - platform/external/skia

blob: 04f266bf553bc8dc79ab6da27410f2c09a1f49a4 [file] [log] [blame]

Ethan Nicholas	ca82a92	2017-09-07 09:39:50 -0400	[diff] [blame]	1	/*
				2	* Copyright 2017 Google Inc.
				3	*
				4	* Use of this source code is governed by a BSD-style license that can be
				5	* found in the LICENSE file.
				6	*/
				7
				8	#include "NFAtoDFA.h"
				9	#include "RegexParser.h"
				10
				11	#include <fstream>
				12	#include <sstream>
				13	#include <string>
				14
				15	/**
				16	* Processes a .lex file and produces .h and .cpp files which implement a lexical analyzer. The .lex
				17	* file is a text file with one token definition per line. Each line is of the form:
				18	* <TOKEN_NAME> = <pattern>
				19	* where <pattern> is either a regular expression (e.g [0-9]) or a double-quoted literal string.
				20	*/
				21
				22	static constexpr const char* HEADER =
				23	"/*\n"
				24	" * Copyright 2017 Google Inc.\n"
				25	" *\n"
				26	" * Use of this source code is governed by a BSD-style license that can be\n"
				27	" * found in the LICENSE file.\n"
				28	" */\n"
				29	"/*****************************************************************************************\n"
				30	" ****************** This file was generated by sksllex. Do not edit. *****************\n"
				31	" *****************************************************************************************/\n";
				32
				33	void writeH(const DFA& dfa, const char* lexer, const char* token,
				34	const std::vector<std::string>& tokens, const char* hPath) {
				35	std::ofstream out(hPath);
Ethan Nicholas	d9d33c3	2018-06-12 11:05:59 -0400	[diff] [blame]	36	SkASSERT(out.good());
Ethan Nicholas	ca82a92	2017-09-07 09:39:50 -0400	[diff] [blame]	37	out << HEADER;
				38	out << "#ifndef SKSL_" << lexer << "\n";
				39	out << "#define SKSL_" << lexer << "\n";
				40	out << "#include <cstddef>\n";
				41	out << "#include <cstdint>\n";
				42	out << "namespace SkSL {\n";
				43	out << "\n";
				44	out << "struct " << token << " {\n";
				45	out << " enum Kind {\n";
				46	for (const std::string& t : tokens) {
				47	out << " #undef " << t << "\n";
				48	out << " " << t << ",\n";
				49	}
				50	out << " };\n";
				51	out << "\n";
				52	out << " " << token << "()\n";
				53	out << " : fKind(Kind::INVALID)\n";
				54	out << " , fOffset(-1)\n";
				55	out << " , fLength(-1) {}\n";
				56	out << "\n";
Ethan Nicholas	c7735f1	2018-08-03 11:48:20 -0400	[diff] [blame]	57	out << " " << token << "(Kind kind, int32_t offset, int32_t length)\n";
Ethan Nicholas	ca82a92	2017-09-07 09:39:50 -0400	[diff] [blame]	58	out << " : fKind(kind)\n";
				59	out << " , fOffset(offset)\n";
				60	out << " , fLength(length) {}\n";
				61	out << "\n";
				62	out << " Kind fKind;\n";
				63	out << " int fOffset;\n";
				64	out << " int fLength;\n";
				65	out << "};\n";
				66	out << "\n";
				67	out << "class " << lexer << " {\n";
				68	out << "public:\n";
Ethan Nicholas	c7735f1	2018-08-03 11:48:20 -0400	[diff] [blame]	69	out << " void start(const char* text, int32_t length) {\n";
Ethan Nicholas	ca82a92	2017-09-07 09:39:50 -0400	[diff] [blame]	70	out << " fText = text;\n";
				71	out << " fLength = length;\n";
				72	out << " fOffset = 0;\n";
				73	out << " }\n";
				74	out << "\n";
				75	out << " " << token << " next();\n";
				76	out << "\n";
				77	out << "private:\n";
				78	out << " const char* fText;\n";
Ethan Nicholas	c7735f1	2018-08-03 11:48:20 -0400	[diff] [blame]	79	out << " int32_t fLength;\n";
				80	out << " int32_t fOffset;\n";
Ethan Nicholas	ca82a92	2017-09-07 09:39:50 -0400	[diff] [blame]	81	out << "};\n";
				82	out << "\n";
				83	out << "} // namespace\n";
				84	out << "#endif\n";
				85	}
				86
				87	void writeCPP(const DFA& dfa, const char* lexer, const char* token, const char* include,
				88	const char* cppPath) {
				89	std::ofstream out(cppPath);
Ethan Nicholas	d9d33c3	2018-06-12 11:05:59 -0400	[diff] [blame]	90	SkASSERT(out.good());
Ethan Nicholas	ca82a92	2017-09-07 09:39:50 -0400	[diff] [blame]	91	out << HEADER;
				92	out << "#include \"" << include << "\"\n";
				93	out << "\n";
				94	out << "namespace SkSL {\n";
				95	out << "\n";
				96
				97	size_t states = 0;
				98	for (const auto& row : dfa.fTransitions) {
				99	states = std::max(states, row.size());
				100	}
Ethan Nicholas	c7735f1	2018-08-03 11:48:20 -0400	[diff] [blame]	101	out << "static int8_t mappings[" << dfa.fCharMappings.size() << "] = {\n ";
Ethan Nicholas	906126e	2017-09-19 14:38:40 -0400	[diff] [blame]	102	const char* separator = "";
				103	for (int m : dfa.fCharMappings) {
				104	out << separator << std::to_string(m);
				105	separator = ", ";
				106	}
				107	out << "\n};\n";
				108	out << "static int16_t transitions[" << dfa.fTransitions.size() << "][" << states << "] = {\n";
				109	for (size_t c = 0; c < dfa.fTransitions.size(); ++c) {
Ethan Nicholas	ca82a92	2017-09-07 09:39:50 -0400	[diff] [blame]	110	out << " {";
Ethan Nicholas	906126e	2017-09-19 14:38:40 -0400	[diff] [blame]	111	for (size_t j = 0; j < states; ++j) {
				112	if ((size_t) c < dfa.fTransitions.size() && j < dfa.fTransitions[c].size()) {
				113	out << " " << dfa.fTransitions[c][j] << ",";
Ethan Nicholas	ca82a92	2017-09-07 09:39:50 -0400	[diff] [blame]	114	} else {
				115	out << " 0,";
				116	}
				117	}
				118	out << " },\n";
				119	}
				120	out << "};\n";
				121	out << "\n";
				122
Ethan Nicholas	906126e	2017-09-19 14:38:40 -0400	[diff] [blame]	123	out << "static int8_t accepts[" << states << "] = {";
Ethan Nicholas	ca82a92	2017-09-07 09:39:50 -0400	[diff] [blame]	124	for (size_t i = 0; i < states; ++i) {
				125	if (i < dfa.fAccepts.size()) {
				126	out << " " << dfa.fAccepts[i] << ",";
				127	} else {
Ethan Nicholas	906126e	2017-09-19 14:38:40 -0400	[diff] [blame]	128	out << " " << INVALID << ",";
Ethan Nicholas	ca82a92	2017-09-07 09:39:50 -0400	[diff] [blame]	129	}
				130	}
				131	out << " };\n";
				132	out << "\n";
				133
Ethan Nicholas	c7735f1	2018-08-03 11:48:20 -0400	[diff] [blame]	134	out << token << " " << lexer << "::next() {\n";
				135	out << " // note that we cheat here: normally a lexer needs to worry about the case\n";
				136	out << " // where a token has a prefix which is not itself a valid token - for instance, \n";
				137	out << " // maybe we have a valid token 'while', but 'w', 'wh', etc. are not valid\n";
				138	out << " // tokens. Our grammar doesn't have this property, so we can simplify the logic\n";
				139	out << " // a bit.\n";
				140	out << " int32_t startOffset = fOffset;\n";
Ethan Nicholas	ca82a92	2017-09-07 09:39:50 -0400	[diff] [blame]	141	out << " if (startOffset == fLength) {\n";
				142	out << " return " << token << "(" << token << "::END_OF_FILE, startOffset, 0);\n";
				143	out << " }\n";
Ethan Nicholas	c7735f1	2018-08-03 11:48:20 -0400	[diff] [blame]	144	out << " int16_t state = 1;\n";
				145	out << " while (fOffset < fLength) {\n";
				146	out << " if ((uint8_t) fText[fOffset] >= " << dfa.fCharMappings.size() << ") {";
				147	out << " ++fOffset;\n";
Ethan Nicholas	e148bf7	2017-10-06 14:22:22 -0400	[diff] [blame]	148	out << " break;";
				149	out << " }";
Ethan Nicholas	c7735f1	2018-08-03 11:48:20 -0400	[diff] [blame]	150	out << " int16_t newState = transitions[mappings[(int) fText[fOffset]]][state];\n";
				151	out << " if (!newState) {\n";
Ethan Nicholas	ca82a92	2017-09-07 09:39:50 -0400	[diff] [blame]	152	out << " break;\n";
				153	out << " }\n";
Ethan Nicholas	c7735f1	2018-08-03 11:48:20 -0400	[diff] [blame]	154	out << " state = newState;";
				155	out << " ++fOffset;\n";
Ethan Nicholas	ca82a92	2017-09-07 09:39:50 -0400	[diff] [blame]	156	out << " }\n";
Ethan Nicholas	c7735f1	2018-08-03 11:48:20 -0400	[diff] [blame]	157	out << " Token::Kind kind = (" << token << "::Kind) accepts[state];\n";
				158	out << " return " << token << "(kind, startOffset, fOffset - startOffset);\n";
Ethan Nicholas	ca82a92	2017-09-07 09:39:50 -0400	[diff] [blame]	159	out << "}\n";
				160	out << "\n";
				161	out << "} // namespace\n";
				162	}
				163
				164	void process(const char* inPath, const char* lexer, const char* token, const char* hPath,
				165	const char* cppPath) {
				166	NFA nfa;
				167	std::vector<std::string> tokens;
				168	tokens.push_back("END_OF_FILE");
				169	std::string line;
				170	std::ifstream in(inPath);
				171	while (std::getline(in, line)) {
				172	std::istringstream split(line);
				173	std::string name, delimiter, pattern;
				174	if (split >> name >> delimiter >> pattern) {
Ethan Nicholas	d9d33c3	2018-06-12 11:05:59 -0400	[diff] [blame]	175	SkASSERT(split.eof());
				176	SkASSERT(name != "");
				177	SkASSERT(delimiter == "=");
				178	SkASSERT(pattern != "");
Ethan Nicholas	ca82a92	2017-09-07 09:39:50 -0400	[diff] [blame]	179	tokens.push_back(name);
				180	if (pattern[0] == '"') {
Ethan Nicholas	d9d33c3	2018-06-12 11:05:59 -0400	[diff] [blame]	181	SkASSERT(pattern.size() > 2 && pattern[pattern.size() - 1] == '"');
Ethan Nicholas	ca82a92	2017-09-07 09:39:50 -0400	[diff] [blame]	182	RegexNode node = RegexNode(RegexNode::kChar_Kind, pattern[1]);
				183	for (size_t i = 2; i < pattern.size() - 1; ++i) {
				184	node = RegexNode(RegexNode::kConcat_Kind, node,
				185	RegexNode(RegexNode::kChar_Kind, pattern[i]));
				186	}
				187	nfa.addRegex(node);
				188	}
				189	else {
				190	nfa.addRegex(RegexParser().parse(pattern));
				191	}
				192	}
				193	}
				194	NFAtoDFA converter(&nfa);
				195	DFA dfa = converter.convert();
				196	writeH(dfa, lexer, token, tokens, hPath);
				197	writeCPP(dfa, lexer, token, (std::string("SkSL") + lexer + ".h").c_str(), cppPath);
				198	}
				199
				200	int main(int argc, const char** argv) {
				201	if (argc != 6) {
				202	printf("usage: sksllex <input.lex> <lexername> <tokenname> <output.h> <output.cpp>\n");
				203	exit(1);
				204	}
				205	process(argv[1], argv[2], argv[3], argv[4], argv[5]);
				206	return 0;
				207	}