Blame - src/sksl/lex/Main.cpp - platform/external/skia

2017-09-07 09:39:50 -0400

[diff] [blame]

/*

*

* Use of this source code is governed by a BSD-style license that can be

5

* found in the LICENSE file.

6

*/

7

Mike Klein

c0bd9f9

2019-04-23 12:05:21 -0500

[diff] [blame]

8

#include "src/sksl/lex/NFAtoDFA.h"

9

#include "src/sksl/lex/RegexParser.h"

Ethan Nicholas

2017-09-07 09:39:50 -0400

[diff] [blame]

#include <fstream>

#include <sstream>

#include <string>

/**

* Processes a .lex file and produces .h and .cpp files which implement a lexical analyzer. The .lex

17

* file is a text file with one token definition per line. Each line is of the form:

18

* <TOKEN_NAME> = <pattern>

19

* where <pattern> is either a regular expression (e.g [0-9]) or a double-quoted literal string.

20

*/

21

22

static constexpr const char* HEADER =

"/*\n"

" *\n"

" * Use of this source code is governed by a BSD-style license that can be\n"

27

" * found in the LICENSE file.\n"

28

" */\n"

29

"/*****************************************************************************************\n"

30

" ******************** This file was generated by sksllex. Do not edit. *******************\n"

31

" *****************************************************************************************/\n";

32

33

void writeH(const DFA& dfa, const char* lexer, const char* token,

34

const std::vector<std::string>& tokens, const char* hPath) {

35

std::ofstream out(hPath);

Ethan Nicholas

2018-06-12 11:05:59 -0400

[diff] [blame]

36

SkASSERT(out.good());

Ethan Nicholas

2017-09-07 09:39:50 -0400

[diff] [blame]

37

out << HEADER;

38

out << "#ifndef SKSL_" << lexer << "\n";

39

out << "#define SKSL_" << lexer << "\n";

Ethan Nicholas

2021-06-15 11:42:07 -0400

[diff] [blame]

40

out << "#include \"include/core/SkStringView.h\"\n";

Ethan Nicholas

2017-09-07 09:39:50 -0400

[diff] [blame]

41

out << "#include <cstddef>\n";

42

out << "#include <cstdint>\n";

43

out << "namespace SkSL {\n";

44

out << "\n";

45

out << "struct " << token << " {\n";

Ethan Nicholas

5a9e7fb

2020-04-17 12:45:51 -0400

[diff] [blame]

46

out << " enum class Kind {\n";

Ethan Nicholas

2017-09-07 09:39:50 -0400

[diff] [blame]

47

for (const std::string& t : tokens) {

Ethan Nicholas

5a9e7fb

2020-04-17 12:45:51 -0400

[diff] [blame]

48

out << " TK_" << t << ",\n";

Ethan Nicholas

2017-09-07 09:39:50 -0400

[diff] [blame]

49

}

John Stiles

2021-01-22 09:49:45 -0500

[diff] [blame]

out << " TK_NONE,";

out << R"(

};

Ethan Nicholas

2021-09-27 10:39:18 -0400

[diff] [blame^]

54

)" << token << "() {}";

John Stiles

2021-01-22 09:49:45 -0500

[diff] [blame]

55

Ethan Nicholas

2021-09-27 10:39:18 -0400

[diff] [blame^]

56

out << token << R"((Kind kind, int32_t offset, int32_t length, int32_t line)

John Stiles

2021-01-22 09:49:45 -0500

[diff] [blame]

57

: fKind(kind)

58

, fOffset(offset)

Ethan Nicholas

2021-09-27 10:39:18 -0400

[diff] [blame^]

59

, fLength(length)

60

, fLine(line) {}

John Stiles

2021-01-22 09:49:45 -0500

[diff] [blame]

61

Ethan Nicholas

2021-09-27 10:39:18 -0400

[diff] [blame^]

62

Kind fKind = Kind::TK_NONE;

63

int32_t fOffset = -1;

64

int32_t fLength = -1;

65

int32_t fLine = -1;

John Stiles

2021-01-22 09:49:45 -0500

[diff] [blame]

66

};

67

68

class )" << lexer << R"( {

69

public:

Ethan Nicholas

2021-06-15 11:42:07 -0400

[diff] [blame]

70

void start(skstd::string_view text) {

John Stiles

2021-01-22 09:49:45 -0500

[diff] [blame]

71

fText = text;

John Stiles

2021-01-22 09:49:45 -0500

[diff] [blame]

72

fOffset = 0;

Ethan Nicholas

2021-09-27 10:39:18 -0400

[diff] [blame^]

73

fLine = 1;

John Stiles

2021-01-22 09:49:45 -0500

[diff] [blame]

74

}

75

76

)" << token << R"( next();

77

Ethan Nicholas

2021-09-27 10:39:18 -0400

[diff] [blame^]

struct Checkpoint {

int32_t fOffset;

int32_t fLine;

};

Checkpoint getCheckpoint() const {

84

return {fOffset, fLine};

John Stiles

2021-01-22 09:49:45 -0500

[diff] [blame]

85

}

86

Ethan Nicholas

2021-09-27 10:39:18 -0400

[diff] [blame^]

87

void rewindToCheckpoint(Checkpoint checkpoint) {

88

fOffset = checkpoint.fOffset;

89

fLine = checkpoint.fLine;

John Stiles

2021-01-22 09:49:45 -0500

[diff] [blame]

90

}

91

92

private:

Ethan Nicholas

2021-06-15 11:42:07 -0400

[diff] [blame]

93

skstd::string_view fText;

John Stiles

2021-01-22 09:49:45 -0500

[diff] [blame]

94

int32_t fOffset;

Ethan Nicholas

2021-09-27 10:39:18 -0400

[diff] [blame^]

95

int32_t fLine;

John Stiles

2021-01-22 09:49:45 -0500

[diff] [blame]

};

} // namespace

#endif

)";

Ethan Nicholas

2017-09-07 09:39:50 -0400

[diff] [blame]

101

}

102

103

void writeCPP(const DFA& dfa, const char* lexer, const char* token, const char* include,

104

const char* cppPath) {

105

std::ofstream out(cppPath);

Ethan Nicholas

2018-06-12 11:05:59 -0400

[diff] [blame]

106

SkASSERT(out.good());

Ethan Nicholas

2017-09-07 09:39:50 -0400

[diff] [blame]

107

out << HEADER;

108

out << "#include \"" << include << "\"\n";

109

out << "\n";

110

out << "namespace SkSL {\n";

out << "\n";

size_t states = 0;

for (const auto& row : dfa.fTransitions) {

115

states = std::max(states, row.size());

116

}

Brian Osman

bfcd782

2021-02-18 14:47:15 -0500

[diff] [blame]

117

out << "using State = " << (states <= 256 ? "uint8_t" : "int16_t") << ";\n";

Ethan Nicholas

10be9d5

2019-03-29 14:16:50 -0400

[diff] [blame]

118

// arbitrarily-chosen character which is greater than START_CHAR and should not appear in actual

119

// input

120

out << "static const uint8_t INVALID_CHAR = 18;";

John Stiles

2021-09-13 13:47:04 -0400

[diff] [blame]

121

out << "static const int8_t kMappings[" << dfa.fCharMappings.size() << "] = {\n ";

Ethan Nicholas

2017-09-19 14:38:40 -0400

[diff] [blame]

122

const char* separator = "";

123

for (int m : dfa.fCharMappings) {

124

out << separator << std::to_string(m);

125

separator = ", ";

126

}

127

out << "\n};\n";

John Stiles

2021-09-13 13:47:04 -0400

[diff] [blame]

128

out << "static const State kTransitions[" << dfa.fTransitions.size() << "]["

129

<< states << "] = {\n";

Ethan Nicholas

2017-09-19 14:38:40 -0400

[diff] [blame]

130

for (size_t c = 0; c < dfa.fTransitions.size(); ++c) {

Ethan Nicholas

2017-09-07 09:39:50 -0400

[diff] [blame]

131

out << " {";

Ethan Nicholas

2017-09-19 14:38:40 -0400

[diff] [blame]

132

for (size_t j = 0; j < states; ++j) {

133

if ((size_t) c < dfa.fTransitions.size() && j < dfa.fTransitions[c].size()) {

134

out << " " << dfa.fTransitions[c][j] << ",";

Ethan Nicholas

2017-09-07 09:39:50 -0400

[diff] [blame]

} else {

out << " 0,";

}

}

out << " },\n";

}

out << "};\n";

out << "\n";

John Stiles

2021-09-13 13:47:04 -0400

[diff] [blame]

144

out << "static const int8_t kAccepts[" << states << "] = {";

Ethan Nicholas

2017-09-07 09:39:50 -0400

[diff] [blame]

145

for (size_t i = 0; i < states; ++i) {

146

if (i < dfa.fAccepts.size()) {

147

out << " " << dfa.fAccepts[i] << ",";

148

} else {

Ethan Nicholas

2017-09-19 14:38:40 -0400

[diff] [blame]

149

out << " " << INVALID << ",";

Ethan Nicholas

2017-09-07 09:39:50 -0400

[diff] [blame]

}

}

out << " };\n";

out << "\n";

John Stiles

2021-01-22 09:49:45 -0500

[diff] [blame]

155

out << token << " " << lexer << "::next() {";

156

out << R"(

157

// note that we cheat here: normally a lexer needs to worry about the case

158

// where a token has a prefix which is not itself a valid token - for instance,

159

// maybe we have a valid token 'while', but 'w', 'wh', etc. are not valid

160

// tokens. Our grammar doesn't have this property, so we can simplify the logic

161

// a bit.

162

int32_t startOffset = fOffset;

Ethan Nicholas

2021-06-15 11:42:07 -0400

[diff] [blame]

163

if (startOffset == (int32_t)fText.length()) {

Ethan Nicholas

2021-09-27 10:39:18 -0400

[diff] [blame^]

164

return )" << token << "(" << token << R"(::Kind::TK_END_OF_FILE, startOffset, 0, fLine);

John Stiles

2021-01-22 09:49:45 -0500

[diff] [blame]

165

}

Brian Osman

bfcd782

2021-02-18 14:47:15 -0500

[diff] [blame]

166

State state = 1;

John Stiles

2021-01-22 09:49:45 -0500

[diff] [blame]

167

for (;;) {

Ethan Nicholas

2021-06-15 11:42:07 -0400

[diff] [blame]

168

if (fOffset >= (int32_t)fText.length()) {

John Stiles

2021-09-13 13:47:04 -0400

[diff] [blame]

169

if (kAccepts[state] == -1) {

Ethan Nicholas

2021-09-27 10:39:18 -0400

[diff] [blame^]

170

return Token(Token::Kind::TK_END_OF_FILE, startOffset, 0, fLine);

John Stiles

2021-01-22 09:49:45 -0500

[diff] [blame]

}

break;

}

uint8_t c = (uint8_t) fText[fOffset];

175

if (c <= 8 || c >= )" << dfa.fCharMappings.size() << R"() {

176

c = INVALID_CHAR;

177

}

John Stiles

2021-09-13 13:47:04 -0400

[diff] [blame]

178

State newState = kTransitions[kMappings[c]][state];

John Stiles

2021-01-22 09:49:45 -0500

[diff] [blame]

if (!newState) {

break;

}

state = newState;

++fOffset;

Ethan Nicholas

2021-09-27 10:39:18 -0400

[diff] [blame^]

184

if (c == '\n') {

185

++fLine;

186

}

John Stiles

2021-01-22 09:49:45 -0500

[diff] [blame]

187

}

John Stiles

2021-09-13 13:47:04 -0400

[diff] [blame]

188

Token::Kind kind = ()" << token << R"(::Kind) kAccepts[state];

Ethan Nicholas

2021-09-27 10:39:18 -0400

[diff] [blame^]

189

return )" << token << R"((kind, startOffset, fOffset - startOffset, fLine);

John Stiles

2021-01-22 09:49:45 -0500

[diff] [blame]

}

} // namespace

)";

Ethan Nicholas

2017-09-07 09:39:50 -0400

[diff] [blame]

194

}

195

196

void process(const char* inPath, const char* lexer, const char* token, const char* hPath,

197

const char* cppPath) {

198

NFA nfa;

199

std::vector<std::string> tokens;

200

tokens.push_back("END_OF_FILE");

201

std::string line;

202

std::ifstream in(inPath);

203

while (std::getline(in, line)) {

Ethan Nicholas

f3c8f5d

2020-08-20 13:09:14 +0000

[diff] [blame]

204

if (line.length() == 0) {

205

continue;

206

}

207

if (line.length() >= 2 && line[0] == '/' && line[1] == '/') {

208

continue;

209

}

Ethan Nicholas

2017-09-07 09:39:50 -0400

[diff] [blame]

210

std::istringstream split(line);

211

std::string name, delimiter, pattern;

212

if (split >> name >> delimiter >> pattern) {

Ethan Nicholas

2018-06-12 11:05:59 -0400

[diff] [blame]

213

SkASSERT(split.eof());

214

SkASSERT(name != "");

215

SkASSERT(delimiter == "=");

216

SkASSERT(pattern != "");

Ethan Nicholas

2017-09-07 09:39:50 -0400

[diff] [blame]

217

tokens.push_back(name);

218

if (pattern[0] == '"') {

Ethan Nicholas

2018-06-12 11:05:59 -0400

[diff] [blame]

219

SkASSERT(pattern.size() > 2 && pattern[pattern.size() - 1] == '"');

Ethan Nicholas

2017-09-07 09:39:50 -0400

[diff] [blame]

220

RegexNode node = RegexNode(RegexNode::kChar_Kind, pattern[1]);

221

for (size_t i = 2; i < pattern.size() - 1; ++i) {

222

node = RegexNode(RegexNode::kConcat_Kind, node,

223

RegexNode(RegexNode::kChar_Kind, pattern[i]));

}

nfa.addRegex(node);

}

else {

nfa.addRegex(RegexParser().parse(pattern));

}

}

}

NFAtoDFA converter(&nfa);

233

DFA dfa = converter.convert();

234

writeH(dfa, lexer, token, tokens, hPath);

Mike Klein

c0bd9f9

2019-04-23 12:05:21 -0500

[diff] [blame]

235

writeCPP(dfa, lexer, token, (std::string("src/sksl/SkSL") + lexer + ".h").c_str(), cppPath);

Ethan Nicholas