blob: 4a41aebb7cda5e1b534eb2e3029e74125bdf851c [file] [log] [blame]
Gavin Howard8a596d42018-01-15 15:46:01 -07001#include <ctype.h>
2#include <stdbool.h>
3#include <stdint.h>
4#include <stdio.h>
5#include <stdlib.h>
6#include <string.h>
7
Gavin Howardf6aa3292018-01-15 16:03:03 -07008#include <bc/bc.h>
9#include <bc/lex.h>
Gavin Howard8a596d42018-01-15 15:46:01 -070010
11static const char* const token_type_strs[] = {
Gavin Howard4bc73ee2018-01-26 11:39:20 -070012 BC_LEX_TOKEN_FOREACH(BC_LEX_GEN_STR)
Gavin Howard8a596d42018-01-15 15:46:01 -070013};
14
15static const char* const keywords[] = {
16
Gavin Howard4bc73ee2018-01-26 11:39:20 -070017 "auto",
18 "break",
19 "continue",
20 "define",
21 "else",
22 "for",
23 "halt",
24 "ibase",
25 "if",
26 "last",
27 "length",
28 "limits",
29 "obase",
30 "print",
31 "quit",
32 "read",
33 "return",
34 "scale",
35 "sqrt",
36 "while",
Gavin Howard8a596d42018-01-15 15:46:01 -070037
38};
39
40static const uint32_t keyword_lens[] = {
41
Gavin Howard4bc73ee2018-01-26 11:39:20 -070042 4, // auto
43 5, // break
44 8, // continue
45 6, // define
46 4, // else
47 3, // for
48 4, // halt
49 5, // ibase
50 2, // if
51 4, // last
52 6, // length
53 6, // limits
54 5, // obase
55 5, // print
56 4, // quit
57 4, // read
58 6, // return
59 5, // scale
60 4, // sqrt
61 5, // while
Gavin Howard8a596d42018-01-15 15:46:01 -070062
63};
64
65static BcStatus bc_lex_token(BcLex* lex, BcLexToken* token);
66static BcStatus bc_lex_whitespace(BcLex* lex, BcLexToken* token);
67static BcStatus bc_lex_string(BcLex* lex, BcLexToken* token);
68static BcStatus bc_lex_comment(BcLex* lex, BcLexToken* token);
69static BcStatus bc_lex_number(BcLex* lex, BcLexToken* token, char start);
70static BcStatus bc_lex_name(BcLex* lex, BcLexToken* token);
71
72BcStatus bc_lex_printToken(BcLexToken* token) {
73
Gavin Howard4bc73ee2018-01-26 11:39:20 -070074 printf("<%s", token_type_strs[token->type]);
Gavin Howard8a596d42018-01-15 15:46:01 -070075
Gavin Howard4bc73ee2018-01-26 11:39:20 -070076 switch (token->type) {
Gavin Howard8a596d42018-01-15 15:46:01 -070077
Gavin Howard4bc73ee2018-01-26 11:39:20 -070078 case BC_LEX_STRING:
79 case BC_LEX_NAME:
80 case BC_LEX_NUMBER:
81 printf(":%s", token->string);
82 break;
Gavin Howard8a596d42018-01-15 15:46:01 -070083
Gavin Howard4bc73ee2018-01-26 11:39:20 -070084 default:
85 break;
86 }
Gavin Howard8a596d42018-01-15 15:46:01 -070087
Gavin Howard4bc73ee2018-01-26 11:39:20 -070088 putchar('>');
89 putchar('\n');
Gavin Howard8a596d42018-01-15 15:46:01 -070090
Gavin Howard4bc73ee2018-01-26 11:39:20 -070091 return BC_STATUS_SUCCESS;
Gavin Howard8a596d42018-01-15 15:46:01 -070092}
93
Gavin Howard88ef8d62018-01-15 19:34:10 -070094BcStatus bc_lex_init(BcLex* lex, const char* file) {
Gavin Howard8a596d42018-01-15 15:46:01 -070095
Gavin Howard4bc73ee2018-01-26 11:39:20 -070096 if (lex == NULL ) {
97 return BC_STATUS_INVALID_PARAM;
98 }
Gavin Howard8a596d42018-01-15 15:46:01 -070099
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700100 lex->line = 1;
101 lex->newline = false;
102 lex->file = file;
Gavin Howard8a596d42018-01-15 15:46:01 -0700103
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700104 return BC_STATUS_SUCCESS;
Gavin Howard8a596d42018-01-15 15:46:01 -0700105}
106
107BcStatus bc_lex_text(BcLex* lex, const char* text) {
108
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700109 if (lex == NULL || text == NULL) {
110 return BC_STATUS_INVALID_PARAM;
111 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700112
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700113 lex->buffer = text;
114 lex->idx = 0;
115 lex->len = strlen(text);
Gavin Howard8a596d42018-01-15 15:46:01 -0700116
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700117 return BC_STATUS_SUCCESS;
Gavin Howard8a596d42018-01-15 15:46:01 -0700118}
119
120BcStatus bc_lex_next(BcLex* lex, BcLexToken* token) {
121
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700122 BcStatus status;
Gavin Howard8a596d42018-01-15 15:46:01 -0700123
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700124 if (lex == NULL || token == NULL) {
125 return BC_STATUS_INVALID_PARAM;
126 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700127
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700128 if (lex->idx == lex->len) {
129 token->type = BC_LEX_EOF;
130 return BC_STATUS_LEX_EOF;
131 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700132
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700133 if (lex->newline) {
134 ++lex->line;
135 lex->newline = false;
136 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700137
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700138 // Loop until failure or we don't have whitespace. This
139 // is so the parser doesn't get inundated with whitespace.
140 do {
141 status = bc_lex_token(lex, token);
142 } while (!status && token->type == BC_LEX_WHITESPACE);
Gavin Howard8a596d42018-01-15 15:46:01 -0700143
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700144 return status;
Gavin Howard8a596d42018-01-15 15:46:01 -0700145}
146
147static BcStatus bc_lex_token(BcLex* lex, BcLexToken* token) {
148
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700149 BcStatus status = BC_STATUS_SUCCESS;
Gavin Howard8a596d42018-01-15 15:46:01 -0700150
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700151 char c = lex->buffer[lex->idx];
Gavin Howard8a596d42018-01-15 15:46:01 -0700152
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700153 ++lex->idx;
Gavin Howard8a596d42018-01-15 15:46:01 -0700154
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700155 char c2;
Gavin Howard8a596d42018-01-15 15:46:01 -0700156
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700157 // This is the workhorse of the lexer.
158 switch (c) {
Gavin Howard8a596d42018-01-15 15:46:01 -0700159
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700160 case '\0':
161 {
162 token->type = BC_LEX_EOF;
163 break;
164 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700165
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700166 case '\t':
167 {
168 status = bc_lex_whitespace(lex, token);
169 break;
170 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700171
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700172 case '\n':
173 {
174 lex->newline = true;
175 token->type = BC_LEX_NEWLINE;
176 break;
177 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700178
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700179 case '\v':
180 case '\f':
181 case '\r':
182 case ' ':
183 {
184 status = bc_lex_whitespace(lex, token);
185 break;
186 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700187
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700188 case '!':
189 {
190 c2 = lex->buffer[lex->idx];
Gavin Howard8a596d42018-01-15 15:46:01 -0700191
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700192 if (c2 == '=') {
193 ++lex->idx;
194 token->type = BC_LEX_OP_REL_NOT_EQ;
195 }
196 else {
197 token->type = BC_LEX_OP_BOOL_NOT;
198 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700199
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700200 break;
201 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700202
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700203 case '"':
204 {
205 status = bc_lex_string(lex, token);
206 break;
207 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700208
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700209 case '%':
210 {
211 c2 = lex->buffer[lex->idx];
Gavin Howard8a596d42018-01-15 15:46:01 -0700212
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700213 if (c2 == '=') {
214 ++lex->idx;
215 token->type = BC_LEX_OP_ASSIGN_MODULUS;
216 }
217 else {
218 token->type = BC_LEX_OP_MODULUS;
219 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700220
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700221 break;
222 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700223
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700224 case '&':
225 {
226 c2 = lex->buffer[lex->idx];
Gavin Howard8a596d42018-01-15 15:46:01 -0700227
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700228 if (c2 == '&') {
229 ++lex->idx;
230 token->type = BC_LEX_OP_BOOL_AND;
231 }
232 else {
233 token->type = BC_LEX_INVALID;
234 status = BC_STATUS_LEX_INVALID_TOKEN;
235 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700236
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700237 break;
238 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700239
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700240 case '(':
241 {
242 token->type = BC_LEX_LEFT_PAREN;
243 break;
244 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700245
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700246 case ')':
247 {
248 token->type = BC_LEX_RIGHT_PAREN;
249 break;
250 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700251
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700252 case '*':
253 {
254 c2 = lex->buffer[lex->idx];
Gavin Howard8a596d42018-01-15 15:46:01 -0700255
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700256 if (c2 == '=') {
257 ++lex->idx;
258 token->type = BC_LEX_OP_ASSIGN_MULTIPLY;
259 }
260 else {
261 token->type = BC_LEX_OP_MULTIPLY;
262 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700263
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700264 break;
265 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700266
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700267 case '+':
268 {
269 c2 = lex->buffer[lex->idx];
Gavin Howard8a596d42018-01-15 15:46:01 -0700270
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700271 if (c2 == '=') {
272 ++lex->idx;
273 token->type = BC_LEX_OP_ASSIGN_PLUS;
274 }
275 else if (c2 == '+') {
276 ++lex->idx;
277 token->type = BC_LEX_OP_INC;
278 }
279 else {
280 token->type = BC_LEX_OP_PLUS;
281 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700282
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700283 break;
284 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700285
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700286 case ',':
287 {
288 token->type = BC_LEX_COMMA;
289 break;
290 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700291
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700292 case '-':
293 {
294 c2 = lex->buffer[lex->idx];
Gavin Howard8a596d42018-01-15 15:46:01 -0700295
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700296 if (c2 == '=') {
297 ++lex->idx;
298 token->type = BC_LEX_OP_ASSIGN_MINUS;
299 }
300 else if (c2 == '-') {
301 ++lex->idx;
302 token->type = BC_LEX_OP_DEC;
303 }
304 else {
305 token->type = BC_LEX_OP_MINUS;
306 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700307
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700308 break;
309 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700310
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700311 case '.':
312 {
313 status = bc_lex_number(lex, token, c);
314 break;
315 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700316
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700317 case '/':
318 {
319 c2 = lex->buffer[lex->idx];
Gavin Howard8a596d42018-01-15 15:46:01 -0700320
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700321 if (c2 == '=') {
322 ++lex->idx;
323 token->type = BC_LEX_OP_ASSIGN_DIVIDE;
324 }
325 else if (c2 == '*') {
326 status = bc_lex_comment(lex, token);
327 }
328 else {
329 token->type = BC_LEX_OP_DIVIDE;
330 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700331
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700332 break;
333 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700334
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700335 case '0':
336 case '1':
337 case '2':
338 case '3':
339 case '4':
340 case '5':
341 case '6':
342 case '7':
343 case '8':
344 case '9':
345 {
346 status = bc_lex_number(lex, token, c);
347 break;
348 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700349
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700350 case ';':
351 {
352 token->type = BC_LEX_SEMICOLON;
353 break;
354 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700355
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700356 case '<':
357 {
358 c2 = lex->buffer[lex->idx];
Gavin Howard8a596d42018-01-15 15:46:01 -0700359
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700360 if (c2 == '=') {
361 ++lex->idx;
362 token->type = BC_LEX_OP_REL_LESS_EQ;
363 }
364 else {
365 token->type = BC_LEX_OP_REL_LESS;
366 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700367
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700368 break;
369 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700370
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700371 case '=':
372 {
373 c2 = lex->buffer[lex->idx];
Gavin Howard8a596d42018-01-15 15:46:01 -0700374
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700375 if (c2 == '=') {
376 ++lex->idx;
377 token->type = BC_LEX_OP_REL_EQUAL;
378 }
379 else {
380 token->type = BC_LEX_OP_ASSIGN;
381 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700382
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700383 break;
384 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700385
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700386 case '>':
387 {
388 c2 = lex->buffer[lex->idx];
Gavin Howard8a596d42018-01-15 15:46:01 -0700389
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700390 if (c2 == '=') {
391 ++lex->idx;
392 token->type = BC_LEX_OP_REL_GREATER_EQ;
393 }
394 else {
395 token->type = BC_LEX_OP_REL_GREATER;
396 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700397
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700398 break;
399 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700400
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700401 case 'A':
402 case 'B':
403 case 'C':
404 case 'D':
405 case 'E':
406 case 'F':
407 {
408 status = bc_lex_number(lex, token, c);
409 break;
410 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700411
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700412 case '[':
413 {
414 token->type = BC_LEX_LEFT_BRACKET;
415 break;
416 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700417
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700418 case '\\':
419 {
420 status = bc_lex_whitespace(lex, token);
421 break;
422 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700423
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700424 case ']':
425 {
426 token->type = BC_LEX_RIGHT_BRACKET;
427 break;
428 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700429
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700430 case '^':
431 {
432 c2 = lex->buffer[lex->idx];
Gavin Howard8a596d42018-01-15 15:46:01 -0700433
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700434 if (c2 == '=') {
435 ++lex->idx;
436 token->type = BC_LEX_OP_ASSIGN_POWER;
437 }
438 else {
439 token->type = BC_LEX_OP_POWER;
440 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700441
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700442 break;
443 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700444
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700445 case 'a':
446 case 'b':
447 case 'c':
448 case 'd':
449 case 'e':
450 case 'f':
451 case 'g':
452 case 'h':
453 case 'i':
454 case 'j':
455 case 'k':
456 case 'l':
457 case 'm':
458 case 'n':
459 case 'o':
460 case 'p':
461 case 'q':
462 case 'r':
463 case 's':
464 case 't':
465 case 'u':
466 case 'v':
467 case 'w':
468 case 'x':
469 case 'y':
470 case 'z':
471 {
472 status = bc_lex_name(lex, token);
473 break;
474 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700475
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700476 case '{':
477 {
478 token->type = BC_LEX_LEFT_BRACE;
479 break;
480 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700481
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700482 case '|':
483 {
484 c2 = lex->buffer[lex->idx];
Gavin Howard8a596d42018-01-15 15:46:01 -0700485
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700486 if (c2 == '|') {
487 ++lex->idx;
488 token->type = BC_LEX_OP_BOOL_OR;
489 }
490 else {
491 token->type = BC_LEX_INVALID;
492 status = BC_STATUS_LEX_INVALID_TOKEN;
493 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700494
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700495 break;
496 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700497
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700498 case '}':
499 {
500 token->type = BC_LEX_RIGHT_BRACE;
501 break;
502 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700503
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700504 default:
505 {
506 token->type = BC_LEX_INVALID;
507 status = BC_STATUS_LEX_INVALID_TOKEN;
508 break;
509 }
510 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700511
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700512 return status;
Gavin Howard8a596d42018-01-15 15:46:01 -0700513}
514
515static BcStatus bc_lex_whitespace(BcLex* lex, BcLexToken* token) {
516
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700517 token->type = BC_LEX_WHITESPACE;
Gavin Howard8a596d42018-01-15 15:46:01 -0700518
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700519 char c = lex->buffer[lex->idx];
Gavin Howard8a596d42018-01-15 15:46:01 -0700520
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700521 while ((isspace(c) && c != '\n') || c == '\\') {
522 ++lex->idx;
523 c = lex->buffer[lex->idx];
524 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700525
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700526 return BC_STATUS_SUCCESS;
Gavin Howard8a596d42018-01-15 15:46:01 -0700527}
528
529static BcStatus bc_lex_string(BcLex* lex, BcLexToken* token) {
530
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700531 uint32_t newlines;
Gavin Howardbdd97f02018-01-17 12:05:04 -0700532
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700533 newlines = 0;
Gavin Howardbdd97f02018-01-17 12:05:04 -0700534
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700535 token->type = BC_LEX_STRING;
Gavin Howard8a596d42018-01-15 15:46:01 -0700536
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700537 size_t i = lex->idx;
538 char c = lex->buffer[i];
Gavin Howard8a596d42018-01-15 15:46:01 -0700539
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700540 while (c != '"' && c != '\0') {
Gavin Howardbdd97f02018-01-17 12:05:04 -0700541
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700542 if (c == '\n') {
543 ++newlines;
544 }
Gavin Howardbdd97f02018-01-17 12:05:04 -0700545
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700546 c = lex->buffer[++i];
547 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700548
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700549 if (c == '\0') {
550 lex->idx = i;
551 return BC_STATUS_LEX_NO_STRING_END;
552 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700553
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700554 size_t len = i - lex->idx;
Gavin Howard8a596d42018-01-15 15:46:01 -0700555
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700556 token->string = malloc(len + 1);
Gavin Howard8a596d42018-01-15 15:46:01 -0700557
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700558 if (token->string == NULL) {
559 return BC_STATUS_MALLOC_FAIL;
560 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700561
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700562 const char* start = lex->buffer + lex->idx;
Gavin Howard8a596d42018-01-15 15:46:01 -0700563
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700564 for (size_t j = 0; j < len; ++j) {
565 token->string[j] = start[j];
566 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700567
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700568 token->string[len] = '\0';
Gavin Howard8a596d42018-01-15 15:46:01 -0700569
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700570 lex->idx = i + 1;
571 lex->line += newlines;
Gavin Howard8a596d42018-01-15 15:46:01 -0700572
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700573 return BC_STATUS_SUCCESS;
Gavin Howard8a596d42018-01-15 15:46:01 -0700574}
575
576static BcStatus bc_lex_comment(BcLex* lex, BcLexToken* token) {
577
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700578 uint32_t newlines;
Gavin Howardbdd97f02018-01-17 12:05:04 -0700579
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700580 newlines = 0;
Gavin Howardbdd97f02018-01-17 12:05:04 -0700581
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700582 token->type = BC_LEX_WHITESPACE;
Gavin Howard8a596d42018-01-15 15:46:01 -0700583
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700584 ++lex->idx;
Gavin Howard8a596d42018-01-15 15:46:01 -0700585
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700586 size_t i = lex->idx;
587 const char* buffer = lex->buffer;
588 char c = buffer[i];
Gavin Howard8a596d42018-01-15 15:46:01 -0700589
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700590 if (c == '\n') {
591 ++newlines;
592 }
Gavin Howardbdd97f02018-01-17 12:05:04 -0700593
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700594 int end = 0;
Gavin Howard8a596d42018-01-15 15:46:01 -0700595
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700596 while (!end) {
Gavin Howard8a596d42018-01-15 15:46:01 -0700597
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700598 while (c != '*' && c != '\0') {
599 c = buffer[++i];
600 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700601
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700602 if (c == '\n') {
603 ++newlines;
604 }
Gavin Howardbdd97f02018-01-17 12:05:04 -0700605
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700606 if (c == '\0' || buffer[i + 1] == '\0') {
607 lex->idx = i;
608 return BC_STATUS_LEX_NO_COMMENT_END;
609 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700610
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700611 end = buffer[i + 1] == '/';
612 i += end ? 0 : 1;
613 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700614
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700615 lex->idx = i + 2;
616 lex->line += newlines;
Gavin Howard8a596d42018-01-15 15:46:01 -0700617
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700618 return BC_STATUS_SUCCESS;
Gavin Howard8a596d42018-01-15 15:46:01 -0700619}
620
621static BcStatus bc_lex_number(BcLex* lex, BcLexToken* token, char start) {
622
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700623 token->type = BC_LEX_NUMBER;
Gavin Howard8a596d42018-01-15 15:46:01 -0700624
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700625 int point = start == '.';
Gavin Howard8a596d42018-01-15 15:46:01 -0700626
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700627 const char* buffer = lex->buffer + lex->idx;
Gavin Howard8a596d42018-01-15 15:46:01 -0700628
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700629 size_t backslashes = 0;
630 size_t i = 0;
631 char c = buffer[i];
Gavin Howard8a596d42018-01-15 15:46:01 -0700632
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700633 while (c && ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') ||
634 (c == '.' && !point) || (c == '\\' && buffer[i + 1] == '\n')))
635 {
636 if (c == '\\') {
637 ++i;
638 backslashes += 1;
639 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700640
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700641 c = buffer[++i];
642 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700643
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700644 size_t len = i + 1;
Gavin Howard8a596d42018-01-15 15:46:01 -0700645
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700646 token->string = malloc(len - backslashes + 1);
Gavin Howard8a596d42018-01-15 15:46:01 -0700647
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700648 if (token->string == NULL) {
649 return BC_STATUS_MALLOC_FAIL;
650 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700651
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700652 token->string[0] = start;
Gavin Howard8a596d42018-01-15 15:46:01 -0700653
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700654 const char* buf = buffer - 1;
655 size_t hits = 0;
Gavin Howard8a596d42018-01-15 15:46:01 -0700656
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700657 for (size_t j = 1; j < len; ++j) {
Gavin Howard8a596d42018-01-15 15:46:01 -0700658
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700659 char c = buf[j];
Gavin Howard8a596d42018-01-15 15:46:01 -0700660
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700661 // If we have hit a backslash, skip it.
662 // We don't have to check for a newline
663 // because it's guaranteed.
664 if (hits < backslashes && c == '\\') {
665 ++hits;
666 ++j;
667 continue;
668 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700669
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700670 token->string[j - (hits * 2)] = c;
671 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700672
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700673 token->string[len] = '\0';
Gavin Howard8a596d42018-01-15 15:46:01 -0700674
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700675 lex->idx += i;
Gavin Howard8a596d42018-01-15 15:46:01 -0700676
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700677 return BC_STATUS_SUCCESS;
Gavin Howard8a596d42018-01-15 15:46:01 -0700678}
679
680static BcStatus bc_lex_name(BcLex* lex, BcLexToken* token) {
681
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700682 const char* buffer = lex->buffer + lex->idx - 1;
Gavin Howard8a596d42018-01-15 15:46:01 -0700683
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700684 for (uint32_t i = 0; i < sizeof(keywords) / sizeof(char*); ++i) {
Gavin Howard8a596d42018-01-15 15:46:01 -0700685
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700686 if (!strncmp(buffer, keywords[i], keyword_lens[i])) {
Gavin Howard8a596d42018-01-15 15:46:01 -0700687
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700688 token->type = BC_LEX_KEY_AUTO + i;
Gavin Howard8a596d42018-01-15 15:46:01 -0700689
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700690 // We need to minus one because the
691 // index has already been incremented.
692 lex->idx += keyword_lens[i] - 1;
Gavin Howard8a596d42018-01-15 15:46:01 -0700693
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700694 return BC_STATUS_SUCCESS;
695 }
696 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700697
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700698 token->type = BC_LEX_NAME;
Gavin Howard8a596d42018-01-15 15:46:01 -0700699
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700700 size_t i = 0;
701 char c = buffer[i];
Gavin Howard8a596d42018-01-15 15:46:01 -0700702
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700703 while ((c >= 'a' && c<= 'z') || (c >= '0' && c <= '9') || c == '_') {
704 ++i;
705 c = buffer[i];
706 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700707
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700708 token->string = malloc(i + 1);
Gavin Howard8a596d42018-01-15 15:46:01 -0700709
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700710 if (token->string == NULL) {
711 return BC_STATUS_MALLOC_FAIL;
712 }
Gavin Howard8a596d42018-01-15 15:46:01 -0700713
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700714 strncpy(token->string, buffer, i);
715 token->string[i] = '\0';
Gavin Howard8a596d42018-01-15 15:46:01 -0700716
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700717 // Increment the index. It is minus one
718 // because it has already been incremented.
719 lex->idx += i - 1;
Gavin Howard8a596d42018-01-15 15:46:01 -0700720
Gavin Howard4bc73ee2018-01-26 11:39:20 -0700721 return BC_STATUS_SUCCESS;
Gavin Howard8a596d42018-01-15 15:46:01 -0700722}