blob: e3c4170acb05a074e685fb79f4fdc20277e8d510 [file] [log] [blame]
Gavin Howard5715b042018-02-12 16:11:42 -07001/*
Gavin Howardb5904bf2018-02-20 13:28:18 -07002 * *****************************************************************************
Gavin Howard5715b042018-02-12 16:11:42 -07003 *
Gavin Howardb5904bf2018-02-20 13:28:18 -07004 * Copyright 2018 Gavin D. Howard
Gavin Howard5715b042018-02-12 16:11:42 -07005 *
6 * Permission to use, copy, modify, and/or distribute this software for any
7 * purpose with or without fee is hereby granted.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
14 * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
16 *
Gavin Howardb5904bf2018-02-20 13:28:18 -070017 * *****************************************************************************
Gavin Howard5715b042018-02-12 16:11:42 -070018 *
Gavin Howardd2a05252018-09-27 14:00:40 -060019 * Common code for the lexers.
Gavin Howard5715b042018-02-12 16:11:42 -070020 *
21 */
22
Gavin Howard27fdfb92018-03-21 07:56:59 -060023#include <assert.h>
Gavin Howard8a596d42018-01-15 15:46:01 -070024#include <ctype.h>
25#include <stdbool.h>
Gavin Howard8a596d42018-01-15 15:46:01 -070026#include <string.h>
27
Gavin Howard29493062018-03-20 19:57:37 -060028#include <status.h>
Gavin Howard3ba6c8d2018-02-15 12:23:35 -070029#include <lex.h>
Gavin Howardd5551672018-09-22 19:52:42 -060030#include <vm.h>
Gavin Howard8a596d42018-01-15 15:46:01 -070031
Gavin Howard50c8c2d2018-12-27 11:58:34 -070032BcStatus bc_lex_invalidChar(BcLex *l, char c) {
33 l->t = BC_LEX_INVALID;
34 return bc_vm_error(BC_ERROR_PARSE_CHAR, l->line, c);
35}
36
Gavin Howarded5c8312018-09-27 12:04:08 -060037void bc_lex_lineComment(BcLex *l) {
Gavin Howardad477312018-12-24 15:51:35 -070038 l->t = BC_LEX_WHITESPACE;
Gavin Howard53eba8b2018-10-31 15:14:37 -060039 while (l->i < l->len && l->buf[l->i++] != '\n');
Gavin Howarded5c8312018-09-27 12:04:08 -060040}
41
Gavin Howard52446f22018-12-13 11:39:22 -070042BcStatus bc_lex_comment(BcLex *l) {
43
44 size_t i, nlines = 0;
45 const char *buf = l->buf;
46 bool end = false;
47 char c;
48
Gavin Howardad477312018-12-24 15:51:35 -070049 l->t = BC_LEX_WHITESPACE;
Gavin Howard52446f22018-12-13 11:39:22 -070050
51 for (i = ++l->i; !end; i += !end) {
52
53 for (c = buf[i]; c != '*' && c != 0; c = buf[++i]) nlines += c == '\n';
54
55 if (c == 0 || buf[i + 1] == '\0') {
56 l->i = i;
Gavin Howard8e306632018-12-21 12:55:35 -070057 return bc_vm_error(BC_ERROR_PARSE_COMMENT, l->line);
Gavin Howard52446f22018-12-13 11:39:22 -070058 }
59
60 end = buf[i + 1] == '/';
61 }
62
63 l->i = i + 2;
64 l->line += nlines;
65
66 return BC_STATUS_SUCCESS;
67}
68
Gavin Howard364df3b2018-09-28 09:48:19 -060069void bc_lex_whitespace(BcLex *l) {
70 char c;
Gavin Howardad477312018-12-24 15:51:35 -070071 l->t = BC_LEX_WHITESPACE;
Gavin Howard53eba8b2018-10-31 15:14:37 -060072 for (c = l->buf[l->i]; c != '\n' && isspace(c); c = l->buf[++l->i]);
Gavin Howard364df3b2018-09-28 09:48:19 -060073}
74
Gavin Howard63738202018-09-26 15:34:20 -060075BcStatus bc_lex_number(BcLex *l, char start) {
Gavin Howard8a596d42018-01-15 15:46:01 -070076
Gavin Howard53eba8b2018-10-31 15:14:37 -060077 const char *buf = l->buf + l->i;
Gavin Howardc39fd492018-10-04 10:07:03 -060078 size_t len, hits = 0, bslashes = 0, i = 0, j;
Gavin Howard63738202018-09-26 15:34:20 -060079 char c = buf[i];
80 bool last_pt, pt = start == '.';
Gavin Howardf2a40492018-03-05 11:27:29 -070081
Gavin Howard63738202018-09-26 15:34:20 -060082 last_pt = pt;
Gavin Howardad477312018-12-24 15:51:35 -070083 l->t = BC_LEX_NUMBER;
Gavin Howard07732ec2018-02-27 15:40:02 -070084
Gavin Howard53eba8b2018-10-31 15:14:37 -060085 while (c != 0 && (isdigit(c) || (c >= 'A' && c <= 'F') ||
Gavin Howard63738202018-09-26 15:34:20 -060086 (c == '.' && !pt) || (c == '\\' && buf[i + 1] == '\n')))
87 {
88 if (c != '\\') {
89 last_pt = c == '.';
90 pt = pt || last_pt;
91 }
92 else {
93 ++i;
94 bslashes += 1;
95 }
Gavin Howard07732ec2018-02-27 15:40:02 -070096
Gavin Howard63738202018-09-26 15:34:20 -060097 c = buf[++i];
98 }
Gavin Howard07732ec2018-02-27 15:40:02 -070099
Gavin Howard63738202018-09-26 15:34:20 -0600100 len = i + 1 * !last_pt - bslashes * 2;
Gavin Howard7536dcf2018-12-15 19:27:09 -0700101
102 if (len > BC_MAX_NUM) return bc_vm_error(BC_ERROR_EXEC_NUM_LEN, l->line);
Gavin Howard07732ec2018-02-27 15:40:02 -0700103
Gavin Howardad477312018-12-24 15:51:35 -0700104 bc_vec_npop(&l->str, l->str.len);
105 bc_vec_expand(&l->str, len + 1);
106 bc_vec_push(&l->str, &start);
Gavin Howarda628aa22018-09-12 13:52:45 -0600107
Gavin Howardc39fd492018-10-04 10:07:03 -0600108 for (buf -= 1, j = 1; j < len + hits * 2; ++j) {
Gavin Howard07732ec2018-02-27 15:40:02 -0700109
Gavin Howard63738202018-09-26 15:34:20 -0600110 c = buf[j];
Gavin Howard07732ec2018-02-27 15:40:02 -0700111
Gavin Howard63738202018-09-26 15:34:20 -0600112 // If we have hit a backslash, skip it. We don't have
113 // to check for a newline because it's guaranteed.
114 if (hits < bslashes && c == '\\') {
115 ++hits;
116 ++j;
117 continue;
118 }
Gavin Howard07732ec2018-02-27 15:40:02 -0700119
Gavin Howardad477312018-12-24 15:51:35 -0700120 bc_vec_push(&l->str, &c);
Gavin Howard63738202018-09-26 15:34:20 -0600121 }
Gavin Howard07732ec2018-02-27 15:40:02 -0700122
Gavin Howardad477312018-12-24 15:51:35 -0700123 bc_vec_pushByte(&l->str, '\0');
Gavin Howard53eba8b2018-10-31 15:14:37 -0600124 l->i += i;
Gavin Howard8a596d42018-01-15 15:46:01 -0700125
Gavin Howard63738202018-09-26 15:34:20 -0600126 return BC_STATUS_SUCCESS;
Gavin Howard8a596d42018-01-15 15:46:01 -0700127}
128
Gavin Howard8412ba82018-10-04 13:03:24 -0600129BcStatus bc_lex_name(BcLex *l) {
130
Gavin Howard88c25302018-10-17 13:32:23 -0600131 size_t i = 0;
Gavin Howard53eba8b2018-10-31 15:14:37 -0600132 const char *buf = l->buf + l->i - 1;
Gavin Howard88c25302018-10-17 13:32:23 -0600133 char c = buf[i];
Gavin Howard8412ba82018-10-04 13:03:24 -0600134
Gavin Howardad477312018-12-24 15:51:35 -0700135 l->t = BC_LEX_NAME;
Gavin Howard8412ba82018-10-04 13:03:24 -0600136
Gavin Howard9a4b6cd2018-10-23 15:13:30 -0600137 while ((c >= 'a' && c <= 'z') || isdigit(c) || c == '_') c = buf[++i];
Gavin Howard8412ba82018-10-04 13:03:24 -0600138
Gavin Howard7536dcf2018-12-15 19:27:09 -0700139 if (i > BC_MAX_STRING) return bc_vm_error(BC_ERROR_EXEC_NAME_LEN, l->line);
140
Gavin Howardad477312018-12-24 15:51:35 -0700141 bc_vec_string(&l->str, i, buf);
Gavin Howard8412ba82018-10-04 13:03:24 -0600142
143 // Increment the index. We minus 1 because it has already been incremented.
Gavin Howard53eba8b2018-10-31 15:14:37 -0600144 l->i += i - 1;
Gavin Howard8412ba82018-10-04 13:03:24 -0600145
146 return BC_STATUS_SUCCESS;
147}
148
Gavin Howardad0ecfe2018-10-30 01:16:01 -0600149void bc_lex_init(BcLex *l, BcLexNext next) {
Gavin Howard63738202018-09-26 15:34:20 -0600150 assert(l);
Gavin Howarded5c8312018-09-27 12:04:08 -0600151 l->next = next;
Gavin Howardad477312018-12-24 15:51:35 -0700152 bc_vec_init(&l->str, sizeof(char), NULL);
Gavin Howard69185042018-09-10 15:46:20 -0600153}
154
Gavin Howard63738202018-09-26 15:34:20 -0600155void bc_lex_free(BcLex *l) {
156 assert(l);
Gavin Howardad477312018-12-24 15:51:35 -0700157 bc_vec_free(&l->str);
Gavin Howard69185042018-09-10 15:46:20 -0600158}
159
Gavin Howard63738202018-09-26 15:34:20 -0600160void bc_lex_file(BcLex *l, const char *file) {
161 assert(l && file);
162 l->line = 1;
Gavin Howard7536dcf2018-12-15 19:27:09 -0700163 vm->file = file;
Gavin Howard8a596d42018-01-15 15:46:01 -0700164}
165
Gavin Howard364df3b2018-09-28 09:48:19 -0600166BcStatus bc_lex_next(BcLex *l) {
167
168 BcStatus s;
169
170 assert(l);
171
Gavin Howardad477312018-12-24 15:51:35 -0700172 l->last = l->t;
Gavin Howardf9b86ee2018-12-28 13:18:27 -0700173 l->line += l->last == BC_LEX_NLINE;
Gavin Howard7536dcf2018-12-15 19:27:09 -0700174
Gavin Howardad477312018-12-24 15:51:35 -0700175 if (l->last == BC_LEX_EOF) return bc_vm_error(BC_ERROR_PARSE_EOF, l->line);
Gavin Howard364df3b2018-09-28 09:48:19 -0600176
Gavin Howardad477312018-12-24 15:51:35 -0700177 l->t = BC_LEX_EOF;
Gavin Howardc39fd492018-10-04 10:07:03 -0600178
Gavin Howardf9b86ee2018-12-28 13:18:27 -0700179 if (l->i == l->len) return BC_STATUS_SUCCESS;
Gavin Howard364df3b2018-09-28 09:48:19 -0600180
Gavin Howard364df3b2018-09-28 09:48:19 -0600181 // Loop until failure or we don't have whitespace. This
182 // is so the parser doesn't get inundated with whitespace.
Gavin Howard53eba8b2018-10-31 15:14:37 -0600183 do {
184 s = l->next(l);
Gavin Howardad477312018-12-24 15:51:35 -0700185 } while (!s && l->t == BC_LEX_WHITESPACE);
Gavin Howard364df3b2018-09-28 09:48:19 -0600186
187 return s;
Gavin Howard35753922018-03-21 19:22:08 -0600188}
Gavin Howardc9a9c472018-10-02 17:23:01 -0600189
190BcStatus bc_lex_text(BcLex *l, const char *text) {
191 assert(l && text);
Gavin Howard890d0c02018-10-30 16:34:50 -0600192 l->buf = text;
Gavin Howard53eba8b2018-10-31 15:14:37 -0600193 l->i = 0;
Gavin Howardc9a9c472018-10-02 17:23:01 -0600194 l->len = strlen(text);
Gavin Howardad477312018-12-24 15:51:35 -0700195 l->t = l->last = BC_LEX_INVALID;
Gavin Howardc9a9c472018-10-02 17:23:01 -0600196 return bc_lex_next(l);
197}