blob: c34edd9e6e34364b7d10bdadf6e3a64bcc39828c [file] [log] [blame]
Gavin Howard5715b042018-02-12 16:11:42 -07001/*
Gavin Howardb5904bf2018-02-20 13:28:18 -07002 * *****************************************************************************
Gavin Howard5715b042018-02-12 16:11:42 -07003 *
Gavin Howardb5904bf2018-02-20 13:28:18 -07004 * Copyright 2018 Gavin D. Howard
Gavin Howard5715b042018-02-12 16:11:42 -07005 *
6 * Permission to use, copy, modify, and/or distribute this software for any
7 * purpose with or without fee is hereby granted.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH
10 * REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY
11 * AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT,
12 * INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM
13 * LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
14 * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
15 * PERFORMANCE OF THIS SOFTWARE.
16 *
Gavin Howardb5904bf2018-02-20 13:28:18 -070017 * *****************************************************************************
Gavin Howard5715b042018-02-12 16:11:42 -070018 *
Gavin Howardd2a05252018-09-27 14:00:40 -060019 * Common code for the lexers.
Gavin Howard5715b042018-02-12 16:11:42 -070020 *
21 */
22
Gavin Howard27fdfb92018-03-21 07:56:59 -060023#include <assert.h>
Gavin Howard8a596d42018-01-15 15:46:01 -070024#include <ctype.h>
25#include <stdbool.h>
Gavin Howard8a596d42018-01-15 15:46:01 -070026#include <string.h>
27
Gavin Howard29493062018-03-20 19:57:37 -060028#include <status.h>
Gavin Howard3ba6c8d2018-02-15 12:23:35 -070029#include <lex.h>
Gavin Howardd5551672018-09-22 19:52:42 -060030#include <vm.h>
Gavin Howard8a596d42018-01-15 15:46:01 -070031
Gavin Howard50c8c2d2018-12-27 11:58:34 -070032BcStatus bc_lex_invalidChar(BcLex *l, char c) {
33 l->t = BC_LEX_INVALID;
Gavin Howard03cd1122018-12-31 14:08:15 -070034 return bc_lex_verr(l, BC_ERROR_PARSE_CHAR, c);
Gavin Howard50c8c2d2018-12-27 11:58:34 -070035}
36
Gavin Howarded5c8312018-09-27 12:04:08 -060037void bc_lex_lineComment(BcLex *l) {
Gavin Howardad477312018-12-24 15:51:35 -070038 l->t = BC_LEX_WHITESPACE;
Gavin Howard5c14da62019-02-16 23:47:48 -070039 while (l->i < l->len && l->buf[l->i] != '\n') l->i += 1;
Gavin Howarded5c8312018-09-27 12:04:08 -060040}
41
Gavin Howard52446f22018-12-13 11:39:22 -070042BcStatus bc_lex_comment(BcLex *l) {
43
44 size_t i, nlines = 0;
45 const char *buf = l->buf;
46 bool end = false;
47 char c;
48
Gavin Howard5c14da62019-02-16 23:47:48 -070049 l->i += 1;
Gavin Howardad477312018-12-24 15:51:35 -070050 l->t = BC_LEX_WHITESPACE;
Gavin Howard52446f22018-12-13 11:39:22 -070051
Gavin Howard5c14da62019-02-16 23:47:48 -070052 for (i = l->i; !end; i += !end) {
Gavin Howard52446f22018-12-13 11:39:22 -070053
Gavin Howard1ab22d22019-01-03 13:32:17 -070054 for (; (c = buf[i]) && c != '*'; ++i) nlines += (c == '\n');
Gavin Howard52446f22018-12-13 11:39:22 -070055
Gavin Howard1ab22d22019-01-03 13:32:17 -070056 if (!c || buf[i + 1] == '\0') {
Gavin Howard52446f22018-12-13 11:39:22 -070057 l->i = i;
Gavin Howard03cd1122018-12-31 14:08:15 -070058 return bc_lex_err(l, BC_ERROR_PARSE_COMMENT);
Gavin Howard52446f22018-12-13 11:39:22 -070059 }
60
61 end = buf[i + 1] == '/';
62 }
63
64 l->i = i + 2;
65 l->line += nlines;
66
67 return BC_STATUS_SUCCESS;
68}
69
Gavin Howard364df3b2018-09-28 09:48:19 -060070void bc_lex_whitespace(BcLex *l) {
71 char c;
Gavin Howardad477312018-12-24 15:51:35 -070072 l->t = BC_LEX_WHITESPACE;
Gavin Howard53eba8b2018-10-31 15:14:37 -060073 for (c = l->buf[l->i]; c != '\n' && isspace(c); c = l->buf[++l->i]);
Gavin Howard364df3b2018-09-28 09:48:19 -060074}
75
Gavin Howard3ae1f8e2019-02-15 11:56:25 -070076void bc_lex_commonTokens(BcLex *l, char c) {
77 if (!c) l->t = BC_LEX_EOF;
78 else if (c == '\n') l->t = BC_LEX_NLINE;
79 else bc_lex_whitespace(l);
80}
81
Gavin Howard7ad5a662019-02-19 14:40:46 -070082static size_t bc_lex_num(BcLex *l, char start, bool int_only) {
Gavin Howard8a596d42018-01-15 15:46:01 -070083
Gavin Howard53eba8b2018-10-31 15:14:37 -060084 const char *buf = l->buf + l->i;
Gavin Howard8dd307e2019-01-08 23:05:19 -070085 size_t i;
Gavin Howard7ad5a662019-02-19 14:40:46 -070086 char c;
Gavin Howard94f14102019-01-11 09:39:57 -070087 bool last_pt, pt = (start == '.');
Gavin Howardf2a40492018-03-05 11:27:29 -070088
Gavin Howard7ad5a662019-02-19 14:40:46 -070089 for (i = 0; (c = buf[i]) && (BC_LEX_NUM_CHAR(c, pt, int_only) ||
Gavin Howard8dd307e2019-01-08 23:05:19 -070090 (c == '\\' && buf[i + 1] == '\n')); ++i)
91 {
Gavin Howard25f60882019-01-11 09:20:51 -070092 if (c == '\\') {
93
94 if (buf[i + 1] == '\n') {
95
96 i += 2;
97
98 // Make sure to eat whitespace at the beginning of the line.
Gavin Howard7ad5a662019-02-19 14:40:46 -070099 while(isspace(buf[i]) && buf[i] != '\n') i += 1;
Gavin Howard25f60882019-01-11 09:20:51 -0700100
101 c = buf[i];
102
Gavin Howard7ad5a662019-02-19 14:40:46 -0700103 if (!BC_LEX_NUM_CHAR(c, pt, int_only)) break;
Gavin Howard25f60882019-01-11 09:20:51 -0700104 }
105 else break;
Gavin Howard63738202018-09-26 15:34:20 -0600106 }
Gavin Howard8dd307e2019-01-08 23:05:19 -0700107
Gavin Howard94f14102019-01-11 09:39:57 -0700108 last_pt = (c == '.');
Gavin Howard25f60882019-01-11 09:20:51 -0700109 if (pt && last_pt) break;
Gavin Howard94f14102019-01-11 09:39:57 -0700110 pt = pt || last_pt;
Gavin Howard07732ec2018-02-27 15:40:02 -0700111
Gavin Howardad477312018-12-24 15:51:35 -0700112 bc_vec_push(&l->str, &c);
Gavin Howard63738202018-09-26 15:34:20 -0600113 }
Gavin Howard07732ec2018-02-27 15:40:02 -0700114
Gavin Howard7ad5a662019-02-19 14:40:46 -0700115 return i;
116}
117
118BcStatus bc_lex_number(BcLex *l, char start) {
119
120 l->t = BC_LEX_NUMBER;
121
122 bc_vec_npop(&l->str, l->str.len);
123 bc_vec_push(&l->str, &start);
124
125 l->i += bc_lex_num(l, start, false);
126#if BC_ENABLE_EXTRA_MATH
127 {
128 char c = l->buf[l->i];
129
130 if (c == 'e') {
131
132 bc_vec_push(&l->str, &c);
133 l->i += 1;
134 c = l->buf[l->i];
135
136 if (c == BC_LEX_NEG_CHAR) {
137 bc_vec_push(&l->str, &c);
138 l->i += 1;
139 c = l->buf[l->i];
140 }
141
142 if (!BC_LEX_NUM_CHAR(c, false, true))
143 return bc_lex_verr(l, BC_ERROR_PARSE_CHAR, c);
144
145 l->i += bc_lex_num(l, 0, true);
146 }
147 }
148#endif // BC_ENABLE_EXTRA_MATH
149
Gavin Howardad477312018-12-24 15:51:35 -0700150 bc_vec_pushByte(&l->str, '\0');
Gavin Howard7ad5a662019-02-19 14:40:46 -0700151
152 return BC_STATUS_SUCCESS;
Gavin Howard8a596d42018-01-15 15:46:01 -0700153}
154
Gavin Howard5c14da62019-02-16 23:47:48 -0700155void bc_lex_name(BcLex *l) {
Gavin Howard8412ba82018-10-04 13:03:24 -0600156
Gavin Howard88c25302018-10-17 13:32:23 -0600157 size_t i = 0;
Gavin Howard53eba8b2018-10-31 15:14:37 -0600158 const char *buf = l->buf + l->i - 1;
Gavin Howard88c25302018-10-17 13:32:23 -0600159 char c = buf[i];
Gavin Howard8412ba82018-10-04 13:03:24 -0600160
Gavin Howardad477312018-12-24 15:51:35 -0700161 l->t = BC_LEX_NAME;
Gavin Howard8412ba82018-10-04 13:03:24 -0600162
Gavin Howard9a4b6cd2018-10-23 15:13:30 -0600163 while ((c >= 'a' && c <= 'z') || isdigit(c) || c == '_') c = buf[++i];
Gavin Howard8412ba82018-10-04 13:03:24 -0600164
Gavin Howardad477312018-12-24 15:51:35 -0700165 bc_vec_string(&l->str, i, buf);
Gavin Howard8412ba82018-10-04 13:03:24 -0600166
167 // Increment the index. We minus 1 because it has already been incremented.
Gavin Howard53eba8b2018-10-31 15:14:37 -0600168 l->i += i - 1;
Gavin Howard8412ba82018-10-04 13:03:24 -0600169}
170
Gavin Howard48354e82019-01-02 18:15:56 -0700171void bc_lex_init(BcLex *l) {
Gavin Howard63738202018-09-26 15:34:20 -0600172 assert(l);
Gavin Howardad477312018-12-24 15:51:35 -0700173 bc_vec_init(&l->str, sizeof(char), NULL);
Gavin Howard69185042018-09-10 15:46:20 -0600174}
175
Gavin Howard63738202018-09-26 15:34:20 -0600176void bc_lex_free(BcLex *l) {
177 assert(l);
Gavin Howardad477312018-12-24 15:51:35 -0700178 bc_vec_free(&l->str);
Gavin Howard69185042018-09-10 15:46:20 -0600179}
180
Gavin Howard63738202018-09-26 15:34:20 -0600181void bc_lex_file(BcLex *l, const char *file) {
182 assert(l && file);
183 l->line = 1;
Gavin Howard7536dcf2018-12-15 19:27:09 -0700184 vm->file = file;
Gavin Howard8a596d42018-01-15 15:46:01 -0700185}
186
Gavin Howard364df3b2018-09-28 09:48:19 -0600187BcStatus bc_lex_next(BcLex *l) {
188
189 BcStatus s;
190
191 assert(l);
192
Gavin Howardad477312018-12-24 15:51:35 -0700193 l->last = l->t;
Gavin Howard56158792019-01-14 12:03:14 -0700194 l->line += (l->i != 0 && l->buf[l->i - 1] == '\n');
Gavin Howard7536dcf2018-12-15 19:27:09 -0700195
Gavin Howard03cd1122018-12-31 14:08:15 -0700196 if (l->last == BC_LEX_EOF) return bc_lex_err(l, BC_ERROR_PARSE_EOF);
Gavin Howard364df3b2018-09-28 09:48:19 -0600197
Gavin Howardad477312018-12-24 15:51:35 -0700198 l->t = BC_LEX_EOF;
Gavin Howardc39fd492018-10-04 10:07:03 -0600199
Gavin Howardf9b86ee2018-12-28 13:18:27 -0700200 if (l->i == l->len) return BC_STATUS_SUCCESS;
Gavin Howard364df3b2018-09-28 09:48:19 -0600201
Gavin Howard364df3b2018-09-28 09:48:19 -0600202 // Loop until failure or we don't have whitespace. This
203 // is so the parser doesn't get inundated with whitespace.
Gavin Howard53eba8b2018-10-31 15:14:37 -0600204 do {
Gavin Howard48354e82019-01-02 18:15:56 -0700205 s = vm->next(l);
Gavin Howardad477312018-12-24 15:51:35 -0700206 } while (!s && l->t == BC_LEX_WHITESPACE);
Gavin Howard364df3b2018-09-28 09:48:19 -0600207
208 return s;
Gavin Howard35753922018-03-21 19:22:08 -0600209}
Gavin Howardc9a9c472018-10-02 17:23:01 -0600210
211BcStatus bc_lex_text(BcLex *l, const char *text) {
212 assert(l && text);
Gavin Howard890d0c02018-10-30 16:34:50 -0600213 l->buf = text;
Gavin Howard53eba8b2018-10-31 15:14:37 -0600214 l->i = 0;
Gavin Howardc9a9c472018-10-02 17:23:01 -0600215 l->len = strlen(text);
Gavin Howardad477312018-12-24 15:51:35 -0700216 l->t = l->last = BC_LEX_INVALID;
Gavin Howardc9a9c472018-10-02 17:23:01 -0600217 return bc_lex_next(l);
218}