blob: f383b5437dc419d486e334eca7b5c26618f28904 [file] [log] [blame]
Gavin Howard5715b042018-02-12 16:11:42 -07001/*
Gavin Howardb5904bf2018-02-20 13:28:18 -07002 * *****************************************************************************
Gavin Howard5715b042018-02-12 16:11:42 -07003 *
Gavin Howard29e00ba2020-06-30 09:25:21 -06004 * SPDX-License-Identifier: BSD-2-Clause
5 *
Zach van Rijn6d2cf3f2020-01-14 22:05:02 +00006 * Copyright (c) 2018-2020 Gavin D. Howard and contributors.
Gavin Howard5715b042018-02-12 16:11:42 -07007 *
Gavin Howard7345cb92019-04-08 14:13:43 -06008 * All rights reserved.
Gavin Howard5715b042018-02-12 16:11:42 -07009 *
Gavin Howard7345cb92019-04-08 14:13:43 -060010 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions are met:
12 *
13 * * Redistributions of source code must retain the above copyright notice, this
14 * list of conditions and the following disclaimer.
15 *
16 * * Redistributions in binary form must reproduce the above copyright notice,
17 * this list of conditions and the following disclaimer in the documentation
18 * and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
21 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
24 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
Gavin Howard5715b042018-02-12 16:11:42 -070031 *
Gavin Howardb5904bf2018-02-20 13:28:18 -070032 * *****************************************************************************
Gavin Howard5715b042018-02-12 16:11:42 -070033 *
Gavin Howardd2a05252018-09-27 14:00:40 -060034 * Common code for the lexers.
Gavin Howard5715b042018-02-12 16:11:42 -070035 *
36 */
37
Gavin Howard27fdfb92018-03-21 07:56:59 -060038#include <assert.h>
Gavin Howard8a596d42018-01-15 15:46:01 -070039#include <ctype.h>
40#include <stdbool.h>
Gavin Howard8a596d42018-01-15 15:46:01 -070041#include <string.h>
42
Gavin Howard29493062018-03-20 19:57:37 -060043#include <status.h>
Gavin Howard3ba6c8d2018-02-15 12:23:35 -070044#include <lex.h>
Gavin Howardd5551672018-09-22 19:52:42 -060045#include <vm.h>
Gavin Howarda6527882019-02-19 20:23:17 -070046#include <bc.h>
Gavin Howard8a596d42018-01-15 15:46:01 -070047
Gavin Howard5a321892020-05-16 17:27:55 -060048void bc_lex_invalidChar(BcLex *l, char c) {
Gavin Howard50c8c2d2018-12-27 11:58:34 -070049 l->t = BC_LEX_INVALID;
Gavin Howard5a321892020-05-16 17:27:55 -060050 bc_lex_verr(l, BC_ERROR_PARSE_CHAR, c);
Gavin Howard50c8c2d2018-12-27 11:58:34 -070051}
52
Gavin Howarded5c8312018-09-27 12:04:08 -060053void bc_lex_lineComment(BcLex *l) {
Gavin Howardad477312018-12-24 15:51:35 -070054 l->t = BC_LEX_WHITESPACE;
Gavin Howard5c14da62019-02-16 23:47:48 -070055 while (l->i < l->len && l->buf[l->i] != '\n') l->i += 1;
Gavin Howarded5c8312018-09-27 12:04:08 -060056}
57
Gavin Howard5a321892020-05-16 17:27:55 -060058void bc_lex_comment(BcLex *l) {
Gavin Howard52446f22018-12-13 11:39:22 -070059
60 size_t i, nlines = 0;
61 const char *buf = l->buf;
62 bool end = false;
63 char c;
64
Gavin Howard5c14da62019-02-16 23:47:48 -070065 l->i += 1;
Gavin Howardad477312018-12-24 15:51:35 -070066 l->t = BC_LEX_WHITESPACE;
Gavin Howard52446f22018-12-13 11:39:22 -070067
Gavin Howard5c14da62019-02-16 23:47:48 -070068 for (i = l->i; !end; i += !end) {
Gavin Howard52446f22018-12-13 11:39:22 -070069
Gavin Howard1ab22d22019-01-03 13:32:17 -070070 for (; (c = buf[i]) && c != '*'; ++i) nlines += (c == '\n');
Gavin Howard52446f22018-12-13 11:39:22 -070071
Gavin Howardecafd4f2019-02-23 09:30:45 -070072 if (BC_ERR(!c || buf[i + 1] == '\0')) {
Gavin Howard52446f22018-12-13 11:39:22 -070073 l->i = i;
Gavin Howard5a321892020-05-16 17:27:55 -060074 bc_lex_err(l, BC_ERROR_PARSE_COMMENT);
Gavin Howard52446f22018-12-13 11:39:22 -070075 }
76
77 end = buf[i + 1] == '/';
78 }
79
80 l->i = i + 2;
81 l->line += nlines;
Gavin Howard52446f22018-12-13 11:39:22 -070082}
83
Gavin Howard364df3b2018-09-28 09:48:19 -060084void bc_lex_whitespace(BcLex *l) {
85 char c;
Gavin Howardad477312018-12-24 15:51:35 -070086 l->t = BC_LEX_WHITESPACE;
Gavin Howard53eba8b2018-10-31 15:14:37 -060087 for (c = l->buf[l->i]; c != '\n' && isspace(c); c = l->buf[++l->i]);
Gavin Howard364df3b2018-09-28 09:48:19 -060088}
89
Gavin Howard3ae1f8e2019-02-15 11:56:25 -070090void bc_lex_commonTokens(BcLex *l, char c) {
91 if (!c) l->t = BC_LEX_EOF;
92 else if (c == '\n') l->t = BC_LEX_NLINE;
93 else bc_lex_whitespace(l);
94}
95
Gavin Howard7ad5a662019-02-19 14:40:46 -070096static size_t bc_lex_num(BcLex *l, char start, bool int_only) {
Gavin Howard8a596d42018-01-15 15:46:01 -070097
Gavin Howard53eba8b2018-10-31 15:14:37 -060098 const char *buf = l->buf + l->i;
Gavin Howard8dd307e2019-01-08 23:05:19 -070099 size_t i;
Gavin Howard7ad5a662019-02-19 14:40:46 -0700100 char c;
Gavin Howard94f14102019-01-11 09:39:57 -0700101 bool last_pt, pt = (start == '.');
Gavin Howardf2a40492018-03-05 11:27:29 -0700102
Gavin Howard7ad5a662019-02-19 14:40:46 -0700103 for (i = 0; (c = buf[i]) && (BC_LEX_NUM_CHAR(c, pt, int_only) ||
Gavin Howard8dd307e2019-01-08 23:05:19 -0700104 (c == '\\' && buf[i + 1] == '\n')); ++i)
105 {
Gavin Howard25f60882019-01-11 09:20:51 -0700106 if (c == '\\') {
107
108 if (buf[i + 1] == '\n') {
109
110 i += 2;
111
112 // Make sure to eat whitespace at the beginning of the line.
Gavin Howard7ad5a662019-02-19 14:40:46 -0700113 while(isspace(buf[i]) && buf[i] != '\n') i += 1;
Gavin Howard25f60882019-01-11 09:20:51 -0700114
115 c = buf[i];
116
Gavin Howard7ad5a662019-02-19 14:40:46 -0700117 if (!BC_LEX_NUM_CHAR(c, pt, int_only)) break;
Gavin Howard25f60882019-01-11 09:20:51 -0700118 }
119 else break;
Gavin Howard63738202018-09-26 15:34:20 -0600120 }
Gavin Howard8dd307e2019-01-08 23:05:19 -0700121
Gavin Howard94f14102019-01-11 09:39:57 -0700122 last_pt = (c == '.');
Gavin Howard25f60882019-01-11 09:20:51 -0700123 if (pt && last_pt) break;
Gavin Howard94f14102019-01-11 09:39:57 -0700124 pt = pt || last_pt;
Gavin Howard07732ec2018-02-27 15:40:02 -0700125
Gavin Howardad477312018-12-24 15:51:35 -0700126 bc_vec_push(&l->str, &c);
Gavin Howard63738202018-09-26 15:34:20 -0600127 }
Gavin Howard07732ec2018-02-27 15:40:02 -0700128
Gavin Howard7ad5a662019-02-19 14:40:46 -0700129 return i;
130}
131
Gavin Howard5a321892020-05-16 17:27:55 -0600132void bc_lex_number(BcLex *l, char start) {
Gavin Howard7ad5a662019-02-19 14:40:46 -0700133
134 l->t = BC_LEX_NUMBER;
135
136 bc_vec_npop(&l->str, l->str.len);
137 bc_vec_push(&l->str, &start);
138
139 l->i += bc_lex_num(l, start, false);
Gavin Howarda6527882019-02-19 20:23:17 -0700140
Gavin Howard7ad5a662019-02-19 14:40:46 -0700141#if BC_ENABLE_EXTRA_MATH
142 {
143 char c = l->buf[l->i];
144
145 if (c == 'e') {
146
Gavin Howarda6527882019-02-19 20:23:17 -0700147#if BC_ENABLED
Gavin Howard5a321892020-05-16 17:27:55 -0600148 if (BC_IS_POSIX) bc_lex_err(l, BC_ERROR_POSIX_EXP_NUM);
Gavin Howarda6527882019-02-19 20:23:17 -0700149#endif // BC_ENABLED
150
Gavin Howard7ad5a662019-02-19 14:40:46 -0700151 bc_vec_push(&l->str, &c);
152 l->i += 1;
153 c = l->buf[l->i];
154
155 if (c == BC_LEX_NEG_CHAR) {
156 bc_vec_push(&l->str, &c);
157 l->i += 1;
158 c = l->buf[l->i];
159 }
160
Gavin Howardecafd4f2019-02-23 09:30:45 -0700161 if (BC_ERR(!BC_LEX_NUM_CHAR(c, false, true)))
Gavin Howard5a321892020-05-16 17:27:55 -0600162 bc_lex_verr(l, BC_ERROR_PARSE_CHAR, c);
Gavin Howard7ad5a662019-02-19 14:40:46 -0700163
164 l->i += bc_lex_num(l, 0, true);
165 }
166 }
167#endif // BC_ENABLE_EXTRA_MATH
168
Gavin Howardad477312018-12-24 15:51:35 -0700169 bc_vec_pushByte(&l->str, '\0');
Gavin Howard8a596d42018-01-15 15:46:01 -0700170}
171
Gavin Howard5c14da62019-02-16 23:47:48 -0700172void bc_lex_name(BcLex *l) {
Gavin Howard8412ba82018-10-04 13:03:24 -0600173
Gavin Howard88c25302018-10-17 13:32:23 -0600174 size_t i = 0;
Gavin Howard53eba8b2018-10-31 15:14:37 -0600175 const char *buf = l->buf + l->i - 1;
Gavin Howard88c25302018-10-17 13:32:23 -0600176 char c = buf[i];
Gavin Howard8412ba82018-10-04 13:03:24 -0600177
Gavin Howardad477312018-12-24 15:51:35 -0700178 l->t = BC_LEX_NAME;
Gavin Howard8412ba82018-10-04 13:03:24 -0600179
Gavin Howard9a4b6cd2018-10-23 15:13:30 -0600180 while ((c >= 'a' && c <= 'z') || isdigit(c) || c == '_') c = buf[++i];
Gavin Howard8412ba82018-10-04 13:03:24 -0600181
Gavin Howardad477312018-12-24 15:51:35 -0700182 bc_vec_string(&l->str, i, buf);
Gavin Howard8412ba82018-10-04 13:03:24 -0600183
184 // Increment the index. We minus 1 because it has already been incremented.
Gavin Howard53eba8b2018-10-31 15:14:37 -0600185 l->i += i - 1;
Gavin Howard8412ba82018-10-04 13:03:24 -0600186}
187
Gavin Howard48354e82019-01-02 18:15:56 -0700188void bc_lex_init(BcLex *l) {
Gavin Howardbbd6f552020-05-22 19:39:13 -0600189 BC_SIG_ASSERT_LOCKED;
Gavin Howardfe9a3022019-06-21 20:40:45 -0600190 assert(l != NULL);
Gavin Howardad477312018-12-24 15:51:35 -0700191 bc_vec_init(&l->str, sizeof(char), NULL);
Gavin Howard69185042018-09-10 15:46:20 -0600192}
193
Gavin Howard63738202018-09-26 15:34:20 -0600194void bc_lex_free(BcLex *l) {
Gavin Howardbbd6f552020-05-22 19:39:13 -0600195 BC_SIG_ASSERT_LOCKED;
Gavin Howardfe9a3022019-06-21 20:40:45 -0600196 assert(l != NULL);
Gavin Howardad477312018-12-24 15:51:35 -0700197 bc_vec_free(&l->str);
Gavin Howard69185042018-09-10 15:46:20 -0600198}
199
Gavin Howard63738202018-09-26 15:34:20 -0600200void bc_lex_file(BcLex *l, const char *file) {
Gavin Howardfe9a3022019-06-21 20:40:45 -0600201 assert(l != NULL && file != NULL);
Gavin Howard63738202018-09-26 15:34:20 -0600202 l->line = 1;
Gavin Howard3e9a8442020-05-15 13:27:18 -0600203 vm.file = file;
Gavin Howard8a596d42018-01-15 15:46:01 -0700204}
205
Gavin Howard5a321892020-05-16 17:27:55 -0600206void bc_lex_next(BcLex *l) {
Gavin Howard364df3b2018-09-28 09:48:19 -0600207
Gavin Howardfe9a3022019-06-21 20:40:45 -0600208 assert(l != NULL);
Gavin Howard364df3b2018-09-28 09:48:19 -0600209
Gavin Howardad477312018-12-24 15:51:35 -0700210 l->last = l->t;
Gavin Howard56158792019-01-14 12:03:14 -0700211 l->line += (l->i != 0 && l->buf[l->i - 1] == '\n');
Gavin Howard7536dcf2018-12-15 19:27:09 -0700212
Gavin Howard5a321892020-05-16 17:27:55 -0600213 if (BC_ERR(l->last == BC_LEX_EOF)) bc_lex_err(l, BC_ERROR_PARSE_EOF);
Gavin Howard364df3b2018-09-28 09:48:19 -0600214
Gavin Howardad477312018-12-24 15:51:35 -0700215 l->t = BC_LEX_EOF;
Gavin Howardc39fd492018-10-04 10:07:03 -0600216
Gavin Howard5a321892020-05-16 17:27:55 -0600217 if (l->i == l->len) return;
Gavin Howard364df3b2018-09-28 09:48:19 -0600218
Gavin Howard364df3b2018-09-28 09:48:19 -0600219 // Loop until failure or we don't have whitespace. This
220 // is so the parser doesn't get inundated with whitespace.
Gavin Howard53eba8b2018-10-31 15:14:37 -0600221 do {
Gavin Howard5a321892020-05-16 17:27:55 -0600222 vm.next(l);
223 } while (l->t == BC_LEX_WHITESPACE);
Gavin Howard35753922018-03-21 19:22:08 -0600224}
Gavin Howardc9a9c472018-10-02 17:23:01 -0600225
Gavin Howard5a321892020-05-16 17:27:55 -0600226void bc_lex_text(BcLex *l, const char *text) {
Gavin Howardfe9a3022019-06-21 20:40:45 -0600227 assert(l != NULL && text != NULL);
Gavin Howard890d0c02018-10-30 16:34:50 -0600228 l->buf = text;
Gavin Howard53eba8b2018-10-31 15:14:37 -0600229 l->i = 0;
Gavin Howardc9a9c472018-10-02 17:23:01 -0600230 l->len = strlen(text);
Gavin Howardad477312018-12-24 15:51:35 -0700231 l->t = l->last = BC_LEX_INVALID;
Gavin Howard5a321892020-05-16 17:27:55 -0600232 bc_lex_next(l);
Gavin Howardc9a9c472018-10-02 17:23:01 -0600233}