blob: 503e41ae2c76b3cd94d496a9a1644917598e8f39 [file] [log] [blame]
Brian Kernighan87b94932012-12-22 10:35:39 -05001/****************************************************************
2Copyright (C) Lucent Technologies 1997
3All Rights Reserved
4
5Permission to use, copy, modify, and distribute this software and
6its documentation for any purpose and without fee is hereby
7granted, provided that the above copyright notice appear in all
8copies and that both that the copyright notice and this
9permission notice and warranty disclaimer appear in supporting
10documentation, and that the name Lucent Technologies or any of
11its entities not be used in advertising or publicity pertaining
12to distribution of the software without specific, written prior
13permission.
14
15LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22THIS SOFTWARE.
23****************************************************************/
24
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28#include <ctype.h>
29#include "awk.h"
30#include "ytab.h"
31
32extern YYSTYPE yylval;
33extern int infunc;
34
35int lineno = 1;
36int bracecnt = 0;
37int brackcnt = 0;
38int parencnt = 0;
39
40typedef struct Keyword {
41 const char *word;
42 int sub;
43 int type;
44} Keyword;
45
zoulasc6a877092020-01-24 04:11:59 -050046const Keyword keywords[] = { /* keep sorted: binary searched */
Brian Kernighan87b94932012-12-22 10:35:39 -050047 { "BEGIN", XBEGIN, XBEGIN },
48 { "END", XEND, XEND },
49 { "NF", VARNF, VARNF },
50 { "atan2", FATAN, BLTIN },
51 { "break", BREAK, BREAK },
52 { "close", CLOSE, CLOSE },
53 { "continue", CONTINUE, CONTINUE },
54 { "cos", FCOS, BLTIN },
55 { "delete", DELETE, DELETE },
56 { "do", DO, DO },
57 { "else", ELSE, ELSE },
58 { "exit", EXIT, EXIT },
59 { "exp", FEXP, BLTIN },
60 { "fflush", FFLUSH, BLTIN },
61 { "for", FOR, FOR },
62 { "func", FUNC, FUNC },
63 { "function", FUNC, FUNC },
64 { "getline", GETLINE, GETLINE },
65 { "gsub", GSUB, GSUB },
66 { "if", IF, IF },
67 { "in", IN, IN },
68 { "index", INDEX, INDEX },
69 { "int", FINT, BLTIN },
70 { "length", FLENGTH, BLTIN },
71 { "log", FLOG, BLTIN },
72 { "match", MATCHFCN, MATCHFCN },
73 { "next", NEXT, NEXT },
74 { "nextfile", NEXTFILE, NEXTFILE },
75 { "print", PRINT, PRINT },
76 { "printf", PRINTF, PRINTF },
77 { "rand", FRAND, BLTIN },
78 { "return", RETURN, RETURN },
79 { "sin", FSIN, BLTIN },
80 { "split", SPLIT, SPLIT },
81 { "sprintf", SPRINTF, SPRINTF },
82 { "sqrt", FSQRT, BLTIN },
83 { "srand", FSRAND, BLTIN },
84 { "sub", SUB, SUB },
85 { "substr", SUBSTR, SUBSTR },
86 { "system", FSYSTEM, BLTIN },
87 { "tolower", FTOLOWER, BLTIN },
88 { "toupper", FTOUPPER, BLTIN },
89 { "while", WHILE, WHILE },
90};
91
92#define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); }
93
zoulasc6a877092020-01-24 04:11:59 -050094static int peek(void)
Brian Kernighan87b94932012-12-22 10:35:39 -050095{
96 int c = input();
97 unput(c);
98 return c;
99}
100
zoulasc6a877092020-01-24 04:11:59 -0500101static int gettok(char **pbuf, int *psz) /* get next input token */
Brian Kernighan87b94932012-12-22 10:35:39 -0500102{
103 int c, retc;
104 char *buf = *pbuf;
105 int sz = *psz;
106 char *bp = buf;
107
108 c = input();
109 if (c == 0)
110 return 0;
111 buf[0] = c;
112 buf[1] = 0;
113 if (!isalnum(c) && c != '.' && c != '_')
114 return c;
115
116 *bp++ = c;
117 if (isalpha(c) || c == '_') { /* it's a varname */
118 for ( ; (c = input()) != 0; ) {
119 if (bp-buf >= sz)
120 if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
121 FATAL( "out of space for name %.10s...", buf );
122 if (isalnum(c) || c == '_')
123 *bp++ = c;
124 else {
125 *bp = 0;
126 unput(c);
127 break;
128 }
129 }
130 *bp = 0;
131 retc = 'a'; /* alphanumeric */
132 } else { /* maybe it's a number, but could be . */
133 char *rem;
134 /* read input until can't be a number */
135 for ( ; (c = input()) != 0; ) {
136 if (bp-buf >= sz)
137 if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
138 FATAL( "out of space for number %.10s...", buf );
Arnold D. Robbins795a06b2019-07-28 05:51:52 -0600139 if (isdigit(c) || c == 'e' || c == 'E'
Brian Kernighan87b94932012-12-22 10:35:39 -0500140 || c == '.' || c == '+' || c == '-')
141 *bp++ = c;
142 else {
143 unput(c);
144 break;
145 }
146 }
147 *bp = 0;
148 strtod(buf, &rem); /* parse the number */
149 if (rem == buf) { /* it wasn't a valid number at all */
150 buf[1] = 0; /* return one character as token */
151 retc = buf[0]; /* character is its own type */
152 unputstr(rem+1); /* put rest back for later */
153 } else { /* some prefix was a number */
154 unputstr(rem); /* put rest back for later */
155 rem[0] = 0; /* truncate buf after number part */
156 retc = '0'; /* type is number */
157 }
158 }
159 *pbuf = buf;
160 *psz = sz;
161 return retc;
162}
163
164int word(char *);
165int string(void);
166int regexpr(void);
Arnold D. Robbins108224b2019-11-10 21:19:18 +0200167bool sc = false; /* true => return a } right now */
168bool reg = false; /* true => return a REGEXPR now */
Brian Kernighan87b94932012-12-22 10:35:39 -0500169
170int yylex(void)
171{
172 int c;
pfg52421942016-06-03 21:23:11 +0000173 static char *buf = NULL;
Brian Kernighan87b94932012-12-22 10:35:39 -0500174 static int bufsize = 5; /* BUG: setting this small causes core dump! */
175
zoulasc65892082019-10-24 09:40:15 -0400176 if (buf == NULL && (buf = malloc(bufsize)) == NULL)
Brian Kernighan87b94932012-12-22 10:35:39 -0500177 FATAL( "out of space in yylex" );
178 if (sc) {
Arnold D. Robbins108224b2019-11-10 21:19:18 +0200179 sc = false;
Brian Kernighan87b94932012-12-22 10:35:39 -0500180 RET('}');
181 }
182 if (reg) {
Arnold D. Robbins108224b2019-11-10 21:19:18 +0200183 reg = false;
Brian Kernighan87b94932012-12-22 10:35:39 -0500184 return regexpr();
185 }
186 for (;;) {
187 c = gettok(&buf, &bufsize);
188 if (c == 0)
189 return 0;
190 if (isalpha(c) || c == '_')
191 return word(buf);
192 if (isdigit(c)) {
Arnold D. Robbinsc7eeb572020-01-05 21:18:36 +0200193 char *cp = tostring(buf);
194 yylval.cp = setsymtab(buf, cp, atof(buf), CON|NUM, symtab);
195 free(cp);
Brian Kernighan87b94932012-12-22 10:35:39 -0500196 /* should this also have STR set? */
197 RET(NUMBER);
198 }
Arnold D. Robbins795a06b2019-07-28 05:51:52 -0600199
Brian Kernighan87b94932012-12-22 10:35:39 -0500200 yylval.i = c;
201 switch (c) {
202 case '\n': /* {EOL} */
Cody Peter Mello6fe0a042018-09-21 11:16:27 -0700203 lineno++;
Brian Kernighan87b94932012-12-22 10:35:39 -0500204 RET(NL);
205 case '\r': /* assume \n is coming */
206 case ' ': /* {WS}+ */
207 case '\t':
208 break;
209 case '#': /* #.* strip comments */
210 while ((c = input()) != '\n' && c != 0)
211 ;
212 unput(c);
213 break;
214 case ';':
215 RET(';');
216 case '\\':
217 if (peek() == '\n') {
218 input();
Cody Peter Mello6fe0a042018-09-21 11:16:27 -0700219 lineno++;
Brian Kernighan87b94932012-12-22 10:35:39 -0500220 } else if (peek() == '\r') {
221 input(); input(); /* \n */
222 lineno++;
223 } else {
224 RET(c);
225 }
226 break;
227 case '&':
228 if (peek() == '&') {
229 input(); RET(AND);
Arnold D. Robbins795a06b2019-07-28 05:51:52 -0600230 } else
Brian Kernighan87b94932012-12-22 10:35:39 -0500231 RET('&');
232 case '|':
233 if (peek() == '|') {
234 input(); RET(BOR);
235 } else
236 RET('|');
237 case '!':
238 if (peek() == '=') {
239 input(); yylval.i = NE; RET(NE);
240 } else if (peek() == '~') {
241 input(); yylval.i = NOTMATCH; RET(MATCHOP);
242 } else
243 RET(NOT);
244 case '~':
245 yylval.i = MATCH;
246 RET(MATCHOP);
247 case '<':
248 if (peek() == '=') {
249 input(); yylval.i = LE; RET(LE);
250 } else {
251 yylval.i = LT; RET(LT);
252 }
253 case '=':
254 if (peek() == '=') {
255 input(); yylval.i = EQ; RET(EQ);
256 } else {
257 yylval.i = ASSIGN; RET(ASGNOP);
258 }
259 case '>':
260 if (peek() == '=') {
261 input(); yylval.i = GE; RET(GE);
262 } else if (peek() == '>') {
263 input(); yylval.i = APPEND; RET(APPEND);
264 } else {
265 yylval.i = GT; RET(GT);
266 }
267 case '+':
268 if (peek() == '+') {
269 input(); yylval.i = INCR; RET(INCR);
270 } else if (peek() == '=') {
271 input(); yylval.i = ADDEQ; RET(ASGNOP);
272 } else
273 RET('+');
274 case '-':
275 if (peek() == '-') {
276 input(); yylval.i = DECR; RET(DECR);
277 } else if (peek() == '=') {
278 input(); yylval.i = SUBEQ; RET(ASGNOP);
279 } else
280 RET('-');
281 case '*':
282 if (peek() == '=') { /* *= */
283 input(); yylval.i = MULTEQ; RET(ASGNOP);
284 } else if (peek() == '*') { /* ** or **= */
285 input(); /* eat 2nd * */
286 if (peek() == '=') {
287 input(); yylval.i = POWEQ; RET(ASGNOP);
288 } else {
289 RET(POWER);
290 }
291 } else
292 RET('*');
293 case '/':
294 RET('/');
295 case '%':
296 if (peek() == '=') {
297 input(); yylval.i = MODEQ; RET(ASGNOP);
298 } else
299 RET('%');
300 case '^':
301 if (peek() == '=') {
302 input(); yylval.i = POWEQ; RET(ASGNOP);
303 } else
304 RET(POWER);
305
306 case '$':
307 /* BUG: awkward, if not wrong */
308 c = gettok(&buf, &bufsize);
309 if (isalpha(c)) {
310 if (strcmp(buf, "NF") == 0) { /* very special */
311 unputstr("(NF)");
312 RET(INDIRECT);
313 }
314 c = peek();
315 if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
316 unputstr(buf);
317 RET(INDIRECT);
318 }
319 yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
320 RET(IVAR);
321 } else if (c == 0) { /* */
322 SYNTAX( "unexpected end of input after $" );
323 RET(';');
324 } else {
325 unputstr(buf);
326 RET(INDIRECT);
327 }
Arnold D. Robbins795a06b2019-07-28 05:51:52 -0600328
Brian Kernighan87b94932012-12-22 10:35:39 -0500329 case '}':
330 if (--bracecnt < 0)
331 SYNTAX( "extra }" );
Arnold D. Robbins108224b2019-11-10 21:19:18 +0200332 sc = true;
Brian Kernighan87b94932012-12-22 10:35:39 -0500333 RET(';');
334 case ']':
335 if (--brackcnt < 0)
336 SYNTAX( "extra ]" );
337 RET(']');
338 case ')':
339 if (--parencnt < 0)
340 SYNTAX( "extra )" );
341 RET(')');
342 case '{':
343 bracecnt++;
344 RET('{');
345 case '[':
346 brackcnt++;
347 RET('[');
348 case '(':
349 parencnt++;
350 RET('(');
Arnold D. Robbins795a06b2019-07-28 05:51:52 -0600351
Brian Kernighan87b94932012-12-22 10:35:39 -0500352 case '"':
353 return string(); /* BUG: should be like tran.c ? */
Arnold D. Robbins795a06b2019-07-28 05:51:52 -0600354
Brian Kernighan87b94932012-12-22 10:35:39 -0500355 default:
356 RET(c);
357 }
358 }
359}
360
361int string(void)
362{
363 int c, n;
364 char *s, *bp;
pfg52421942016-06-03 21:23:11 +0000365 static char *buf = NULL;
Brian Kernighan87b94932012-12-22 10:35:39 -0500366 static int bufsz = 500;
367
zoulasc65892082019-10-24 09:40:15 -0400368 if (buf == NULL && (buf = malloc(bufsz)) == NULL)
Brian Kernighan87b94932012-12-22 10:35:39 -0500369 FATAL("out of space for strings");
370 for (bp = buf; (c = input()) != '"'; ) {
371 if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
372 FATAL("out of space for string %.10s...", buf);
373 switch (c) {
374 case '\n':
375 case '\r':
376 case 0:
Cody Peter Mello6fe0a042018-09-21 11:16:27 -0700377 *bp = '\0';
Brian Kernighan87b94932012-12-22 10:35:39 -0500378 SYNTAX( "non-terminated string %.10s...", buf );
Brian Kernighan87b94932012-12-22 10:35:39 -0500379 if (c == 0) /* hopeless */
380 FATAL( "giving up" );
Cody Peter Mello6fe0a042018-09-21 11:16:27 -0700381 lineno++;
Brian Kernighan87b94932012-12-22 10:35:39 -0500382 break;
383 case '\\':
384 c = input();
385 switch (c) {
386 case '"': *bp++ = '"'; break;
Arnold D. Robbins795a06b2019-07-28 05:51:52 -0600387 case 'n': *bp++ = '\n'; break;
Brian Kernighan87b94932012-12-22 10:35:39 -0500388 case 't': *bp++ = '\t'; break;
389 case 'f': *bp++ = '\f'; break;
390 case 'r': *bp++ = '\r'; break;
391 case 'b': *bp++ = '\b'; break;
392 case 'v': *bp++ = '\v'; break;
Arnold D. Robbins944989b2020-01-06 00:01:46 -0700393 case 'a': *bp++ = '\a'; break;
Brian Kernighan87b94932012-12-22 10:35:39 -0500394 case '\\': *bp++ = '\\'; break;
395
396 case '0': case '1': case '2': /* octal: \d \dd \ddd */
397 case '3': case '4': case '5': case '6': case '7':
398 n = c - '0';
399 if ((c = peek()) >= '0' && c < '8') {
400 n = 8 * n + input() - '0';
401 if ((c = peek()) >= '0' && c < '8')
402 n = 8 * n + input() - '0';
403 }
404 *bp++ = n;
405 break;
406
407 case 'x': /* hex \x0-9a-fA-F + */
408 { char xbuf[100], *px;
409 for (px = xbuf; (c = input()) != 0 && px-xbuf < 100-2; ) {
410 if (isdigit(c)
411 || (c >= 'a' && c <= 'f')
412 || (c >= 'A' && c <= 'F'))
413 *px++ = c;
414 else
415 break;
416 }
417 *px = 0;
418 unput(c);
419 sscanf(xbuf, "%x", (unsigned int *) &n);
420 *bp++ = n;
421 break;
422 }
423
Arnold D. Robbins795a06b2019-07-28 05:51:52 -0600424 default:
Brian Kernighan87b94932012-12-22 10:35:39 -0500425 *bp++ = c;
426 break;
427 }
428 break;
429 default:
430 *bp++ = c;
431 break;
432 }
433 }
Arnold D. Robbins795a06b2019-07-28 05:51:52 -0600434 *bp = 0;
Brian Kernighan87b94932012-12-22 10:35:39 -0500435 s = tostring(buf);
Arnold D. Robbinsc7eeb572020-01-05 21:18:36 +0200436 *bp++ = ' '; *bp++ = '\0';
Brian Kernighan87b94932012-12-22 10:35:39 -0500437 yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
Arnold D. Robbinsc7eeb572020-01-05 21:18:36 +0200438 free(s);
Brian Kernighan87b94932012-12-22 10:35:39 -0500439 RET(STRING);
440}
441
442
zoulasc6a877092020-01-24 04:11:59 -0500443static int binsearch(char *w, const Keyword *kp, int n)
Brian Kernighan87b94932012-12-22 10:35:39 -0500444{
445 int cond, low, mid, high;
446
447 low = 0;
448 high = n - 1;
449 while (low <= high) {
450 mid = (low + high) / 2;
451 if ((cond = strcmp(w, kp[mid].word)) < 0)
452 high = mid - 1;
453 else if (cond > 0)
454 low = mid + 1;
455 else
456 return mid;
457 }
458 return -1;
459}
460
Arnold D. Robbins795a06b2019-07-28 05:51:52 -0600461int word(char *w)
Brian Kernighan87b94932012-12-22 10:35:39 -0500462{
zoulasc6a877092020-01-24 04:11:59 -0500463 const Keyword *kp;
Brian Kernighan87b94932012-12-22 10:35:39 -0500464 int c, n;
465
466 n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
Brian Kernighan87b94932012-12-22 10:35:39 -0500467 if (n != -1) { /* found in table */
Alexander Richardsonad9bd2f2019-09-10 07:54:53 +0100468 kp = keywords + n;
Brian Kernighan87b94932012-12-22 10:35:39 -0500469 yylval.i = kp->sub;
470 switch (kp->type) { /* special handling */
471 case BLTIN:
472 if (kp->sub == FSYSTEM && safe)
473 SYNTAX( "system is unsafe" );
474 RET(kp->type);
475 case FUNC:
476 if (infunc)
477 SYNTAX( "illegal nested function" );
478 RET(kp->type);
479 case RETURN:
480 if (!infunc)
481 SYNTAX( "return not in function" );
482 RET(kp->type);
483 case VARNF:
484 yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
485 RET(VARNF);
486 default:
487 RET(kp->type);
488 }
489 }
490 c = peek(); /* look for '(' */
491 if (c != '(' && infunc && (n=isarg(w)) >= 0) {
492 yylval.i = n;
493 RET(ARG);
494 } else {
495 yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
496 if (c == '(') {
497 RET(CALL);
498 } else {
499 RET(VAR);
500 }
501 }
502}
503
504void startreg(void) /* next call to yylex will return a regular expression */
505{
Arnold D. Robbins108224b2019-11-10 21:19:18 +0200506 reg = true;
Brian Kernighan87b94932012-12-22 10:35:39 -0500507}
508
509int regexpr(void)
510{
511 int c;
pfg52421942016-06-03 21:23:11 +0000512 static char *buf = NULL;
Brian Kernighan87b94932012-12-22 10:35:39 -0500513 static int bufsz = 500;
514 char *bp;
515
zoulasc65892082019-10-24 09:40:15 -0400516 if (buf == NULL && (buf = malloc(bufsz)) == NULL)
Brian Kernighan87b94932012-12-22 10:35:39 -0500517 FATAL("out of space for rex expr");
518 bp = buf;
519 for ( ; (c = input()) != '/' && c != 0; ) {
520 if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
521 FATAL("out of space for reg expr %.10s...", buf);
522 if (c == '\n') {
Cody Peter Mello6fe0a042018-09-21 11:16:27 -0700523 *bp = '\0';
Arnold D. Robbins795a06b2019-07-28 05:51:52 -0600524 SYNTAX( "newline in regular expression %.10s...", buf );
Brian Kernighan87b94932012-12-22 10:35:39 -0500525 unput('\n');
526 break;
527 } else if (c == '\\') {
Arnold D. Robbins795a06b2019-07-28 05:51:52 -0600528 *bp++ = '\\';
Brian Kernighan87b94932012-12-22 10:35:39 -0500529 *bp++ = input();
530 } else {
531 *bp++ = c;
532 }
533 }
534 *bp = 0;
535 if (c == 0)
536 SYNTAX("non-terminated regular expression %.10s...", buf);
537 yylval.s = tostring(buf);
538 unput('/');
539 RET(REGEXPR);
540}
541
542/* low-level lexical stuff, sort of inherited from lex */
543
544char ebuf[300];
545char *ep = ebuf;
546char yysbuf[100]; /* pushback buffer */
547char *yysptr = yysbuf;
pfg52421942016-06-03 21:23:11 +0000548FILE *yyin = NULL;
Brian Kernighan87b94932012-12-22 10:35:39 -0500549
550int input(void) /* get next lexical input character */
551{
552 int c;
553 extern char *lexprog;
554
555 if (yysptr > yysbuf)
556 c = (uschar)*--yysptr;
557 else if (lexprog != NULL) { /* awk '...' */
558 if ((c = (uschar)*lexprog) != 0)
559 lexprog++;
560 } else /* awk -f ... */
561 c = pgetc();
Cody Peter Mello6fe0a042018-09-21 11:16:27 -0700562 if (c == EOF)
Brian Kernighan87b94932012-12-22 10:35:39 -0500563 c = 0;
564 if (ep >= ebuf + sizeof ebuf)
565 ep = ebuf;
Cody Peter Mello6fe0a042018-09-21 11:16:27 -0700566 *ep = c;
567 if (c != 0) {
568 ep++;
569 }
570 return (c);
Brian Kernighan87b94932012-12-22 10:35:39 -0500571}
572
573void unput(int c) /* put lexical character back on input */
574{
zoulasc6a877092020-01-24 04:11:59 -0500575 if (c == '\n')
576 lineno--;
Brian Kernighan87b94932012-12-22 10:35:39 -0500577 if (yysptr >= yysbuf + sizeof(yysbuf))
578 FATAL("pushed back too much: %.20s...", yysbuf);
579 *yysptr++ = c;
580 if (--ep < ebuf)
581 ep = ebuf + sizeof(ebuf) - 1;
582}
583
584void unputstr(const char *s) /* put a string back on input */
585{
586 int i;
587
588 for (i = strlen(s)-1; i >= 0; i--)
589 unput(s[i]);
590}