blob: 8fc170906c48831fc974e624f335dcccae600e4b [file] [log] [blame]
Brian Kernighan87b94932012-12-22 10:35:39 -05001/****************************************************************
2Copyright (C) Lucent Technologies 1997
3All Rights Reserved
4
5Permission to use, copy, modify, and distribute this software and
6its documentation for any purpose and without fee is hereby
7granted, provided that the above copyright notice appear in all
8copies and that both that the copyright notice and this
9permission notice and warranty disclaimer appear in supporting
10documentation, and that the name Lucent Technologies or any of
11its entities not be used in advertising or publicity pertaining
12to distribution of the software without specific, written prior
13permission.
14
15LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22THIS SOFTWARE.
23****************************************************************/
24
25%{
26#include <stdio.h>
27#include <string.h>
28#include "awk.h"
29
30void checkdup(Node *list, Cell *item);
31int yywrap(void) { return(1); }
32
33Node *beginloc = 0;
34Node *endloc = 0;
Arnold D. Robbins108224b2019-11-10 21:19:18 +020035bool infunc = false; /* = true if in arglist or body of func */
36int inloop = 0; /* >= 1 if in while, for, do; can't be bool, since loops can next */
Brian Kernighan87b94932012-12-22 10:35:39 -050037char *curfname = 0; /* current function name */
38Node *arglist = 0; /* list of args for current function */
39%}
40
41%union {
42 Node *p;
43 Cell *cp;
44 int i;
45 char *s;
46}
47
48%token <i> FIRSTTOKEN /* must be first */
49%token <p> PROGRAM PASTAT PASTAT2 XBEGIN XEND
50%token <i> NL ',' '{' '(' '|' ';' '/' ')' '}' '[' ']'
51%token <i> ARRAY
52%token <i> MATCH NOTMATCH MATCHOP
Arnold D. Robbinsa3e9e822020-01-01 22:47:29 +020053%token <i> FINAL DOT ALL CCL NCCL CHAR OR STAR QUEST PLUS EMPTYRE ZERO
Brian Kernighan87b94932012-12-22 10:35:39 -050054%token <i> AND BOR APPEND EQ GE GT LE LT NE IN
Arnold D. Robbins795a06b2019-07-28 05:51:52 -060055%token <i> ARG BLTIN BREAK CLOSE CONTINUE DELETE DO EXIT FOR FUNC
Brian Kernighan87b94932012-12-22 10:35:39 -050056%token <i> SUB GSUB IF INDEX LSUBSTR MATCHFCN NEXT NEXTFILE
57%token <i> ADD MINUS MULT DIVIDE MOD
58%token <i> ASSIGN ASGNOP ADDEQ SUBEQ MULTEQ DIVEQ MODEQ POWEQ
59%token <i> PRINT PRINTF SPRINTF
60%token <p> ELSE INTEST CONDEXPR
61%token <i> POSTINCR PREINCR POSTDECR PREDECR
62%token <cp> VAR IVAR VARNF CALL NUMBER STRING
63%token <s> REGEXPR
64
65%type <p> pas pattern ppattern plist pplist patlist prarg term re
66%type <p> pa_pat pa_stat pa_stats
67%type <s> reg_expr
68%type <p> simple_stmt opt_simple_stmt stmt stmtlist
69%type <p> var varname funcname varlist
70%type <p> for if else while
71%type <i> do st
72%type <i> pst opt_pst lbrace rbrace rparen comma nl opt_nl and bor
73%type <i> subop print
Arnold D. Robbinsc95b9602019-07-28 20:09:24 +030074%type <cp> string
Brian Kernighan87b94932012-12-22 10:35:39 -050075
76%right ASGNOP
77%right '?'
78%right ':'
79%left BOR
80%left AND
81%left GETLINE
82%nonassoc APPEND EQ GE GT LE LT NE MATCHOP IN '|'
Arnold D. Robbins795a06b2019-07-28 05:51:52 -060083%left ARG BLTIN BREAK CALL CLOSE CONTINUE DELETE DO EXIT FOR FUNC
Brian Kernighan87b94932012-12-22 10:35:39 -050084%left GSUB IF INDEX LSUBSTR MATCHFCN NEXT NUMBER
85%left PRINT PRINTF RETURN SPLIT SPRINTF STRING SUB SUBSTR
86%left REGEXPR VAR VARNF IVAR WHILE '('
87%left CAT
88%left '+' '-'
89%left '*' '/' '%'
Arnold D. Robbins32093f52018-08-22 20:40:26 +030090%left NOT UMINUS UPLUS
Brian Kernighan87b94932012-12-22 10:35:39 -050091%right POWER
92%right DECR INCR
93%left INDIRECT
94%token LASTTOKEN /* must be last */
95
96%%
97
98program:
99 pas { if (errorflag==0)
100 winner = (Node *)stat3(PROGRAM, beginloc, $1, endloc); }
101 | error { yyclearin; bracecheck(); SYNTAX("bailing out"); }
102 ;
103
104and:
105 AND | and NL
106 ;
107
108bor:
109 BOR | bor NL
110 ;
111
112comma:
113 ',' | comma NL
114 ;
115
116do:
117 DO | do NL
118 ;
119
120else:
121 ELSE | else NL
122 ;
123
124for:
125 FOR '(' opt_simple_stmt ';' opt_nl pattern ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
126 { --inloop; $$ = stat4(FOR, $3, notnull($6), $9, $12); }
127 | FOR '(' opt_simple_stmt ';' ';' opt_nl opt_simple_stmt rparen {inloop++;} stmt
128 { --inloop; $$ = stat4(FOR, $3, NIL, $7, $10); }
129 | FOR '(' varname IN varname rparen {inloop++;} stmt
130 { --inloop; $$ = stat3(IN, $3, makearr($5), $8); }
131 ;
132
133funcname:
134 VAR { setfname($1); }
135 | CALL { setfname($1); }
136 ;
137
138if:
139 IF '(' pattern rparen { $$ = notnull($3); }
140 ;
141
142lbrace:
143 '{' | lbrace NL
144 ;
145
146nl:
147 NL | nl NL
148 ;
149
150opt_nl:
151 /* empty */ { $$ = 0; }
152 | nl
153 ;
154
155opt_pst:
156 /* empty */ { $$ = 0; }
157 | pst
158 ;
159
160
161opt_simple_stmt:
162 /* empty */ { $$ = 0; }
163 | simple_stmt
164 ;
165
166pas:
167 opt_pst { $$ = 0; }
168 | opt_pst pa_stats opt_pst { $$ = $2; }
169 ;
170
171pa_pat:
172 pattern { $$ = notnull($1); }
173 ;
174
175pa_stat:
176 pa_pat { $$ = stat2(PASTAT, $1, stat2(PRINT, rectonode(), NIL)); }
177 | pa_pat lbrace stmtlist '}' { $$ = stat2(PASTAT, $1, $3); }
178 | pa_pat ',' opt_nl pa_pat { $$ = pa2stat($1, $4, stat2(PRINT, rectonode(), NIL)); }
179 | pa_pat ',' opt_nl pa_pat lbrace stmtlist '}' { $$ = pa2stat($1, $4, $6); }
180 | lbrace stmtlist '}' { $$ = stat2(PASTAT, NIL, $2); }
181 | XBEGIN lbrace stmtlist '}'
182 { beginloc = linkum(beginloc, $3); $$ = 0; }
183 | XEND lbrace stmtlist '}'
184 { endloc = linkum(endloc, $3); $$ = 0; }
Arnold D. Robbins108224b2019-11-10 21:19:18 +0200185 | FUNC funcname '(' varlist rparen {infunc = true;} lbrace stmtlist '}'
186 { infunc = false; curfname=0; defn((Cell *)$2, $4, $8); $$ = 0; }
Brian Kernighan87b94932012-12-22 10:35:39 -0500187 ;
188
189pa_stats:
190 pa_stat
191 | pa_stats opt_pst pa_stat { $$ = linkum($1, $3); }
192 ;
193
194patlist:
195 pattern
196 | patlist comma pattern { $$ = linkum($1, $3); }
197 ;
198
199ppattern:
200 var ASGNOP ppattern { $$ = op2($2, $1, $3); }
201 | ppattern '?' ppattern ':' ppattern %prec '?'
202 { $$ = op3(CONDEXPR, notnull($1), $3, $5); }
203 | ppattern bor ppattern %prec BOR
204 { $$ = op2(BOR, notnull($1), notnull($3)); }
205 | ppattern and ppattern %prec AND
206 { $$ = op2(AND, notnull($1), notnull($3)); }
207 | ppattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
208 | ppattern MATCHOP ppattern
209 { if (constnode($3))
210 $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
211 else
212 $$ = op3($2, (Node *)1, $1, $3); }
213 | ppattern IN varname { $$ = op2(INTEST, $1, makearr($3)); }
214 | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); }
215 | ppattern term %prec CAT { $$ = op2(CAT, $1, $2); }
216 | re
217 | term
218 ;
219
220pattern:
221 var ASGNOP pattern { $$ = op2($2, $1, $3); }
222 | pattern '?' pattern ':' pattern %prec '?'
223 { $$ = op3(CONDEXPR, notnull($1), $3, $5); }
224 | pattern bor pattern %prec BOR
225 { $$ = op2(BOR, notnull($1), notnull($3)); }
226 | pattern and pattern %prec AND
227 { $$ = op2(AND, notnull($1), notnull($3)); }
228 | pattern EQ pattern { $$ = op2($2, $1, $3); }
229 | pattern GE pattern { $$ = op2($2, $1, $3); }
230 | pattern GT pattern { $$ = op2($2, $1, $3); }
231 | pattern LE pattern { $$ = op2($2, $1, $3); }
232 | pattern LT pattern { $$ = op2($2, $1, $3); }
233 | pattern NE pattern { $$ = op2($2, $1, $3); }
234 | pattern MATCHOP reg_expr { $$ = op3($2, NIL, $1, (Node*)makedfa($3, 0)); }
235 | pattern MATCHOP pattern
236 { if (constnode($3))
237 $$ = op3($2, NIL, $1, (Node*)makedfa(strnode($3), 0));
238 else
239 $$ = op3($2, (Node *)1, $1, $3); }
240 | pattern IN varname { $$ = op2(INTEST, $1, makearr($3)); }
241 | '(' plist ')' IN varname { $$ = op2(INTEST, $2, makearr($5)); }
Arnold D. Robbins795a06b2019-07-28 05:51:52 -0600242 | pattern '|' GETLINE var {
Brian Kernighan87b94932012-12-22 10:35:39 -0500243 if (safe) SYNTAX("cmd | getline is unsafe");
244 else $$ = op3(GETLINE, $4, itonp($2), $1); }
Arnold D. Robbins795a06b2019-07-28 05:51:52 -0600245 | pattern '|' GETLINE {
Brian Kernighan87b94932012-12-22 10:35:39 -0500246 if (safe) SYNTAX("cmd | getline is unsafe");
247 else $$ = op3(GETLINE, (Node*)0, itonp($2), $1); }
248 | pattern term %prec CAT { $$ = op2(CAT, $1, $2); }
249 | re
250 | term
251 ;
252
253plist:
254 pattern comma pattern { $$ = linkum($1, $3); }
255 | plist comma pattern { $$ = linkum($1, $3); }
256 ;
257
258pplist:
259 ppattern
260 | pplist comma ppattern { $$ = linkum($1, $3); }
261 ;
262
263prarg:
264 /* empty */ { $$ = rectonode(); }
265 | pplist
266 | '(' plist ')' { $$ = $2; }
267 ;
268
269print:
270 PRINT | PRINTF
271 ;
272
273pst:
274 NL | ';' | pst NL | pst ';'
275 ;
276
277rbrace:
278 '}' | rbrace NL
279 ;
280
281re:
282 reg_expr
283 { $$ = op3(MATCH, NIL, rectonode(), (Node*)makedfa($1, 0)); }
284 | NOT re { $$ = op1(NOT, notnull($2)); }
285 ;
286
287reg_expr:
288 '/' {startreg();} REGEXPR '/' { $$ = $3; }
289 ;
290
291rparen:
292 ')' | rparen NL
293 ;
294
295simple_stmt:
Arnold D. Robbins795a06b2019-07-28 05:51:52 -0600296 print prarg '|' term {
Brian Kernighan87b94932012-12-22 10:35:39 -0500297 if (safe) SYNTAX("print | is unsafe");
298 else $$ = stat3($1, $2, itonp($3), $4); }
299 | print prarg APPEND term {
300 if (safe) SYNTAX("print >> is unsafe");
301 else $$ = stat3($1, $2, itonp($3), $4); }
302 | print prarg GT term {
303 if (safe) SYNTAX("print > is unsafe");
304 else $$ = stat3($1, $2, itonp($3), $4); }
305 | print prarg { $$ = stat3($1, $2, NIL, NIL); }
306 | DELETE varname '[' patlist ']' { $$ = stat2(DELETE, makearr($2), $4); }
307 | DELETE varname { $$ = stat2(DELETE, makearr($2), 0); }
308 | pattern { $$ = exptostat($1); }
309 | error { yyclearin; SYNTAX("illegal statement"); }
310 ;
311
312st:
313 nl
314 | ';' opt_nl
315 ;
316
317stmt:
318 BREAK st { if (!inloop) SYNTAX("break illegal outside of loops");
319 $$ = stat1(BREAK, NIL); }
320 | CONTINUE st { if (!inloop) SYNTAX("continue illegal outside of loops");
321 $$ = stat1(CONTINUE, NIL); }
322 | do {inloop++;} stmt {--inloop;} WHILE '(' pattern ')' st
323 { $$ = stat2(DO, $3, notnull($7)); }
324 | EXIT pattern st { $$ = stat1(EXIT, $2); }
325 | EXIT st { $$ = stat1(EXIT, NIL); }
326 | for
327 | if stmt else stmt { $$ = stat3(IF, $1, $2, $4); }
328 | if stmt { $$ = stat3(IF, $1, $2, NIL); }
329 | lbrace stmtlist rbrace { $$ = $2; }
330 | NEXT st { if (infunc)
331 SYNTAX("next is illegal inside a function");
332 $$ = stat1(NEXT, NIL); }
333 | NEXTFILE st { if (infunc)
334 SYNTAX("nextfile is illegal inside a function");
335 $$ = stat1(NEXTFILE, NIL); }
336 | RETURN pattern st { $$ = stat1(RETURN, $2); }
337 | RETURN st { $$ = stat1(RETURN, NIL); }
338 | simple_stmt st
339 | while {inloop++;} stmt { --inloop; $$ = stat2(WHILE, $1, $3); }
340 | ';' opt_nl { $$ = 0; }
341 ;
342
343stmtlist:
344 stmt
345 | stmtlist stmt { $$ = linkum($1, $2); }
346 ;
347
348subop:
349 SUB | GSUB
350 ;
351
Arnold D. Robbinsc95b9602019-07-28 20:09:24 +0300352string:
353 STRING
354 | string STRING { $$ = catstr($1, $2); }
355 ;
356
Brian Kernighan87b94932012-12-22 10:35:39 -0500357term:
358 term '/' ASGNOP term { $$ = op2(DIVEQ, $1, $4); }
359 | term '+' term { $$ = op2(ADD, $1, $3); }
360 | term '-' term { $$ = op2(MINUS, $1, $3); }
361 | term '*' term { $$ = op2(MULT, $1, $3); }
362 | term '/' term { $$ = op2(DIVIDE, $1, $3); }
363 | term '%' term { $$ = op2(MOD, $1, $3); }
364 | term POWER term { $$ = op2(POWER, $1, $3); }
365 | '-' term %prec UMINUS { $$ = op1(UMINUS, $2); }
Arnold D. Robbins32093f52018-08-22 20:40:26 +0300366 | '+' term %prec UMINUS { $$ = op1(UPLUS, $2); }
Brian Kernighan87b94932012-12-22 10:35:39 -0500367 | NOT term %prec UMINUS { $$ = op1(NOT, notnull($2)); }
368 | BLTIN '(' ')' { $$ = op2(BLTIN, itonp($1), rectonode()); }
369 | BLTIN '(' patlist ')' { $$ = op2(BLTIN, itonp($1), $3); }
370 | BLTIN { $$ = op2(BLTIN, itonp($1), rectonode()); }
371 | CALL '(' ')' { $$ = op2(CALL, celltonode($1,CVAR), NIL); }
372 | CALL '(' patlist ')' { $$ = op2(CALL, celltonode($1,CVAR), $3); }
373 | CLOSE term { $$ = op1(CLOSE, $2); }
374 | DECR var { $$ = op1(PREDECR, $2); }
375 | INCR var { $$ = op1(PREINCR, $2); }
376 | var DECR { $$ = op1(POSTDECR, $1); }
377 | var INCR { $$ = op1(POSTINCR, $1); }
378 | GETLINE var LT term { $$ = op3(GETLINE, $2, itonp($3), $4); }
379 | GETLINE LT term { $$ = op3(GETLINE, NIL, itonp($2), $3); }
380 | GETLINE var { $$ = op3(GETLINE, $2, NIL, NIL); }
381 | GETLINE { $$ = op3(GETLINE, NIL, NIL, NIL); }
382 | INDEX '(' pattern comma pattern ')'
383 { $$ = op2(INDEX, $3, $5); }
384 | INDEX '(' pattern comma reg_expr ')'
385 { SYNTAX("index() doesn't permit regular expressions");
386 $$ = op2(INDEX, $3, (Node*)$5); }
387 | '(' pattern ')' { $$ = $2; }
388 | MATCHFCN '(' pattern comma reg_expr ')'
389 { $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa($5, 1)); }
390 | MATCHFCN '(' pattern comma pattern ')'
391 { if (constnode($5))
392 $$ = op3(MATCHFCN, NIL, $3, (Node*)makedfa(strnode($5), 1));
393 else
394 $$ = op3(MATCHFCN, (Node *)1, $3, $5); }
395 | NUMBER { $$ = celltonode($1, CCON); }
396 | SPLIT '(' pattern comma varname comma pattern ')' /* string */
397 { $$ = op4(SPLIT, $3, makearr($5), $7, (Node*)STRING); }
398 | SPLIT '(' pattern comma varname comma reg_expr ')' /* const /regexp/ */
399 { $$ = op4(SPLIT, $3, makearr($5), (Node*)makedfa($7, 1), (Node *)REGEXPR); }
400 | SPLIT '(' pattern comma varname ')'
401 { $$ = op4(SPLIT, $3, makearr($5), NIL, (Node*)STRING); } /* default */
402 | SPRINTF '(' patlist ')' { $$ = op1($1, $3); }
Arnold D. Robbinsc95b9602019-07-28 20:09:24 +0300403 | string { $$ = celltonode($1, CCON); }
Brian Kernighan87b94932012-12-22 10:35:39 -0500404 | subop '(' reg_expr comma pattern ')'
405 { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, rectonode()); }
406 | subop '(' pattern comma pattern ')'
407 { if (constnode($3))
408 $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, rectonode());
409 else
410 $$ = op4($1, (Node *)1, $3, $5, rectonode()); }
411 | subop '(' reg_expr comma pattern comma var ')'
412 { $$ = op4($1, NIL, (Node*)makedfa($3, 1), $5, $7); }
413 | subop '(' pattern comma pattern comma var ')'
414 { if (constnode($3))
415 $$ = op4($1, NIL, (Node*)makedfa(strnode($3), 1), $5, $7);
416 else
417 $$ = op4($1, (Node *)1, $3, $5, $7); }
418 | SUBSTR '(' pattern comma pattern comma pattern ')'
419 { $$ = op3(SUBSTR, $3, $5, $7); }
420 | SUBSTR '(' pattern comma pattern ')'
421 { $$ = op3(SUBSTR, $3, $5, NIL); }
422 | var
423 ;
424
425var:
426 varname
427 | varname '[' patlist ']' { $$ = op2(ARRAY, makearr($1), $3); }
428 | IVAR { $$ = op1(INDIRECT, celltonode($1, CVAR)); }
429 | INDIRECT term { $$ = op1(INDIRECT, $2); }
Arnold D. Robbins795a06b2019-07-28 05:51:52 -0600430 ;
Brian Kernighan87b94932012-12-22 10:35:39 -0500431
432varlist:
433 /* nothing */ { arglist = $$ = 0; }
434 | VAR { arglist = $$ = celltonode($1,CVAR); }
435 | varlist comma VAR {
436 checkdup($1, $3);
437 arglist = $$ = linkum($1,celltonode($3,CVAR)); }
438 ;
439
440varname:
441 VAR { $$ = celltonode($1, CVAR); }
442 | ARG { $$ = op1(ARG, itonp($1)); }
443 | VARNF { $$ = op1(VARNF, (Node *) $1); }
444 ;
445
446
447while:
448 WHILE '(' pattern rparen { $$ = notnull($3); }
449 ;
450
451%%
452
453void setfname(Cell *p)
454{
455 if (isarr(p))
456 SYNTAX("%s is an array, not a function", p->nval);
457 else if (isfcn(p))
458 SYNTAX("you can't define function %s more than once", p->nval);
459 curfname = p->nval;
460}
461
462int constnode(Node *p)
463{
464 return isvalue(p) && ((Cell *) (p->narg[0]))->csub == CCON;
465}
466
467char *strnode(Node *p)
468{
469 return ((Cell *)(p->narg[0]))->sval;
470}
471
472Node *notnull(Node *n)
473{
474 switch (n->nobj) {
475 case LE: case LT: case EQ: case NE: case GT: case GE:
476 case BOR: case AND: case NOT:
477 return n;
478 default:
479 return op2(NE, n, nullnode);
480 }
481}
482
483void checkdup(Node *vl, Cell *cp) /* check if name already in list */
484{
485 char *s = cp->nval;
486 for ( ; vl; vl = vl->nnext) {
487 if (strcmp(s, ((Cell *)(vl->narg[0]))->nval) == 0) {
488 SYNTAX("duplicate argument %s", s);
489 break;
490 }
491 }
492}