blob: 0f8cf94f4e04a39d7d7e34187690f669701a70da [file] [log] [blame]
Glenn L McGrath545106f2002-11-11 06:21:00 +00001/* vi: set sw=4 ts=4: */
2/*
3 * awk implementation for busybox
4 *
5 * Copyright (C) 2002 by Dmitry Zakharov <dmit@crp.bank.gov.ua>
6 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 */
22
23#include <stdio.h>
24#include <stdlib.h>
25#include <unistd.h>
26#include <errno.h>
27#include <string.h>
28#include <time.h>
29#include <math.h>
30#include <ctype.h>
31#include <getopt.h>
32#include <regex.h>
33
34#include "busybox.h"
35
36
37#define MAXVARFMT 240
38#define MINNVBLOCK 64
39
40/* variable flags */
41#define VF_NUMBER 0x0001 /* 1 = primary type is number */
42#define VF_ARRAY 0x0002 /* 1 = it's an array */
43
44#define VF_CACHED 0x0100 /* 1 = num/str value has cached str/num eq */
45#define VF_USER 0x0200 /* 1 = user input (may be numeric string) */
46#define VF_SPECIAL 0x0400 /* 1 = requires extra handling when changed */
47#define VF_WALK 0x0800 /* 1 = variable has alloc'd x.walker list */
48#define VF_FSTR 0x1000 /* 1 = string points to fstring buffer */
49#define VF_CHILD 0x2000 /* 1 = function arg; x.parent points to source */
50#define VF_DIRTY 0x4000 /* 1 = variable was set explicitly */
51
52/* these flags are static, don't change them when value is changed */
53#define VF_DONTTOUCH (VF_ARRAY | VF_SPECIAL | VF_WALK | VF_CHILD | VF_DIRTY)
54
55/* Variable */
56typedef struct var_s {
57 unsigned short type; /* flags */
58 double number;
59 char *string;
60 union {
61 int aidx; /* func arg index (on compilation stage) */
62 struct xhash_s *array; /* array ptr */
63 struct var_s *parent; /* for func args, ptr to actual parameter */
64 char **walker; /* list of array elements (for..in) */
65 } x;
66} var;
67
68/* Node chain (pattern-action chain, BEGIN, END, function bodies) */
69typedef struct chain_s {
70 struct node_s *first;
71 struct node_s *last;
72 char *programname;
73} chain;
74
75/* Function */
76typedef struct func_s {
77 unsigned short nargs;
78 struct chain_s body;
79} func;
80
81/* I/O stream */
82typedef struct rstream_s {
83 FILE *F;
84 char *buffer;
85 int size;
86 int pos;
87 unsigned short is_pipe;
88} rstream;
89
90typedef struct hash_item_s {
91 union {
92 struct var_s v; /* variable/array hash */
93 struct rstream_s rs; /* redirect streams hash */
94 struct func_s f; /* functions hash */
95 } data;
96 struct hash_item_s *next; /* next in chain */
97 char name[1]; /* really it's longer */
98} hash_item;
99
100typedef struct xhash_s {
101 unsigned int nel; /* num of elements */
102 unsigned int csize; /* current hash size */
103 unsigned int nprime; /* next hash size in PRIMES[] */
104 unsigned int glen; /* summary length of item names */
105 struct hash_item_s **items;
106} xhash;
107
108/* Tree node */
109typedef struct node_s {
110 unsigned long info;
111 unsigned short lineno;
112 union {
113 struct node_s *n;
114 var *v;
115 int i;
116 char *s;
117 regex_t *re;
118 } l;
119 union {
120 struct node_s *n;
121 regex_t *ire;
122 func *f;
123 int argno;
124 } r;
125 union {
126 struct node_s *n;
127 } a;
128} node;
129
130/* Block of temporary variables */
131typedef struct nvblock_s {
132 int size;
133 var *pos;
134 struct nvblock_s *prev;
135 struct nvblock_s *next;
136 var nv[0];
137} nvblock;
138
139typedef struct tsplitter_s {
140 node n;
141 regex_t re[2];
142} tsplitter;
143
144/* simple token classes */
145/* Order and hex values are very important!!! See next_token() */
146#define TC_SEQSTART 1 /* ( */
147#define TC_SEQTERM (1 << 1) /* ) */
148#define TC_REGEXP (1 << 2) /* /.../ */
149#define TC_OUTRDR (1 << 3) /* | > >> */
150#define TC_UOPPOST (1 << 4) /* unary postfix operator */
151#define TC_UOPPRE1 (1 << 5) /* unary prefix operator */
152#define TC_BINOPX (1 << 6) /* two-opnd operator */
153#define TC_IN (1 << 7)
154#define TC_COMMA (1 << 8)
155#define TC_PIPE (1 << 9) /* input redirection pipe */
156#define TC_UOPPRE2 (1 << 10) /* unary prefix operator */
157#define TC_ARRTERM (1 << 11) /* ] */
158#define TC_GRPSTART (1 << 12) /* { */
159#define TC_GRPTERM (1 << 13) /* } */
160#define TC_SEMICOL (1 << 14)
161#define TC_NEWLINE (1 << 15)
162#define TC_STATX (1 << 16) /* ctl statement (for, next...) */
163#define TC_WHILE (1 << 17)
164#define TC_ELSE (1 << 18)
165#define TC_BUILTIN (1 << 19)
166#define TC_GETLINE (1 << 20)
167#define TC_FUNCDECL (1 << 21) /* `function' `func' */
168#define TC_BEGIN (1 << 22)
169#define TC_END (1 << 23)
170#define TC_EOF (1 << 24)
171#define TC_VARIABLE (1 << 25)
172#define TC_ARRAY (1 << 26)
173#define TC_FUNCTION (1 << 27)
174#define TC_STRING (1 << 28)
175#define TC_NUMBER (1 << 29)
176
177#define TC_UOPPRE (TC_UOPPRE1 | TC_UOPPRE2)
178
179/* combined token classes */
180#define TC_BINOP (TC_BINOPX | TC_COMMA | TC_PIPE | TC_IN)
181#define TC_UNARYOP (TC_UOPPRE | TC_UOPPOST)
182#define TC_OPERAND (TC_VARIABLE | TC_ARRAY | TC_FUNCTION | \
183 TC_BUILTIN | TC_GETLINE | TC_SEQSTART | TC_STRING | TC_NUMBER)
184
185#define TC_STATEMNT (TC_STATX | TC_WHILE)
186#define TC_OPTERM (TC_SEMICOL | TC_NEWLINE)
187
188/* word tokens, cannot mean something else if not expected */
189#define TC_WORD (TC_IN | TC_STATEMNT | TC_ELSE | TC_BUILTIN | \
190 TC_GETLINE | TC_FUNCDECL | TC_BEGIN | TC_END)
191
192/* discard newlines after these */
193#define TC_NOTERM (TC_COMMA | TC_GRPSTART | TC_GRPTERM | \
194 TC_BINOP | TC_OPTERM)
195
196/* what can expression begin with */
197#define TC_OPSEQ (TC_OPERAND | TC_UOPPRE | TC_REGEXP)
198/* what can group begin with */
199#define TC_GRPSEQ (TC_OPSEQ | TC_OPTERM | TC_STATEMNT | TC_GRPSTART)
200
201/* if previous token class is CONCAT1 and next is CONCAT2, concatenation */
202/* operator is inserted between them */
203#define TC_CONCAT1 (TC_VARIABLE | TC_ARRTERM | TC_SEQTERM | \
204 TC_STRING | TC_NUMBER | TC_UOPPOST)
205#define TC_CONCAT2 (TC_OPERAND | TC_UOPPRE)
206
207#define OF_RES1 0x010000
208#define OF_RES2 0x020000
209#define OF_STR1 0x040000
210#define OF_STR2 0x080000
211#define OF_NUM1 0x100000
212#define OF_CHECKED 0x200000
213
214/* combined operator flags */
215#define xx 0
216#define xV OF_RES2
217#define xS (OF_RES2 | OF_STR2)
218#define Vx OF_RES1
219#define VV (OF_RES1 | OF_RES2)
220#define Nx (OF_RES1 | OF_NUM1)
221#define NV (OF_RES1 | OF_NUM1 | OF_RES2)
222#define Sx (OF_RES1 | OF_STR1)
223#define SV (OF_RES1 | OF_STR1 | OF_RES2)
224#define SS (OF_RES1 | OF_STR1 | OF_RES2 | OF_STR2)
225
226#define OPCLSMASK 0xFF00
227#define OPNMASK 0x007F
228
229/* operator priority is a highest byte (even: r->l, odd: l->r grouping)
230 * For builtins it has different meaning: n n s3 s2 s1 v3 v2 v1,
231 * n - min. number of args, vN - resolve Nth arg to var, sN - resolve to string
232 */
233#define P(x) (x << 24)
234#define PRIMASK 0x7F000000
235#define PRIMASK2 0x7E000000
236
237/* Operation classes */
238
239#define SHIFT_TIL_THIS 0x0600
240#define RECUR_FROM_THIS 0x1000
241
242enum {
243 OC_DELETE=0x0100, OC_EXEC=0x0200, OC_NEWSOURCE=0x0300,
244 OC_PRINT=0x0400, OC_PRINTF=0x0500, OC_WALKINIT=0x0600,
245
246 OC_BR=0x0700, OC_BREAK=0x0800, OC_CONTINUE=0x0900,
247 OC_EXIT=0x0a00, OC_NEXT=0x0b00, OC_NEXTFILE=0x0c00,
248 OC_TEST=0x0d00, OC_WALKNEXT=0x0e00,
249
250 OC_BINARY=0x1000, OC_BUILTIN=0x1100, OC_COLON=0x1200,
251 OC_COMMA=0x1300, OC_COMPARE=0x1400, OC_CONCAT=0x1500,
252 OC_FBLTIN=0x1600, OC_FIELD=0x1700, OC_FNARG=0x1800,
253 OC_FUNC=0x1900, OC_GETLINE=0x1a00, OC_IN=0x1b00,
254 OC_LAND=0x1c00, OC_LOR=0x1d00, OC_MATCH=0x1e00,
255 OC_MOVE=0x1f00, OC_PGETLINE=0x2000, OC_REGEXP=0x2100,
256 OC_REPLACE=0x2200, OC_RETURN=0x2300, OC_SPRINTF=0x2400,
257 OC_TERNARY=0x2500, OC_UNARY=0x2600, OC_VAR=0x2700,
258 OC_DONE=0x2800,
259
260 ST_IF=0x3000, ST_DO=0x3100, ST_FOR=0x3200,
261 ST_WHILE=0x3300
262};
263
264/* simple builtins */
265enum {
266 F_in=0, F_rn, F_co, F_ex, F_lg, F_si, F_sq, F_sr,
267 F_ti, F_le, F_sy, F_ff, F_cl
268};
269
270/* builtins */
271enum {
272 B_a2=0, B_ix, B_ma, B_sp, B_ss, B_ti, B_lo, B_up,
273 B_ge, B_gs, B_su
274};
275
276/* tokens and their corresponding info values */
277
278#define NTC "\377" /* switch to next token class (tc<<1) */
279#define NTCC '\377'
280
281#define OC_B OC_BUILTIN
282
283static char * const tokenlist =
284 "\1(" NTC
285 "\1)" NTC
286 "\1/" NTC /* REGEXP */
287 "\2>>" "\1>" "\1|" NTC /* OUTRDR */
288 "\2++" "\2--" NTC /* UOPPOST */
289 "\2++" "\2--" "\1$" NTC /* UOPPRE1 */
290 "\2==" "\1=" "\2+=" "\2-=" /* BINOPX */
291 "\2*=" "\2/=" "\2%=" "\2^="
292 "\1+" "\1-" "\3**=" "\2**"
293 "\1/" "\1%" "\1^" "\1*"
294 "\2!=" "\2>=" "\2<=" "\1>"
295 "\1<" "\2!~" "\1~" "\2&&"
296 "\2||" "\1?" "\1:" NTC
297 "\2in" NTC
298 "\1," NTC
299 "\1|" NTC
300 "\1+" "\1-" "\1!" NTC /* UOPPRE2 */
301 "\1]" NTC
302 "\1{" NTC
303 "\1}" NTC
304 "\1;" NTC
305 "\1\n" NTC
306 "\2if" "\2do" "\3for" "\5break" /* STATX */
307 "\10continue" "\6delete" "\5print"
308 "\6printf" "\4next" "\10nextfile"
309 "\6return" "\4exit" NTC
310 "\5while" NTC
311 "\4else" NTC
312
313 "\5close" "\6system" "\6fflush" "\5atan2" /* BUILTIN */
314 "\3cos" "\3exp" "\3int" "\3log"
315 "\4rand" "\3sin" "\4sqrt" "\5srand"
316 "\6gensub" "\4gsub" "\5index" "\6length"
317 "\5match" "\5split" "\7sprintf" "\3sub"
318 "\6substr" "\7systime" "\10strftime"
319 "\7tolower" "\7toupper" NTC
320 "\7getline" NTC
321 "\4func" "\10function" NTC
322 "\5BEGIN" NTC
323 "\3END" "\0"
324 ;
325
326static unsigned long tokeninfo[] = {
327
328 0,
329 0,
330 OC_REGEXP,
331 xS|'a', xS|'w', xS|'|',
332 OC_UNARY|xV|P(9)|'p', OC_UNARY|xV|P(9)|'m',
333 OC_UNARY|xV|P(9)|'P', OC_UNARY|xV|P(9)|'M',
334 OC_FIELD|xV|P(5),
335 OC_COMPARE|VV|P(39)|5, OC_MOVE|VV|P(74),
336 OC_REPLACE|NV|P(74)|'+', OC_REPLACE|NV|P(74)|'-',
337 OC_REPLACE|NV|P(74)|'*', OC_REPLACE|NV|P(74)|'/',
338 OC_REPLACE|NV|P(74)|'%', OC_REPLACE|NV|P(74)|'&',
339 OC_BINARY|NV|P(29)|'+', OC_BINARY|NV|P(29)|'-',
340 OC_REPLACE|NV|P(74)|'&', OC_BINARY|NV|P(15)|'&',
341 OC_BINARY|NV|P(25)|'/', OC_BINARY|NV|P(25)|'%',
342 OC_BINARY|NV|P(15)|'&', OC_BINARY|NV|P(25)|'*',
343 OC_COMPARE|VV|P(39)|4, OC_COMPARE|VV|P(39)|3,
344 OC_COMPARE|VV|P(39)|0, OC_COMPARE|VV|P(39)|1,
345 OC_COMPARE|VV|P(39)|2, OC_MATCH|Sx|P(45)|'!',
346 OC_MATCH|Sx|P(45)|'~', OC_LAND|Vx|P(55),
347 OC_LOR|Vx|P(59), OC_TERNARY|Vx|P(64)|'?',
348 OC_COLON|xx|P(67)|':',
349 OC_IN|SV|P(49),
350 OC_COMMA|SS|P(80),
351 OC_PGETLINE|SV|P(37),
352 OC_UNARY|xV|P(19)|'+', OC_UNARY|xV|P(19)|'-',
353 OC_UNARY|xV|P(19)|'!',
354 0,
355 0,
356 0,
357 0,
358 0,
359 ST_IF, ST_DO, ST_FOR, OC_BREAK,
360 OC_CONTINUE, OC_DELETE|Vx, OC_PRINT,
361 OC_PRINTF, OC_NEXT, OC_NEXTFILE,
362 OC_RETURN|Vx, OC_EXIT|Nx,
363 ST_WHILE,
364 0,
365
366 OC_FBLTIN|Sx|F_cl, OC_FBLTIN|Sx|F_sy, OC_FBLTIN|Sx|F_ff, OC_B|B_a2|P(0x83),
367 OC_FBLTIN|Nx|F_co, OC_FBLTIN|Nx|F_ex, OC_FBLTIN|Nx|F_in, OC_FBLTIN|Nx|F_lg,
368 OC_FBLTIN|F_rn, OC_FBLTIN|Nx|F_si, OC_FBLTIN|Nx|F_sq, OC_FBLTIN|Nx|F_sr,
369 OC_B|B_ge|P(0xd6), OC_B|B_gs|P(0xb6), OC_B|B_ix|P(0x9b), OC_FBLTIN|Sx|F_le,
370 OC_B|B_ma|P(0x89), OC_B|B_sp|P(0x8b), OC_SPRINTF, OC_B|B_su|P(0xb6),
371 OC_B|B_ss|P(0x8f), OC_FBLTIN|F_ti, OC_B|B_ti|P(0x0b),
372 OC_B|B_lo|P(0x49), OC_B|B_up|P(0x49),
373 OC_GETLINE|SV|P(0),
374 0, 0,
375 0,
376 0
377};
378
379/* internal variable names and their initial values */
380/* asterisk marks SPECIAL vars; $ is just no-named Field0 */
381enum {
382 CONVFMT=0, OFMT, FS, OFS,
383 ORS, RS, RT, FILENAME,
384 SUBSEP, ARGIND, ARGC, ARGV,
385 ERRNO, FNR,
386 NR, NF, IGNORECASE,
387 ENVIRON, F0, _intvarcount_
388};
389
390static char * vNames =
391 "CONVFMT\0" "OFMT\0" "FS\0*" "OFS\0"
392 "ORS\0" "RS\0*" "RT\0" "FILENAME\0"
393 "SUBSEP\0" "ARGIND\0" "ARGC\0" "ARGV\0"
394 "ERRNO\0" "FNR\0"
395 "NR\0" "NF\0*" "IGNORECASE\0*"
396 "ENVIRON\0" "$\0*" "\0";
397
398static char * vValues =
399 "%.6g\0" "%.6g\0" " \0" " \0"
400 "\n\0" "\n\0" "\0" "\0"
401 "\034\0"
402 "\377";
403
404/* hash size may grow to these values */
405#define FIRST_PRIME 61;
406static const unsigned int PRIMES[] = { 251, 1021, 4093, 16381, 65521 };
407static const unsigned int NPRIMES = sizeof(PRIMES) / sizeof(unsigned int);
408
409/* globals */
410
411extern char **environ;
412
413static var * V[_intvarcount_];
414static chain beginseq, mainseq, endseq, *seq;
415static int nextrec, nextfile;
416static node *break_ptr, *continue_ptr;
417static rstream *iF;
418static xhash *vhash, *ahash, *fdhash, *fnhash;
419static char *programname;
420static short lineno;
421static int is_f0_split;
422static int nfields = 0;
423static var *Fields = NULL;
424static tsplitter fsplitter, rsplitter;
425static nvblock *cb = NULL;
426static char *pos;
427static char *buf;
428static int icase = FALSE;
429
430static struct {
431 unsigned long tclass;
432 unsigned long info;
433 char *string;
434 double number;
435 short lineno;
436 int rollback;
437} t;
438
439/* function prototypes */
440extern void xregcomp(regex_t *preg, const char *regex, int cflags);
441static void handle_special(var *);
442static node *parse_expr(unsigned long);
443static void chain_group(void);
444static var *evaluate(node *, var *);
445static rstream *next_input_file(void);
446static int fmt_num(char *, int, char *, double, int);
447static int awk_exit(int);
448
449/* ---- error handling ---- */
450
451static const char EMSG_INTERNAL_ERROR[] = "Internal error";
452static const char EMSG_UNEXP_EOS[] = "Unexpected end of string";
453static const char EMSG_UNEXP_TOKEN[] = "Unexpected token";
454static const char EMSG_DIV_BY_ZERO[] = "Division by zero";
455static const char EMSG_INV_FMT[] = "Invalid format specifier";
456static const char EMSG_TOO_FEW_ARGS[] = "Too few arguments for builtin";
457static const char EMSG_NOT_ARRAY[] = "Not an array";
458static const char EMSG_POSSIBLE_ERROR[] = "Possible syntax error";
459static const char EMSG_UNDEF_FUNC[] = "Call to undefined function";
460#ifndef CONFIG_FEATURE_AWK_MATH
461static const char EMSG_NO_MATH[] = "Math support is not compiled in";
462#endif
463
Glenn L McGrathd4036f82002-11-28 09:30:40 +0000464static void syntax_error(const char * const message)
465{
Manuel Novoa III cad53642003-03-19 09:13:01 +0000466 bb_error_msg("%s:%i: %s", programname, lineno, message);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000467 awk_exit(1);
468}
469
470#define runtime_error(x) syntax_error(x)
471
472
473/* ---- hash stuff ---- */
474
475static unsigned int hashidx(char *name) {
476
477 register unsigned int idx=0;
478
479 while (*name) idx = *name++ + (idx << 6) - idx;
480 return idx;
481}
482
483/* create new hash */
484static xhash *hash_init(void) {
485
486 xhash *newhash;
487
488 newhash = (xhash *)xcalloc(1, sizeof(xhash));
489 newhash->csize = FIRST_PRIME;
490 newhash->items = (hash_item **)xcalloc(newhash->csize, sizeof(hash_item *));
491
492 return newhash;
493}
494
495/* find item in hash, return ptr to data, NULL if not found */
496static void *hash_search(xhash *hash, char *name) {
497
498 hash_item *hi;
499
500 hi = hash->items [ hashidx(name) % hash->csize ];
501 while (hi) {
502 if (strcmp(hi->name, name) == 0)
503 return &(hi->data);
504 hi = hi->next;
505 }
506 return NULL;
507}
508
509/* grow hash if it becomes too big */
510static void hash_rebuild(xhash *hash) {
511
512 unsigned int newsize, i, idx;
513 hash_item **newitems, *hi, *thi;
514
515 if (hash->nprime == NPRIMES)
516 return;
517
518 newsize = PRIMES[hash->nprime++];
519 newitems = (hash_item **)xcalloc(newsize, sizeof(hash_item *));
520
521 for (i=0; i<hash->csize; i++) {
522 hi = hash->items[i];
523 while (hi) {
524 thi = hi;
525 hi = thi->next;
526 idx = hashidx(thi->name) % newsize;
527 thi->next = newitems[idx];
528 newitems[idx] = thi;
529 }
530 }
531
532 free(hash->items);
533 hash->csize = newsize;
534 hash->items = newitems;
535}
536
537/* find item in hash, add it if necessary. Return ptr to data */
538static void *hash_find(xhash *hash, char *name) {
539
540 hash_item *hi;
541 unsigned int idx;
542 int l;
543
544 hi = hash_search(hash, name);
545 if (! hi) {
546 if (++hash->nel / hash->csize > 10)
547 hash_rebuild(hash);
548
Manuel Novoa III cad53642003-03-19 09:13:01 +0000549 l = bb_strlen(name) + 1;
Glenn L McGrath545106f2002-11-11 06:21:00 +0000550 hi = xcalloc(sizeof(hash_item) + l, 1);
551 memcpy(hi->name, name, l);
552
553 idx = hashidx(name) % hash->csize;
554 hi->next = hash->items[idx];
555 hash->items[idx] = hi;
556 hash->glen += l;
557 }
558 return &(hi->data);
559}
560
561#define findvar(hash, name) (var *) hash_find ( (hash) , (name) )
562#define newvar(name) (var *) hash_find ( vhash , (name) )
563#define newfile(name) (rstream *) hash_find ( fdhash , (name) )
564#define newfunc(name) (func *) hash_find ( fnhash , (name) )
565
566static void hash_remove(xhash *hash, char *name) {
567
568 hash_item *hi, **phi;
569
570 phi = &(hash->items[ hashidx(name) % hash->csize ]);
571 while (*phi) {
572 hi = *phi;
573 if (strcmp(hi->name, name) == 0) {
Manuel Novoa III cad53642003-03-19 09:13:01 +0000574 hash->glen -= (bb_strlen(name) + 1);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000575 hash->nel--;
576 *phi = hi->next;
577 free(hi);
578 break;
579 }
580 phi = &(hi->next);
581 }
582}
583
584/* ------ some useful functions ------ */
585
586static void skip_spaces(char **s) {
587
588 register char *p = *s;
589
590 while(*p == ' ' || *p == '\t' ||
591 (*p == '\\' && *(p+1) == '\n' && (++p, ++t.lineno))) {
592 p++;
593 }
594 *s = p;
595}
596
597static char *nextword(char **s) {
598
599 register char *p = *s;
600
601 while (*(*s)++) ;
602
603 return p;
604}
605
606static char nextchar(char **s) {
607
608 register char c, *pps;
609
610 c = *((*s)++);
611 pps = *s;
Manuel Novoa III cad53642003-03-19 09:13:01 +0000612 if (c == '\\') c = bb_process_escape_sequence((const char**)s);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000613 if (c == '\\' && *s == pps) c = *((*s)++);
614 return c;
615}
616
617static inline int isalnum_(int c) {
618
619 return (isalnum(c) || c == '_');
620}
621
622static FILE *afopen(const char *path, const char *mode) {
623
Manuel Novoa III cad53642003-03-19 09:13:01 +0000624 return (*path == '-' && *(path+1) == '\0') ? stdin : bb_xfopen(path, mode);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000625}
626
627/* -------- working with variables (set/get/copy/etc) -------- */
628
629static xhash *iamarray(var *v) {
630
631 var *a = v;
632
633 while (a->type & VF_CHILD)
634 a = a->x.parent;
635
636 if (! (a->type & VF_ARRAY)) {
637 a->type |= VF_ARRAY;
638 a->x.array = hash_init();
639 }
640 return a->x.array;
641}
642
643static void clear_array(xhash *array) {
644
645 unsigned int i;
646 hash_item *hi, *thi;
647
648 for (i=0; i<array->csize; i++) {
649 hi = array->items[i];
650 while (hi) {
651 thi = hi;
652 hi = hi->next;
Aaron Lehmanna170e1c2002-11-28 11:27:31 +0000653 free(thi->data.v.string);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000654 free(thi);
655 }
656 array->items[i] = NULL;
657 }
658 array->glen = array->nel = 0;
659}
660
661/* clear a variable */
662static var *clrvar(var *v) {
663
Aaron Lehmanna170e1c2002-11-28 11:27:31 +0000664 if (!(v->type & VF_FSTR))
Glenn L McGrath545106f2002-11-11 06:21:00 +0000665 free(v->string);
666
667 v->type &= VF_DONTTOUCH;
668 v->type |= VF_DIRTY;
669 v->string = NULL;
670 return v;
671}
672
673/* assign string value to variable */
674static var *setvar_p(var *v, char *value) {
675
676 clrvar(v);
677 v->string = value;
678 handle_special(v);
679
680 return v;
681}
682
683/* same as setvar_p but make a copy of string */
684static var *setvar_s(var *v, char *value) {
685
Manuel Novoa III cad53642003-03-19 09:13:01 +0000686 return setvar_p(v, (value && *value) ? bb_xstrdup(value) : NULL);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000687}
688
689/* same as setvar_s but set USER flag */
690static var *setvar_u(var *v, char *value) {
691
692 setvar_s(v, value);
693 v->type |= VF_USER;
694 return v;
695}
696
697/* set array element to user string */
698static void setari_u(var *a, int idx, char *s) {
699
700 register var *v;
701 static char sidx[12];
702
703 sprintf(sidx, "%d", idx);
704 v = findvar(iamarray(a), sidx);
705 setvar_u(v, s);
706}
707
708/* assign numeric value to variable */
709static var *setvar_i(var *v, double value) {
710
711 clrvar(v);
712 v->type |= VF_NUMBER;
713 v->number = value;
714 handle_special(v);
715 return v;
716}
717
718static char *getvar_s(var *v) {
719
720 /* if v is numeric and has no cached string, convert it to string */
721 if ((v->type & (VF_NUMBER | VF_CACHED)) == VF_NUMBER) {
722 fmt_num(buf, MAXVARFMT, getvar_s(V[CONVFMT]), v->number, TRUE);
Manuel Novoa III cad53642003-03-19 09:13:01 +0000723 v->string = bb_xstrdup(buf);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000724 v->type |= VF_CACHED;
725 }
726 return (v->string == NULL) ? "" : v->string;
727}
728
729static double getvar_i(var *v) {
730
731 char *s;
732
733 if ((v->type & (VF_NUMBER | VF_CACHED)) == 0) {
734 v->number = 0;
735 s = v->string;
736 if (s && *s) {
737 v->number = strtod(s, &s);
738 if (v->type & VF_USER) {
739 skip_spaces(&s);
740 if (*s != '\0')
741 v->type &= ~VF_USER;
742 }
743 } else {
744 v->type &= ~VF_USER;
745 }
746 v->type |= VF_CACHED;
747 }
748 return v->number;
749}
750
751static var *copyvar(var *dest, var *src) {
752
753 if (dest != src) {
754 clrvar(dest);
755 dest->type |= (src->type & ~VF_DONTTOUCH);
756 dest->number = src->number;
757 if (src->string)
Manuel Novoa III cad53642003-03-19 09:13:01 +0000758 dest->string = bb_xstrdup(src->string);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000759 }
760 handle_special(dest);
761 return dest;
762}
763
764static var *incvar(var *v) {
765
766 return setvar_i(v, getvar_i(v)+1.);
767}
768
769/* return true if v is number or numeric string */
770static int is_numeric(var *v) {
771
772 getvar_i(v);
773 return ((v->type ^ VF_DIRTY) & (VF_NUMBER | VF_USER | VF_DIRTY));
774}
775
776/* return 1 when value of v corresponds to true, 0 otherwise */
777static int istrue(var *v) {
778
779 if (is_numeric(v))
780 return (v->number == 0) ? 0 : 1;
781 else
782 return (v->string && *(v->string)) ? 1 : 0;
783}
784
785/* temporary varables allocator. Last allocated should be first freed */
786static var *nvalloc(int n) {
787
788 nvblock *pb = NULL;
789 var *v, *r;
790 int size;
791
792 while (cb) {
793 pb = cb;
794 if ((cb->pos - cb->nv) + n <= cb->size) break;
795 cb = cb->next;
796 }
797
798 if (! cb) {
799 size = (n <= MINNVBLOCK) ? MINNVBLOCK : n;
800 cb = (nvblock *)xmalloc(sizeof(nvblock) + size * sizeof(var));
801 cb->size = size;
802 cb->pos = cb->nv;
803 cb->prev = pb;
804 cb->next = NULL;
805 if (pb) pb->next = cb;
806 }
807
808 v = r = cb->pos;
809 cb->pos += n;
810
811 while (v < cb->pos) {
812 v->type = 0;
813 v->string = NULL;
814 v++;
815 }
816
817 return r;
818}
819
820static void nvfree(var *v) {
821
822 var *p;
823
824 if (v < cb->nv || v >= cb->pos)
825 runtime_error(EMSG_INTERNAL_ERROR);
826
827 for (p=v; p<cb->pos; p++) {
828 if ((p->type & (VF_ARRAY|VF_CHILD)) == VF_ARRAY) {
829 clear_array(iamarray(p));
830 free(p->x.array->items);
831 free(p->x.array);
832 }
833 if (p->type & VF_WALK)
834 free(p->x.walker);
835
836 clrvar(p);
837 }
838
839 cb->pos = v;
840 while (cb->prev && cb->pos == cb->nv) {
841 cb = cb->prev;
842 }
843}
844
845/* ------- awk program text parsing ------- */
846
847/* Parse next token pointed by global pos, place results into global t.
848 * If token isn't expected, give away. Return token class
849 */
850static unsigned long next_token(unsigned long expected) {
851
852 char *p, *pp, *s;
853 char *tl;
854 unsigned long tc, *ti;
855 int l;
856 static int concat_inserted = FALSE;
857 static unsigned long save_tclass, save_info;
858 static unsigned long ltclass = TC_OPTERM;
859
860 if (t.rollback) {
861
862 t.rollback = FALSE;
863
864 } else if (concat_inserted) {
865
866 concat_inserted = FALSE;
867 t.tclass = save_tclass;
868 t.info = save_info;
869
870 } else {
871
872 p = pos;
873
874 readnext:
875 skip_spaces(&p);
876 lineno = t.lineno;
877 if (*p == '#')
878 while (*p != '\n' && *p != '\0') p++;
879
880 if (*p == '\n')
881 t.lineno++;
882
883 if (*p == '\0') {
884 tc = TC_EOF;
885
886 } else if (*p == '\"') {
887 /* it's a string */
888 t.string = s = ++p;
889 while (*p != '\"') {
890 if (*p == '\0' || *p == '\n')
891 syntax_error(EMSG_UNEXP_EOS);
892 *(s++) = nextchar(&p);
893 }
894 p++;
895 *s = '\0';
896 tc = TC_STRING;
897
898 } else if ((expected & TC_REGEXP) && *p == '/') {
899 /* it's regexp */
900 t.string = s = ++p;
901 while (*p != '/') {
902 if (*p == '\0' || *p == '\n')
903 syntax_error(EMSG_UNEXP_EOS);
904 if ((*s++ = *p++) == '\\') {
905 pp = p;
Manuel Novoa III cad53642003-03-19 09:13:01 +0000906 *(s-1) = bb_process_escape_sequence((const char **)&p);
Glenn L McGrath545106f2002-11-11 06:21:00 +0000907 if (*pp == '\\') *s++ = '\\';
908 if (p == pp) *s++ = *p++;
909 }
910 }
911 p++;
912 *s = '\0';
913 tc = TC_REGEXP;
914
915 } else if (*p == '.' || isdigit(*p)) {
916 /* it's a number */
917 t.number = strtod(p, &p);
918 if (*p == '.')
919 syntax_error(EMSG_UNEXP_TOKEN);
920 tc = TC_NUMBER;
921
922 } else {
923 /* search for something known */
924 tl = tokenlist;
925 tc = 0x00000001;
926 ti = tokeninfo;
927 while (*tl) {
928 l = *(tl++);
929 if (l == NTCC) {
930 tc <<= 1;
931 continue;
932 }
933 /* if token class is expected, token
934 * matches and it's not a longer word,
935 * then this is what we are looking for
936 */
937 if ((tc & (expected | TC_WORD | TC_NEWLINE)) &&
938 *tl == *p && strncmp(p, tl, l) == 0 &&
939 !((tc & TC_WORD) && isalnum_(*(p + l)))) {
940 t.info = *ti;
941 p += l;
942 break;
943 }
944 ti++;
945 tl += l;
946 }
947
948 if (! *tl) {
949 /* it's a name (var/array/function),
950 * otherwise it's something wrong
951 */
952 if (! isalnum_(*p))
953 syntax_error(EMSG_UNEXP_TOKEN);
954
955 t.string = --p;
956 while(isalnum_(*(++p))) {
957 *(p-1) = *p;
958 }
959 *(p-1) = '\0';
960 tc = TC_VARIABLE;
961 if (*p == '(') {
962 tc = TC_FUNCTION;
963 } else {
964 skip_spaces(&p);
965 if (*p == '[') {
966 p++;
967 tc = TC_ARRAY;
968 }
969 }
970 }
971 }
972 pos = p;
973
974 /* skipping newlines in some cases */
975 if ((ltclass & TC_NOTERM) && (tc & TC_NEWLINE))
976 goto readnext;
977
978 /* insert concatenation operator when needed */
979 if ((ltclass&TC_CONCAT1) && (tc&TC_CONCAT2) && (expected&TC_BINOP)) {
980 concat_inserted = TRUE;
981 save_tclass = tc;
982 save_info = t.info;
983 tc = TC_BINOP;
984 t.info = OC_CONCAT | SS | P(35);
985 }
986
987 t.tclass = tc;
988 }
989 ltclass = t.tclass;
990
991 /* Are we ready for this? */
992 if (! (ltclass & expected))
993 syntax_error((ltclass & (TC_NEWLINE | TC_EOF)) ?
994 EMSG_UNEXP_EOS : EMSG_UNEXP_TOKEN);
995
996 return ltclass;
997}
998
999static void rollback_token(void) { t.rollback = TRUE; }
1000
1001static node *new_node(unsigned long info) {
1002
1003 register node *n;
1004
1005 n = (node *)xcalloc(sizeof(node), 1);
1006 n->info = info;
1007 n->lineno = lineno;
1008 return n;
1009}
1010
1011static node *mk_re_node(char *s, node *n, regex_t *re) {
1012
1013 n->info = OC_REGEXP;
1014 n->l.re = re;
1015 n->r.ire = re + 1;
1016 xregcomp(re, s, REG_EXTENDED);
1017 xregcomp(re+1, s, REG_EXTENDED | REG_ICASE);
1018
1019 return n;
1020}
1021
1022static node *condition(void) {
1023
1024 next_token(TC_SEQSTART);
1025 return parse_expr(TC_SEQTERM);
1026}
1027
1028/* parse expression terminated by given argument, return ptr
1029 * to built subtree. Terminator is eaten by parse_expr */
1030static node *parse_expr(unsigned long iexp) {
1031
1032 node sn;
1033 node *cn = &sn;
1034 node *vn, *glptr;
1035 unsigned long tc, xtc;
1036 var *v;
1037
1038 sn.info = PRIMASK;
1039 sn.r.n = glptr = NULL;
1040 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP | iexp;
1041
1042 while (! ((tc = next_token(xtc)) & iexp)) {
1043 if (glptr && (t.info == (OC_COMPARE|VV|P(39)|2))) {
1044 /* input redirection (<) attached to glptr node */
1045 cn = glptr->l.n = new_node(OC_CONCAT|SS|P(37));
1046 xtc = TC_OPERAND | TC_UOPPRE;
1047 glptr = NULL;
1048
1049 } else if (tc & (TC_BINOP | TC_UOPPOST)) {
1050 /* for binary and postfix-unary operators, jump back over
1051 * previous operators with higher priority */
1052 vn = cn;
1053 while ( ((t.info & PRIMASK) > (vn->a.n->info & PRIMASK2)) ||
1054 ((t.info == vn->info) && ((t.info & OPCLSMASK) == OC_COLON)) )
1055 vn = vn->a.n;
1056 if ((t.info & OPCLSMASK) == OC_TERNARY)
1057 t.info += P(6);
1058 cn = vn->a.n->r.n = new_node(t.info);
1059 cn->a.n = vn->a.n;
1060 if (tc & TC_BINOP) {
1061 cn->l.n = vn;
1062 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1063 if ((t.info & OPCLSMASK) == OC_PGETLINE) {
1064 /* it's a pipe */
1065 next_token(TC_GETLINE);
1066 /* give maximum priority to this pipe */
1067 cn->info &= ~PRIMASK;
1068 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1069 }
1070 } else {
1071 cn->r.n = vn;
1072 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1073 }
1074 vn->a.n = cn;
1075
1076 } else {
1077 /* for operands and prefix-unary operators, attach them
1078 * to last node */
1079 vn = cn;
1080 cn = vn->r.n = new_node(t.info);
1081 cn->a.n = vn;
1082 xtc = TC_OPERAND | TC_UOPPRE | TC_REGEXP;
1083 if (tc & (TC_OPERAND | TC_REGEXP)) {
1084 xtc = TC_UOPPRE | TC_BINOP | TC_OPERAND | iexp;
1085 /* one should be very careful with switch on tclass -
1086 * only simple tclasses should be used! */
1087 switch (tc) {
1088 case TC_VARIABLE:
1089 case TC_ARRAY:
1090 cn->info = OC_VAR;
1091 if ((v = hash_search(ahash, t.string)) != NULL) {
1092 cn->info = OC_FNARG;
1093 cn->l.i = v->x.aidx;
1094 } else {
1095 cn->l.v = newvar(t.string);
1096 }
1097 if (tc & TC_ARRAY) {
1098 cn->info |= xS;
1099 cn->r.n = parse_expr(TC_ARRTERM);
1100 }
1101 xtc = TC_UOPPOST | TC_UOPPRE | TC_BINOP | TC_OPERAND | iexp;
1102 break;
1103
1104 case TC_NUMBER:
1105 case TC_STRING:
1106 cn->info = OC_VAR;
1107 v = cn->l.v = xcalloc(sizeof(var), 1);
1108 if (tc & TC_NUMBER)
1109 setvar_i(v, t.number);
1110 else
1111 setvar_s(v, t.string);
1112 break;
1113
1114 case TC_REGEXP:
1115 mk_re_node(t.string, cn,
1116 (regex_t *)xcalloc(sizeof(regex_t),2));
1117 break;
1118
1119 case TC_FUNCTION:
1120 cn->info = OC_FUNC;
1121 cn->r.f = newfunc(t.string);
1122 cn->l.n = condition();
1123 break;
1124
1125 case TC_SEQSTART:
1126 cn = vn->r.n = parse_expr(TC_SEQTERM);
1127 cn->a.n = vn;
1128 break;
1129
1130 case TC_GETLINE:
1131 glptr = cn;
1132 xtc = TC_OPERAND | TC_UOPPRE | TC_BINOP | iexp;
1133 break;
1134
1135 case TC_BUILTIN:
1136 cn->l.n = condition();
1137 break;
1138 }
1139 }
1140 }
1141 }
1142 return sn.r.n;
1143}
1144
1145/* add node to chain. Return ptr to alloc'd node */
1146static node *chain_node(unsigned long info) {
1147
1148 register node *n;
1149
1150 if (! seq->first)
1151 seq->first = seq->last = new_node(0);
1152
1153 if (seq->programname != programname) {
1154 seq->programname = programname;
1155 n = chain_node(OC_NEWSOURCE);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001156 n->l.s = bb_xstrdup(programname);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001157 }
1158
1159 n = seq->last;
1160 n->info = info;
1161 seq->last = n->a.n = new_node(OC_DONE);
1162
1163 return n;
1164}
1165
1166static void chain_expr(unsigned long info) {
1167
1168 node *n;
1169
1170 n = chain_node(info);
1171 n->l.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1172 if (t.tclass & TC_GRPTERM)
1173 rollback_token();
1174}
1175
1176static node *chain_loop(node *nn) {
1177
1178 node *n, *n2, *save_brk, *save_cont;
1179
1180 save_brk = break_ptr;
1181 save_cont = continue_ptr;
1182
1183 n = chain_node(OC_BR | Vx);
1184 continue_ptr = new_node(OC_EXEC);
1185 break_ptr = new_node(OC_EXEC);
1186 chain_group();
1187 n2 = chain_node(OC_EXEC | Vx);
1188 n2->l.n = nn;
1189 n2->a.n = n;
1190 continue_ptr->a.n = n2;
1191 break_ptr->a.n = n->r.n = seq->last;
1192
1193 continue_ptr = save_cont;
1194 break_ptr = save_brk;
1195
1196 return n;
1197}
1198
1199/* parse group and attach it to chain */
1200static void chain_group(void) {
1201
1202 unsigned long c;
1203 node *n, *n2, *n3;
1204
1205 do {
1206 c = next_token(TC_GRPSEQ);
1207 } while (c & TC_NEWLINE);
1208
1209 if (c & TC_GRPSTART) {
1210 while(next_token(TC_GRPSEQ | TC_GRPTERM) != TC_GRPTERM) {
1211 rollback_token();
1212 chain_group();
1213 }
1214 } else if (c & (TC_OPSEQ | TC_OPTERM)) {
1215 rollback_token();
1216 chain_expr(OC_EXEC | Vx);
1217 } else { /* TC_STATEMNT */
1218 switch (t.info & OPCLSMASK) {
1219 case ST_IF:
1220 n = chain_node(OC_BR | Vx);
1221 n->l.n = condition();
1222 chain_group();
1223 n2 = chain_node(OC_EXEC);
1224 n->r.n = seq->last;
1225 if (next_token(TC_GRPSEQ | TC_GRPTERM | TC_ELSE)==TC_ELSE) {
1226 chain_group();
1227 n2->a.n = seq->last;
1228 } else {
1229 rollback_token();
1230 }
1231 break;
1232
1233 case ST_WHILE:
1234 n2 = condition();
1235 n = chain_loop(NULL);
1236 n->l.n = n2;
1237 break;
1238
1239 case ST_DO:
1240 n2 = chain_node(OC_EXEC);
1241 n = chain_loop(NULL);
1242 n2->a.n = n->a.n;
1243 next_token(TC_WHILE);
1244 n->l.n = condition();
1245 break;
1246
1247 case ST_FOR:
1248 next_token(TC_SEQSTART);
1249 n2 = parse_expr(TC_SEMICOL | TC_SEQTERM);
1250 if (t.tclass & TC_SEQTERM) { /* for-in */
1251 if ((n2->info & OPCLSMASK) != OC_IN)
1252 syntax_error(EMSG_UNEXP_TOKEN);
1253 n = chain_node(OC_WALKINIT | VV);
1254 n->l.n = n2->l.n;
1255 n->r.n = n2->r.n;
1256 n = chain_loop(NULL);
1257 n->info = OC_WALKNEXT | Vx;
1258 n->l.n = n2->l.n;
1259 } else { /* for(;;) */
1260 n = chain_node(OC_EXEC | Vx);
1261 n->l.n = n2;
1262 n2 = parse_expr(TC_SEMICOL);
1263 n3 = parse_expr(TC_SEQTERM);
1264 n = chain_loop(n3);
1265 n->l.n = n2;
1266 if (! n2)
1267 n->info = OC_EXEC;
1268 }
1269 break;
1270
1271 case OC_PRINT:
1272 case OC_PRINTF:
1273 n = chain_node(t.info);
1274 n->l.n = parse_expr(TC_OPTERM | TC_OUTRDR | TC_GRPTERM);
1275 if (t.tclass & TC_OUTRDR) {
1276 n->info |= t.info;
1277 n->r.n = parse_expr(TC_OPTERM | TC_GRPTERM);
1278 }
1279 if (t.tclass & TC_GRPTERM)
1280 rollback_token();
1281 break;
1282
1283 case OC_BREAK:
1284 n = chain_node(OC_EXEC);
1285 n->a.n = break_ptr;
1286 break;
1287
1288 case OC_CONTINUE:
1289 n = chain_node(OC_EXEC);
1290 n->a.n = continue_ptr;
1291 break;
1292
1293 /* delete, next, nextfile, return, exit */
1294 default:
1295 chain_expr(t.info);
1296
1297 }
1298 }
1299}
1300
1301static void parse_program(char *p) {
1302
1303 unsigned long tclass;
1304 node *cn;
1305 func *f;
1306 var *v;
1307
1308 pos = p;
1309 t.lineno = 1;
1310 while((tclass = next_token(TC_EOF | TC_OPSEQ | TC_GRPSTART |
1311 TC_OPTERM | TC_BEGIN | TC_END | TC_FUNCDECL)) != TC_EOF) {
1312
1313 if (tclass & TC_OPTERM)
1314 continue;
1315
1316 seq = &mainseq;
1317 if (tclass & TC_BEGIN) {
1318 seq = &beginseq;
1319 chain_group();
1320
1321 } else if (tclass & TC_END) {
1322 seq = &endseq;
1323 chain_group();
1324
1325 } else if (tclass & TC_FUNCDECL) {
1326 next_token(TC_FUNCTION);
1327 pos++;
1328 f = newfunc(t.string);
1329 f->body.first = NULL;
1330 f->nargs = 0;
1331 while(next_token(TC_VARIABLE | TC_SEQTERM) & TC_VARIABLE) {
1332 v = findvar(ahash, t.string);
1333 v->x.aidx = (f->nargs)++;
1334
1335 if (next_token(TC_COMMA | TC_SEQTERM) & TC_SEQTERM)
1336 break;
1337 }
1338 seq = &(f->body);
1339 chain_group();
1340 clear_array(ahash);
1341
1342 } else if (tclass & TC_OPSEQ) {
1343 rollback_token();
1344 cn = chain_node(OC_TEST);
1345 cn->l.n = parse_expr(TC_OPTERM | TC_EOF | TC_GRPSTART);
1346 if (t.tclass & TC_GRPSTART) {
1347 rollback_token();
1348 chain_group();
1349 } else {
1350 chain_node(OC_PRINT);
1351 }
1352 cn->r.n = mainseq.last;
1353
1354 } else /* if (tclass & TC_GRPSTART) */ {
1355 rollback_token();
1356 chain_group();
1357 }
1358 }
1359}
1360
1361
1362/* -------- program execution part -------- */
1363
1364static node *mk_splitter(char *s, tsplitter *spl) {
1365
1366 register regex_t *re, *ire;
1367 node *n;
1368
1369 re = &spl->re[0];
1370 ire = &spl->re[1];
1371 n = &spl->n;
1372 if ((n->info && OPCLSMASK) == OC_REGEXP) {
1373 regfree(re);
1374 regfree(ire);
1375 }
Manuel Novoa III cad53642003-03-19 09:13:01 +00001376 if (bb_strlen(s) > 1) {
Glenn L McGrath545106f2002-11-11 06:21:00 +00001377 mk_re_node(s, n, re);
1378 } else {
1379 n->info = (unsigned long) *s;
1380 }
1381
1382 return n;
1383}
1384
1385/* use node as a regular expression. Supplied with node ptr and regex_t
1386 * storage space. Return ptr to regex (if result points to preg, it shuold
1387 * be later regfree'd manually
1388 */
1389static regex_t *as_regex(node *op, regex_t *preg) {
1390
1391 var *v;
1392 char *s;
1393
1394 if ((op->info & OPCLSMASK) == OC_REGEXP) {
1395 return icase ? op->r.ire : op->l.re;
1396 } else {
1397 v = nvalloc(1);
1398 s = getvar_s(evaluate(op, v));
1399 xregcomp(preg, s, icase ? REG_EXTENDED | REG_ICASE : REG_EXTENDED);
1400 nvfree(v);
1401 return preg;
1402 }
1403}
1404
1405/* gradually increasing buffer */
1406static void qrealloc(char **b, int n, int *size) {
1407
1408 if (! *b || n >= *size)
1409 *b = xrealloc(*b, *size = n + (n>>1) + 80);
1410}
1411
1412/* resize field storage space */
1413static void fsrealloc(int size) {
1414
1415 static int maxfields = 0;
1416 int i;
1417
1418 if (size >= maxfields) {
1419 i = maxfields;
1420 maxfields = size + 16;
1421 Fields = (var *)xrealloc(Fields, maxfields * sizeof(var));
1422 for (; i<maxfields; i++) {
1423 Fields[i].type = VF_SPECIAL;
1424 Fields[i].string = NULL;
1425 }
1426 }
1427
1428 if (size < nfields) {
1429 for (i=size; i<nfields; i++) {
1430 clrvar(Fields+i);
1431 }
1432 }
1433 nfields = size;
1434}
1435
1436static int awk_split(char *s, node *spl, char **slist) {
1437
1438 int l, n=0;
1439 char c[4];
1440 char *s1;
1441 regmatch_t pmatch[2];
1442
1443 /* in worst case, each char would be a separate field */
Manuel Novoa III cad53642003-03-19 09:13:01 +00001444 *slist = s1 = bb_xstrndup(s, bb_strlen(s) * 2 + 3);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001445
1446 c[0] = c[1] = (char)spl->info;
1447 c[2] = c[3] = '\0';
1448 if (*getvar_s(V[RS]) == '\0') c[2] = '\n';
1449
1450 if ((spl->info & OPCLSMASK) == OC_REGEXP) { /* regex split */
1451 while (*s) {
1452 l = strcspn(s, c+2);
1453 if (regexec(icase ? spl->r.ire : spl->l.re, s, 1, pmatch, 0) == 0 &&
1454 pmatch[0].rm_so <= l) {
1455 l = pmatch[0].rm_so;
1456 if (pmatch[0].rm_eo == 0) { l++; pmatch[0].rm_eo++; }
1457 } else {
1458 pmatch[0].rm_eo = l;
1459 if (*(s+l)) pmatch[0].rm_eo++;
1460 }
1461
1462 memcpy(s1, s, l);
1463 *(s1+l) = '\0';
1464 nextword(&s1);
1465 s += pmatch[0].rm_eo;
1466 n++;
1467 }
1468 } else if (c[0] == '\0') { /* null split */
1469 while(*s) {
1470 *(s1++) = *(s++);
1471 *(s1++) = '\0';
1472 n++;
1473 }
1474 } else if (c[0] != ' ') { /* single-character split */
1475 if (icase) {
1476 c[0] = toupper(c[0]);
1477 c[1] = tolower(c[1]);
1478 }
1479 if (*s1) n++;
1480 while ((s1 = strpbrk(s1, c))) {
1481 *(s1++) = '\0';
1482 n++;
1483 }
1484 } else { /* space split */
1485 while (*s) {
1486 while (isspace(*s)) s++;
1487 if (! *s) break;
1488 n++;
1489 while (*s && !isspace(*s))
1490 *(s1++) = *(s++);
1491 *(s1++) = '\0';
1492 }
1493 }
1494 return n;
1495}
1496
1497static void split_f0(void) {
1498
1499 static char *fstrings = NULL;
1500 int i, n;
1501 char *s;
1502
1503 if (is_f0_split)
1504 return;
1505
1506 is_f0_split = TRUE;
Aaron Lehmanna170e1c2002-11-28 11:27:31 +00001507 free(fstrings);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001508 fsrealloc(0);
1509 n = awk_split(getvar_s(V[F0]), &fsplitter.n, &fstrings);
1510 fsrealloc(n);
1511 s = fstrings;
1512 for (i=0; i<n; i++) {
1513 Fields[i].string = nextword(&s);
1514 Fields[i].type |= (VF_FSTR | VF_USER | VF_DIRTY);
1515 }
1516
1517 /* set NF manually to avoid side effects */
1518 clrvar(V[NF]);
1519 V[NF]->type = VF_NUMBER | VF_SPECIAL;
1520 V[NF]->number = nfields;
1521}
1522
1523/* perform additional actions when some internal variables changed */
1524static void handle_special(var *v) {
1525
1526 int n;
1527 char *b, *sep, *s;
1528 int sl, l, len, i, bsize;
1529
1530 if (! (v->type & VF_SPECIAL))
1531 return;
1532
1533 if (v == V[NF]) {
1534 n = (int)getvar_i(v);
1535 fsrealloc(n);
1536
1537 /* recalculate $0 */
1538 sep = getvar_s(V[OFS]);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001539 sl = bb_strlen(sep);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001540 b = NULL;
1541 len = 0;
1542 for (i=0; i<n; i++) {
1543 s = getvar_s(&Fields[i]);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001544 l = bb_strlen(s);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001545 if (b) {
1546 memcpy(b+len, sep, sl);
1547 len += sl;
1548 }
1549 qrealloc(&b, len+l+sl, &bsize);
1550 memcpy(b+len, s, l);
1551 len += l;
1552 }
1553 b[len] = '\0';
1554 setvar_p(V[F0], b);
1555 is_f0_split = TRUE;
1556
1557 } else if (v == V[F0]) {
1558 is_f0_split = FALSE;
1559
1560 } else if (v == V[FS]) {
1561 mk_splitter(getvar_s(v), &fsplitter);
1562
1563 } else if (v == V[RS]) {
1564 mk_splitter(getvar_s(v), &rsplitter);
1565
1566 } else if (v == V[IGNORECASE]) {
1567 icase = istrue(v);
1568
1569 } else { /* $n */
1570 n = getvar_i(V[NF]);
1571 setvar_i(V[NF], n > v-Fields ? n : v-Fields+1);
1572 /* right here v is invalid. Just to note... */
1573 }
1574}
1575
1576/* step through func/builtin/etc arguments */
1577static node *nextarg(node **pn) {
1578
1579 node *n;
1580
1581 n = *pn;
1582 if (n && (n->info & OPCLSMASK) == OC_COMMA) {
1583 *pn = n->r.n;
1584 n = n->l.n;
1585 } else {
1586 *pn = NULL;
1587 }
1588 return n;
1589}
1590
1591static void hashwalk_init(var *v, xhash *array) {
1592
1593 char **w;
1594 hash_item *hi;
1595 int i;
1596
1597 if (v->type & VF_WALK)
1598 free(v->x.walker);
1599
1600 v->type |= VF_WALK;
1601 w = v->x.walker = (char **)xcalloc(2 + 2*sizeof(char *) + array->glen, 1);
1602 *w = *(w+1) = (char *)(w + 2);
1603 for (i=0; i<array->csize; i++) {
1604 hi = array->items[i];
1605 while(hi) {
1606 strcpy(*w, hi->name);
1607 nextword(w);
1608 hi = hi->next;
1609 }
1610 }
1611}
1612
1613static int hashwalk_next(var *v) {
1614
1615 char **w;
1616
1617 w = v->x.walker;
1618 if (*(w+1) == *w)
1619 return FALSE;
1620
1621 setvar_s(v, nextword(w+1));
1622 return TRUE;
1623}
1624
1625/* evaluate node, return 1 when result is true, 0 otherwise */
1626static int ptest(node *pattern) {
1627 static var v;
1628
1629 return istrue(evaluate(pattern, &v));
1630}
1631
1632/* read next record from stream rsm into a variable v */
1633static int awk_getline(rstream *rsm, var *v) {
1634
1635 char *b;
1636 regmatch_t pmatch[2];
1637 int p, pp=0, size;
1638 int fd, so, eo, r, rp;
1639 char c, *s;
1640
1641 /* we're using our own buffer since we need access to accumulating
1642 * characters
1643 */
1644 fd = fileno(rsm->F);
1645 b = rsm->buffer;
1646 p = rsm->pos;
1647 size = rsm->size;
1648 c = (char) rsplitter.n.info;
1649 rp = 0;
1650 do {
1651 qrealloc(&b, p+128, &size);
1652 so = eo = p;
1653 r = 1;
1654 if (p > 0) {
1655 if ((rsplitter.n.info & OPCLSMASK) == OC_REGEXP) {
1656 if (regexec(icase ? rsplitter.n.r.ire : rsplitter.n.l.re,
1657 b, 1, pmatch, 0) == 0) {
1658 so = pmatch[0].rm_so;
1659 eo = pmatch[0].rm_eo;
1660 if (b[eo] != '\0')
1661 break;
1662 }
1663 } else if (c != '\0') {
1664 s = strchr(b+pp, c);
1665 if (s) {
1666 so = eo = s-b;
1667 eo++;
1668 break;
1669 }
1670 } else {
1671 while (b[rp] == '\n')
1672 rp++;
1673 s = strstr(b+rp, "\n\n");
1674 if (s) {
1675 so = eo = s-b;
1676 while (b[eo] == '\n') eo++;
1677 if (b[eo] != '\0')
1678 break;
1679 }
1680 }
1681 }
1682
1683 pp = p;
1684 p += safe_read(fd, b+p, size-p-1);
1685 if (p < pp) {
1686 p = 0;
1687 r = 0;
1688 setvar_i(V[ERRNO], errno);
1689 }
1690 b[p] = '\0';
1691
1692 } while (p > pp);
1693
1694 if (p == 0) {
1695 r--;
1696 } else {
1697 c = b[so]; b[so] = '\0';
1698 setvar_s(v, b+rp);
1699 v->type |= VF_USER;
1700 b[so] = c;
1701 c = b[eo]; b[eo] = '\0';
1702 setvar_s(V[RT], b+so);
1703 b[eo] = c;
1704 }
1705
1706 p -= eo;
1707 if (p) memmove(b, (const void *)(b+eo), p+1);
1708
1709 rsm->buffer = b;
1710 rsm->pos = p;
1711 rsm->size = size;
1712
1713 return r;
1714}
1715
1716static int fmt_num(char *b, int size, char *format, double n, int int_as_int) {
1717
1718 int r=0;
1719 char c, *s=format;
1720
1721 if (int_as_int && n == (int)n) {
1722 r = snprintf(b, size, "%d", (int)n);
1723 } else {
1724 do { c = *s; } while (*s && *++s);
1725 if (strchr("diouxX", c)) {
1726 r = snprintf(b, size, format, (int)n);
1727 } else if (strchr("eEfgG", c)) {
1728 r = snprintf(b, size, format, n);
1729 } else {
1730 runtime_error(EMSG_INV_FMT);
1731 }
1732 }
1733 return r;
1734}
1735
1736
1737/* formatted output into an allocated buffer, return ptr to buffer */
1738static char *awk_printf(node *n) {
1739
1740 char *b = NULL;
1741 char *fmt, *s, *s1, *f;
1742 int i, j, incr, bsize;
1743 char c, c1;
1744 var *v, *arg;
1745
1746 v = nvalloc(1);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001747 fmt = f = bb_xstrdup(getvar_s(evaluate(nextarg(&n), v)));
Glenn L McGrath545106f2002-11-11 06:21:00 +00001748
1749 i = 0;
1750 while (*f) {
1751 s = f;
1752 while (*f && (*f != '%' || *(++f) == '%'))
1753 f++;
1754 while (*f && !isalpha(*f))
1755 f++;
1756
1757 incr = (f - s) + MAXVARFMT;
1758 qrealloc(&b, incr+i, &bsize);
1759 c = *f; if (c != '\0') f++;
1760 c1 = *f ; *f = '\0';
1761 arg = evaluate(nextarg(&n), v);
1762
1763 j = i;
1764 if (c == 'c' || !c) {
1765 i += sprintf(b+i, s,
1766 is_numeric(arg) ? (char)getvar_i(arg) : *getvar_s(arg));
1767
1768 } else if (c == 's') {
1769 s1 = getvar_s(arg);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001770 qrealloc(&b, incr+i+bb_strlen(s1), &bsize);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001771 i += sprintf(b+i, s, s1);
1772
1773 } else {
1774 i += fmt_num(b+i, incr, s, getvar_i(arg), FALSE);
1775 }
1776 *f = c1;
1777
1778 /* if there was an error while sprintf, return value is negative */
1779 if (i < j) i = j;
1780
1781 }
1782
1783 b = xrealloc(b, i+1);
1784 free(fmt);
1785 nvfree(v);
1786 b[i] = '\0';
1787 return b;
1788}
1789
1790/* common substitution routine
1791 * replace (nm) substring of (src) that match (n) with (repl), store
1792 * result into (dest), return number of substitutions. If nm=0, replace
1793 * all matches. If src or dst is NULL, use $0. If ex=TRUE, enable
1794 * subexpression matching (\1-\9)
1795 */
1796static int awk_sub(node *rn, char *repl, int nm, var *src, var *dest, int ex) {
1797
1798 char *ds = NULL;
1799 char *sp, *s;
1800 int c, i, j, di, rl, so, eo, nbs, n, dssize;
1801 regmatch_t pmatch[10];
1802 regex_t sreg, *re;
1803
1804 re = as_regex(rn, &sreg);
1805 if (! src) src = V[F0];
1806 if (! dest) dest = V[F0];
1807
1808 i = di = 0;
1809 sp = getvar_s(src);
Manuel Novoa III cad53642003-03-19 09:13:01 +00001810 rl = bb_strlen(repl);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001811 while (regexec(re, sp, 10, pmatch, sp==getvar_s(src) ? 0:REG_NOTBOL) == 0) {
1812 so = pmatch[0].rm_so;
1813 eo = pmatch[0].rm_eo;
1814
1815 qrealloc(&ds, di + eo + rl, &dssize);
1816 memcpy(ds + di, sp, eo);
1817 di += eo;
1818 if (++i >= nm) {
1819 /* replace */
1820 di -= (eo - so);
1821 nbs = 0;
1822 for (s = repl; *s; s++) {
1823 ds[di++] = c = *s;
1824 if (c == '\\') {
1825 nbs++;
1826 continue;
1827 }
1828 if (c == '&' || (ex && c >= '0' && c <= '9')) {
1829 di -= ((nbs + 3) >> 1);
1830 j = 0;
1831 if (c != '&') {
1832 j = c - '0';
1833 nbs++;
1834 }
1835 if (nbs % 2) {
1836 ds[di++] = c;
1837 } else {
1838 n = pmatch[j].rm_eo - pmatch[j].rm_so;
1839 qrealloc(&ds, di + rl + n, &dssize);
1840 memcpy(ds + di, sp + pmatch[j].rm_so, n);
1841 di += n;
1842 }
1843 }
1844 nbs = 0;
1845 }
1846 }
1847
1848 sp += eo;
1849 if (i == nm) break;
1850 if (eo == so) {
1851 if (! (ds[di++] = *sp++)) break;
1852 }
1853 }
1854
1855 qrealloc(&ds, di + strlen(sp), &dssize);
1856 strcpy(ds + di, sp);
1857 setvar_p(dest, ds);
1858 if (re == &sreg) regfree(re);
1859 return i;
1860}
1861
1862static var *exec_builtin(node *op, var *res) {
1863
1864 int (*to_xxx)(int);
1865 var *tv;
1866 node *an[4];
1867 var *av[4];
1868 char *as[4];
1869 regmatch_t pmatch[2];
1870 regex_t sreg, *re;
1871 static tsplitter tspl;
1872 node *spl;
1873 unsigned long isr, info;
1874 int nargs;
1875 time_t tt;
1876 char *s, *s1;
1877 int i, l, ll, n;
1878
1879 tv = nvalloc(4);
1880 isr = info = op->info;
1881 op = op->l.n;
1882
1883 av[2] = av[3] = NULL;
1884 for (i=0 ; i<4 && op ; i++) {
1885 an[i] = nextarg(&op);
1886 if (isr & 0x09000000) av[i] = evaluate(an[i], &tv[i]);
1887 if (isr & 0x08000000) as[i] = getvar_s(av[i]);
1888 isr >>= 1;
1889 }
1890
1891 nargs = i;
1892 if (nargs < (info >> 30))
1893 runtime_error(EMSG_TOO_FEW_ARGS);
1894
1895 switch (info & OPNMASK) {
1896
1897 case B_a2:
1898#ifdef CONFIG_FEATURE_AWK_MATH
1899 setvar_i(res, atan2(getvar_i(av[i]), getvar_i(av[1])));
1900#else
1901 runtime_error(EMSG_NO_MATH);
1902#endif
1903 break;
1904
1905 case B_sp:
1906 if (nargs > 2) {
1907 spl = (an[2]->info & OPCLSMASK) == OC_REGEXP ?
1908 an[2] : mk_splitter(getvar_s(evaluate(an[2], &tv[2])), &tspl);
1909 } else {
1910 spl = &fsplitter.n;
1911 }
1912
1913 n = awk_split(as[0], spl, &s);
1914 s1 = s;
1915 clear_array(iamarray(av[1]));
1916 for (i=1; i<=n; i++)
1917 setari_u(av[1], i, nextword(&s1));
1918 free(s);
1919 setvar_i(res, n);
1920 break;
1921
1922 case B_ss:
Manuel Novoa III cad53642003-03-19 09:13:01 +00001923 l = bb_strlen(as[0]);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001924 i = getvar_i(av[1]) - 1;
1925 if (i>l) i=l; if (i<0) i=0;
1926 n = (nargs > 2) ? getvar_i(av[2]) : l-i;
1927 if (n<0) n=0;
1928 s = xmalloc(n+1);
1929 strncpy(s, as[0]+i, n);
1930 s[n] = '\0';
1931 setvar_p(res, s);
1932 break;
1933
1934 case B_lo:
1935 to_xxx = tolower;
1936 goto lo_cont;
1937
1938 case B_up:
1939 to_xxx = toupper;
1940lo_cont:
Manuel Novoa III cad53642003-03-19 09:13:01 +00001941 s1 = s = bb_xstrdup(as[0]);
Glenn L McGrath545106f2002-11-11 06:21:00 +00001942 while (*s1) {
1943 *s1 = (*to_xxx)(*s1);
1944 s1++;
1945 }
1946 setvar_p(res, s);
1947 break;
1948
1949 case B_ix:
1950 n = 0;
Manuel Novoa III cad53642003-03-19 09:13:01 +00001951 ll = bb_strlen(as[1]);
1952 l = bb_strlen(as[0]) - ll;
Glenn L McGrath545106f2002-11-11 06:21:00 +00001953 if (ll > 0 && l >= 0) {
1954 if (! icase) {
1955 s = strstr(as[0], as[1]);
1956 if (s) n = (s - as[0]) + 1;
1957 } else {
1958 /* this piece of code is terribly slow and
1959 * really should be rewritten
1960 */
1961 for (i=0; i<=l; i++) {
1962 if (strncasecmp(as[0]+i, as[1], ll) == 0) {
1963 n = i+1;
1964 break;
1965 }
1966 }
1967 }
1968 }
1969 setvar_i(res, n);
1970 break;
1971
1972 case B_ti:
1973 if (nargs > 1)
1974 tt = getvar_i(av[1]);
1975 else
1976 time(&tt);
1977 s = (nargs > 0) ? as[0] : "%a %b %d %H:%M:%S %Z %Y";
1978 i = strftime(buf, MAXVARFMT, s, localtime(&tt));
1979 buf[i] = '\0';
1980 setvar_s(res, buf);
1981 break;
1982
1983 case B_ma:
1984 re = as_regex(an[1], &sreg);
1985 n = regexec(re, as[0], 1, pmatch, 0);
1986 if (n == 0) {
1987 pmatch[0].rm_so++;
1988 pmatch[0].rm_eo++;
1989 } else {
1990 pmatch[0].rm_so = 0;
1991 pmatch[0].rm_eo = -1;
1992 }
1993 setvar_i(newvar("RSTART"), pmatch[0].rm_so);
1994 setvar_i(newvar("RLENGTH"), pmatch[0].rm_eo - pmatch[0].rm_so);
1995 setvar_i(res, pmatch[0].rm_so);
1996 if (re == &sreg) regfree(re);
1997 break;
1998
1999 case B_ge:
2000 awk_sub(an[0], as[1], getvar_i(av[2]), av[3], res, TRUE);
2001 break;
2002
2003 case B_gs:
2004 setvar_i(res, awk_sub(an[0], as[1], 0, av[2], av[2], FALSE));
2005 break;
2006
2007 case B_su:
2008 setvar_i(res, awk_sub(an[0], as[1], 1, av[2], av[2], FALSE));
2009 break;
2010 }
2011
2012 nvfree(tv);
2013 return res;
2014}
2015
2016/*
2017 * Evaluate node - the heart of the program. Supplied with subtree
2018 * and place where to store result. returns ptr to result.
2019 */
2020#define XC(n) ((n) >> 8)
2021
2022static var *evaluate(node *op, var *res) {
2023
2024 /* This procedure is recursive so we should count every byte */
2025 static var *fnargs = NULL;
2026 static unsigned int seed = 1;
2027 static regex_t sreg;
2028 node *op1;
2029 var *v1;
2030 union {
2031 var *v;
2032 char *s;
2033 double d;
2034 int i;
2035 } L, R;
2036 unsigned long opinfo;
2037 short opn;
2038 union {
2039 char *s;
2040 rstream *rsm;
2041 FILE *F;
2042 var *v;
2043 regex_t *re;
2044 unsigned long info;
2045 } X;
2046
2047 if (! op)
2048 return setvar_s(res, NULL);
2049
2050 v1 = nvalloc(2);
2051
2052 while (op) {
2053
2054 opinfo = op->info;
2055 opn = (short)(opinfo & OPNMASK);
2056 lineno = op->lineno;
2057
2058 /* execute inevitable things */
2059 op1 = op->l.n;
2060 if (opinfo & OF_RES1) X.v = L.v = evaluate(op1, v1);
2061 if (opinfo & OF_RES2) R.v = evaluate(op->r.n, v1+1);
2062 if (opinfo & OF_STR1) L.s = getvar_s(L.v);
2063 if (opinfo & OF_STR2) R.s = getvar_s(R.v);
2064 if (opinfo & OF_NUM1) L.d = getvar_i(L.v);
2065
2066 switch (XC(opinfo & OPCLSMASK)) {
2067
2068 /* -- iterative node type -- */
2069
2070 /* test pattern */
2071 case XC( OC_TEST ):
2072 if ((op1->info & OPCLSMASK) == OC_COMMA) {
2073 /* it's range pattern */
2074 if ((opinfo & OF_CHECKED) || ptest(op1->l.n)) {
2075 op->info |= OF_CHECKED;
2076 if (ptest(op1->r.n))
2077 op->info &= ~OF_CHECKED;
2078
2079 op = op->a.n;
2080 } else {
2081 op = op->r.n;
2082 }
2083 } else {
2084 op = (ptest(op1)) ? op->a.n : op->r.n;
2085 }
2086 break;
2087
2088 /* just evaluate an expression, also used as unconditional jump */
2089 case XC( OC_EXEC ):
2090 break;
2091
2092 /* branch, used in if-else and various loops */
2093 case XC( OC_BR ):
2094 op = istrue(L.v) ? op->a.n : op->r.n;
2095 break;
2096
2097 /* initialize for-in loop */
2098 case XC( OC_WALKINIT ):
2099 hashwalk_init(L.v, iamarray(R.v));
2100 break;
2101
2102 /* get next array item */
2103 case XC( OC_WALKNEXT ):
2104 op = hashwalk_next(L.v) ? op->a.n : op->r.n;
2105 break;
2106
2107 case XC( OC_PRINT ):
2108 case XC( OC_PRINTF ):
2109 X.F = stdout;
2110 if (op->r.n) {
2111 X.rsm = newfile(R.s);
2112 if (! X.rsm->F) {
2113 if (opn == '|') {
2114 if((X.rsm->F = popen(R.s, "w")) == NULL)
Manuel Novoa III cad53642003-03-19 09:13:01 +00002115 bb_perror_msg_and_die("popen");
Glenn L McGrath545106f2002-11-11 06:21:00 +00002116 X.rsm->is_pipe = 1;
2117 } else {
Manuel Novoa III cad53642003-03-19 09:13:01 +00002118 X.rsm->F = bb_xfopen(R.s, opn=='w' ? "w" : "a");
Glenn L McGrath545106f2002-11-11 06:21:00 +00002119 }
2120 }
2121 X.F = X.rsm->F;
2122 }
2123
2124 if ((opinfo & OPCLSMASK) == OC_PRINT) {
2125 if (! op1) {
2126 fputs(getvar_s(V[F0]), X.F);
2127 } else {
2128 while (op1) {
2129 L.v = evaluate(nextarg(&op1), v1);
2130 if (L.v->type & VF_NUMBER) {
2131 fmt_num(buf, MAXVARFMT, getvar_s(V[OFMT]),
2132 getvar_i(L.v), TRUE);
2133 fputs(buf, X.F);
2134 } else {
2135 fputs(getvar_s(L.v), X.F);
2136 }
2137
2138 if (op1) fputs(getvar_s(V[OFS]), X.F);
2139 }
2140 }
2141 fputs(getvar_s(V[ORS]), X.F);
2142
2143 } else { /* OC_PRINTF */
2144 L.s = awk_printf(op1);
2145 fputs(L.s, X.F);
2146 free(L.s);
2147 }
2148 fflush(X.F);
2149 break;
2150
2151 case XC( OC_DELETE ):
2152 X.info = op1->info & OPCLSMASK;
2153 if (X.info == OC_VAR) {
2154 R.v = op1->l.v;
2155 } else if (X.info == OC_FNARG) {
2156 R.v = &fnargs[op1->l.i];
2157 } else {
2158 runtime_error(EMSG_NOT_ARRAY);
2159 }
2160
2161 if (op1->r.n) {
2162 clrvar(L.v);
2163 L.s = getvar_s(evaluate(op1->r.n, v1));
2164 hash_remove(iamarray(R.v), L.s);
2165 } else {
2166 clear_array(iamarray(R.v));
2167 }
2168 break;
2169
2170 case XC( OC_NEWSOURCE ):
2171 programname = op->l.s;
2172 break;
2173
2174 case XC( OC_RETURN ):
2175 copyvar(res, L.v);
2176 break;
2177
2178 case XC( OC_NEXTFILE ):
2179 nextfile = TRUE;
2180 case XC( OC_NEXT ):
2181 nextrec = TRUE;
2182 case XC( OC_DONE ):
2183 clrvar(res);
2184 break;
2185
2186 case XC( OC_EXIT ):
2187 awk_exit(L.d);
2188
2189 /* -- recursive node type -- */
2190
2191 case XC( OC_VAR ):
2192 L.v = op->l.v;
2193 if (L.v == V[NF])
2194 split_f0();
2195 goto v_cont;
2196
2197 case XC( OC_FNARG ):
2198 L.v = &fnargs[op->l.i];
2199
2200v_cont:
2201 res = (op->r.n) ? findvar(iamarray(L.v), R.s) : L.v;
2202 break;
2203
2204 case XC( OC_IN ):
2205 setvar_i(res, hash_search(iamarray(R.v), L.s) ? 1 : 0);
2206 break;
2207
2208 case XC( OC_REGEXP ):
2209 op1 = op;
2210 L.s = getvar_s(V[F0]);
2211 goto re_cont;
2212
2213 case XC( OC_MATCH ):
2214 op1 = op->r.n;
2215re_cont:
2216 X.re = as_regex(op1, &sreg);
2217 R.i = regexec(X.re, L.s, 0, NULL, 0);
2218 if (X.re == &sreg) regfree(X.re);
2219 setvar_i(res, (R.i == 0 ? 1 : 0) ^ (opn == '!' ? 1 : 0));
2220 break;
2221
2222 case XC( OC_MOVE ):
2223 /* if source is a temporary string, jusk relink it to dest */
2224 if (R.v == v1+1 && R.v->string) {
2225 res = setvar_p(L.v, R.v->string);
2226 R.v->string = NULL;
2227 } else {
2228 res = copyvar(L.v, R.v);
2229 }
2230 break;
2231
2232 case XC( OC_TERNARY ):
2233 if ((op->r.n->info & OPCLSMASK) != OC_COLON)
2234 runtime_error(EMSG_POSSIBLE_ERROR);
2235 res = evaluate(istrue(L.v) ? op->r.n->l.n : op->r.n->r.n, res);
2236 break;
2237
2238 case XC( OC_FUNC ):
2239 if (! op->r.f->body.first)
2240 runtime_error(EMSG_UNDEF_FUNC);
2241
2242 X.v = R.v = nvalloc(op->r.f->nargs+1);
2243 while (op1) {
2244 L.v = evaluate(nextarg(&op1), v1);
2245 copyvar(R.v, L.v);
2246 R.v->type |= VF_CHILD;
2247 R.v->x.parent = L.v;
2248 if (++R.v - X.v >= op->r.f->nargs)
2249 break;
2250 }
2251
2252 R.v = fnargs;
2253 fnargs = X.v;
2254
2255 L.s = programname;
2256 res = evaluate(op->r.f->body.first, res);
2257 programname = L.s;
2258
2259 nvfree(fnargs);
2260 fnargs = R.v;
2261 break;
2262
2263 case XC( OC_GETLINE ):
2264 case XC( OC_PGETLINE ):
2265 if (op1) {
2266 X.rsm = newfile(L.s);
2267 if (! X.rsm->F) {
2268 if ((opinfo & OPCLSMASK) == OC_PGETLINE) {
2269 X.rsm->F = popen(L.s, "r");
2270 X.rsm->is_pipe = TRUE;
2271 } else {
Manuel Novoa III cad53642003-03-19 09:13:01 +00002272 X.rsm->F = fopen(L.s, "r"); /* not bb_xfopen! */
Glenn L McGrath545106f2002-11-11 06:21:00 +00002273 }
2274 }
2275 } else {
2276 if (! iF) iF = next_input_file();
2277 X.rsm = iF;
2278 }
2279
2280 if (! X.rsm->F) {
2281 setvar_i(V[ERRNO], errno);
2282 setvar_i(res, -1);
2283 break;
2284 }
2285
2286 if (! op->r.n)
2287 R.v = V[F0];
2288
2289 L.i = awk_getline(X.rsm, R.v);
2290 if (L.i > 0) {
2291 if (! op1) {
2292 incvar(V[FNR]);
2293 incvar(V[NR]);
2294 }
2295 }
2296 setvar_i(res, L.i);
2297 break;
2298
2299 /* simple builtins */
2300 case XC( OC_FBLTIN ):
2301 switch (opn) {
2302
2303 case F_in:
2304 R.d = (int)L.d;
2305 break;
2306
2307 case F_rn:
2308 R.d = (double)rand() / (double)RAND_MAX;
2309 break;
2310
2311#ifdef CONFIG_FEATURE_AWK_MATH
2312 case F_co:
2313 R.d = cos(L.d);
2314 break;
2315
2316 case F_ex:
2317 R.d = exp(L.d);
2318 break;
2319
2320 case F_lg:
2321 R.d = log(L.d);
2322 break;
2323
2324 case F_si:
2325 R.d = sin(L.d);
2326 break;
2327
2328 case F_sq:
2329 R.d = sqrt(L.d);
2330 break;
2331#else
2332 case F_co:
2333 case F_ex:
2334 case F_lg:
2335 case F_si:
2336 case F_sq:
2337 runtime_error(EMSG_NO_MATH);
2338 break;
2339#endif
2340
2341 case F_sr:
2342 R.d = (double)seed;
2343 seed = op1 ? (unsigned int)L.d : (unsigned int)time(NULL);
2344 srand(seed);
2345 break;
2346
2347 case F_ti:
2348 R.d = time(NULL);
2349 break;
2350
2351 case F_le:
2352 if (! op1)
2353 L.s = getvar_s(V[F0]);
Manuel Novoa III cad53642003-03-19 09:13:01 +00002354 R.d = bb_strlen(L.s);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002355 break;
2356
2357 case F_sy:
2358 fflush(NULL);
2359 R.d = (L.s && *L.s) ? system(L.s) : 0;
2360 break;
2361
2362 case F_ff:
2363 if (! op1)
2364 fflush(stdout);
2365 else {
2366 if (L.s && *L.s) {
2367 X.rsm = newfile(L.s);
2368 fflush(X.rsm->F);
2369 } else {
2370 fflush(NULL);
2371 }
2372 }
2373 break;
2374
2375 case F_cl:
2376 X.rsm = (rstream *)hash_search(fdhash, L.s);
2377 if (X.rsm) {
2378 R.i = X.rsm->is_pipe ? pclose(X.rsm->F) : fclose(X.rsm->F);
Aaron Lehmanna170e1c2002-11-28 11:27:31 +00002379 free(X.rsm->buffer);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002380 hash_remove(fdhash, L.s);
2381 }
2382 if (R.i != 0)
2383 setvar_i(V[ERRNO], errno);
2384 R.d = (double)R.i;
2385 break;
2386 }
2387 setvar_i(res, R.d);
2388 break;
2389
2390 case XC( OC_BUILTIN ):
2391 res = exec_builtin(op, res);
2392 break;
2393
2394 case XC( OC_SPRINTF ):
2395 setvar_p(res, awk_printf(op1));
2396 break;
2397
2398 case XC( OC_UNARY ):
2399 X.v = R.v;
2400 L.d = R.d = getvar_i(R.v);
2401 switch (opn) {
2402 case 'P':
2403 L.d = ++R.d;
2404 goto r_op_change;
2405 case 'p':
2406 R.d++;
2407 goto r_op_change;
2408 case 'M':
2409 L.d = --R.d;
2410 goto r_op_change;
2411 case 'm':
2412 R.d--;
2413 goto r_op_change;
2414 case '!':
2415 L.d = istrue(X.v) ? 0 : 1;
2416 break;
2417 case '-':
2418 L.d = -R.d;
2419 break;
2420 r_op_change:
2421 setvar_i(X.v, R.d);
2422 }
2423 setvar_i(res, L.d);
2424 break;
2425
2426 case XC( OC_FIELD ):
2427 R.i = (int)getvar_i(R.v);
2428 if (R.i == 0) {
2429 res = V[F0];
2430 } else {
2431 split_f0();
2432 if (R.i > nfields)
2433 fsrealloc(R.i);
2434
2435 res = &Fields[R.i-1];
2436 }
2437 break;
2438
2439 /* concatenation (" ") and index joining (",") */
2440 case XC( OC_CONCAT ):
2441 case XC( OC_COMMA ):
Manuel Novoa III cad53642003-03-19 09:13:01 +00002442 opn = bb_strlen(L.s) + bb_strlen(R.s) + 2;
Glenn L McGrath545106f2002-11-11 06:21:00 +00002443 X.s = (char *)xmalloc(opn);
2444 strcpy(X.s, L.s);
2445 if ((opinfo & OPCLSMASK) == OC_COMMA) {
2446 L.s = getvar_s(V[SUBSEP]);
Manuel Novoa III cad53642003-03-19 09:13:01 +00002447 X.s = (char *)xrealloc(X.s, opn + bb_strlen(L.s));
Glenn L McGrath545106f2002-11-11 06:21:00 +00002448 strcat(X.s, L.s);
2449 }
2450 strcat(X.s, R.s);
2451 setvar_p(res, X.s);
2452 break;
2453
2454 case XC( OC_LAND ):
2455 setvar_i(res, istrue(L.v) ? ptest(op->r.n) : 0);
2456 break;
2457
2458 case XC( OC_LOR ):
2459 setvar_i(res, istrue(L.v) ? 1 : ptest(op->r.n));
2460 break;
2461
2462 case XC( OC_BINARY ):
2463 case XC( OC_REPLACE ):
2464 R.d = getvar_i(R.v);
2465 switch (opn) {
2466 case '+':
2467 L.d += R.d;
2468 break;
2469 case '-':
2470 L.d -= R.d;
2471 break;
2472 case '*':
2473 L.d *= R.d;
2474 break;
2475 case '/':
2476 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2477 L.d /= R.d;
2478 break;
2479 case '&':
2480#ifdef CONFIG_FEATURE_AWK_MATH
2481 L.d = pow(L.d, R.d);
2482#else
2483 runtime_error(EMSG_NO_MATH);
2484#endif
2485 break;
2486 case '%':
2487 if (R.d == 0) runtime_error(EMSG_DIV_BY_ZERO);
2488 L.d -= (int)(L.d / R.d) * R.d;
2489 break;
2490 }
2491 res = setvar_i(((opinfo&OPCLSMASK) == OC_BINARY) ? res : X.v, L.d);
2492 break;
2493
2494 case XC( OC_COMPARE ):
2495 if (is_numeric(L.v) && is_numeric(R.v)) {
2496 L.d = getvar_i(L.v) - getvar_i(R.v);
2497 } else {
2498 L.s = getvar_s(L.v);
2499 R.s = getvar_s(R.v);
2500 L.d = icase ? strcasecmp(L.s, R.s) : strcmp(L.s, R.s);
2501 }
2502 switch (opn & 0xfe) {
2503 case 0:
2504 R.i = (L.d > 0);
2505 break;
2506 case 2:
2507 R.i = (L.d >= 0);
2508 break;
2509 case 4:
2510 R.i = (L.d == 0);
2511 break;
2512 }
2513 setvar_i(res, (opn & 0x1 ? R.i : !R.i) ? 1 : 0);
2514 break;
2515
2516 default:
2517 runtime_error(EMSG_POSSIBLE_ERROR);
2518 }
2519 if ((opinfo & OPCLSMASK) <= SHIFT_TIL_THIS)
2520 op = op->a.n;
2521 if ((opinfo & OPCLSMASK) >= RECUR_FROM_THIS)
2522 break;
2523 if (nextrec)
2524 break;
2525 }
2526 nvfree(v1);
2527 return res;
2528}
2529
2530
2531/* -------- main & co. -------- */
2532
2533static int awk_exit(int r) {
2534
2535 unsigned int i;
2536 hash_item *hi;
2537
2538 /* waiting for children */
2539 for (i=0; i<fdhash->csize; i++) {
2540 hi = fdhash->items[i];
2541 while(hi) {
2542 if (hi->data.rs.F && hi->data.rs.is_pipe)
2543 pclose(hi->data.rs.F);
2544 hi = hi->next;
2545 }
2546 }
2547
2548 exit(r);
2549}
2550
2551/* if expr looks like "var=value", perform assignment and return 1,
2552 * otherwise return 0 */
2553static int is_assignment(char *expr) {
2554
2555 char *exprc, *s, *s0, *s1;
2556
Manuel Novoa III cad53642003-03-19 09:13:01 +00002557 exprc = bb_xstrdup(expr);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002558 if (!isalnum_(*exprc) || (s = strchr(exprc, '=')) == NULL) {
2559 free(exprc);
2560 return FALSE;
2561 }
2562
2563 *(s++) = '\0';
2564 s0 = s1 = s;
2565 while (*s)
2566 *(s1++) = nextchar(&s);
2567
2568 *s1 = '\0';
2569 setvar_u(newvar(exprc), s0);
2570 free(exprc);
2571 return TRUE;
2572}
2573
2574/* switch to next input file */
2575static rstream *next_input_file(void) {
2576
2577 static rstream rsm;
2578 FILE *F = NULL;
2579 char *fname, *ind;
2580 static int files_happen = FALSE;
2581
2582 if (rsm.F) fclose(rsm.F);
2583 rsm.F = NULL;
2584 rsm.pos = 0;
2585
2586 do {
2587 if (getvar_i(V[ARGIND])+1 >= getvar_i(V[ARGC])) {
2588 if (files_happen)
2589 return NULL;
2590 fname = "-";
2591 F = stdin;
2592 } else {
2593 ind = getvar_s(incvar(V[ARGIND]));
2594 fname = getvar_s(findvar(iamarray(V[ARGV]), ind));
2595 if (fname && *fname && !is_assignment(fname))
2596 F = afopen(fname, "r");
2597 }
2598 } while (!F);
2599
2600 files_happen = TRUE;
2601 setvar_s(V[FILENAME], fname);
2602 rsm.F = F;
2603 return &rsm;
2604}
2605
2606extern int awk_main(int argc, char **argv) {
2607
2608 char *s, *s1;
2609 int i, j, c;
2610 var *v;
2611 static var tv;
2612 char **envp;
2613 static int from_file = FALSE;
2614 rstream *rsm;
2615 FILE *F, *stdfiles[3];
2616 static char * stdnames = "/dev/stdin\0/dev/stdout\0/dev/stderr";
2617
2618 /* allocate global buffer */
2619 buf = xmalloc(MAXVARFMT+1);
2620
2621 vhash = hash_init();
2622 ahash = hash_init();
2623 fdhash = hash_init();
2624 fnhash = hash_init();
2625
2626 /* initialize variables */
2627 for (i=0; *vNames; i++) {
2628 V[i] = v = newvar(nextword(&vNames));
2629 if (*vValues != '\377')
2630 setvar_s(v, nextword(&vValues));
2631 else
2632 setvar_i(v, 0);
2633
2634 if (*vNames == '*') {
2635 v->type |= VF_SPECIAL;
2636 vNames++;
2637 }
2638 }
2639
2640 handle_special(V[FS]);
2641 handle_special(V[RS]);
2642
2643 stdfiles[0] = stdin;
2644 stdfiles[1] = stdout;
2645 stdfiles[2] = stderr;
2646 for (i=0; i<3; i++) {
2647 rsm = newfile(nextword(&stdnames));
2648 rsm->F = stdfiles[i];
2649 }
2650
2651 for (envp=environ; *envp; envp++) {
Manuel Novoa III cad53642003-03-19 09:13:01 +00002652 s = bb_xstrdup(*envp);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002653 s1 = strchr(s, '=');
2654 *(s1++) = '\0';
2655 setvar_u(findvar(iamarray(V[ENVIRON]), s), s1);
2656 free(s);
2657 }
2658
2659 while((c = getopt(argc, argv, "F:v:f:W:")) != EOF) {
2660 switch (c) {
2661 case 'F':
2662 setvar_s(V[FS], optarg);
2663 break;
2664 case 'v':
2665 if (! is_assignment(optarg))
Manuel Novoa III cad53642003-03-19 09:13:01 +00002666 bb_show_usage();
Glenn L McGrath545106f2002-11-11 06:21:00 +00002667 break;
2668 case 'f':
2669 from_file = TRUE;
2670 F = afopen(programname = optarg, "r");
2671 s = NULL;
2672 /* one byte is reserved for some trick in next_token */
2673 for (i=j=1; j>0; i+=j) {
2674 s = (char *)xrealloc(s, i+4096);
2675 j = fread(s+i, 1, 4094, F);
2676 }
2677 s[i] = '\0';
2678 fclose(F);
2679 parse_program(s+1);
2680 free(s);
2681 break;
2682 case 'W':
Manuel Novoa III cad53642003-03-19 09:13:01 +00002683 bb_error_msg("Warning: unrecognized option '-W %s' ignored\n", optarg);
Glenn L McGrath545106f2002-11-11 06:21:00 +00002684 break;
2685
2686 default:
Manuel Novoa III cad53642003-03-19 09:13:01 +00002687 bb_show_usage();
Glenn L McGrath545106f2002-11-11 06:21:00 +00002688 }
2689 }
2690
2691 if (!from_file) {
2692 if (argc == optind)
Manuel Novoa III cad53642003-03-19 09:13:01 +00002693 bb_show_usage();
Glenn L McGrath545106f2002-11-11 06:21:00 +00002694 programname="cmd. line";
2695 parse_program(argv[optind++]);
2696
2697 }
2698
2699 /* fill in ARGV array */
2700 setvar_i(V[ARGC], argc - optind + 1);
2701 setari_u(V[ARGV], 0, "awk");
2702 for(i=optind; i < argc; i++)
2703 setari_u(V[ARGV], i+1-optind, argv[i]);
2704
2705 evaluate(beginseq.first, &tv);
2706 if (! mainseq.first && ! endseq.first)
2707 awk_exit(EXIT_SUCCESS);
2708
2709 /* input file could already be opened in BEGIN block */
2710 if (! iF) iF = next_input_file();
2711
2712 /* passing through input files */
2713 while (iF) {
2714
2715 nextfile = FALSE;
2716 setvar_i(V[FNR], 0);
2717
2718 while ((c = awk_getline(iF, V[F0])) > 0) {
2719
2720 nextrec = FALSE;
2721 incvar(V[NR]);
2722 incvar(V[FNR]);
2723 evaluate(mainseq.first, &tv);
2724
2725 if (nextfile)
2726 break;
2727 }
2728
2729 if (c < 0)
2730 runtime_error(strerror(errno));
2731
2732 iF = next_input_file();
2733
2734 }
2735
2736 evaluate(endseq.first, &tv);
2737 awk_exit(EXIT_SUCCESS);
2738
2739 return 0;
2740}
2741