Brian Kernighan | 87b9493 | 2012-12-22 10:35:39 -0500 | [diff] [blame] | 1 | /**************************************************************** |
| 2 | Copyright (C) Lucent Technologies 1997 |
| 3 | All Rights Reserved |
| 4 | |
| 5 | Permission to use, copy, modify, and distribute this software and |
| 6 | its documentation for any purpose and without fee is hereby |
| 7 | granted, provided that the above copyright notice appear in all |
| 8 | copies and that both that the copyright notice and this |
| 9 | permission notice and warranty disclaimer appear in supporting |
| 10 | documentation, and that the name Lucent Technologies or any of |
| 11 | its entities not be used in advertising or publicity pertaining |
| 12 | to distribution of the software without specific, written prior |
| 13 | permission. |
| 14 | |
| 15 | LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, |
| 16 | INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. |
| 17 | IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY |
| 18 | SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
| 19 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER |
| 20 | IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, |
| 21 | ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF |
| 22 | THIS SOFTWARE. |
| 23 | ****************************************************************/ |
| 24 | |
| 25 | #include <assert.h> |
Arnold D. Robbins | 961eec1 | 2019-10-24 09:42:51 -0400 | [diff] [blame] | 26 | #include <stdint.h> |
Arnold D. Robbins | 108224b | 2019-11-10 21:19:18 +0200 | [diff] [blame] | 27 | #include <stdbool.h> |
Brian Kernighan | 87b9493 | 2012-12-22 10:35:39 -0500 | [diff] [blame] | 28 | |
| 29 | typedef double Awkfloat; |
| 30 | |
| 31 | /* unsigned char is more trouble than it's worth */ |
| 32 | |
| 33 | typedef unsigned char uschar; |
| 34 | |
zoulasc | 6589208 | 2019-10-24 09:40:15 -0400 | [diff] [blame] | 35 | #define xfree(a) { if ((a) != NULL) { free((void *)(intptr_t)(a)); (a) = NULL; } } |
| 36 | /* |
| 37 | * We sometimes cheat writing read-only pointers to NUL-terminate them |
| 38 | * and then put back the original value |
| 39 | */ |
| 40 | #define setptr(ptr, a) (*(char *)(intptr_t)(ptr)) = (a) |
Brian Kernighan | 87b9493 | 2012-12-22 10:35:39 -0500 | [diff] [blame] | 41 | |
Arnold D. Robbins | 795a06b | 2019-07-28 05:51:52 -0600 | [diff] [blame] | 42 | #define NN(p) ((p) ? (p) : "(null)") /* guaranteed non-null for dprintf |
Brian Kernighan | 87b9493 | 2012-12-22 10:35:39 -0500 | [diff] [blame] | 43 | */ |
| 44 | #define DEBUG |
| 45 | #ifdef DEBUG |
| 46 | /* uses have to be doubly parenthesized */ |
| 47 | # define dprintf(x) if (dbg) printf x |
| 48 | #else |
| 49 | # define dprintf(x) |
| 50 | #endif |
| 51 | |
Arnold D. Robbins | 108224b | 2019-11-10 21:19:18 +0200 | [diff] [blame] | 52 | extern enum compile_states { |
| 53 | RUNNING, |
| 54 | COMPILING, |
| 55 | ERROR_PRINTING |
| 56 | } compile_time; |
| 57 | |
| 58 | extern bool safe; /* false => unsafe, true => safe */ |
Brian Kernighan | 87b9493 | 2012-12-22 10:35:39 -0500 | [diff] [blame] | 59 | |
| 60 | #define RECSIZE (8 * 1024) /* sets limit on records, fields, etc., etc. */ |
| 61 | extern int recsize; /* size of current record, orig RECSIZE */ |
| 62 | |
Arnold D. Robbins | 4d9b129 | 2020-01-24 11:15:30 +0200 | [diff] [blame] | 63 | extern char EMPTY[]; /* this avoid -Wwritable-strings issues */ |
Brian Kernighan | 87b9493 | 2012-12-22 10:35:39 -0500 | [diff] [blame] | 64 | extern char **FS; |
| 65 | extern char **RS; |
| 66 | extern char **ORS; |
| 67 | extern char **OFS; |
| 68 | extern char **OFMT; |
| 69 | extern Awkfloat *NR; |
| 70 | extern Awkfloat *FNR; |
| 71 | extern Awkfloat *NF; |
| 72 | extern char **FILENAME; |
| 73 | extern char **SUBSEP; |
| 74 | extern Awkfloat *RSTART; |
| 75 | extern Awkfloat *RLENGTH; |
| 76 | |
| 77 | extern char *record; /* points to $0 */ |
| 78 | extern int lineno; /* line number in awk program */ |
| 79 | extern int errorflag; /* 1 if error has occurred */ |
Arnold D. Robbins | 108224b | 2019-11-10 21:19:18 +0200 | [diff] [blame] | 80 | extern bool donefld; /* true if record broken into fields */ |
| 81 | extern bool donerec; /* true if record is valid (no fld has changed */ |
Brian Kernighan | 87b9493 | 2012-12-22 10:35:39 -0500 | [diff] [blame] | 82 | extern int dbg; |
| 83 | |
zoulasc | 6589208 | 2019-10-24 09:40:15 -0400 | [diff] [blame] | 84 | extern const char *patbeg; /* beginning of pattern matched */ |
Brian Kernighan | 87b9493 | 2012-12-22 10:35:39 -0500 | [diff] [blame] | 85 | extern int patlen; /* length of pattern matched. set in b.c */ |
| 86 | |
| 87 | /* Cell: all information about a variable or constant */ |
| 88 | |
| 89 | typedef struct Cell { |
| 90 | uschar ctype; /* OCELL, OBOOL, OJUMP, etc. */ |
| 91 | uschar csub; /* CCON, CTEMP, CFLD, etc. */ |
| 92 | char *nval; /* name, for variables only */ |
| 93 | char *sval; /* string value */ |
| 94 | Awkfloat fval; /* value as number */ |
Arnold D. Robbins | 32093f5 | 2018-08-22 20:40:26 +0300 | [diff] [blame] | 95 | int tval; /* type info: STR|NUM|ARR|FCN|FLD|CON|DONTFREE|CONVC|CONVO */ |
| 96 | char *fmt; /* CONVFMT/OFMT value used to convert from number */ |
Brian Kernighan | 87b9493 | 2012-12-22 10:35:39 -0500 | [diff] [blame] | 97 | struct Cell *cnext; /* ptr to next if chained */ |
| 98 | } Cell; |
| 99 | |
| 100 | typedef struct Array { /* symbol table array */ |
| 101 | int nelem; /* elements in table right now */ |
| 102 | int size; /* size of tab */ |
| 103 | Cell **tab; /* hash table pointers */ |
| 104 | } Array; |
| 105 | |
| 106 | #define NSYMTAB 50 /* initial size of a symbol table */ |
| 107 | extern Array *symtab; |
| 108 | |
| 109 | extern Cell *nrloc; /* NR */ |
| 110 | extern Cell *fnrloc; /* FNR */ |
Cody Peter Mello | 52566c0 | 2018-09-18 15:45:55 -0700 | [diff] [blame] | 111 | extern Cell *fsloc; /* FS */ |
Brian Kernighan | 87b9493 | 2012-12-22 10:35:39 -0500 | [diff] [blame] | 112 | extern Cell *nfloc; /* NF */ |
Cody Peter Mello | 52566c0 | 2018-09-18 15:45:55 -0700 | [diff] [blame] | 113 | extern Cell *ofsloc; /* OFS */ |
| 114 | extern Cell *orsloc; /* ORS */ |
| 115 | extern Cell *rsloc; /* RS */ |
Brian Kernighan | 87b9493 | 2012-12-22 10:35:39 -0500 | [diff] [blame] | 116 | extern Cell *rstartloc; /* RSTART */ |
| 117 | extern Cell *rlengthloc; /* RLENGTH */ |
Cody Peter Mello | 97a4b7e | 2018-09-17 11:59:04 -0700 | [diff] [blame] | 118 | extern Cell *subseploc; /* SUBSEP */ |
Cody Mello | ae99b75 | 2019-06-17 10:08:54 -0900 | [diff] [blame] | 119 | extern Cell *symtabloc; /* SYMTAB */ |
Brian Kernighan | 87b9493 | 2012-12-22 10:35:39 -0500 | [diff] [blame] | 120 | |
| 121 | /* Cell.tval values: */ |
| 122 | #define NUM 01 /* number value is valid */ |
| 123 | #define STR 02 /* string value is valid */ |
| 124 | #define DONTFREE 04 /* string space is not freeable */ |
| 125 | #define CON 010 /* this is a constant */ |
| 126 | #define ARR 020 /* this is an array */ |
| 127 | #define FCN 040 /* this is a function name */ |
| 128 | #define FLD 0100 /* this is a field $1, $2, ... */ |
| 129 | #define REC 0200 /* this is $0 */ |
Arnold D. Robbins | 32093f5 | 2018-08-22 20:40:26 +0300 | [diff] [blame] | 130 | #define CONVC 0400 /* string was converted from number via CONVFMT */ |
| 131 | #define CONVO 01000 /* string was converted from number via OFMT */ |
Brian Kernighan | 87b9493 | 2012-12-22 10:35:39 -0500 | [diff] [blame] | 132 | |
| 133 | |
| 134 | /* function types */ |
| 135 | #define FLENGTH 1 |
| 136 | #define FSQRT 2 |
| 137 | #define FEXP 3 |
| 138 | #define FLOG 4 |
| 139 | #define FINT 5 |
| 140 | #define FSYSTEM 6 |
| 141 | #define FRAND 7 |
| 142 | #define FSRAND 8 |
| 143 | #define FSIN 9 |
| 144 | #define FCOS 10 |
| 145 | #define FATAN 11 |
| 146 | #define FTOUPPER 12 |
| 147 | #define FTOLOWER 13 |
| 148 | #define FFLUSH 14 |
| 149 | |
| 150 | /* Node: parse tree is made of nodes, with Cell's at bottom */ |
| 151 | |
| 152 | typedef struct Node { |
| 153 | int ntype; |
| 154 | struct Node *nnext; |
| 155 | int lineno; |
| 156 | int nobj; |
| 157 | struct Node *narg[1]; /* variable: actual size set by calling malloc */ |
| 158 | } Node; |
| 159 | |
| 160 | #define NIL ((Node *) 0) |
| 161 | |
| 162 | extern Node *winner; |
| 163 | extern Node *nullstat; |
| 164 | extern Node *nullnode; |
| 165 | |
| 166 | /* ctypes */ |
| 167 | #define OCELL 1 |
| 168 | #define OBOOL 2 |
| 169 | #define OJUMP 3 |
| 170 | |
| 171 | /* Cell subtypes: csub */ |
| 172 | #define CFREE 7 |
| 173 | #define CCOPY 6 |
| 174 | #define CCON 5 |
| 175 | #define CTEMP 4 |
Arnold D. Robbins | 795a06b | 2019-07-28 05:51:52 -0600 | [diff] [blame] | 176 | #define CNAME 3 |
Brian Kernighan | 87b9493 | 2012-12-22 10:35:39 -0500 | [diff] [blame] | 177 | #define CVAR 2 |
| 178 | #define CFLD 1 |
| 179 | #define CUNK 0 |
| 180 | |
| 181 | /* bool subtypes */ |
| 182 | #define BTRUE 11 |
| 183 | #define BFALSE 12 |
| 184 | |
| 185 | /* jump subtypes */ |
| 186 | #define JEXIT 21 |
| 187 | #define JNEXT 22 |
| 188 | #define JBREAK 23 |
| 189 | #define JCONT 24 |
| 190 | #define JRET 25 |
| 191 | #define JNEXTFILE 26 |
| 192 | |
| 193 | /* node types */ |
| 194 | #define NVALUE 1 |
| 195 | #define NSTAT 2 |
| 196 | #define NEXPR 3 |
| 197 | |
| 198 | |
| 199 | extern int pairstack[], paircnt; |
| 200 | |
| 201 | #define notlegal(n) (n <= FIRSTTOKEN || n >= LASTTOKEN || proctab[n-FIRSTTOKEN] == nullproc) |
| 202 | #define isvalue(n) ((n)->ntype == NVALUE) |
| 203 | #define isexpr(n) ((n)->ntype == NEXPR) |
| 204 | #define isjump(n) ((n)->ctype == OJUMP) |
| 205 | #define isexit(n) ((n)->csub == JEXIT) |
| 206 | #define isbreak(n) ((n)->csub == JBREAK) |
| 207 | #define iscont(n) ((n)->csub == JCONT) |
| 208 | #define isnext(n) ((n)->csub == JNEXT || (n)->csub == JNEXTFILE) |
| 209 | #define isret(n) ((n)->csub == JRET) |
| 210 | #define isrec(n) ((n)->tval & REC) |
| 211 | #define isfld(n) ((n)->tval & FLD) |
| 212 | #define isstr(n) ((n)->tval & STR) |
| 213 | #define isnum(n) ((n)->tval & NUM) |
| 214 | #define isarr(n) ((n)->tval & ARR) |
| 215 | #define isfcn(n) ((n)->tval & FCN) |
| 216 | #define istrue(n) ((n)->csub == BTRUE) |
| 217 | #define istemp(n) ((n)->csub == CTEMP) |
| 218 | #define isargument(n) ((n)->nobj == ARG) |
| 219 | /* #define freeable(p) (!((p)->tval & DONTFREE)) */ |
| 220 | #define freeable(p) ( ((p)->tval & (STR|DONTFREE)) == STR ) |
| 221 | |
| 222 | /* structures used by regular expression matching machinery, mostly b.c: */ |
| 223 | |
| 224 | #define NCHARS (256+3) /* 256 handles 8-bit chars; 128 does 7-bit */ |
| 225 | /* watch out in match(), etc. */ |
Alexander Richardson | cbf9243 | 2019-09-10 07:54:11 +0100 | [diff] [blame] | 226 | #define HAT (NCHARS+2) /* matches ^ in regular expr */ |
Brian Kernighan | 87b9493 | 2012-12-22 10:35:39 -0500 | [diff] [blame] | 227 | #define NSTATES 32 |
| 228 | |
| 229 | typedef struct rrow { |
| 230 | long ltype; /* long avoids pointer warnings on 64-bit */ |
| 231 | union { |
| 232 | int i; |
| 233 | Node *np; |
| 234 | uschar *up; |
| 235 | } lval; /* because Al stores a pointer in it! */ |
| 236 | int *lfollow; |
| 237 | } rrow; |
| 238 | |
| 239 | typedef struct fa { |
zoulasc | c16e869 | 2019-10-17 13:04:46 -0400 | [diff] [blame] | 240 | unsigned int **gototab; |
| 241 | uschar *out; |
Brian Kernighan | 87b9493 | 2012-12-22 10:35:39 -0500 | [diff] [blame] | 242 | uschar *restr; |
zoulasc | c16e869 | 2019-10-17 13:04:46 -0400 | [diff] [blame] | 243 | int **posns; |
| 244 | int state_count; |
Arnold D. Robbins | 108224b | 2019-11-10 21:19:18 +0200 | [diff] [blame] | 245 | bool anchor; |
Brian Kernighan | 87b9493 | 2012-12-22 10:35:39 -0500 | [diff] [blame] | 246 | int use; |
| 247 | int initstat; |
| 248 | int curstat; |
| 249 | int accept; |
Brian Kernighan | 87b9493 | 2012-12-22 10:35:39 -0500 | [diff] [blame] | 250 | struct rrow re[1]; /* variable: actual size set by calling malloc */ |
| 251 | } fa; |
| 252 | |
| 253 | |
| 254 | #include "proto.h" |