blob: b6bfcdb68d1a423ea68e4c4c2aff5178263d8f5a [file] [log] [blame]
Eric Andersen6b6b3f61999-10-28 16:06:25 +00001/*
Mark Whitley6315ce62000-07-10 22:55:51 +00002 * sed.c - very minimalist version of sed
Eric Andersen6b6b3f61999-10-28 16:06:25 +00003 *
Erik Andersen61677fe2000-04-13 01:18:56 +00004 * Copyright (C) 1999,2000 by Lineo, inc.
Mark Whitley6315ce62000-07-10 22:55:51 +00005 * Written by Mark Whitley <markw@lineo.com>, <markw@enol.com>
Erik Andersen1266a131999-12-29 22:19:46 +00006 *
Eric Andersen6b6b3f61999-10-28 16:06:25 +00007 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 *
21 */
22
Mark Whitley6315ce62000-07-10 22:55:51 +000023/*
24 Supported features and commands in this version of sed:
25
26 - comments ('#')
27 - Address matching: num|/matchstr/[,num|/matchstr/|$]command
28 - Commands: p, d, s/match/replace/[g]
29
30 (Note: Specifying an address (range) to match is *optional*; commands
31 default to the whole pattern space if no specific address match was
32 requested.)
33
34 Unsupported features:
35
36 - transliteration (y/source-chars/dest-chars/) (use 'tr')
37 - no support for characters other than the '/' character for regex matches
38 - no pattern space hold space storing / swapping (x, etc.)
39 - no labels / branching (: label, b, t, and friends)
40 - and lots, lots more.
41
42*/
43
Eric Andersen6b6b3f61999-10-28 16:06:25 +000044#include <stdio.h>
Mark Whitley6315ce62000-07-10 22:55:51 +000045#include <stdlib.h> /* for realloc() */
46#include <unistd.h> /* for getopt() */
47#include <regex.h>
48#include <string.h> /* for strdup() */
Eric Andersen6b6b3f61999-10-28 16:06:25 +000049#include <errno.h>
Mark Whitley6315ce62000-07-10 22:55:51 +000050#include <ctype.h> /* for isspace() */
51#include "internal.h"
52
53
54/* externs */
55extern int optind; /* in unistd.h */
56extern char *optarg; /* ditto */
57
58/* options */
59static int be_quiet = 0;
60
61struct sed_cmd {
62
63 /* address storage */
64 int beg_line; /* 'sed 1p' 0 == no begining line, apply commands to all lines */
65 int end_line; /* 'sed 1,3p' 0 == no end line, use only beginning. -1 == $ */
66 regex_t *beg_match; /* sed -e '/match/cmd' */
67 regex_t *end_match; /* sed -e '/match/,/end_match/cmd' */
68
69 /* the command */
70 char cmd; /* p,d,s (add more at your leisure :-) */
71
72 /* substitution command specific fields */
73 regex_t *sub_match; /* sed -e 's/sub_match/replace/' */
74 char *replace; /* sed -e 's/sub_match/replace/' XXX: who will hold the \1 \2 \3s? */
75 unsigned int sub_g:1; /* sed -e 's/foo/bar/g' (global) */
76};
77
78/* globals */
79static struct sed_cmd *sed_cmds = NULL; /* growable arrary holding a sequence of sed cmds */
80static int ncmds = 0; /* number of sed commands */
81
82/*static char *cur_file = NULL;*/ /* file currently being processed XXX: do I need this? */
Eric Andersen6b6b3f61999-10-28 16:06:25 +000083
Erik Andersen1266a131999-12-29 22:19:46 +000084static const char sed_usage[] =
Mark Whitley6315ce62000-07-10 22:55:51 +000085 "sed [-Vhnef] pattern [files...]\n"
Erik Andersen7ab9c7e2000-05-12 19:41:47 +000086#ifndef BB_FEATURE_TRIVIAL_HELP
Mark Whitley6315ce62000-07-10 22:55:51 +000087 "\n"
88 "-n\tsuppress automatic printing of pattern space\n"
89 "-e script\tadd the script to the commands to be executed\n"
90 "-f scriptfile\tadd the contents of script-file to the commands to be executed\n"
91 "-h\tdisplay this help message\n"
92 "-V\toutput version information and exit\n"
93 "\n"
94 "If no -e or -f is given, the first non-option argument is taken as the\n"
95 "sed script to interpret. All remaining arguments are names of input\n"
96 "files; if no input files are specified, then the standard input is read.\n"
Erik Andersen7ab9c7e2000-05-12 19:41:47 +000097#endif
98 ;
Eric Andersen6b6b3f61999-10-28 16:06:25 +000099
Mark Whitley6315ce62000-07-10 22:55:51 +0000100static void destroy_cmd_strs()
101{
102 if (sed_cmds == NULL)
103 return;
104
105 /* destroy all the elements in the array */
106 while (--ncmds >= 0) {
107
108 if (sed_cmds[ncmds].beg_match) {
109 regfree(sed_cmds[ncmds].beg_match);
110 free(sed_cmds[ncmds].beg_match);
Erik Andersene49d5ec2000-02-08 19:58:47 +0000111 }
Mark Whitley6315ce62000-07-10 22:55:51 +0000112 if (sed_cmds[ncmds].end_match) {
113 regfree(sed_cmds[ncmds].end_match);
114 free(sed_cmds[ncmds].end_match);
115 }
116 if (sed_cmds[ncmds].sub_match) {
117 regfree(sed_cmds[ncmds].sub_match);
118 free(sed_cmds[ncmds].sub_match);
119 }
120 if (sed_cmds[ncmds].replace)
121 free(sed_cmds[ncmds].replace);
122 }
Erik Andersene49d5ec2000-02-08 19:58:47 +0000123
Mark Whitley6315ce62000-07-10 22:55:51 +0000124 /* destroy the array */
125 free(sed_cmds);
126 sed_cmds = NULL;
127}
128
Mark Whitley6315ce62000-07-10 22:55:51 +0000129/*
130 * trim_str - trims leading and trailing space from a string
131 *
132 * Note: This returns a malloc'ed string so you must store and free it
133 * XXX: This should be in the utility.c file.
134 */
135static char *trim_str(const char *str)
136{
137 int i;
138 char *retstr = strdup(str);
139
140 /* trim leading whitespace */
141 memmove(retstr, &retstr[strspn(retstr, " \n\t\v")], strlen(retstr));
142
143 /* trim trailing whitespace */
144 i = strlen(retstr) - 1;
145 while (isspace(retstr[i]))
146 i--;
147 retstr[++i] = 0;
148
149 /* Aside:
150 *
151 * you know, a strrspn() would really be nice cuz then we could say:
152 *
153 * retstr[strlen(retstr) - strrspn(retstr, " \n\t\v") + 1] = 0;
154 */
155
156 return retstr;
157}
158
159/*
160 * index_of_unescaped_slash - walks left to right through a string beginning
161 * at a specified index and returns the index of the next unescaped slash.
162 */
163static int index_of_next_unescaped_slash(int idx, const char *str)
164{
165 do {
166 idx++;
167 /* test if we've hit the end */
168 if (str[idx] == 0)
169 return -1;
170 } while (str[idx] != '/' && str[idx - 1] != '\\');
171
172 return idx;
173}
174
175/*
176 * returns the index in the string just past where the address ends.
177 */
178static int get_address(const char *str, int *line, regex_t **regex)
179{
180 char *my_str = strdup(str);
181 int idx = 0;
182
183 if (isdigit(my_str[idx])) {
184 do {
185 idx++;
186 } while (isdigit(my_str[idx]));
187 my_str[idx] = 0;
188 *line = atoi(my_str);
189 *regex = NULL;
190 }
191 else if (my_str[idx] == '$') {
192 *line = -1;
193 *regex = NULL;
194 idx++;
195 }
196 else if (my_str[idx] == '/') {
Mark Whitley6315ce62000-07-10 22:55:51 +0000197 idx = index_of_next_unescaped_slash(idx, my_str);
Mark Whitleydf5f6ba2000-07-11 16:53:56 +0000198 if (idx == -1)
Mark Whitley858c1ad2000-07-11 21:38:47 +0000199 fatalError("sed: unterminated match expression\n");
Mark Whitleydf5f6ba2000-07-11 16:53:56 +0000200 my_str[idx] = '\0';
201 *regex = (regex_t *)xmalloc(sizeof(regex_t));
202 if (bb_regcomp(*regex, my_str+1, REG_NEWLINE) != 0) {
203 free(my_str);
Mark Whitley858c1ad2000-07-11 21:38:47 +0000204 exit(1);
Erik Andersene49d5ec2000-02-08 19:58:47 +0000205 }
Mark Whitley6315ce62000-07-10 22:55:51 +0000206 }
207 else {
208 fprintf(stderr, "sed.c:get_address: no address found in string\n");
209 fprintf(stderr, "\t(you probably didn't check the string you passed me)\n");
210 idx = -1;
211 }
Erik Andersene49d5ec2000-02-08 19:58:47 +0000212
Mark Whitley6315ce62000-07-10 22:55:51 +0000213 free(my_str);
214 return idx;
215}
Erik Andersene49d5ec2000-02-08 19:58:47 +0000216
Mark Whitleydf5f6ba2000-07-11 16:53:56 +0000217static char *strdup_substr(const char *str, int start, int end)
218{
219 int size = end - start + 1;
220 char *newstr = xmalloc(size);
221 memcpy(newstr, str+start, size-1);
222 newstr[size-1] = '\0';
223 return newstr;
224}
225
Mark Whitley6315ce62000-07-10 22:55:51 +0000226static void parse_cmd_str(struct sed_cmd *sed_cmd, const char *cmdstr)
227{
228 int idx = 0;
229
230 /* parse the command
231 * format is: [addr][,addr]cmd
232 * |----||-----||-|
233 * part1 part2 part3
234 */
235
236 /* first part (if present) is an address: either a number or a /regex/ */
237 if (isdigit(cmdstr[idx]) || cmdstr[idx] == '/')
238 idx = get_address(cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match);
239
240 /* second part (if present) will begin with a comma */
241 if (cmdstr[idx] == ',')
242 idx += get_address(&cmdstr[++idx], &sed_cmd->end_line, &sed_cmd->end_match);
243
244 /* last part (mandatory) will be a command */
245 if (cmdstr[idx] == '\0')
Mark Whitley858c1ad2000-07-11 21:38:47 +0000246 fatalError("sed: missing command\n");
Mark Whitley6315ce62000-07-10 22:55:51 +0000247 if (!strchr("pds", cmdstr[idx])) /* <-- XXX add new commands here */
Mark Whitley858c1ad2000-07-11 21:38:47 +0000248 fatalError("sed: invalid command\n");
Mark Whitley6315ce62000-07-10 22:55:51 +0000249 sed_cmd->cmd = cmdstr[idx];
250 /* special-case handling for 's' */
251 if (sed_cmd->cmd == 's') {
Mark Whitleydf5f6ba2000-07-11 16:53:56 +0000252 int oldidx, cflags = REG_NEWLINE;
253 char *match;
Mark Whitley6315ce62000-07-10 22:55:51 +0000254 /* format for substitution is:
Mark Whitleydf5f6ba2000-07-11 16:53:56 +0000255 * s/match/replace/gI
256 * | ||
Mark Whitley6315ce62000-07-10 22:55:51 +0000257 * mandatory optional
258 */
259
260 /* verify that we have an 's' followed by a 'slash' */
261 if (cmdstr[++idx] != '/')
Mark Whitley858c1ad2000-07-11 21:38:47 +0000262 fatalError("sed: bad format in substitution expression\n");
Mark Whitley6315ce62000-07-10 22:55:51 +0000263
Mark Whitleydf5f6ba2000-07-11 16:53:56 +0000264 /* save the match string */
265 oldidx = idx+1;
Mark Whitley6315ce62000-07-10 22:55:51 +0000266 idx = index_of_next_unescaped_slash(idx, cmdstr);
Mark Whitleydf5f6ba2000-07-11 16:53:56 +0000267 if (idx == -1)
Mark Whitley858c1ad2000-07-11 21:38:47 +0000268 fatalError("sed: bad format in substitution expression\n");
Mark Whitleydf5f6ba2000-07-11 16:53:56 +0000269 match = strdup_substr(cmdstr, oldidx, idx);
Mark Whitley6315ce62000-07-10 22:55:51 +0000270
Mark Whitleydf5f6ba2000-07-11 16:53:56 +0000271 /* save the replacement string */
272 oldidx = idx+1;
273 idx = index_of_next_unescaped_slash(idx, cmdstr);
274 if (idx == -1)
Mark Whitley858c1ad2000-07-11 21:38:47 +0000275 fatalError("sed: bad format in substitution expression\n");
Mark Whitleydf5f6ba2000-07-11 16:53:56 +0000276 sed_cmd->replace = strdup_substr(cmdstr, oldidx, idx);
277
278 /* process the flags */
279 while (cmdstr[++idx]) {
280 switch (cmdstr[idx]) {
281 case 'g':
282 sed_cmd->sub_g = 1;
283 break;
284 case 'I':
285 cflags |= REG_ICASE;
286 break;
287 default:
Mark Whitley858c1ad2000-07-11 21:38:47 +0000288 fatalError("sed: bad option in substitution expression\n");
Mark Whitleydf5f6ba2000-07-11 16:53:56 +0000289 }
290 }
291
292 /* compile the regex */
293 sed_cmd->sub_match = (regex_t *)xmalloc(sizeof(regex_t));
294 if (bb_regcomp(sed_cmd->sub_match, match, cflags) != 0) {
295 free(match);
Mark Whitley858c1ad2000-07-11 21:38:47 +0000296 exit(1);
Mark Whitleydf5f6ba2000-07-11 16:53:56 +0000297 }
298 free(match);
Erik Andersen1266a131999-12-29 22:19:46 +0000299 }
Eric Andersen50d63601999-11-09 01:47:36 +0000300}
Eric Andersen6b6b3f61999-10-28 16:06:25 +0000301
Mark Whitley6315ce62000-07-10 22:55:51 +0000302static void add_cmd_str(const char *cmdstr)
Erik Andersen1266a131999-12-29 22:19:46 +0000303{
Mark Whitley6315ce62000-07-10 22:55:51 +0000304 char *my_cmdstr = trim_str(cmdstr);
Erik Andersen1266a131999-12-29 22:19:46 +0000305
Mark Whitley6315ce62000-07-10 22:55:51 +0000306 /* if this is a comment, don't even bother */
307 if (my_cmdstr[0] == '#') {
308 free(my_cmdstr);
309 return;
310 }
311
312 /* grow the array */
313 sed_cmds = realloc(sed_cmds, sizeof(struct sed_cmd) * (++ncmds));
314 /* zero new element */
315 memset(&sed_cmds[ncmds-1], 0, sizeof(struct sed_cmd));
316 /* load command string into new array element */
317 parse_cmd_str(&sed_cmds[ncmds-1], my_cmdstr);
318}
319
320
321static void load_cmd_file(char *filename)
322{
323 FILE *cmdfile;
324 char *line;
325
326 cmdfile = fopen(filename, "r");
327 if (cmdfile == NULL)
Mark Whitley858c1ad2000-07-11 21:38:47 +0000328 fatalError(strerror(errno));
Mark Whitley6315ce62000-07-10 22:55:51 +0000329
330 while ((line = get_line_from_file(cmdfile)) != NULL) {
331 line[strlen(line)-1] = 0; /* eat newline */
332 add_cmd_str(line);
333 free(line);
334 }
335}
336
337
338static int do_sed_command(const struct sed_cmd *sed_cmd, const char *line)
339{
340 int altered = 0;
341
342 switch (sed_cmd->cmd) {
343
344 case 'p':
345 fputs(line, stdout);
346 break;
347
348 case 'd':
349 altered++;
350 break;
351
352 case 's': /* oo, a fun one :-) */
353
354 /* we only substitute if the substitution 'search' expression matches */
355 if (regexec(sed_cmd->sub_match, line, 0, NULL, 0) == 0) {
356 regmatch_t regmatch;
357 int i;
358 char *ptr = (char *)line;
359
360 while (*ptr) {
361 /* if we can match the search string... */
362 if (regexec(sed_cmd->sub_match, ptr, 1, &regmatch, 0) == 0) {
363 /* print everything before the match, */
364 for (i = 0; i < regmatch.rm_so; i++)
365 fputc(ptr[i], stdout);
366 /* then print the substitution in its place */
367 fputs(sed_cmd->replace, stdout);
368 /* then advance past the match */
369 ptr += regmatch.rm_eo;
370 /* and let the calling function know that something
371 * has been changed */
372 altered++;
373
374 /* if we're not doing this globally... */
375 if (!sed_cmd->sub_g)
376 break;
377 }
378 /* if we COULD NOT match the search string (meaning we've gone past
379 * all previous instances), get out */
380 else
381 break;
382 }
383
384 /* is there anything left to print? */
385 if (*ptr)
386 fputs(ptr, stdout);
387 }
388
389 break;
390 }
391
392 return altered;
393}
394
395static void process_file(FILE *file)
396{
397 char *line = NULL;
398 static int linenum = 0; /* GNU sed does not restart counting lines at EOF */
399 unsigned int still_in_range = 0;
400 int line_altered;
401 int i;
402
403 /* go through every line in the file */
404 while ((line = get_line_from_file(file)) != NULL) {
405
406 linenum++;
407 line_altered = 0;
408
409 /* for every line, go through all the commands */
410 for (i = 0; i < ncmds; i++) {
411
412 /* are we acting on a range of matched lines? */
413 if (sed_cmds[i].beg_match && sed_cmds[i].end_match) {
414 if (still_in_range || regexec(sed_cmds[i].beg_match, line, 0, NULL, 0) == 0) {
415 line_altered += do_sed_command(&sed_cmds[i], line);
416 still_in_range = 1;
417 if (regexec(sed_cmds[i].end_match, line, 0, NULL, 0) == 0)
418 still_in_range = 0;
419 }
420 }
421
422 /* are we trying to match a single line? */
423 else if (sed_cmds[i].beg_match) {
424 if (regexec(sed_cmds[i].beg_match, line, 0, NULL, 0) == 0)
425 line_altered += do_sed_command(&sed_cmds[i], line);
426 }
427
428 /* are we acting on a range of line numbers? */
429 else if (sed_cmds[i].beg_line > 0 && sed_cmds[i].end_line > 0) {
430 if (linenum >= sed_cmds[i].beg_line && linenum <= sed_cmds[i].end_line)
431 line_altered += do_sed_command(&sed_cmds[i], line);
432 }
433
434 /* are we acting on a specified line number */
435 else if (sed_cmds[i].beg_line > 0) {
436 if (linenum == sed_cmds[i].beg_line)
437 line_altered += do_sed_command(&sed_cmds[i], line);
438 }
439
440 /* not acting on matches or line numbers. act on every line */
441 else
442 line_altered += do_sed_command(&sed_cmds[i], line);
443
Erik Andersene49d5ec2000-02-08 19:58:47 +0000444 }
Erik Andersen1266a131999-12-29 22:19:46 +0000445
Mark Whitley6315ce62000-07-10 22:55:51 +0000446 /* we will print the line unless we were told to be quiet or if the
447 * line was altered (via a 'd'elete or 's'ubstitution) */
448 if (!be_quiet && !line_altered)
449 fputs(line, stdout);
450
451 free(line);
Erik Andersene49d5ec2000-02-08 19:58:47 +0000452 }
Erik Andersen1266a131999-12-29 22:19:46 +0000453}
454
455extern int sed_main(int argc, char **argv)
Eric Andersen6b6b3f61999-10-28 16:06:25 +0000456{
Mark Whitley6315ce62000-07-10 22:55:51 +0000457 int opt;
Eric Andersen6b6b3f61999-10-28 16:06:25 +0000458
Mark Whitley858c1ad2000-07-11 21:38:47 +0000459 /* do special-case option parsing */
Mark Whitley6315ce62000-07-10 22:55:51 +0000460 if (argv[1] && (strcmp(argv[1], "--help") == 0))
Eric Andersenc1525e81999-10-29 00:07:31 +0000461 usage(sed_usage);
Eric Andersen6b6b3f61999-10-28 16:06:25 +0000462
Mark Whitley858c1ad2000-07-11 21:38:47 +0000463 /* destroy command strings on exit */
464 if (atexit(destroy_cmd_strs) == -1) {
465 perror("sed");
466 exit(1);
467 }
468
Mark Whitley6315ce62000-07-10 22:55:51 +0000469 /* do normal option parsing */
470 while ((opt = getopt(argc, argv, "Vhne:f:")) > 0) {
471 switch (opt) {
472 case 'V':
473 printf("Print Busybox version here\n");
474 exit(0);
475 break;
476 case 'h':
477 usage(sed_usage);
478 break;
Erik Andersene916d242000-03-06 19:20:35 +0000479 case 'n':
Mark Whitley6315ce62000-07-10 22:55:51 +0000480 be_quiet++;
Erik Andersene916d242000-03-06 19:20:35 +0000481 break;
482 case 'e':
Mark Whitley6315ce62000-07-10 22:55:51 +0000483 add_cmd_str(optarg);
Erik Andersene916d242000-03-06 19:20:35 +0000484 break;
Mark Whitley6315ce62000-07-10 22:55:51 +0000485 case 'f':
486 load_cmd_file(optarg);
Erik Andersene49d5ec2000-02-08 19:58:47 +0000487 break;
Erik Andersene49d5ec2000-02-08 19:58:47 +0000488 }
Eric Andersen6b6b3f61999-10-28 16:06:25 +0000489 }
Mark Whitley6315ce62000-07-10 22:55:51 +0000490
491 /* if we didn't get a pattern from a -e and no command file was specified,
492 * argv[optind] should be the pattern. no pattern, no worky */
493 if (ncmds == 0) {
494 if (argv[optind] == NULL)
495 usage(sed_usage);
496 else {
497 add_cmd_str(argv[optind]);
498 optind++;
499 }
500 }
501
502
503 /* argv[(optind)..(argc-1)] should be names of file to process. If no
504 * files were specified or '-' was specified, take input from stdin.
505 * Otherwise, we process all the files specified. */
506 if (argv[optind] == NULL || (strcmp(argv[optind], "-") == 0)) {
507 process_file(stdin);
508 }
509 else {
510 int i;
511 FILE *file;
512 for (i = optind; i < argc; i++) {
513 file = fopen(argv[i], "r");
514 if (file == NULL) {
515 fprintf(stderr, "sed: %s: %s\n", argv[i], strerror(errno));
516 } else {
517 process_file(file);
518 fclose(file);
519 }
520 }
521 }
522
Mark Whitley6315ce62000-07-10 22:55:51 +0000523 return 0;
Eric Andersen6b6b3f61999-10-28 16:06:25 +0000524}