blob: d0af63a362dd639ff3c05195b4aa26745a995056 [file] [log] [blame]
Erik Andersenfb002d02000-03-05 08:07:00 +00001/* vi: set sw=4 ts=4: */
2/*
Erik Andersen8f8d6d52000-05-01 22:30:37 +00003 * Mini tr implementation for busybox
Erik Andersenfb002d02000-03-05 08:07:00 +00004 *
Rob Landleycd545282006-06-30 16:35:40 +00005 ** Copyright (c) 1987,1997, Prentice Hall All rights reserved.
6 *
7 * The name of Prentice Hall may not be used to endorse or promote
8 * products derived from this software without specific prior
9 * written permission.
10 *
Erik Andersen5afc8642000-05-02 00:07:56 +000011 * Copyright (c) Michiel Huisjes
12 *
Eric Andersenc7bda1c2004-03-15 08:29:22 +000013 * This version of tr is adapted from Minix tr and was modified
Eric Andersencb81e642003-07-14 21:21:08 +000014 * by Erik Andersen <andersen@codepoet.org> to be used in busybox.
Erik Andersenfb002d02000-03-05 08:07:00 +000015 *
Rob Landleycd545282006-06-30 16:35:40 +000016 * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
Erik Andersenfb002d02000-03-05 08:07:00 +000017 */
Bernhard Reutner-Fischereceecea2007-03-30 14:43:27 +000018/* http://www.opengroup.org/onlinepubs/009695399/utilities/tr.html
19 * TODO: xdigit, graph, print
20 */
Denis Vlasenkob6adbf12007-05-26 19:00:18 +000021#include "libbb.h"
Erik Andersen330fd2b2000-05-19 05:35:19 +000022
Eric Andersen22ecf042001-07-02 17:32:40 +000023#define ASCII 0377
Erik Andersenfb002d02000-03-05 08:07:00 +000024
Denis Vlasenko74324c82007-06-04 10:16:52 +000025static void map(char *pvector,
26 unsigned char *string1, unsigned int string1_len,
Bernhard Reutner-Fischereceecea2007-03-30 14:43:27 +000027 unsigned char *string2, unsigned int string2_len)
Erik Andersen8f8d6d52000-05-01 22:30:37 +000028{
Rob Landleyab58d5c2006-06-30 19:04:09 +000029 char last = '0';
Eric Andersen00143ba2000-07-13 16:40:41 +000030 unsigned int i, j;
Erik Andersen8f8d6d52000-05-01 22:30:37 +000031
Eric Andersen00143ba2000-07-13 16:40:41 +000032 for (j = 0, i = 0; i < string1_len; i++) {
33 if (string2_len <= j)
Bernhard Reutner-Fischereceecea2007-03-30 14:43:27 +000034 pvector[string1[i]] = last;
Erik Andersen8f8d6d52000-05-01 22:30:37 +000035 else
Bernhard Reutner-Fischereceecea2007-03-30 14:43:27 +000036 pvector[string1[i]] = last = string2[j++];
Erik Andersen8f8d6d52000-05-01 22:30:37 +000037 }
38}
39
Mark Whitley8b7a0d82001-05-24 21:31:09 +000040/* supported constructs:
Denis Vlasenko3d461672007-11-13 22:22:29 +000041 * Ranges, e.g., 0-9 ==> 0123456789
42 * Ranges, e.g., [0-9] ==> 0123456789
43 * Escapes, e.g., \a ==> Control-G
44 * Character classes, e.g. [:upper:] ==> A...Z
45 * Equiv classess, e.g. [=A=] ==> A (hmmmmmmm?)
Mark Whitley8b7a0d82001-05-24 21:31:09 +000046 */
Rob Landleyab58d5c2006-06-30 19:04:09 +000047static unsigned int expand(const char *arg, char *buffer)
Erik Andersen8f8d6d52000-05-01 22:30:37 +000048{
Rob Landleyab58d5c2006-06-30 19:04:09 +000049 char *buffer_start = buffer;
Denis Vlasenko3d461672007-11-13 22:22:29 +000050 unsigned i; /* can't be unsigned char: must be able to hold 256 */
Bernhard Reutner-Fischereceecea2007-03-30 14:43:27 +000051 unsigned char ac;
Denis Vlasenko3d461672007-11-13 22:22:29 +000052
53 while (*arg) {
54 if (*arg == '\\') {
55 arg++;
56 *buffer++ = bb_process_escape_sequence(&arg);
57 continue;
58 }
59 if (arg[1] == '-') { /* "0-9..." */
60 ac = arg[2];
61 if (ac == '\0') { /* "0-": copy verbatim */
62 *buffer++ = *arg++; /* copy '0' */
63 continue; /* next iter will copy '-' and stop */
64 }
65 i = *arg;
66 while (i <= ac) /* ok: i is unsigned _int_ */
67 *buffer++ = i++;
68 arg += 3; /* skip 0-9 */
69 continue;
70 }
71 if (*arg == '[') { /* "[xyz..." */
72 arg++;
73 i = *arg++;
74 /* "[xyz...", i=x, arg points to y */
75 if (ENABLE_FEATURE_TR_CLASSES && i == ':') {
Denis Vlasenko990d0f62007-07-24 15:54:42 +000076#define CLO ":]\0"
Denis Vlasenko3d461672007-11-13 22:22:29 +000077 static const char classes[] ALIGN1 =
78 "alpha"CLO "alnum"CLO "digit"CLO
79 "lower"CLO "upper"CLO "space"CLO
80 "blank"CLO "punct"CLO "cntrl"CLO;
Bernhard Reutner-Fischereceecea2007-03-30 14:43:27 +000081#define CLASS_invalid 0 /* we increment the retval */
82#define CLASS_alpha 1
83#define CLASS_alnum 2
84#define CLASS_digit 3
85#define CLASS_lower 4
86#define CLASS_upper 5
87#define CLASS_space 6
88#define CLASS_blank 7
89#define CLASS_punct 8
90#define CLASS_cntrl 9
91//#define CLASS_xdigit 10
92//#define CLASS_graph 11
93//#define CLASS_print 12
Bernhard Reutner-Fischereceecea2007-03-30 14:43:27 +000094 smalluint j;
95 { /* not really pretty.. */
Denis Vlasenko3d461672007-11-13 22:22:29 +000096 char *tmp = xstrndup(arg, 7); // warning: xdigit would need 8, not 7
Denis Vlasenko990d0f62007-07-24 15:54:42 +000097 j = index_in_strings(classes, tmp) + 1;
Denis Vlasenko74324c82007-06-04 10:16:52 +000098 free(tmp);
Rob Landleyf1048142005-10-08 21:21:08 +000099 }
Bernhard Reutner-Fischereceecea2007-03-30 14:43:27 +0000100 if (j == CLASS_alnum || j == CLASS_digit) {
Rob Landley998dbee2006-04-19 22:22:06 +0000101 for (i = '0'; i <= '9'; i++)
102 *buffer++ = i;
Bernhard Reutner-Fischereceecea2007-03-30 14:43:27 +0000103 }
104 if (j == CLASS_alpha || j == CLASS_alnum || j == CLASS_upper) {
Rob Landleyf1048142005-10-08 21:21:08 +0000105 for (i = 'A'; i <= 'Z'; i++)
106 *buffer++ = i;
Bernhard Reutner-Fischereceecea2007-03-30 14:43:27 +0000107 }
108 if (j == CLASS_alpha || j == CLASS_alnum || j == CLASS_lower) {
Rob Landleyf1048142005-10-08 21:21:08 +0000109 for (i = 'a'; i <= 'z'; i++)
110 *buffer++ = i;
Rob Landleyf1048142005-10-08 21:21:08 +0000111 }
Bernhard Reutner-Fischereceecea2007-03-30 14:43:27 +0000112 if (j == CLASS_space || j == CLASS_blank) {
Rob Landley998dbee2006-04-19 22:22:06 +0000113 *buffer++ = '\t';
Bernhard Reutner-Fischereceecea2007-03-30 14:43:27 +0000114 if (j == CLASS_space) {
115 *buffer++ = '\n';
116 *buffer++ = '\v';
117 *buffer++ = '\f';
118 *buffer++ = '\r';
119 }
Rob Landley998dbee2006-04-19 22:22:06 +0000120 *buffer++ = ' ';
121 }
Bernhard Reutner-Fischereceecea2007-03-30 14:43:27 +0000122 if (j == CLASS_punct || j == CLASS_cntrl) {
Denis Vlasenko3d461672007-11-13 22:22:29 +0000123 for (i = '\0'; i <= ASCII; i++)
124 if ((j == CLASS_punct && isprint(i) && !isalnum(i) && !isspace(i))
125 || (j == CLASS_cntrl && iscntrl(i)))
Rob Landleyf1048142005-10-08 21:21:08 +0000126 *buffer++ = i;
127 }
Bernhard Reutner-Fischereceecea2007-03-30 14:43:27 +0000128 if (j == CLASS_invalid) {
Rob Landley998dbee2006-04-19 22:22:06 +0000129 *buffer++ = '[';
130 *buffer++ = ':';
Rob Landleyf1048142005-10-08 21:21:08 +0000131 continue;
132 }
133 break;
134 }
Denis Vlasenko3d461672007-11-13 22:22:29 +0000135 /* "[xyz...", i=x, arg points to y */
136 if (ENABLE_FEATURE_TR_EQUIV && i == '=') { /* [=CHAR=] */
137 *buffer++ = *arg; /* copy CHAR */
138 arg += 3; /* skip CHAR=] */
Rob Landleyf1048142005-10-08 21:21:08 +0000139 continue;
140 }
Denis Vlasenko3d461672007-11-13 22:22:29 +0000141 if (*arg != '-') { /* not [x-...] - copy verbatim */
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000142 *buffer++ = '[';
Denis Vlasenko3d461672007-11-13 22:22:29 +0000143 arg--; /* points to x */
144 continue; /* copy all, including eventual ']' */
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000145 }
Denis Vlasenko3d461672007-11-13 22:22:29 +0000146 /* [x-y...] */
147 arg++;
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000148 ac = *arg++;
149 while (i <= ac)
150 *buffer++ = i++;
Denis Vlasenko3d461672007-11-13 22:22:29 +0000151 arg++; /* skip the assumed ']' */
152 continue;
153 }
154 *buffer++ = *arg++;
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000155 }
Eric Andersen00143ba2000-07-13 16:40:41 +0000156 return (buffer - buffer_start);
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000157}
158
Rob Landleyab58d5c2006-06-30 19:04:09 +0000159static int complement(char *buffer, int buffer_len)
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000160{
Denis Vlasenko74324c82007-06-04 10:16:52 +0000161 int i, j, ix;
Eric Andersenfad04fd2000-07-14 06:49:52 +0000162 char conv[ASCII + 2];
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000163
Eric Andersen1ca20a72001-03-21 07:34:27 +0000164 ix = 0;
Denis Vlasenko3d461672007-11-13 22:22:29 +0000165 for (i = '\0'; i <= ASCII; i++) {
Eric Andersen00143ba2000-07-13 16:40:41 +0000166 for (j = 0; j < buffer_len; j++)
167 if (buffer[j] == i)
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000168 break;
Eric Andersen00143ba2000-07-13 16:40:41 +0000169 if (j == buffer_len)
Eric Andersen1ca20a72001-03-21 07:34:27 +0000170 conv[ix++] = i & ASCII;
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000171 }
Eric Andersen1ca20a72001-03-21 07:34:27 +0000172 memcpy(buffer, conv, ix);
173 return ix;
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000174}
175
Denis Vlasenko9b49a5e2007-10-11 10:05:36 +0000176int tr_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
Denis Vlasenko68404f12008-03-17 09:00:54 +0000177int tr_main(int argc ATTRIBUTE_UNUSED, char **argv)
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000178{
Bernhard Reutner-Fischereceecea2007-03-30 14:43:27 +0000179 int output_length = 0, input_length;
Eric Andersenfad04fd2000-07-14 06:49:52 +0000180 int i;
Denis Vlasenko8d0a7342007-11-13 22:23:57 +0000181 smalluint flags;
Denis Vlasenkoa89d50f2007-11-13 17:51:40 +0000182 ssize_t read_chars = 0;
Denis Vlasenko3d461672007-11-13 22:22:29 +0000183 size_t in_index = 0, out_index = 0;
184 unsigned last = UCHAR_MAX + 1; /* not equal to any char */
Denis Vlasenkof7be20e2007-12-24 14:09:19 +0000185 unsigned char coded, c;
Denis Vlasenko8d0a7342007-11-13 22:23:57 +0000186 unsigned char *output = xmalloc(BUFSIZ);
187 char *vector = xzalloc((ASCII+1) * 3);
188 char *invec = vector + (ASCII+1);
189 char *outvec = vector + (ASCII+1) * 2;
Mark Whitley59ab0252001-01-23 22:30:04 +0000190
Denis Vlasenko8d0a7342007-11-13 22:23:57 +0000191#define TR_OPT_complement (1 << 0)
192#define TR_OPT_delete (1 << 1)
193#define TR_OPT_squeeze_reps (1 << 2)
194
195 flags = getopt32(argv, "+cds"); /* '+': stop at first non-option */
196 argv += optind;
197
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000198 for (i = 0; i <= ASCII; i++) {
199 vector[i] = i;
Denis Vlasenko8d0a7342007-11-13 22:23:57 +0000200 /*invec[i] = outvec[i] = FALSE; - done by xzalloc */
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000201 }
202
Denis Vlasenko74324c82007-06-04 10:16:52 +0000203#define tr_buf bb_common_bufsiz1
Denis Vlasenko8d0a7342007-11-13 22:23:57 +0000204 if (*argv != NULL) {
205 input_length = expand(*argv++, tr_buf);
Bernhard Reutner-Fischereceecea2007-03-30 14:43:27 +0000206 if (flags & TR_OPT_complement)
Denis Vlasenko74324c82007-06-04 10:16:52 +0000207 input_length = complement(tr_buf, input_length);
Denis Vlasenko8d0a7342007-11-13 22:23:57 +0000208 if (*argv) {
209 if (argv[0][0] == '\0')
Manuel Novoa III cad53642003-03-19 09:13:01 +0000210 bb_error_msg_and_die("STRING2 cannot be empty");
Denis Vlasenko8d0a7342007-11-13 22:23:57 +0000211 output_length = expand(*argv, output);
Denis Vlasenko74324c82007-06-04 10:16:52 +0000212 map(vector, tr_buf, input_length, output, output_length);
Eric Andersena03d86c2000-07-10 16:38:50 +0000213 }
Eric Andersen00143ba2000-07-13 16:40:41 +0000214 for (i = 0; i < input_length; i++)
Denis Vlasenko74324c82007-06-04 10:16:52 +0000215 invec[(unsigned char)tr_buf[i]] = TRUE;
Eric Andersen00143ba2000-07-13 16:40:41 +0000216 for (i = 0; i < output_length; i++)
Bernhard Reutner-Fischereceecea2007-03-30 14:43:27 +0000217 outvec[output[i]] = TRUE;
Erik Andersen8f8d6d52000-05-01 22:30:37 +0000218 }
Denis Vlasenko74324c82007-06-04 10:16:52 +0000219
220 for (;;) {
221 /* If we're out of input, flush output and read more input. */
222 if (in_index == read_chars) {
223 if (out_index) {
224 xwrite(STDOUT_FILENO, (char *)output, out_index);
225 out_index = 0;
226 }
Denis Vlasenko3d461672007-11-13 22:22:29 +0000227 read_chars = safe_read(STDIN_FILENO, tr_buf, BUFSIZ);
Denis Vlasenko74324c82007-06-04 10:16:52 +0000228 if (read_chars <= 0) {
Denis Vlasenkoa89d50f2007-11-13 17:51:40 +0000229 if (read_chars < 0)
230 bb_perror_msg_and_die(bb_msg_read_error);
Denis Vlasenko74324c82007-06-04 10:16:52 +0000231 exit(EXIT_SUCCESS);
232 }
233 in_index = 0;
234 }
235 c = tr_buf[in_index++];
236 coded = vector[c];
237 if ((flags & TR_OPT_delete) && invec[c])
238 continue;
Denis Vlasenko3d461672007-11-13 22:22:29 +0000239 if ((flags & TR_OPT_squeeze_reps) && last == coded
240 && (invec[c] || outvec[coded]))
Denis Vlasenko74324c82007-06-04 10:16:52 +0000241 continue;
242 output[out_index++] = last = coded;
243 }
244 /* NOTREACHED */
Bernhard Reutner-Fischereceecea2007-03-30 14:43:27 +0000245 return EXIT_SUCCESS;
Erik Andersenfb002d02000-03-05 08:07:00 +0000246}