blob: 62f9e8731604c561e349c22884da6139c7ae6f08 [file] [log] [blame]
Erik Andersen7ab9c7e2000-05-12 19:41:47 +00001/*
Mark Whitley807f0fd2000-08-02 18:30:11 +00002 * cut.c - minimalist version of cut
Erik Andersen7ab9c7e2000-05-12 19:41:47 +00003 *
Mark Whitley807f0fd2000-08-02 18:30:11 +00004 * Copyright (C) 1999,2000 by Lineo, inc.
5 * Written by Mark Whitley <markw@lineo.com>, <markw@enol.com>
Erik Andersen7ab9c7e2000-05-12 19:41:47 +00006 *
7 * This program is free software; you can redistribute it and/or modify
8 * it under the terms of the GNU General Public License as published by
9 * the Free Software Foundation; either version 2 of the License, or
10 * (at your option) any later version.
11 *
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
16 *
17 * You should have received a copy of the GNU General Public License
18 * along with this program; if not, write to the Free Software
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
Mark Whitley807f0fd2000-08-02 18:30:11 +000020 *
Erik Andersen7ab9c7e2000-05-12 19:41:47 +000021 */
22
Erik Andersen7ab9c7e2000-05-12 19:41:47 +000023#include <stdio.h>
Mark Whitley807f0fd2000-08-02 18:30:11 +000024#include <stdlib.h>
25#include <unistd.h> /* getopt */
26#include <string.h>
27#include <ctype.h>
28#include <errno.h>
Eric Andersen3570a342000-09-25 21:45:58 +000029#include "busybox.h"
Erik Andersen7ab9c7e2000-05-12 19:41:47 +000030
31
Mark Whitley807f0fd2000-08-02 18:30:11 +000032/* globals from other files */
33extern int optind;
34extern char *optarg;
Erik Andersen7ab9c7e2000-05-12 19:41:47 +000035
36
Mark Whitley807f0fd2000-08-02 18:30:11 +000037/* globals in this file only */
38static char part = 0; /* (b)yte, (c)har, (f)ields */
39static int startpos = 1;
40static int endpos = -1;
41static char delim = '\t'; /* delimiter, default is tab */
42static unsigned int supress_non_delimited_lines = 0;
Erik Andersen7ab9c7e2000-05-12 19:41:47 +000043
Erik Andersen7ab9c7e2000-05-12 19:41:47 +000044
45/*
Mark Whitley807f0fd2000-08-02 18:30:11 +000046 * decompose_list() - parses a list and puts values into startpos and endpos.
47 * valid list formats: N, N-, N-M, -M
Erik Andersen7ab9c7e2000-05-12 19:41:47 +000048 */
Mark Whitley807f0fd2000-08-02 18:30:11 +000049static void decompose_list(const char *list)
50{
51 unsigned int nminus = 0;
52 char *ptr;
53
54 /* the list must contain only digits and no more than one minus sign */
55 for (ptr = (char *)list; *ptr; ptr++) {
56 if (!isdigit(*ptr) && *ptr != '-') {
Mark Whitleyf57c9442000-12-07 19:56:48 +000057 error_msg_and_die("invalid byte or field list\n");
Mark Whitley807f0fd2000-08-02 18:30:11 +000058 }
59 if (*ptr == '-') {
60 nminus++;
61 if (nminus > 1) {
Mark Whitleyf57c9442000-12-07 19:56:48 +000062 error_msg_and_die("invalid byte or field list\n");
Mark Whitley807f0fd2000-08-02 18:30:11 +000063 }
64 }
65 }
66
67 /* handle single value 'N' case */
68 if (nminus == 0) {
69 startpos = strtol(list, &ptr, 10);
70 if (startpos == 0) {
Mark Whitleyf57c9442000-12-07 19:56:48 +000071 error_msg_and_die("missing list of fields\n");
Mark Whitley807f0fd2000-08-02 18:30:11 +000072 }
73 endpos = startpos;
74 }
75 /* handle multi-value cases */
76 else if (nminus == 1) {
77 /* handle 'N-' case */
78 if (list[strlen(list) - 1] == '-') {
79 startpos = strtol(list, &ptr, 10);
80 }
81 /* handle '-M' case */
82 else if (list[0] == '-') {
83 endpos = strtol(&list[1], NULL, 10);
84 }
85 /* handle 'N-M' case */
86 else {
87 startpos = strtol(list, &ptr, 10);
88 endpos = strtol(ptr+1, &ptr, 10);
89 }
90
91 /* a sanity check */
92 if (startpos == 0) {
93 startpos = 1;
94 }
95 }
96}
Erik Andersen7ab9c7e2000-05-12 19:41:47 +000097
98
Mark Whitley807f0fd2000-08-02 18:30:11 +000099/*
100 * snippy-snip
101 */
102static void cut_file(FILE *file)
103{
104 char *line;
Mark Whitley00530872000-11-17 22:02:45 +0000105 unsigned int cr_hits = 0;
Mark Whitley807f0fd2000-08-02 18:30:11 +0000106
107 /* go through every line in the file */
108 for (line = NULL; (line = get_line_from_file(file)) != NULL; free(line)) {
Mark Whitley807f0fd2000-08-02 18:30:11 +0000109 /* cut based on chars/bytes */
110 if (part == 'c' || part == 'b') {
111 int i;
112 /* a valid end position has been specified */
113 if (endpos > 0) {
114 for (i = startpos-1; i < endpos; i++) {
115 fputc(line[i], stdout);
116 }
117 fputc('\n', stdout);
118 }
119 /* otherwise, just go to the end of the line */
120 else {
121 for (i = startpos-1; line[i]; i++) {
122 fputc(line[i], stdout);
123 }
124 }
125 }
126 /* cut based on fields */
127 else if (part == 'f') {
128 char *ptr;
129 char *start = line;
130 unsigned int delims_hit = 0;
131
Mark Whitley00530872000-11-17 22:02:45 +0000132 if (delim == '\n') {
133 cr_hits++;
134 if (cr_hits >= startpos && cr_hits <= endpos) {
135 while (*start && *start != '\n') {
Mark Whitley807f0fd2000-08-02 18:30:11 +0000136 fputc(*start, stdout);
137 start++;
138 }
139 fputc('\n', stdout);
140 }
Mark Whitley00530872000-11-17 22:02:45 +0000141 }
142 else {
143 for (ptr = line; (ptr = strchr(ptr, delim)) != NULL; ptr++) {
144 delims_hit++;
145 if (delims_hit == (startpos - 1)) {
146 start = ptr+1;
147 }
148 if (delims_hit == endpos) {
149 break;
150 }
151 }
152
153 /* we didn't hit any delimeters */
154 if (delims_hit == 0 && !supress_non_delimited_lines) {
155 fputs(line, stdout);
156 }
157 /* we =did= hit some delimiters */
158 else if (delims_hit > 0) {
159 /* we have a fixed end point */
160 if (ptr) {
161 while (start < ptr) {
162 fputc(*start, stdout);
163 start++;
164 }
165 fputc('\n', stdout);
166 }
167 /* or we're just going til the end of the line */
168 else {
169 while (*start) {
170 fputc(*start, stdout);
171 start++;
172 }
Mark Whitley807f0fd2000-08-02 18:30:11 +0000173 }
174 }
175 }
176 }
177 }
178}
179
180extern int cut_main(int argc, char **argv)
181{
182 int opt;
183
184 while ((opt = getopt(argc, argv, "b:c:d:f:ns")) > 0) {
185 switch (opt) {
186 case 'b':
187 case 'c':
188 case 'f':
189 /* make sure they didn't ask for two types of lists */
190 if (part != 0) {
Matt Kraai0dab8292000-12-18 03:08:29 +0000191 error_msg_and_die("only one type of list may be specified\n");
Mark Whitley807f0fd2000-08-02 18:30:11 +0000192 }
193 part = (char)opt;
194 decompose_list(optarg);
195 break;
196 case 'd':
197 if (strlen(optarg) > 1) {
Mark Whitleyf57c9442000-12-07 19:56:48 +0000198 error_msg_and_die("the delimiter must be a single character\n");
Mark Whitley807f0fd2000-08-02 18:30:11 +0000199 }
200 delim = optarg[0];
201 break;
202 case 'n':
203 /* no-op */
204 break;
205 case 's':
206 supress_non_delimited_lines++;
207 break;
208 }
209 }
210
211 if (part == 0) {
Mark Whitleyf57c9442000-12-07 19:56:48 +0000212 error_msg_and_die("you must specify a list of bytes, characters, or fields\n");
Mark Whitley807f0fd2000-08-02 18:30:11 +0000213 }
214
215 if (supress_non_delimited_lines && part != 'f') {
Matt Kraai0dab8292000-12-18 03:08:29 +0000216 error_msg_and_die("suppressing non-delimited lines makes sense"
217 " only when operating on fields\n");
218
Mark Whitley807f0fd2000-08-02 18:30:11 +0000219 }
220
221 if (delim != '\t' && part != 'f') {
Mark Whitleyf57c9442000-12-07 19:56:48 +0000222 error_msg_and_die("a delimiter may be specified only when operating on fields\n");
Mark Whitley807f0fd2000-08-02 18:30:11 +0000223 }
224
225 /* argv[(optind)..(argc-1)] should be names of file to process. If no
226 * files were specified or '-' was specified, take input from stdin.
227 * Otherwise, we process all the files specified. */
228 if (argv[optind] == NULL || (strcmp(argv[optind], "-") == 0)) {
229 cut_file(stdin);
230 }
231 else {
232 int i;
233 FILE *file;
234 for (i = optind; i < argc; i++) {
235 file = fopen(argv[i], "r");
236 if (file == NULL) {
Matt Kraai1fa1ade2000-12-18 03:57:16 +0000237 perror_msg("%s", argv[i]);
Mark Whitley807f0fd2000-08-02 18:30:11 +0000238 } else {
239 cut_file(file);
240 fclose(file);
241 }
242 }
243 }
244
Matt Kraai3e856ce2000-12-01 02:55:13 +0000245 return EXIT_SUCCESS;
Mark Whitley807f0fd2000-08-02 18:30:11 +0000246}