blob: 87fc71a19e7ad0f4e9fef6d1a504a645e90266b8 [file] [log] [blame]
nstrazed8fe6b2001-01-22 17:50:31 +00001/*
2 * Copyright (c) 2000 Silicon Graphics, Inc. All Rights Reserved.
3 *
4 * This program is free software; you can redistribute it and/or modify it
5 * under the terms of version 2 of the GNU General Public License as
6 * published by the Free Software Foundation.
7 *
8 * This program is distributed in the hope that it would be useful, but
9 * WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11 *
12 * Further, this software is distributed without any warranty that it is
13 * free of the rightful claim of any third person regarding infringement
14 * or the like. Any license provided herein, whether implied or
15 * otherwise, applies only to this software file. Patent licenses, if
16 * any, provided herein do not apply to combinations of this program with
17 * other software, or any other product whatsoever.
18 *
19 * You should have received a copy of the GNU General Public License along
Wanlong Gaofed96412012-10-24 10:10:29 +080020 * with this program; if not, write the Free Software Foundation, Inc.,
21 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
nstrazed8fe6b2001-01-22 17:50:31 +000022 *
23 * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24 * Mountain View, CA 94043, or:
25 *
26 * http://www.sgi.com
27 *
28 * For further information regarding this notice, see:
29 *
30 * http://oss.sgi.com/projects/GenInfo/NoticeExplan/
31 *
32 */
nstraza700ef52002-09-16 15:02:57 +000033/* $Id: rand_lines.c,v 1.5 2002/09/16 15:02:57 nstraz Exp $ */
nstrazed8fe6b2001-01-22 17:50:31 +000034/**************************************************************
35 *
36 * OS Testing - Silicon Graphics, Inc.
37 *
38 * TOOL IDENTIFIER : rand_lines
39 *
40 * DESCRIPTION : prints lines from a file in random order
41 *
42 * SYNOPSIS:
43 * rand_line [-hg][-S seed][-l numlines] [files...]
44 *
45 * AUTHOR : Richard Logan
46 *
Garrett Cooper1e6f5a62010-12-19 09:58:10 -080047 * CO-PILOT(s) :
nstrazed8fe6b2001-01-22 17:50:31 +000048 *
49 * DATE STARTED : 05/94
50 *
51 * INPUT SPECIFICATIONS
52 * This tool will print lines of a file in random order.
nstraz9d100512002-06-20 19:03:00 +000053 * The max line length is 4096.
nstrazed8fe6b2001-01-22 17:50:31 +000054 * The options supported are:
55 * -h This option prints an help message then exits.
56 *
57 * -g This option specifies to count the number of lines
58 * in the file before randomizing. This option overrides
59 * -l option. Using this option, will give you the best
60 * randomization, but it requires processing
61 * the file an additional time.
Garrett Cooper1e6f5a62010-12-19 09:58:10 -080062 *
nstrazed8fe6b2001-01-22 17:50:31 +000063 * -l numlines : This option specifies to randomize file in
nstraz9d100512002-06-20 19:03:00 +000064 * numlines chucks. The default size is 4096.
nstrazed8fe6b2001-01-22 17:50:31 +000065 *
Garrett Cooper1e6f5a62010-12-19 09:58:10 -080066 * -S seed : sets randomization seed to seed.
nstrazed8fe6b2001-01-22 17:50:31 +000067 * The default is time(0). If seed is zero, time(0) is used.
68 *
69 * file A readable, seekable filename. The cmd allows the user
70 * to specify multiple files, but each file is dealt with
71 * separately.
72 *
73 * DESIGN DESCRIPTION
74 * This tool uses a simple algorithm where the file is read.
75 * The offset to the each line is randomly placed into an
76 * array. The array is then processed sequentially. The infile's
77 * line who's offset in the array element is thus reread then printed.
78 * This output will thus be infile's lines in random order.
79 *
80 * SPECIAL REQUIREMENTS
81 * None.
82 *
83 * UPDATE HISTORY
84 * This should contain the description, author, and date of any
85 * "interesting" modifications (i.e. info should helpful in
86 * maintaining/enhancing this tool).
87 * username description
88 * ----------------------------------------------------------------
89 * rrl Creatation of program
nstraz9d100512002-06-20 19:03:00 +000090 * rrl 06/02 Fixed bug and some cleanup. Changed default chunk
Garrett Cooper1e6f5a62010-12-19 09:58:10 -080091 * and line size to 4096 characters.
nstrazed8fe6b2001-01-22 17:50:31 +000092 *
93 * BUGS/LIMITATIONS
94 * This program can not deal with non-seekable file like
95 * stdin or a pipe. If more than one file is specified,
96 * each file is randomized one at a time. The max line
nstraz9d100512002-06-20 19:03:00 +000097 * length is 4096 characters.
nstrazed8fe6b2001-01-22 17:50:31 +000098 *
99 **************************************************************/
100
Garrett Cooper83466312011-02-15 06:39:09 -0800101#include <err.h>
Garrett Cooper04f42072011-01-10 12:55:44 -0800102#include <errno.h>
nstrazed8fe6b2001-01-22 17:50:31 +0000103#include <stdio.h>
104#include <stdlib.h>
nstrazd5d51ca2001-02-28 17:41:59 +0000105#include <string.h>
nstrazed8fe6b2001-01-22 17:50:31 +0000106#include <time.h>
Garrett Cooper04f42072011-01-10 12:55:44 -0800107#include <unistd.h>
nstrazed8fe6b2001-01-22 17:50:31 +0000108
nstraz9d100512002-06-20 19:03:00 +0000109#include "random_range.h"
nstrazed8fe6b2001-01-22 17:50:31 +0000110
111/*
112 * Structure used to hold file line offset.
113 */
114struct offset_t {
115 long used;
116 long offset;
117};
118
nstraz9d100512002-06-20 19:03:00 +0000119void usage(FILE *stream);
120void help();
nstrazd5d51ca2001-02-28 17:41:59 +0000121int rnd_file(FILE *infile, int numlines, long seed);
nstraz9d100512002-06-20 19:03:00 +0000122int get_numlines(FILE *infile);
123int rnd_insert(struct offset_t offsets[], long offset, int size);
124
125#define DEF_SIZE 4096 /* default chunk size */
126#define MAX_LN_SZ 4096 /* max line size */
nstrazed8fe6b2001-01-22 17:50:31 +0000127
128#ifndef SEEK_SET
129#define SEEK_SET 0
130#endif
131
nstraz9d100512002-06-20 19:03:00 +0000132char *Progname = NULL;
nstrazed8fe6b2001-01-22 17:50:31 +0000133
134/***********************************************************************
135 * MAIN
136 ***********************************************************************/
nstrazd5d51ca2001-02-28 17:41:59 +0000137int
nstrazed8fe6b2001-01-22 17:50:31 +0000138main(argc, argv)
139int argc;
140char **argv;
141{
142 FILE *infile;
143 int c;
144 long seed = -1; /* use time as seed */
145 int lsize = DEF_SIZE; /* num lines to randomize */
146 int getfilelines = 0; /* if set, count lines first */
nstrazed8fe6b2001-01-22 17:50:31 +0000147
nstraz9d100512002-06-20 19:03:00 +0000148 if ((Progname = strrchr(argv[0], '/')) == NULL)
149 Progname = argv[0];
150 else
151 Progname++;
nstrazed8fe6b2001-01-22 17:50:31 +0000152
Garrett Cooper43088e12010-12-13 23:30:59 -0800153 while ((c = getopt (argc, argv, "hgS:l:")) != EOF) {
nstrazed8fe6b2001-01-22 17:50:31 +0000154 switch(c) {
155 case 'h':
156 help();
157 exit(0);
158 break;
159 case 'S': /* seed */
Garrett Cooper43088e12010-12-13 23:30:59 -0800160 if (sscanf(optarg, "%li", &seed) != 1) {
nstrazed8fe6b2001-01-22 17:50:31 +0000161 fprintf(stderr, "%s: --S option argument is invalid\n", Progname);
162 exit(1);
163 }
164 break;
165
166 case 'l': /* number of lines */
Garrett Cooper43088e12010-12-13 23:30:59 -0800167 if (sscanf(optarg, "%i", &lsize) != 1) {
nstrazed8fe6b2001-01-22 17:50:31 +0000168 fprintf(stderr, "%s: --s option argument is invalid\n", Progname);
169 exit(1);
170 }
171 break;
172
173 case 'g':
174 getfilelines++;
175 break;
176
177 case '?':
178 usage(stderr);
179 exit(1);
180 break;
181 }
182 }
183
Garrett Cooper43088e12010-12-13 23:30:59 -0800184 if (optind + 1 != argc) {
nstraz9d100512002-06-20 19:03:00 +0000185 fprintf(stderr, "%s: Missing argument.\n", Progname);
nstrazed8fe6b2001-01-22 17:50:31 +0000186 usage(stderr);
187 exit(1);
188 }
189
Garrett Cooper43088e12010-12-13 23:30:59 -0800190 if (seed == -1) {
nstrazed8fe6b2001-01-22 17:50:31 +0000191 seed = time(0);
192 }
Garrett Cooper1e6f5a62010-12-19 09:58:10 -0800193
Garrett Cooper43088e12010-12-13 23:30:59 -0800194 if (strcmp(argv[argc-1],"-") == 0) {
nstrazed8fe6b2001-01-22 17:50:31 +0000195 infile = stdin;
nstraz9d100512002-06-20 19:03:00 +0000196 fprintf(stderr, "%s: Can not support stdin processing.\n",
197 Progname);
nstrazed8fe6b2001-01-22 17:50:31 +0000198 exit(2);
199 }
200 else {
201
202 if ((infile=fopen(argv[argc-1], "r")) == NULL) {
nstraz9d100512002-06-20 19:03:00 +0000203 fprintf(stderr, "%s: Unable to open file %s: %s\n",
204 Progname, argv[argc-1], strerror(errno));
nstrazed8fe6b2001-01-22 17:50:31 +0000205 exit(1);
206 }
207
Garrett Cooper43088e12010-12-13 23:30:59 -0800208 if (getfilelines) {
nstrazed8fe6b2001-01-22 17:50:31 +0000209 lsize=get_numlines(infile);
210 }
211
212 rnd_file(infile, lsize, seed);
213 }
214
215 exit(0);
216}
217
218/***********************************************************************
219 * Print usage message to stream.
220 ***********************************************************************/
221void
222usage(FILE *stream)
223{
224 fprintf(stream,
225 "Usage %s [-hg][-S seed][-l numlines] [files...]\n", Progname);
226
227}
228
229/***********************************************************************
230 * Print help message to stdout.
231 ***********************************************************************/
232void
233help()
234{
235 usage(stdout);
236 printf("This tool will print lines in random order (max line len %d).\n\
237 -h : print this help and exit\n\
238 -g : count the number of lines in the file before randomizing\n\
239 This option overrides -l option.\n\
240 -l numlines : randoms lines in numlines chuncks (def %d)\n\
241 -S seed : sets seed to seed (def time(0))\n",
242 MAX_LN_SZ, DEF_SIZE);
243
244}
245
246/***********************************************************************
247 * counts the number of lines in already open file.
248 * Note: File must be seekable (not stdin or a pipe).
249 ***********************************************************************/
nstrazd5d51ca2001-02-28 17:41:59 +0000250int
nstrazed8fe6b2001-01-22 17:50:31 +0000251get_numlines(infile)
252FILE *infile;
253{
254 char line[MAX_LN_SZ]; /* max size of a line */
255 int cnt=0;
256
Garrett Cooper43088e12010-12-13 23:30:59 -0800257 while (fgets(line, MAX_LN_SZ, infile) != NULL) {
nstrazed8fe6b2001-01-22 17:50:31 +0000258 cnt++;
259 }
260
261 /* rewind the file */
262 fseek(infile, 0, SEEK_SET);
263
264 return cnt;
265}
266
267/***********************************************************************
268 *
269 * infile must be a fseekable file. Thus, it can not be stdin.
270 * It will read each line in the file, randomly saving the offset
271 * of each line in a array of struct offset_t.
272 * It will then print each line in the array stored order.
273 *
274 ***********************************************************************/
nstrazd5d51ca2001-02-28 17:41:59 +0000275int
nstrazed8fe6b2001-01-22 17:50:31 +0000276rnd_file(infile, numlines, seed)
277FILE *infile;
278int numlines; /* can be more or less than num lines in file */
279 /* most opt randomized when num lines in files */
280 /* or just a bit bigger */
281long seed;
282{
283
nstrazed8fe6b2001-01-22 17:50:31 +0000284 char line[MAX_LN_SZ]; /* max size of a line */
285 int cnt;
nstraz9d100512002-06-20 19:03:00 +0000286 long coffset; /* current line offset */
nstrazed8fe6b2001-01-22 17:50:31 +0000287
288 struct offset_t *offsets;
nstrazed8fe6b2001-01-22 17:50:31 +0000289 int memsize;
290
Garrett Cooper43088e12010-12-13 23:30:59 -0800291 if (numlines <= 0) { /*use default */
nstrazed8fe6b2001-01-22 17:50:31 +0000292 numlines = DEF_SIZE;
293 }
294
295 /*
296 * Malloc space for numlines copies the offset_t structure.
297 * This is where the randomization takes place.
298 */
299 memsize = sizeof(struct offset_t)*numlines;
300
Garrett Cooper43088e12010-12-13 23:30:59 -0800301 if ((offsets=(struct offset_t *)malloc(memsize)) == NULL) {
nstrazed8fe6b2001-01-22 17:50:31 +0000302 fprintf(stderr, "Unable to malloc(%d): errno:%d\n", memsize, errno);
303 return -1;
304 }
305
306 random_range_seed(seed);
307
308 coffset=0;
nstrazed8fe6b2001-01-22 17:50:31 +0000309
Garrett Cooper43088e12010-12-13 23:30:59 -0800310 while (! feof(infile)) {
nstrazed8fe6b2001-01-22 17:50:31 +0000311
312 fseek(infile, coffset, SEEK_SET);
nstraz9d100512002-06-20 19:03:00 +0000313 coffset=ftell(infile);
314 memset(offsets, 0, memsize);
nstrazed8fe6b2001-01-22 17:50:31 +0000315 cnt=0;
316
317 /*
318 * read the file in and place offset of each line randomly
319 * into offsets array. Only numlines line can be randomized
320 * at a time.
321 */
Garrett Cooper43088e12010-12-13 23:30:59 -0800322 while (cnt < numlines && fgets(line, MAX_LN_SZ, infile) != NULL) {
nstrazed8fe6b2001-01-22 17:50:31 +0000323
Garrett Cooper43088e12010-12-13 23:30:59 -0800324 if (rnd_insert(offsets, coffset, numlines) < 0) {
nstraz9d100512002-06-20 19:03:00 +0000325 fprintf(stderr, "%s:%d rnd_insert() returned -1 (fatal error)!\n",
326 __FILE__, __LINE__);
327 abort();
328 }
nstrazed8fe6b2001-01-22 17:50:31 +0000329 cnt++;
330
331 coffset=ftell(infile);
nstrazed8fe6b2001-01-22 17:50:31 +0000332 }
333
Garrett Cooper43088e12010-12-13 23:30:59 -0800334 if (cnt == 0) {
nstrazed8fe6b2001-01-22 17:50:31 +0000335 continue;
336 }
337
338 /*
339 * print out lines based on offset.
340 */
341 for (cnt=0; cnt<numlines; cnt++) {
342
Garrett Cooper43088e12010-12-13 23:30:59 -0800343 if (offsets[cnt].used) {
nstrazed8fe6b2001-01-22 17:50:31 +0000344 fseek(infile, offsets[cnt].offset, SEEK_SET);
Garrett Cooper04f42072011-01-10 12:55:44 -0800345 if (fgets(line, MAX_LN_SZ, infile) == NULL)
346 err(1, "fgets");
nstraz9d100512002-06-20 19:03:00 +0000347 fputs(line, stdout);
nstrazed8fe6b2001-01-22 17:50:31 +0000348 }
349 }
350
351 } /* end of file */
352
353 return 0;
354}
355
356/***********************************************************************
357 * This function randomly inserts offset information into
358 * the offsets array. The array has a size of size.
359 * It will attempt 75 random array indexes before finding the first
360 * open array element.
361 *
362 ***********************************************************************/
nstrazd5d51ca2001-02-28 17:41:59 +0000363int
nstrazed8fe6b2001-01-22 17:50:31 +0000364rnd_insert(offsets, offset, size)
365struct offset_t offsets[];
nstraz9d100512002-06-20 19:03:00 +0000366long offset;
nstrazed8fe6b2001-01-22 17:50:31 +0000367int size;
368{
369 int rand_num;
370 int quick = 0;
371 int ind;
nstrazed8fe6b2001-01-22 17:50:31 +0000372
373 /*
374 * Loop looking for random unused index.
375 * It will only be attempted 75 times.
376 */
Garrett Cooper43088e12010-12-13 23:30:59 -0800377 while (quick < 75) {
nstrazed8fe6b2001-01-22 17:50:31 +0000378
nstraz9d100512002-06-20 19:03:00 +0000379 rand_num=random_range(0, size-1, 1, NULL);
nstrazed8fe6b2001-01-22 17:50:31 +0000380
Garrett Cooper43088e12010-12-13 23:30:59 -0800381 if (! offsets[rand_num].used) {
nstrazed8fe6b2001-01-22 17:50:31 +0000382 offsets[rand_num].offset=offset;
383 offsets[rand_num].used++;
384 return rand_num;
385 }
386 quick++;
387 }
388
389 /*
390 * an randomly choosen index was not found, find
391 * first open index and use it.
392 */
Garrett Cooper1e6f5a62010-12-19 09:58:10 -0800393 for (ind=0; ind < size && offsets[ind].used != 0; ind++)
nstraz9d100512002-06-20 19:03:00 +0000394 ; /* do nothing */
395
Garrett Cooper43088e12010-12-13 23:30:59 -0800396 if (ind >= size) {
nstraz9d100512002-06-20 19:03:00 +0000397 /*
398 * If called with an array where all offsets are used,
399 * we won't be able to find an open array location.
400 * Thus, return -1 indicating the error.
401 * This should never happen if called correctly.
402 */
403 return -1;
404 }
nstrazed8fe6b2001-01-22 17:50:31 +0000405
406 offsets[ind].offset=offset;
407 offsets[ind].used++;
408 return ind;
409
410}
411
412
413
414/***********************************************************************
415 *
416 * CODE NOT TESTED AT ALL - it must be tested before it is used.
417 *
418 * This function was written to allow rand_lines to work on non-seekable
419 * file (i.e stdin).
420 *
421 ***********************************************************************/
422int
423rnd_stdin(infile, space, numlines, seed)
424FILE *infile;
425int space; /* amount of space to use to read file into memory, */
426 /* randomized and print. randomize in chunks */
427int numlines; /* can be more or less than num lines in file */
428 /* most opt randomized when num lines in files */
429 /* or just a bit bigger */
430long seed;
431{
432
nstrazed8fe6b2001-01-22 17:50:31 +0000433 char line[MAX_LN_SZ]; /* max size of a line */
434 int cnt; /* offset printer counter */
nstraza700ef52002-09-16 15:02:57 +0000435 long loffset; /* last line address */
nstrazed8fe6b2001-01-22 17:50:31 +0000436 char *buffer; /* malloc space for file reads */
437 char *rdbuff; /* where to start read */
438 long stopaddr; /* end of read space (address)*/
439 int rdsz; /* amount read */
440 int sztord;
441 char *chr; /* buffer processing pointer */
442 char *ptr; /* printing processing pointer */
443 char *lptr; /* printing processing pointer */
444 int loopcntl = 1; /* main loop control flag */
445 struct offset_t *offsets; /* pointer to offset space */
446 int memsize; /* amount of offset space to malloc */
447 int newbuffer = 1; /* need new buffer */
448
Garrett Cooper43088e12010-12-13 23:30:59 -0800449 if (numlines <= 0) { /*use default */
nstrazed8fe6b2001-01-22 17:50:31 +0000450 numlines = DEF_SIZE;
451 }
452
453 /*
454 * Malloc space for file contents
455 */
Garrett Cooper43088e12010-12-13 23:30:59 -0800456 if ((buffer=(char *)malloc(space)) == NULL) {
nstrazed8fe6b2001-01-22 17:50:31 +0000457 fprintf(stderr, "Unable to malloc(%d): errno:%d\n", space, errno);
458 return -1;
459 }
460
461 /*
462 * Malloc space for numlines copies the offset_t structure.
463 * This is where the randomization takes place.
464 */
465 memsize = sizeof(struct offset_t)*numlines;
466
Garrett Cooper43088e12010-12-13 23:30:59 -0800467 if ((offsets=(struct offset_t *)malloc(memsize)) == NULL) {
nstrazed8fe6b2001-01-22 17:50:31 +0000468 fprintf(stderr, "Unable to malloc(%d): errno:%d\n", memsize, errno);
469 return -1;
470 }
471
472 random_range_seed(seed);
473 rdbuff = buffer; /* read into start of buffer */
474 sztord = space; /* amount of space left in buffer */
475
476 /*
477 * Loop until read doesn't read anything
478 * If last line does not end in newline, it is not printed
479 */
Garrett Cooper43088e12010-12-13 23:30:59 -0800480 while (loopcntl) {
nstrazed8fe6b2001-01-22 17:50:31 +0000481 /*
482 * read in file up to space size
483 * only works if used as filter.
484 * The code will randomize one reads worth at a time.
485 * If typing in lines, read will read only one line - no randomizing.
486 */
487
488 chr = buffer;
Garrett Cooper43088e12010-12-13 23:30:59 -0800489 if ((rdsz=fread((void *)rdbuff, sztord, 1, infile)) == 0) {
nstrazed8fe6b2001-01-22 17:50:31 +0000490 fprintf(stderr, "input file is empty, done randomizing\n");
491 loopcntl=0;
492 return 0;
493 }
494
495 stopaddr = ((long)buffer + rdsz);
496
497 loffset= (long)buffer;
498
Garrett Cooper43088e12010-12-13 23:30:59 -0800499 while (! newbuffer) {
nstrazed8fe6b2001-01-22 17:50:31 +0000500
Garrett Cooper43088e12010-12-13 23:30:59 -0800501 while ((long)chr < stopaddr && *chr != '\n')
nstrazed8fe6b2001-01-22 17:50:31 +0000502 chr++;
503
504 chr++;
505
Garrett Cooper43088e12010-12-13 23:30:59 -0800506 if ((long)chr >= stopaddr) {
nstrazed8fe6b2001-01-22 17:50:31 +0000507
508 fprintf(stderr, "end of read in buffer\n");
509
510 /*
511 * print out lines based on offset.
512 */
513 for (cnt=0; cnt<numlines; cnt++) {
514
Garrett Cooper43088e12010-12-13 23:30:59 -0800515 if (offsets[cnt].used) {
nstrazed8fe6b2001-01-22 17:50:31 +0000516 ptr = (char *)offsets[cnt].offset;
517 /*
518 * copy buffer characters into line for printing
519 */
520 lptr = line;
Garrett Cooper1e6f5a62010-12-19 09:58:10 -0800521 while (*ptr != '\n')
nstrazed8fe6b2001-01-22 17:50:31 +0000522 *lptr++ = *ptr++;
Garrett Cooper1e6f5a62010-12-19 09:58:10 -0800523
nstrazed8fe6b2001-01-22 17:50:31 +0000524 printf("%s\n", line);
525 }
526 }
527
528 /*
529 * move start of partically read line to beginning of buffer
530 * and adjust rdbuff to end of partically read line
531 */
532 memcpy((void *)loffset, buffer, (stopaddr - loffset));
533 rdbuff = buffer + (stopaddr - loffset);
534 sztord = space - (stopaddr - loffset);
535
536 newbuffer++;
537 }
538
Garrett Cooper43088e12010-12-13 23:30:59 -0800539 if (rnd_insert(offsets, loffset, numlines) < 0) {
nstraz9d100512002-06-20 19:03:00 +0000540 fprintf(stderr, "%s:%d rnd_insert() returned -1 (fatal error)!\n",
541 __FILE__, __LINE__);
542 abort();
543 }
nstrazed8fe6b2001-01-22 17:50:31 +0000544
545 loffset = (long)chr;
546 }
547 }
548
549 return 0;
550
551}