blob: 442901e537dc8f9ef6ed845d63a049d1da5928de [file] [log] [blame]
Ben Cheng25b3c042013-11-20 14:45:36 -08001/* Print the strings of printable characters in files.
2 Copyright (C) 2005-2010, 2012 Red Hat, Inc.
3 This file is part of Red Hat elfutils.
4 Written by Ulrich Drepper <drepper@redhat.com>, 2005.
5
6 Red Hat elfutils is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by the
8 Free Software Foundation; version 2 of the License.
9
10 Red Hat elfutils is distributed in the hope that it will be useful, but
11 WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 General Public License for more details.
14
15 You should have received a copy of the GNU General Public License along
16 with Red Hat elfutils; if not, write to the Free Software Foundation,
17 Inc., 51 Franklin Street, Fifth Floor, Boston MA 02110-1301 USA.
18
19 Red Hat elfutils is an included package of the Open Invention Network.
20 An included package of the Open Invention Network is a package for which
21 Open Invention Network licensees cross-license their patents. No patent
22 license is granted, either expressly or impliedly, by designation as an
23 included package. Should you wish to participate in the Open Invention
24 Network licensing program, please visit www.openinventionnetwork.com
25 <http://www.openinventionnetwork.com>. */
26
27#ifdef HAVE_CONFIG_H
28# include <config.h>
29#endif
30
31#include <argp.h>
32#include <assert.h>
33#include <ctype.h>
34#include <endian.h>
35#include <errno.h>
36#include <error.h>
37#include <fcntl.h>
38#include <gelf.h>
39#include <inttypes.h>
40#include <libintl.h>
41#include <locale.h>
42#include <stdbool.h>
43#include <stdio.h>
44#include <stdio_ext.h>
45#include <stdlib.h>
46#include <string.h>
47#include <unistd.h>
48#include <sys/mman.h>
49#include <sys/param.h>
50#include <sys/stat.h>
51
52#include <system.h>
53
54
55/* Prototypes of local functions. */
56static int read_fd (int fd, const char *fname, off64_t fdlen);
57static int read_elf (Elf *elf, int fd, const char *fname, off64_t fdlen);
58
59
60/* Name and version of program. */
61static void print_version (FILE *stream, struct argp_state *state);
62ARGP_PROGRAM_VERSION_HOOK_DEF = print_version;
63
64/* Bug report address. */
65ARGP_PROGRAM_BUG_ADDRESS_DEF = PACKAGE_BUGREPORT;
66
67/* Definitions of arguments for argp functions. */
68static const struct argp_option options[] =
69{
70 { NULL, 0, NULL, 0, N_("Output Selection:"), 0 },
71 { "all", 'a', NULL, 0, N_("Scan entire file, not only loaded sections"), 0 },
72 { "bytes", 'n', "MIN-LEN", 0,
73 N_("Only NUL-terminated sequences of MIN-LEN characters or more are printed"), 0 },
74 { "encoding", 'e', "SELECTOR", 0, N_("\
75Select character size and endianess: s = 7-bit, S = 8-bit, {b,l} = 16-bit, {B,L} = 32-bit"),
76 0},
77 { "print-file-name", 'f', NULL, 0,
78 N_("Print name of the file before each string."), 0 },
79 { "radix", 't', "{o,d,x}", 0,
80 N_("Print location of the string in base 8, 10, or 16 respectively."), 0 },
81 { NULL, 'o', NULL, 0, N_("Alias for --radix=o"), 0 },
82
83 { NULL, 0, NULL, 0, N_("Miscellaneous:"), 0 },
84 { NULL, 0, NULL, 0, NULL, 0 }
85};
86
87/* Short description of program. */
88static const char doc[] = N_("\
89Print the strings of printable characters in files.");
90
91/* Strings for arguments in help texts. */
92static const char args_doc[] = N_("[FILE...]");
93
94/* Prototype for option handler. */
95static error_t parse_opt (int key, char *arg, struct argp_state *state);
96
97/* Data structure to communicate with argp functions. */
98static struct argp argp =
99{
100 options, parse_opt, args_doc, doc, NULL, NULL, NULL
101};
102
103
104/* Global variables. */
105
106/* True if whole file and not only loaded sections are looked at. */
107static bool entire_file;
108
109/* Minimum length of any sequence reported. */
110static size_t min_len = 4;
111
112/* Number of bytes per character. */
113static size_t bytes_per_char = 1;
114
115/* Minimum length of any sequence reported in bytes. */
116static size_t min_len_bytes;
117
118/* True if multibyte characters are in big-endian order. */
119static bool big_endian;
120
121/* True unless 7-bit ASCII are expected. */
122static bool char_7bit;
123
124/* True if file names should be printed before strings. */
125static bool print_file_name;
126
127/* Location print format string. */
128static const char *locfmt;
129
130/* Page size in use. */
131static size_t ps;
132
133
134/* Mapped parts of the ELF file. */
135static unsigned char *elfmap;
136static unsigned char *elfmap_base;
137static size_t elfmap_size;
138static off64_t elfmap_off;
139
140
141int
142main (int argc, char *argv[])
143{
144 /* We use no threads. */
145 __fsetlocking (stdin, FSETLOCKING_BYCALLER);
146 __fsetlocking (stdout, FSETLOCKING_BYCALLER);
147
148 /* Set locale. */
149 (void) setlocale (LC_ALL, "");
150
151 /* Make sure the message catalog can be found. */
152 (void) bindtextdomain (PACKAGE_TARNAME, LOCALEDIR);
153
154 /* Initialize the message catalog. */
155 (void) textdomain (PACKAGE_TARNAME);
156
157 /* Parse and process arguments. */
158 int remaining;
159 (void) argp_parse (&argp, argc, argv, 0, &remaining, NULL);
160
161 /* Tell the library which version we are expecting. */
162 elf_version (EV_CURRENT);
163
164 /* Determine the page size. We will likely need it a couple of times. */
165 ps = sysconf (_SC_PAGESIZE);
166
167 struct stat64 st;
168 int result = 0;
169 if (remaining == argc)
170 /* We read from standard input. This we cannot do for a
171 structured file. */
172 result = read_fd (STDIN_FILENO,
173 print_file_name ? "{standard input}" : NULL,
174 (fstat64 (STDIN_FILENO, &st) == 0 && S_ISREG (st.st_mode))
175 ? st.st_size : INT64_C (0x7fffffffffffffff));
176 else
177 do
178 {
179 int fd = (strcmp (argv[remaining], "-") == 0
180 ? STDIN_FILENO : open (argv[remaining], O_RDONLY));
181 if (unlikely (fd == -1))
182 {
183 error (0, errno, gettext ("cannot open '%s'"), argv[remaining]);
184 result = 1;
185 }
186 else
187 {
188 const char *fname = print_file_name ? argv[remaining] : NULL;
189 int fstat_fail = fstat64 (fd, &st);
190 off64_t fdlen = (fstat_fail
191 ? INT64_C (0x7fffffffffffffff) : st.st_size);
192 if (fdlen > (off64_t) min_len_bytes)
193 {
194 Elf *elf = NULL;
195 if (entire_file
196 || fstat_fail
197 || !S_ISREG (st.st_mode)
198 || (elf = elf_begin (fd, ELF_C_READ, NULL)) == NULL
199 || elf_kind (elf) != ELF_K_ELF)
200 result |= read_fd (fd, fname, fdlen);
201 else
202 result |= read_elf (elf, fd, fname, fdlen);
203
204 /* This call will succeed even if ELF is NULL. */
205 elf_end (elf);
206 }
207
208 if (strcmp (argv[remaining], "-") != 0)
209 close (fd);
210 }
211
212 if (elfmap != NULL && elfmap != MAP_FAILED)
213 munmap (elfmap, elfmap_size);
214 elfmap = NULL;
215 }
216 while (++remaining < argc);
217
218 return result;
219}
220
221
222/* Print the version information. */
223static void
224print_version (FILE *stream, struct argp_state *state __attribute__ ((unused)))
225{
226 fprintf (stream, "strings (%s) %s\n", PACKAGE_NAME, PACKAGE_VERSION);
227 fprintf (stream, gettext ("\
228Copyright (C) %s Red Hat, Inc.\n\
229This is free software; see the source for copying conditions. There is NO\n\
230warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
231"), "2012");
232 fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
233}
234
235
236/* Handle program arguments. */
237static error_t
238parse_opt (int key, char *arg,
239 struct argp_state *state __attribute__ ((unused)))
240{
241 switch (key)
242 {
243 case 'a':
244 entire_file = true;
245 break;
246
247 case 'e':
248 /* We expect a string of one character. */
249 switch (arg[1] != '\0' ? '\0' : arg[0])
250 {
251 case 's':
252 case 'S':
253 char_7bit = arg[0] == 's';
254 bytes_per_char = 1;
255 break;
256
257 case 'b':
258 case 'B':
259 big_endian = true;
260 /* FALLTHROUGH */
261
262 case 'l':
263 case 'L':
264 bytes_per_char = isupper (arg[0]) ? 4 : 2;
265 break;
266
267 default:
268 error (0, 0, gettext ("invalid value '%s' for %s parameter"),
269 arg, "-e");
270 argp_help (&argp, stderr, ARGP_HELP_SEE, "strings");
271 return ARGP_ERR_UNKNOWN;
272 }
273 break;
274
275 case 'f':
276 print_file_name = true;
277 break;
278
279 case 'n':
280 min_len = atoi (arg);
281 break;
282
283 case 'o':
284 goto octfmt;
285
286 case 't':
287 switch (arg[0])
288 {
289 case 'd':
290 locfmt = "%7" PRId64 " ";
291 break;
292
293 case 'o':
294 octfmt:
295 locfmt = "%7" PRIo64 " ";
296 break;
297
298 case 'x':
299 locfmt = "%7" PRIx64 " ";
300 break;
301
302 default:
303 error (0, 0, gettext ("invalid value '%s' for %s parameter"),
304 arg, "-t");
305 argp_help (&argp, stderr, ARGP_HELP_SEE, "strings");
306 return ARGP_ERR_UNKNOWN;
307 }
308 break;
309
310 case ARGP_KEY_FINI:
311 /* Compute the length in bytes of any match. */
312 if (min_len <= 0 || min_len > INT_MAX / bytes_per_char)
313 error (EXIT_FAILURE, 0,
314 gettext ("invalid minimum length of matched string size"));
315 min_len_bytes = min_len * bytes_per_char;
316 break;
317
318 default:
319 return ARGP_ERR_UNKNOWN;
320 }
321 return 0;
322}
323
324
325static void
326process_chunk_mb (const char *fname, const unsigned char *buf, off64_t to,
327 size_t len, char **unprinted)
328{
329 size_t curlen = *unprinted == NULL ? 0 : strlen (*unprinted);
330 const unsigned char *start = buf;
331 while (len >= bytes_per_char)
332 {
333 uint32_t ch;
334
335 if (bytes_per_char == 2)
336 {
337 if (big_endian)
338 ch = buf[0] << 8 | buf[1];
339 else
340 ch = buf[1] << 8 | buf[0];
341 }
342 else
343 {
344 if (big_endian)
345 ch = buf[0] << 24 | buf[1] << 16 | buf[2] << 8 | buf[3];
346 else
347 ch = buf[3] << 24 | buf[2] << 16 | buf[1] << 8 | buf[0];
348 }
349
350 if (ch <= 255 && (isprint (ch) || ch == '\t'))
351 {
352 ++buf;
353 ++curlen;
354 }
355 else
356 {
357 if (curlen >= min_len)
358 {
359 /* We found a match. */
360 if (unlikely (fname != NULL))
361 {
362 fputs_unlocked (fname, stdout);
363 fputs_unlocked (": ", stdout);
364 }
365
366 if (unlikely (locfmt != NULL))
367 printf (locfmt, (int64_t) to - len - (buf - start));
368
369 if (unlikely (*unprinted != NULL))
370 {
371 fputs_unlocked (*unprinted, stdout);
372 free (*unprinted);
373 *unprinted = NULL;
374 }
375
376 /* There is no sane way of printing the string. If we
377 assume the file data is encoded in UCS-2/UTF-16 or
378 UCS-4/UTF-32 respectively we could covert the string.
379 But there is no such guarantee. */
380 fwrite_unlocked (start, 1, buf - start, stdout);
381 putc_unlocked ('\n', stdout);
382 }
383
384 start = ++buf;
385 curlen = 0;
386
387 if (len <= min_len)
388 break;
389 }
390
391 --len;
392 }
393
394 if (curlen != 0)
395 *unprinted = xstrndup ((const char *) start, curlen);
396}
397
398
399static void
400process_chunk (const char *fname, const unsigned char *buf, off64_t to,
401 size_t len, char **unprinted)
402{
403 /* We are not going to slow the check down for the 2- and 4-byte
404 encodings. Handle them special. */
405 if (unlikely (bytes_per_char != 1))
406 {
407 process_chunk_mb (fname, buf, to, len, unprinted);
408 return;
409 }
410
411 size_t curlen = *unprinted == NULL ? 0 : strlen (*unprinted);
412 const unsigned char *start = buf;
413 while (len > 0)
414 {
415 if ((isprint (*buf) || *buf == '\t') && (! char_7bit || *buf <= 127))
416 {
417 ++buf;
418 ++curlen;
419 }
420 else
421 {
422 if (curlen >= min_len)
423 {
424 /* We found a match. */
425 if (likely (fname != NULL))
426 {
427 fputs_unlocked (fname, stdout);
428 fputs_unlocked (": ", stdout);
429 }
430
431 if (likely (locfmt != NULL))
432 printf (locfmt, (int64_t) to - len - (buf - start));
433
434 if (unlikely (*unprinted != NULL))
435 {
436 fputs_unlocked (*unprinted, stdout);
437 free (*unprinted);
438 *unprinted = NULL;
439 }
440 fwrite_unlocked (start, 1, buf - start, stdout);
441 putc_unlocked ('\n', stdout);
442 }
443
444 start = ++buf;
445 curlen = 0;
446
447 if (len <= min_len)
448 break;
449 }
450
451 --len;
452 }
453
454 if (curlen != 0)
455 *unprinted = xstrndup ((const char *) start, curlen);
456}
457
458
459/* Map a file in as large chunks as possible. */
460static void *
461map_file (int fd, off64_t start_off, off64_t fdlen, size_t *map_sizep)
462{
463#if _MUDFLAP
464 (void) fd;
465 (void) start_off;
466 (void) fdlen;
467 (void) map_sizep;
468 return MAP_FAILED;
469#else
470 /* Maximum size we mmap. We use an #ifdef to avoid overflows on
471 32-bit machines. 64-bit machines these days do not have usable
472 address spaces larger than about 43 bits. Not that any file
473 should be that large. */
474# if SIZE_MAX > 0xffffffff
475 const size_t mmap_max = 0x4000000000lu;
476# else
477 const size_t mmap_max = 0x40000000lu;
478# endif
479
480 /* Try to mmap the file. */
481 size_t map_size = MIN ((off64_t) mmap_max, fdlen);
482 const size_t map_size_min = MAX (MAX (SIZE_MAX / 16, 2 * ps),
483 roundup (2 * min_len_bytes + 1, ps));
484 void *mem;
485 while (1)
486 {
487 /* We map the memory for reading only here. Since we will
488 always look at every byte of the file it makes sense to
489 use MAP_POPULATE. */
490 mem = mmap64 (NULL, map_size, PROT_READ, MAP_PRIVATE | MAP_POPULATE,
491 fd, start_off);
492 if (mem != MAP_FAILED)
493 {
494 /* We will go through the mapping sequentially. */
495 (void) posix_madvise (mem, map_size, POSIX_MADV_SEQUENTIAL);
496 break;
497 }
498 if (errno != EINVAL && errno != ENOMEM)
499 /* This is an error other than the lack of address space. */
500 break;
501
502 /* Maybe the size of the mapping is too big. Try again. */
503 map_size /= 2;
504 if (map_size < map_size_min)
505 /* That size should have fit. */
506 break;
507 }
508
509 *map_sizep = map_size;
510 return mem;
511#endif
512}
513
514
515/* Read the file without mapping. */
516static int
517read_block_no_mmap (int fd, const char *fname, off64_t from, off64_t fdlen)
518{
519 char *unprinted = NULL;
520#define CHUNKSIZE 65536
521 unsigned char *buf = xmalloc (CHUNKSIZE + min_len_bytes
522 + bytes_per_char - 1);
523 size_t ntrailer = 0;
524 int result = 0;
525 while (fdlen > 0)
526 {
527 ssize_t n = TEMP_FAILURE_RETRY (read (fd, buf + ntrailer,
528 MIN (fdlen, CHUNKSIZE)));
529 if (n == 0)
530 {
531 /* There are less than MIN_LEN+1 bytes left so there cannot be
532 another match. */
533 assert (unprinted == NULL || ntrailer == 0);
534 break;
535 }
536 if (unlikely (n < 0))
537 {
538 /* Something went wrong. */
539 result = 1;
540 break;
541 }
542
543 /* Account for the number of bytes read in this round. */
544 fdlen -= n;
545
546 /* Do not use the signed N value. Note that the addition cannot
547 overflow. */
548 size_t nb = (size_t) n + ntrailer;
549 if (nb >= min_len_bytes)
550 {
551 /* We only use complete characters. */
552 nb &= ~(bytes_per_char - 1);
553
554 process_chunk (fname, buf, from + nb, nb, &unprinted);
555
556 /* If the last bytes of the buffer (modulo the character
557 size) have been printed we are not copying them. */
558 size_t to_keep = unprinted != NULL ? 0 : min_len_bytes;
559
560 memmove (buf, buf + nb - to_keep, to_keep);
561 ntrailer = to_keep;
562 from += nb;
563 }
564 else
565 ntrailer = nb;
566 }
567
568 free (buf);
569
570 /* Don't print anything we collected so far. There is no
571 terminating NUL byte. */
572 free (unprinted);
573
574 return result;
575}
576
577
578static int
579read_block (int fd, const char *fname, off64_t fdlen, off64_t from, off64_t to)
580{
581 if (elfmap == NULL)
582 {
583 /* We need a completely new mapping. */
584 elfmap_off = from & ~(ps - 1);
585 elfmap_base = elfmap = map_file (fd, elfmap_off, fdlen, &elfmap_size);
586
587 if (unlikely (elfmap == MAP_FAILED))
588 /* Let the kernel know we are going to read everything in sequence. */
589 (void) posix_fadvise (fd, 0, 0, POSIX_FADV_SEQUENTIAL);
590 }
591
592 if (unlikely (elfmap == MAP_FAILED))
593 {
594 /* Read from the file descriptor. For this we must position the
595 read pointer. */
596 // XXX Eventually add flag which avoids this if the position
597 // XXX is known to match.
598 if (from != 0 && lseek64 (fd, from, SEEK_SET) != from)
599 error (EXIT_FAILURE, errno, gettext ("lseek64 failed"));
600
601 return read_block_no_mmap (fd, fname, from, to - from);
602 }
603
604 assert ((off64_t) min_len_bytes < fdlen);
605
606 if (to < (off64_t) elfmap_off || from > (off64_t) (elfmap_off + elfmap_size))
607 {
608 /* The existing mapping cannot fit at all. Map the new area.
609 We always map the full range of ELFMAP_SIZE bytes even if
610 this extend beyond the end of the file. The Linux kernel
611 handles this OK if the access pages are not touched. */
612 elfmap_off = from & ~(ps - 1);
613 if (mmap64 (elfmap, elfmap_size, PROT_READ,
614 MAP_PRIVATE | MAP_POPULATE | MAP_FIXED, fd, from)
615 == MAP_FAILED)
616 error (EXIT_FAILURE, errno, gettext ("re-mmap failed"));
617 elfmap_base = elfmap;
618 }
619
620 char *unprinted = NULL;
621
622 /* Use the existing mapping as much as possible. If necessary, map
623 new pages. */
624 if (from >= (off64_t) elfmap_off
625 && from < (off64_t) (elfmap_off + elfmap_size))
626 /* There are at least a few bytes in this mapping which we can
627 use. */
628 process_chunk (fname, elfmap_base + (from - elfmap_off),
629 MIN (to, (off64_t) (elfmap_off + elfmap_size)),
630 MIN (to, (off64_t) (elfmap_off + elfmap_size)) - from,
631 &unprinted);
632
633 if (to > (off64_t) (elfmap_off + elfmap_size))
634 {
635 unsigned char *remap_base = elfmap_base;
636 size_t read_now = elfmap_size - (elfmap_base - elfmap);
637
638 assert (from >= (off64_t) elfmap_off
639 && from < (off64_t) (elfmap_off + elfmap_size));
640 off64_t handled_to = elfmap_off + elfmap_size;
641 assert (elfmap == elfmap_base
642 || (elfmap_base - elfmap
643 == (ptrdiff_t) ((min_len_bytes + ps - 1) & ~(ps - 1))));
644 if (elfmap == elfmap_base)
645 {
646 size_t keep_area = (min_len_bytes + ps - 1) & ~(ps - 1);
647 assert (elfmap_size >= keep_area + ps);
648 /* The keep area is used for the content of the previous
649 buffer we have to keep. This means copying those bytes
650 and for this we have to make the data writable. */
651 if (unlikely (mprotect (elfmap, keep_area, PROT_READ | PROT_WRITE)
652 != 0))
653 error (EXIT_FAILURE, errno, gettext ("mprotect failed"));
654
655 elfmap_base = elfmap + keep_area;
656 }
657
658 while (1)
659 {
660 /* Map the rest of the file, eventually again in pieces.
661 We speed things up with a nice Linux feature. Note
662 that we have at least two pages mapped. */
663 size_t to_keep = unprinted != NULL ? 0 : min_len_bytes;
664
665 assert (read_now >= to_keep);
666 memmove (elfmap_base - to_keep,
667 remap_base + read_now - to_keep, to_keep);
668 remap_base = elfmap_base;
669
670 assert ((elfmap_size - (elfmap_base - elfmap)) % bytes_per_char
671 == 0);
672 read_now = MIN (to - handled_to,
673 (ptrdiff_t) elfmap_size - (elfmap_base - elfmap));
674
675 assert (handled_to % ps == 0);
676 assert (handled_to % bytes_per_char == 0);
677 if (mmap64 (remap_base, read_now, PROT_READ,
678 MAP_PRIVATE | MAP_POPULATE | MAP_FIXED, fd, handled_to)
679 == MAP_FAILED)
680 error (EXIT_FAILURE, errno, gettext ("re-mmap failed"));
681 elfmap_off = handled_to;
682
683 process_chunk (fname, remap_base - to_keep,
684 elfmap_off + (read_now & ~(bytes_per_char - 1)),
685 to_keep + (read_now & ~(bytes_per_char - 1)),
686 &unprinted);
687 handled_to += read_now;
688 if (handled_to >= to)
689 break;
690 }
691 }
692
693 /* Don't print anything we collected so far. There is no
694 terminating NUL byte. */
695 free (unprinted);
696
697 return 0;
698}
699
700
701static int
702read_fd (int fd, const char *fname, off64_t fdlen)
703{
704 return read_block (fd, fname, fdlen, 0, fdlen);
705}
706
707
708static int
709read_elf (Elf *elf, int fd, const char *fname, off64_t fdlen)
710{
711 assert (fdlen >= 0);
712
713 /* We will look at each section separately. The ELF file is not
714 mmapped. The libelf implementation will load the needed parts on
715 demand. Since we only interate over the section header table the
716 memory consumption at this stage is kept minimal. */
717 Elf_Scn *scn = elf_nextscn (elf, NULL);
718 if (scn == NULL)
719 return read_fd (fd, fname, fdlen);
720
721 int result = 0;
722 do
723 {
724 GElf_Shdr shdr_mem;
725 GElf_Shdr *shdr = gelf_getshdr (scn, &shdr_mem);
726
727 /* Only look in sections which are loaded at runtime and
728 actually have content. */
729 if (shdr != NULL && shdr->sh_type != SHT_NOBITS
730 && (shdr->sh_flags & SHF_ALLOC) != 0)
731 result |= read_block (fd, fname, fdlen, shdr->sh_offset,
732 shdr->sh_offset + shdr->sh_size);
733 }
734 while ((scn = elf_nextscn (elf, scn)) != NULL);
735
736 if (elfmap != NULL && elfmap != MAP_FAILED)
737 munmap (elfmap, elfmap_size);
738 elfmap = NULL;
739
740 return result;
741}
742
743
744#include "debugpred.h"